Thursday, September 16, 2021

.::: Install PAF (PostgreSQL Automatic Failover ) on centos7 ( base on pacemaker, pcs, corosys ):::.

 
A. Install PAF 

1. Prepare Node 
Node01 
10.10.10.241

Node02
10.10.10.242

Ip Virtual 
10.10.10.243

2. Install pacemaker, corosys, pacemaker & configuration 
insttal pcsd
https://teguhth.blogspot.com/2018/08/how-to-install-and-configuration-pcs.html

or you can install pacemaker default by PAF

# yum install -y pacemaker resource-agents pcs fence-agents-all fence-agents-virsh
# systemctl enable corosync
# systemctl enable pacemaker
# systemctl enable pcsd 
...

exlude 6. Add ClusterIP/FloatingIP/VirtualIP on Server01 for this step

folowing blog until ok
# pcs status 

3. install replication streaming & test 
https://teguhth.blogspot.com/2021/08/how-to-setting-streaminng-replication.html

4. Node fencing configuration

pcs cluster cib cluster1.xml

pcs -f cluster1.xml stonith create fence_vm_server1 fence_virsh \
    pcmk_host_check="static-list" pcmk_host_list="node01"        \
    ipaddr="10.10.10.241" login="root" passwd="root" port="node01"            \
    identity_file="/root/.ssh/id_rsa" meta provides=infecing

pcs -f cluster1.xml stonith create fence_vm_server2 fence_virsh \
    pcmk_host_check="static-list" pcmk_host_list="node02"        \
    ipaddr="10.10.10.242" login="root" passwd="root" port="node02"            \
     identity_file="/root/.ssh/id_rsa" meta provides=infecing

pcs -f cluster1.xml constraint location fence_vm_server1 avoids node01=INFINITY
pcs -f cluster1.xml constraint location fence_vm_server2 avoids node02=INFINITY
pcs cluster cib-push cluster1.xml

log 
[root@node01 data]# pcs cluster cib cluster1.xml
[root@node01 data]# pcs -f cluster1.xml stonith create fence_vm_server1 fence_virsh \
>     pcmk_host_check="static-list" pcmk_host_list="node01"        \
>     ipaddr="10.10.10.241" login="root" passwd="root" port="node01"            \
>     identity_file="/root/.ssh/id_rsa" meta provides=infecing
[root@node01 data]# pcs -f cluster1.xml stonith create fence_vm_server2 fence_virsh \
>     pcmk_host_check="static-list" pcmk_host_list="node02"        \
>     ipaddr="10.10.10.242" login="root" passwd="root" port="node02"            \
>      identity_file="/root/.ssh/id_rsa" meta provides=infecing
[root@node01 data]# pcs -f cluster1.xml constraint location fence_vm_server1 avoids node01=INFINITY
Warning: Validation for node existence in the cluster will be skipped
[root@node01 data]# pcs -f cluster1.xml constraint location fence_vm_server2 avoids node02=INFINITY
Warning: Validation for node existence in the cluster will be skipped
[root@node01 data]# pcs cluster cib-push cluster1.xml
CIB updated
[root@node01 data]# 

5. check pcs status for fence 
[root@node01 data]# pcs status
Cluster name: mycluster
Stack: corosync
Current DC: node01 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
Last updated: Thu Sep 16 12:12:41 2021
Last change: Thu Sep 16 12:12:14 2021 by root via cibadmin on node01

2 nodes configured
2 resource instances configured

Online: [ node01 node02 ]

Full list of resources:

 fence_vm_server1 (stonith:fence_virsh): Started node02
 fence_vm_server2 (stonith:fence_virsh): Started node01

Daemon Status:
  corosync: active/enabled
  pacemaker: active/enabled
  pcsd: active/enabled
[root@node01 data]# 


6. Cluster resources creation

# pgsqld
pcs -f cluster1.xml resource create pgsqld ocf:heartbeat:pgsqlms \
    bindir=/usr/pgsql-13/bin                                    \
    pgdata=/var/lib/pgsql/13/data                               \
    op start timeout=60s                                         \
    op stop timeout=60s                                          \
    op promote timeout=30s                                       \
    op demote timeout=120s                                       \
    op monitor interval=15s timeout=10s role="Master"            \
    op monitor interval=16s timeout=10s role="Slave"             \
    op notify timeout=60s

# pgsql-ha
pcs -f cluster1.xml resource master pgsql-ha pgsqld notify=true

pcs -f cluster1.xml resource create pgsql-pri-ip ocf:heartbeat:IPaddr2 ip=10.10.10.243 nic=eth0 cidr_netmask=24 op monitor interval=10s

pcs --force -f cluster1.xml constraint colocation add pgsql-pri-ip with master pgsql-ha INFINITY;
pcs --force -f cluster1.xml constraint order promote pgsql-ha then start pgsql-pri-ip symmetrical=false kind=Mandatory;
pcs --force -f cluster1.xml constraint order demote pgsql-ha then stop pgsql-pri-ip symmetrical=false kind=Mandatory;

pcs cluster cib-push scope=configuration cluster1.xml;
pcs status 

log 

[root@node01 data]# pcs -f cluster1.xml resource create pgsqld ocf:heartbeat:pgsqlms \
>     bindir=/usr/pgsql-13/bin                                    \
>     pgdata=/var/lib/pgsql/13/data                               \
>     op start timeout=60s                                         \
>     op stop timeout=60s                                          \
>     op promote timeout=30s                                       \
>     op demote timeout=120s                                       \
>     op monitor interval=15s timeout=10s role="Master"            \
>     op monitor interval=16s timeout=10s role="Slave"             \
>     op notify timeout=60s
[root@node01 data]# 
[root@node01 data]# pcs -f cluster1.xml resource master pgsql-ha pgsqld notify=true
[root@node01 data]# 
[root@node01 data]# pcs -f cluster1.xml resource create pgsql-pri-ip ocf:heartbeat:IPaddr2 ip=10.10.10.243 nic=eth0 cidr_netmask=24 op monitor interval=10s
[root@node01 data]# 
[root@node01 data]# pcs -f cluster1.xml resource create pgsql-pri-ip ocf:heartbeat:IPaddr2 ip=10.10.10.243 nic=eth0 cidr_netmask=24 op monitor interval=10s
[root@node01 data]# 
[root@node01 data]# pcs --force -f cluster1.xml constraint colocation add pgsql-pri-ip with master pgsql-ha INFINITY;
[root@node01 data]#
[root@node01 data]# pcs --force  -f cluster1.xml constraint order promote pgsql-ha then start pgsql-pri-ip symmetrical=false kind=Mandatory;
Adding pgsql-ha pgsql-pri-ip (kind: Mandatory) (Options: first-action=promote then-action=start symmetrical=false)
[root@node01 data]#
[root@node01 data]# pcs  --force -f cluster1.xml constraint order demote pgsql-ha then stop pgsql-pri-ip symmetrical=false kind=Mandatory;
Adding pgsql-ha pgsql-pri-ip (kind: Mandatory) (Options: first-action=demote then-action=stop symmetrical=false)
[root@node01 data]# 
[root@node01 data]# pcs cluster cib-push scope=configuration cluster1.xml;
CIB updated
[root@node01 data]# 

7. Check the cluster status:
[root@node01 data]# pcs status
Cluster name: mycluster
Stack: corosync
Current DC: node01 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
Last updated: Thu Sep 16 12:18:37 2021
Last change: Thu Sep 16 12:18:27 2021 by root via cibadmin on node01

2 nodes configured
5 resource instances configured (2 BLOCKED from further action due to failure)

Online: [ node01 node02 ]

Full list of resources:

 fence_vm_server1 (stonith:fence_virsh): Started node02
 fence_vm_server2 (stonith:fence_virsh): Started node01
 Master/Slave Set: pgsql-ha [pgsqld]
     pgsqld (ocf::heartbeat:pgsqlms): FAILED node02 (blocked)
     pgsqld (ocf::heartbeat:pgsqlms): FAILED node01 (blocked)
 pgsql-pri-ip (ocf::heartbeat:IPaddr2): Stopped

Failed Resource Actions:
* pgsqld_stop_0 on node02 'invalid parameter' (2): call=36, status=complete, exitreason='Parameter "primary_conninfo" MUST contain 'application_name=node02'. It is currently set to 'user=replicate password=admin chann',
    last-rc-change='Thu Sep 16 12:18:32 2021', queued=0ms, exec=357ms
* pgsqld_stop_0 on node01 'invalid parameter' (2): call=36, status=complete, exitreason='Parameter "primary_conninfo" MUST contain 'application_name=node01'. It is currently set to 'host=10.10.10.243 application_name=',
    last-rc-change='Thu Sep 16 12:18:32 2021', queued=0ms, exec=403ms

Daemon Status:
  corosync: active/enabled
  pacemaker: active/enabled
  pcsd: active/enabled
[root@node01 data]# 

8. Stil error, next will try trouble shoot. cause i m focus my new job



B. Delete fence & pcs cluster 

1. Chek pcs status with error 

[root@node01 ~]# pcs status
Cluster name: mycluster
Stack: corosync
Current DC: node01 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
Last updated: Thu Sep 16 11:52:33 2021
Last change: Mon Sep 13 16:38:20 2021 by root via cibadmin on node01

2 nodes configured
6 resource instances configured

Online: [ node01 node02 ]

Full list of resources:

 fence_vm_server1 (stonith:fence_virsh): Started node01
 fence_vm_server2 (stonith:fence_virsh): Started node02
 Master/Slave Set: pgsql-ha [pgsqld]
     pgsqld (ocf::heartbeat:pgsqlms): FAILED node02
     pgsqld (ocf::heartbeat:pgsqlms): FAILED node01
 pgsql-pri-ip (ocf::heartbeat:IPaddr2): Stopped
 ClusterIP (ocf::heartbeat:IPaddr2): Stopped

Failed Resource Actions:
* pgsqld_monitor_0 on node02 'invalid parameter' (2): call=16, status=complete, exitreason='Parameter "primary_conninfo" MUST contain 'application_name=node02'. It is currently set to 'user=replicate password=admin chann',
    last-rc-change='Thu Sep 16 11:52:22 2021', queued=2ms, exec=7249ms
* pgsqld_monitor_0 on node01 'invalid parameter' (2): call=16, status=complete, exitreason='Parameter "primary_conninfo" MUST contain 'application_name=node01'. It is currently set to 'host=10.10.10.243 application_name=',
    last-rc-change='Thu Sep 16 11:52:12 2021', queued=1ms, exec=5753ms

Daemon Status:
  corosync: active/enabled
  pacemaker: active/enabled
  pcsd: active/enabled
[root@node01 ~]# 

2. Delete Resource 
[root@node01 ~]# pcs resource delete  ClusterIP
Attempting to stop: ClusterIP... Stopped
[root@node01 ~]#
[root@node01 ~]# pcs resource delete pgsql-pri-ip
Removing Constraint - colocation-pgsql-pri-ip-pgsql-ha-INFINITY
Removing Constraint - order-pgsql-ha-pgsql-pri-ip-Mandatory
Removing Constraint - order-pgsql-ha-pgsql-pri-ip-Mandatory-1
Deleting Resource - pgsql-pri-ip
[root@node01 ~]# pcs resource delete pgsql-ha
Deleting Resource - pgsqld
[root@node01 ~]# pcs resource delete  pgsqld
Error: Resource 'pgsqld' does not exist.
[root@node01 ~]# 
pcs cluster stop node01

3. restart pcs for update 
[root@node01 ~]# pcs cluster stop --all
node02: Stopping Cluster (pacemaker)...
node01: Stopping Cluster (pacemaker)...
node02: Stopping Cluster (corosync)...
node01: Stopping Cluster (corosync)...
[root@node01 ~]# 

[root@node01 ~]# pcs cluster start --all
node01: Starting Cluster (corosync)...
node02: Starting Cluster (corosync)...
node02: Starting Cluster (pacemaker)...
node01: Starting Cluster (pacemaker)...
[root@node01 ~]# pcs status
Cluster name: mycluster
Stack: corosync
Current DC: NONE
Last updated: Thu Sep 16 12:06:00 2021
Last change: Thu Sep 16 12:03:57 2021 by root via cibadmin on node01

2 nodes configured
2 resource instances configured

OFFLINE: [ node01 node02 ]

Full list of resources:

 fence_vm_server1 (stonith:fence_virsh): Stopped
 fence_vm_server2 (stonith:fence_virsh): Stopped

Daemon Status:
  corosync: active/enabled
  pacemaker: active/enabled
  pcsd: active/enabled
[root@node01 ~]# 


4. Delete Stonith  

[root@node01 ~]# pcs stonith show
 fence_vm_server1 (stonith:fence_virsh): Started node01
 fence_vm_server2 (stonith:fence_virsh): Started node02
[root@node01 ~]# 
[root@node01 ~]# pcs stonith show fence_vm_server1
 Resource: fence_vm_server1 (class=stonith type=fence_virsh)
  Attributes: identity_file=/root/.ssh/id_rsa ipaddr=10.10.10.241 login=root passwd=root pcmk_host_check=static-list pcmk_host_list=node01 port=node01
  Meta Attrs: provides=infecing
  Operations: monitor interval=60s (fence_vm_server1-monitor-interval-60s)
[root@node01 ~]# pcs stonith show fence_vm_server2
 Resource: fence_vm_server2 (class=stonith type=fence_virsh)
  Attributes: identity_file=/root/.ssh/id_rsa ipaddr=10.10.10.242 login=root passwd=root pcmk_host_check=static-list pcmk_host_list=node02 port=node02
  Meta Attrs: provides=infecing
  Operations: monitor interval=60s (fence_vm_server2-monitor-interval-60s)
[root@node01 ~]# 

[root@node01 ~]# pcs stonith delete fence_vm_server1
Attempting to stop: fence_vm_server1... Stopped
[root@node01 ~]# pcs stonith delete fence_vm_server2
Attempting to stop: fence_vm_server2... Stopped
[root@node01 ~]# pcs stonith show
NO stonith devices configured
[root@node01 ~]# 

5. Check pcs status 

[root@node01 ~]# pcs status
Cluster name: mycluster
Stack: corosync
Current DC: node01 (version 1.1.23-1.el7_9.1-9acf116022) - partition with quorum
Last updated: Thu Sep 16 12:27:00 2021
Last change: Thu Sep 16 12:25:13 2021 by root via cibadmin on node01

2 nodes configured
0 resource instances configured

Online: [ node01 node02 ]

No resources


Daemon Status:
  corosync: active/enabled
  pacemaker: active/enabled
  pcsd: inactive/enabled
[root@node01 ~]# 





No comments:

Post a Comment

Popular Posts