Difference between revisions of "PostgreSQL"

From DHVLab

(Created page with "Category:OVirt_Engine_Cluster == Install dependencies ==")
 
 
(6 intermediate revisions by the same user not shown)
Line 1: Line 1:
 
[[Category:OVirt_Engine_Cluster]]
 
[[Category:OVirt_Engine_Cluster]]
== Install dependencies ==
+
HA PostgreSQL stores the OVirt Engine configuration. It is configured with the following features:<br/>
 +
# Fail-over of Master
 +
# If Master breaks down, RA detects this fault and makes Master to stop, and Slave is promoted to new Master(promote).
 +
# Switching between asynchronous and synchronous
 +
# If Slave breaks down or LAN has some troubles, the transaction including Write operation will be stopped during the setting of synchronous replication. This means the stop of service. Therefore, RA switches dynamically from the synchronous to the asynchronous replication for prevented from stopping service.
 +
# Automated discrimination of data old and new when initial starts
 +
# When Pacemaker is started at the same time in the initial starts, RA compares the data of each node using "last xlog replay location" or "last receive replay location" to check which node has the newest data. The node which has the newest data will be Master.
 +
 
 +
== Node 1 ==
 +
<syntaxhighlight lang="bash">
 +
su – postgres
 +
mkdir /var/lib/pgsql/pg_archive
 +
cd /var/lib/pgsql/data
 +
initdb
 +
</syntaxhighlight>
 +
/var/lib/pgsql/data/postgresql.conf
 +
<syntaxhighlight lang="text">
 +
listen_addresses = '*'
 +
max_connections = 200
 +
shared_buffers = 32MB
 +
wal_level = hot_standby
 +
synchronous_commit = on
 +
archive_mode = on   
 +
archive_command = 'cp %p /var/lib/pgsql/pg_archive/%f'
 +
max_wal_senders = 5
 +
wal_keep_segments = 32
 +
replication_timeout = 9000
 +
hot_standby = on
 +
max_standby_archive_delay = -1
 +
max_standby_streaming_delay = -1
 +
wal_receiver_status_interval = 2
 +
hot_standby_feedback = on
 +
logging_collector = on
 +
log_filename = 'postgresql-%a.log'
 +
log_truncate_on_rotation = on
 +
log_rotation_age = 1d 
 +
log_rotation_size = 0
 +
log_timezone = 'Europe/Berlin'
 +
datestyle = 'iso, mdy'
 +
timezone = 'Europe/Berlin'
 +
lc_messages = 'en_US.UTF-8'
 +
lc_monetary = 'en_US.UTF-8'
 +
lc_numeric = 'en_US.UTF-8'
 +
lc_time = 'en_US.UTF-8'
 +
default_text_search_config = 'pg_catalog.english'
 +
restart_after_crash = off
 +
include '/var/lib/pgsql/tmp/rep_mode.conf'
 +
</syntaxhighlight>
 +
Add permissions for PostgreSQL replication and database access over network by editing the<br/>
 +
/var/lib/pgsql/pg_hba.conf
 +
<syntaxhighlight lang="text">
 +
local  all            all                                        trust
 +
host    all            all    127.0.0.1/32                        trust
 +
host    all            all    ::1/128                            trust
 +
host    all            all    127.0.0.1/32                        trust
 +
host    all            all    OVIRTM/32                          md5 
 +
host    all            all    CLUSTERLAN_CLUSTER_N1_IP/32        trust
 +
host    all            all    CLUSTERLAN_CLUSTER_N2_IP/32        trust
 +
host    all            all    CLUSTERLAN_CLUSTER_IP/32            trust
 +
host    replication    all    CLUSTERLAN_CLUSTER_N1_IP/32        trust
 +
host    replication    all    CLUSTERLAN_CLUSTER_N2_IP/32        trust
 +
</syntaxhighlight>
 +
 
 +
== Node 2 ==
 +
<syntaxhighlight lang="bash">
 +
su - postgres
 +
rm -rf /var/lib/pgsql/data/*
 +
pg_basebackup -hCLUSTERLAN_CLUSTER_N1_IP -Upostgres -D/var/lib/pgsql/data -Xstream -P
 +
mkdir /var/lib/pgsql/pg_archive
 +
</syntaxhighlight>
 +
/var/lib/pgsql/data/recover.conf
 +
<syntaxhighlight lang="text">
 +
standby_mode = 'on'
 +
primary_conninfo = 'host=CLUSTERLAN_CLUSTER_N2_IP port=5432 user=postgres
 +
application_name=NODE2'
 +
restore_command = 'cp /var/lib/pgsql/pg_archive/%f %p'
 +
recovery_target_timeline = 'latest'
 +
</syntaxhighlight>
 +
 
 +
== Prepare for Pacemaker ==
 +
<syntaxhighlight lang="bash">
 +
wget https://raw.github.com/ClusterLabs/resource-\
 +
agents/a6f4ddf76cb4bbc1b3df4c9b6632a6351b63c19e/heartbeat/pgsql
 +
cp pgsql /usr/lib/ocf/resource.d/heartbeat/
 +
chmod 755 /usr/lib/ocf/resource.d/heartbeat/pgsql
 +
</syntaxhighlight>
 +
 
 +
For further information see: [http://clusterlabs.org/wiki/PgSQL_Replicated_Cluster clusterlabs]
 +
 
 +
== Adding as cluster resource ==
 +
<syntaxhighlight lang="bash">
 +
#create resource
 +
pcs resource create pgsql pgsql \
 +
                    pgctl="/usr/bin/pg_ctl" \
 +
                    psql="/usr/bin/psql" \
 +
                    pgdata="/var/lib/pgsql/data/" \
 +
                    rep_mode="sync" \
 +
                    node_list="CLUSTER_N1.clan CLUSTER_N2.clan" \
 +
                    restore_command="cp /var/lib/pgsql/pg_archive/%f %p" \
 +
                    primary_conninfo_opt="keepalives_idle=60 keepalives_interval=5 keepalives_count=5" \
 +
                    master_ip="CLUSTERLAN_CLUSTER_IP" \
 +
                    restart_on_promote='true' \
 +
                    op start  timeout="60s" interval="0s" on-fail="restart" \
 +
                    op monitor timeout="60s" interval="4s" on-fail="restart" \
 +
                    op monitor timeout="60s" interval="3s" on-fail="restart" role="Master" \
 +
                    op promote timeout="60s" interval="0s" on-fail="restart" \
 +
                    op demote  timeout="60s" interval="0s" on-fail="stop" \
 +
                    op stop    timeout="60s" interval="0s" on-fail="block" \
 +
                    op notify  timeout="60s" interval="0s"
 +
#create master slave set
 +
pcs resource master msPostgresql pgsql master-max=1 master-node-max=1 \
 +
                    clone-max=2 clone-node-max=1 notify=true
 +
</syntaxhighlight>
 +
 
 +
== Recovery on failure ==
 +
If the resource gets stopped<br/>
 +
Connect to any of the nodes and try to run the following:
 +
<syntaxhighlight lang="bash">
 +
pcs resource disable msPostgresql
 +
#login as postgres
 +
su - postgres
 +
#kill all running instances
 +
killall -sSIGKILL postgres
 +
#try to start it manually
 +
pg_ctl start
 +
</syntaxhighlight>
 +
Read the logs available at /var/lib/pgsql/data/pg_logs/ and check if the startup was successfull.
 +
If not, connect to a different host and try the same thing again.
 +
If it is running, connect to the other host and run the following recovery script.<br/>
 +
recover.sh
 +
<syntaxhighlight lang="text">
 +
su - postgres << 'EOF'
 +
rm -rf /var/lib/pgsql/data/
 +
pg_basebackup -h OTHER_NODE_IP -U postgres -D /var/lib/pgsql/data -X stream -P
 +
rm /var/lib/pgsql/tmp/PGSQL.lock
 +
EOF
 +
pcs resource cleanup msPostgresql
 +
</syntaxhighlight>
 +
'''IF EVERYTHING FAILS, RECOVER FROM BACKUP ;-)'''
 +
 
 +
== Create oVirt Database ==
 +
<syntaxhighlight lang="bash">
 +
#change to postgres user and start postgres console
 +
su - postgres
 +
pgsql
 +
 
 +
#create ovirt user
 +
CREATE USER ovirtusername WITH PASSWORD 'ovirtpassword';
 +
 
 +
#create ovirt database
 +
CREATE DATABASE ovirtdb;
 +
 
 +
#grant all privileges on database
 +
GRANT ALL PRIVILEGES ON DATABASE ovirtdb to ovirtusername;
 +
 
 +
#quit
 +
\q
 +
</syntaxhighlight>

Latest revision as of 16:11, 9 September 2016

HA PostgreSQL stores the OVirt Engine configuration. It is configured with the following features:

  1. Fail-over of Master
  2. If Master breaks down, RA detects this fault and makes Master to stop, and Slave is promoted to new Master(promote).
  3. Switching between asynchronous and synchronous
  4. If Slave breaks down or LAN has some troubles, the transaction including Write operation will be stopped during the setting of synchronous replication. This means the stop of service. Therefore, RA switches dynamically from the synchronous to the asynchronous replication for prevented from stopping service.
  5. Automated discrimination of data old and new when initial starts
  6. When Pacemaker is started at the same time in the initial starts, RA compares the data of each node using "last xlog replay location" or "last receive replay location" to check which node has the newest data. The node which has the newest data will be Master.

Node 1

su – postgres
mkdir /var/lib/pgsql/pg_archive
cd /var/lib/pgsql/data
initdb

/var/lib/pgsql/data/postgresql.conf

listen_addresses = '*'
max_connections = 200
shared_buffers = 32MB
wal_level = hot_standby
synchronous_commit = on 
archive_mode = on    
archive_command = 'cp %p /var/lib/pgsql/pg_archive/%f'
max_wal_senders = 5
wal_keep_segments = 32
replication_timeout = 9000
hot_standby = on 
max_standby_archive_delay = -1
max_standby_streaming_delay = -1
wal_receiver_status_interval = 2
hot_standby_feedback = on 
logging_collector = on
log_filename = 'postgresql-%a.log'
log_truncate_on_rotation = on 
log_rotation_age = 1d  
log_rotation_size = 0
log_timezone = 'Europe/Berlin'
datestyle = 'iso, mdy'
timezone = 'Europe/Berlin'
lc_messages = 'en_US.UTF-8' 
lc_monetary = 'en_US.UTF-8' 
lc_numeric = 'en_US.UTF-8' 
lc_time = 'en_US.UTF-8' 
default_text_search_config = 'pg_catalog.english'
restart_after_crash = off
include '/var/lib/pgsql/tmp/rep_mode.conf'

Add permissions for PostgreSQL replication and database access over network by editing the
/var/lib/pgsql/pg_hba.conf

local   all             all                                         trust
host    all             all     127.0.0.1/32                        trust
host    all             all     ::1/128                             trust
host    all             all     127.0.0.1/32                        trust
host    all             all     OVIRTM/32                           md5  
host    all             all     CLUSTERLAN_CLUSTER_N1_IP/32         trust
host    all             all     CLUSTERLAN_CLUSTER_N2_IP/32         trust
host    all             all     CLUSTERLAN_CLUSTER_IP/32            trust
host    replication     all     CLUSTERLAN_CLUSTER_N1_IP/32         trust
host    replication     all     CLUSTERLAN_CLUSTER_N2_IP/32         trust

Node 2

su - postgres
rm -rf /var/lib/pgsql/data/*
pg_basebackup -hCLUSTERLAN_CLUSTER_N1_IP -Upostgres -D/var/lib/pgsql/data -Xstream -P
mkdir /var/lib/pgsql/pg_archive

/var/lib/pgsql/data/recover.conf

standby_mode = 'on'
primary_conninfo = 'host=CLUSTERLAN_CLUSTER_N2_IP port=5432 user=postgres
application_name=NODE2'
restore_command = 'cp /var/lib/pgsql/pg_archive/%f %p'
recovery_target_timeline = 'latest'

Prepare for Pacemaker

wget https://raw.github.com/ClusterLabs/resource-\
agents/a6f4ddf76cb4bbc1b3df4c9b6632a6351b63c19e/heartbeat/pgsql
cp pgsql /usr/lib/ocf/resource.d/heartbeat/
chmod 755 /usr/lib/ocf/resource.d/heartbeat/pgsql

For further information see: clusterlabs

Adding as cluster resource

#create resource
pcs resource create pgsql pgsql \
                    pgctl="/usr/bin/pg_ctl" \
                    psql="/usr/bin/psql" \
                    pgdata="/var/lib/pgsql/data/" \
                    rep_mode="sync" \
                    node_list="CLUSTER_N1.clan CLUSTER_N2.clan" \
                    restore_command="cp /var/lib/pgsql/pg_archive/%f %p" \
                    primary_conninfo_opt="keepalives_idle=60 keepalives_interval=5 keepalives_count=5" \
                    master_ip="CLUSTERLAN_CLUSTER_IP" \
                    restart_on_promote='true' \
                    op start   timeout="60s" interval="0s" on-fail="restart" \
                    op monitor timeout="60s" interval="4s" on-fail="restart" \
                    op monitor timeout="60s" interval="3s" on-fail="restart" role="Master" \
                    op promote timeout="60s" interval="0s" on-fail="restart" \
                    op demote  timeout="60s" interval="0s" on-fail="stop" \
                    op stop    timeout="60s" interval="0s" on-fail="block" \
                    op notify  timeout="60s" interval="0s"
#create master slave set
pcs resource master msPostgresql pgsql master-max=1 master-node-max=1 \
                    clone-max=2 clone-node-max=1 notify=true

Recovery on failure

If the resource gets stopped
Connect to any of the nodes and try to run the following:

pcs resource disable msPostgresql
#login as postgres
su - postgres
#kill all running instances
killall -sSIGKILL postgres
#try to start it manually
pg_ctl start

Read the logs available at /var/lib/pgsql/data/pg_logs/ and check if the startup was successfull. If not, connect to a different host and try the same thing again. If it is running, connect to the other host and run the following recovery script.
recover.sh

su - postgres << 'EOF'
rm -rf /var/lib/pgsql/data/
pg_basebackup -h OTHER_NODE_IP -U postgres -D /var/lib/pgsql/data -X stream -P
rm /var/lib/pgsql/tmp/PGSQL.lock
EOF
pcs resource cleanup msPostgresql

IF EVERYTHING FAILS, RECOVER FROM BACKUP ;-)

Create oVirt Database

#change to postgres user and start postgres console
su - postgres
pgsql

#create ovirt user
CREATE USER ovirtusername WITH PASSWORD 'ovirtpassword';

#create ovirt database
CREATE DATABASE ovirtdb;

#grant all privileges on database
GRANT ALL PRIVILEGES ON DATABASE ovirtdb to ovirtusername;

#quit
\q