[pgpool-general: 6199] pgpool watchdog with too many masters

mjb2kmn mjb2kmn at gmail.com
Fri Aug 17 22:15:07 JST 2018


I am trying to deploy pgpool in native replication mode with watchdog
to manage 2 nodes for Active-Standby.
When I start pgpool2 on both servers, they each claim to be the only
node alive and become master.

I have lifecheck method of heartbeat and set the heartbeat
destinations to the other host for each host. I confirmed that I can
send UDP messages between hosts with nc. Am I missing or
misunderstanding something?

Also I don't understand what the watchdog port actually does, if the
heartbeat port send and receive the health check and pcp has it's own
port for management, what does the watchdog TCP socket do?

Below is pgpool.conf from 1st server, second server's config is the
same except for the 01/02 in the hostnames. Config has been trimmed
down a little, mostly just comments and unrelated stuff removed for
brevity (logging, cache)


#------------------------------------------------------------------------------
# CONNECTIONS
#------------------------------------------------------------------------------

# - pgpool Connection Settings -
listen_addresses = '*'
port = 6432
socket_dir = '/var/run/postgresql'
listen_backlog_multiplier = 2
serialize_accept = off
pcp_listen_addresses = '*'
pcp_port = 9898
pcp_socket_dir = '/var/run/postgresql'

# - Backend Connection Settings -
backend_hostname0 = 'pgsql01.example.com'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '/var/lib/pgsql/data'
backend_flag0 = 'ALLOW_TO_FAILOVER'
backend_hostname1 = 'pgsql02.example.com'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/var/lib/pgsql/data'
backend_flag1 = 'ALLOW_TO_FAILOVER'

# - Authentication -
enable_pool_hba = on
pool_passwd = 'pool_passwd'
authentication_timeout = 60

# - SSL Connections -
ssl = off

#------------------------------------------------------------------------------
# POOLS
#------------------------------------------------------------------------------

# - Concurrent session and pool size -
num_init_children = 32
max_pool = 4
# - Life time -
child_life_time = 300
child_max_connections = 0
connection_life_time = 0
client_idle_limit = 0

#------------------------------------------------------------------------------
# CONNECTION POOLING
#------------------------------------------------------------------------------

connection_cache = on
reset_query_list = 'ABORT; DISCARD ALL'


#------------------------------------------------------------------------------
# REPLICATION MODE
#------------------------------------------------------------------------------

replication_mode = on
replicate_select = off
insert_lock = on
lobj_lock_table = ''
# - Degenerate handling -
replication_stop_on_mismatch = off
failover_if_affected_tuples_mismatch = off


#------------------------------------------------------------------------------
# LOAD BALANCING MODE
#------------------------------------------------------------------------------

load_balance_mode = on
ignore_leading_white_space = on
white_function_list = ''
black_function_list = 'nextval,setval,nextval,setval'
database_redirect_preference_list = ''
app_name_redirect_preference_list = ''
allow_sql_comments = on

#------------------------------------------------------------------------------
# MASTER/SLAVE MODE
#------------------------------------------------------------------------------

master_slave_mode = off
master_slave_sub_mode = 'stream'
# - Streaming -
sr_check_period = 0
sr_check_user = 'nobody'
sr_check_password = ''
sr_check_database = 'postgres'
delay_threshold = 0
# - Special commands -
follow_master_command = ''

#------------------------------------------------------------------------------
# HEALTH CHECK GLOBAL PARAMETERS
#------------------------------------------------------------------------------

health_check_period = 0
health_check_timeout = 20
health_check_user = 'nobody'
health_check_password = ''
health_check_database = ''
health_check_max_retries = 0
health_check_retry_delay = 1
connect_timeout = 10000

#------------------------------------------------------------------------------
# FAILOVER AND FAILBACK
#------------------------------------------------------------------------------

failover_command = ''
failback_command = ''
fail_over_on_backend_error = on
search_primary_node_timeout = 300

#------------------------------------------------------------------------------
# ONLINE RECOVERY
#------------------------------------------------------------------------------

recovery_user = 'nobody'
recovery_password = ''
recovery_1st_stage_command = ''
recovery_2nd_stage_command = ''
recovery_timeout = 90
client_idle_limit_in_recovery = 0

#------------------------------------------------------------------------------
# WATCHDOG
#------------------------------------------------------------------------------

# - Enabling -
use_watchdog = on
trusted_servers =
'pgsql-test.example.com,proxy.example.com,proxy01.example.com,proxy02.example.com'
ping_path = '/bin'
# - Watchdog communication Settings -
wd_hostname = 'pgsql01.example.com'
wd_port = 9000
wd_priority = 2
wd_authkey = 'Hunter1'
wd_ipc_socket_dir = '/tmp'

# - Virtual IP control Setting -
delegate_IP = '10.10.10.92'
if_cmd_path = '/sbin'
if_up_cmd = 'ip addr add $_IP_$/23 dev ens192 label ens192:0'
if_down_cmd = 'ip addr del $_IP_$/23 dev ens192'
arping_path = '/usr/bin'
arping_cmd = 'arping -U $_IP_$ -w 1'

# - Behaivor on escalation Setting -
clear_memqcache_on_escalation = on
wd_escalation_command = ''
wd_de_escalation_command = ''

# - Watchdog consensus settings for failover -
failover_when_quorum_exists = on
failover_require_consensus = on
allow_multiple_failover_requests_from_node = off

# -- common --
wd_monitoring_interfaces_list = ''  # Comma separated list of
interfaces names to monitor.
wd_lifecheck_method = 'heartbeat'
wd_interval = 5

# -- heartbeat mode --
wd_heartbeat_port = 9694
wd_heartbeat_keepalive = 2
wd_heartbeat_deadtime = 30
heartbeat_destination0 = 'pgsql02.example.com'
heartbeat_destination_port0 = 9694
heartbeat_device0 = ''

# -- query mode --
wd_life_point = 3
wd_lifecheck_query = 'SELECT 1'
wd_lifecheck_dbname = 'template1'
wd_lifecheck_user = 'nobody'
wd_lifecheck_password = ''

# - Other pgpool Connection Settings -
other_pgpool_hostname0 = 'pgsql02'
other_pgpool_port0 = 6432
other_wd_port0 = 9694


More information about the pgpool-general mailing list