[pgpool-general: 2983] watchdog escalation between pgpool instances not always working

Sat Jun 28 01:07:43 JST 2014

Hello,

I've installed pgpoolII version 3.3.2 using RPMS provided by the PGDG 
repository, on Centos6 64 bit servers.

On my test rig, I have two pgpool servers in front of 2 postgres servers 
(replication mode). Connection to the DB servers seems to work fine, but 
I seem to have some issues using the watchdog/heartbeat.

Basically when I stop pgpool on one of the pgpool machines (the one 
having the virtual IP at the moment), the other should escalate to 
master pgpool. But it only works something like 3 out of 5 attempts - 
without any changes in the configurations.

Other times, I stop the pgpool daemon on the master pgpool server, and 
the other pgpool server does not escalate - nothing strange is shown in 
the logs, either, as far as I can tell.

For my testing purposes, I'm running pgpool as root.

This behaviour is shown regardless of which of the pgpool servers cedes 
rank.

Please find below my configuration.

I hope someone can shed some light on this. Thank you!

Best regards,
Andrei

------------------------------------

listen_addresses = '*'
port = 9999
socket_dir = '/var/run/pgpool-II-93'
pcp_port = 9898
pcp_socket_dir = '/var/run/pgpool-II-93'
backend_hostname0 = '192.168.122.131'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '/data'
backend_flag0 = 'ALLOW_TO_FAILOVER'
backend_hostname1 = '192.168.122.132'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/data'
backend_flag1 = 'ALLOW_TO_FAILOVER'
enable_pool_hba = on
pool_passwd = 'pool_passwd'
authentication_timeout = 60
ssl = off
num_init_children = 32
max_pool = 4
child_life_time = 300
child_max_connections = 0
connection_life_time = 0
client_idle_limit = 0
log_destination = 'syslog'
print_timestamp = on
log_connections = off
log_hostname = off
log_statement = off
log_per_node_statement = off
log_standby_delay = 'none'
syslog_facility = 'LOCAL0'
syslog_ident = 'pgpool'
debug_level = 2
pid_file_name = '/var/run/pgpool-II-93/pgpool.pid'
logdir = '/var/run/pgpool-II-93'
connection_cache = on
reset_query_list = 'ABORT; DISCARD ALL'
replication_mode = on
replicate_select = off
insert_lock = on
lobj_lock_table = ''
replication_stop_on_mismatch = off
failover_if_affected_tuples_mismatch = off
load_balance_mode = on
ignore_leading_white_space = on
white_function_list = ''
black_function_list = 'nextval,setval'
master_slave_mode = off
master_slave_sub_mode = 'slony'
sr_check_period = 0
sr_check_user = 'nobody'
sr_check_password = ''
delay_threshold = 0
follow_master_command = ''
parallel_mode = off
pgpool2_hostname = ''
health_check_period = 0
health_check_timeout = 20
health_check_user = 'nobody'
health_check_password = ''
health_check_max_retries = 0
health_check_retry_delay = 1
failover_command = ''
failback_command = ''
fail_over_on_backend_error = on
search_primary_node_timeout = 10
recovery_user = 'nobody'
recovery_password = ''
recovery_1st_stage_command = ''
recovery_2nd_stage_command = ''
recovery_timeout = 90
client_idle_limit_in_recovery = 0
use_watchdog = on
trusted_servers = '192.168.122.1'
ping_path = '/bin'
wd_hostname = '192.168.122.133'
wd_port = 9000
wd_authkey = ''
delegate_IP = '192.168.122.140'
ifconfig_path = '/sbin'
if_up_cmd = 'ifconfig eth0:0 inet $_IP_$ netmask 255.255.255.0'
if_down_cmd = 'ifconfig eth0:0 down'
arping_path = '/usr/sbin'
arping_cmd = 'arping -U $_IP_$ -w 1'
clear_memqcache_on_escalation = on
wd_escalation_command = ''
wd_lifecheck_method = 'heartbeat'
wd_interval = 3
wd_heartbeat_port = 9694
wd_heartbeat_keepalive = 2
wd_heartbeat_deadtime = 10
heartbeat_destination0 = '192.168.122.134'
heartbeat_destination_port0 = 9694
heartbeat_device0 = 'eth0'
wd_life_point = 3
wd_lifecheck_query = 'SELECT 1'
wd_lifecheck_dbname = 'template1'
wd_lifecheck_user = 'nobody'
wd_lifecheck_password = ''
other_pgpool_hostname0 = '192.168.122.134'
other_pgpool_port0 = 9999
other_wd_port0 = 9000
relcache_expire = 0
relcache_size = 256
check_temp_table = on
memory_cache_enabled = off
memqcache_method = 'shmem'
memqcache_memcached_host = 'localhost'
memqcache_memcached_port = 11211
memqcache_total_size = 67108864
memqcache_max_num_cache = 1000000
memqcache_expire = 0
memqcache_auto_cache_invalidation = on
memqcache_maxcache = 409600
memqcache_cache_block_size = 1048576
memqcache_oiddir = '/var/log/pgpool/oiddir'
white_memqcache_table_list = ''
black_memqcache_table_list = ''