[pgpool-general: 2986] Re: watchdog escalation between pgpool instances not always working

Yugo Nagata nagata at sraoss.co.jp
Mon Jun 30 10:56:01 JST 2014


Hi,

On Fri, 27 Jun 2014 18:07:43 +0200
pgpool-gbif at gbif.org wrote:

> Hello,
> 
> I've installed pgpoolII version 3.3.2 using RPMS provided by the PGDG 
> repository, on Centos6 64 bit servers.
> 
> On my test rig, I have two pgpool servers in front of 2 postgres servers 
> (replication mode). Connection to the DB servers seems to work fine, but 
> I seem to have some issues using the watchdog/heartbeat.
> 
> Basically when I stop pgpool on one of the pgpool machines (the one 
> having the virtual IP at the moment), the other should escalate to 
> master pgpool. But it only works something like 3 out of 5 attempts - 
> without any changes in the configurations.

How did you stop pgpool, pgpool stop command, Ctrl+C, or others such like
kill command? Anyhow, when pgpool stop normally, this should notify this
to the other pgpool and escalation be triggered. I see pgpool.conf but I think
there is no problem, though this would be one of the two and another
pgpool.conf might have something wrong.

If possible, could you please show me your pgpool.log of both pgpools?
I might find some missing messages which should be shown.

> 
> Other times, I stop the pgpool daemon on the master pgpool server, and 
> the other pgpool server does not escalate - nothing strange is shown in 
> the logs, either, as far as I can tell.
> 
> For my testing purposes, I'm running pgpool as root.
> 
> This behaviour is shown regardless of which of the pgpool servers cedes 
> rank.
> 
> Please find below my configuration.
> 
> I hope someone can shed some light on this. Thank you!
> 
> Best regards,
> Andrei
> 
> ------------------------------------
> 
> listen_addresses = '*'
> port = 9999
> socket_dir = '/var/run/pgpool-II-93'
> pcp_port = 9898
> pcp_socket_dir = '/var/run/pgpool-II-93'
> backend_hostname0 = '192.168.122.131'
> backend_port0 = 5432
> backend_weight0 = 1
> backend_data_directory0 = '/data'
> backend_flag0 = 'ALLOW_TO_FAILOVER'
> backend_hostname1 = '192.168.122.132'
> backend_port1 = 5432
> backend_weight1 = 1
> backend_data_directory1 = '/data'
> backend_flag1 = 'ALLOW_TO_FAILOVER'
> enable_pool_hba = on
> pool_passwd = 'pool_passwd'
> authentication_timeout = 60
> ssl = off
> num_init_children = 32
> max_pool = 4
> child_life_time = 300
> child_max_connections = 0
> connection_life_time = 0
> client_idle_limit = 0
> log_destination = 'syslog'
> print_timestamp = on
> log_connections = off
> log_hostname = off
> log_statement = off
> log_per_node_statement = off
> log_standby_delay = 'none'
> syslog_facility = 'LOCAL0'
> syslog_ident = 'pgpool'
> debug_level = 2
> pid_file_name = '/var/run/pgpool-II-93/pgpool.pid'
> logdir = '/var/run/pgpool-II-93'
> connection_cache = on
> reset_query_list = 'ABORT; DISCARD ALL'
> replication_mode = on
> replicate_select = off
> insert_lock = on
> lobj_lock_table = ''
> replication_stop_on_mismatch = off
> failover_if_affected_tuples_mismatch = off
> load_balance_mode = on
> ignore_leading_white_space = on
> white_function_list = ''
> black_function_list = 'nextval,setval'
> master_slave_mode = off
> master_slave_sub_mode = 'slony'
> sr_check_period = 0
> sr_check_user = 'nobody'
> sr_check_password = ''
> delay_threshold = 0
> follow_master_command = ''
> parallel_mode = off
> pgpool2_hostname = ''
> health_check_period = 0
> health_check_timeout = 20
> health_check_user = 'nobody'
> health_check_password = ''
> health_check_max_retries = 0
> health_check_retry_delay = 1
> failover_command = ''
> failback_command = ''
> fail_over_on_backend_error = on
> search_primary_node_timeout = 10
> recovery_user = 'nobody'
> recovery_password = ''
> recovery_1st_stage_command = ''
> recovery_2nd_stage_command = ''
> recovery_timeout = 90
> client_idle_limit_in_recovery = 0
> use_watchdog = on
> trusted_servers = '192.168.122.1'
> ping_path = '/bin'
> wd_hostname = '192.168.122.133'
> wd_port = 9000
> wd_authkey = ''
> delegate_IP = '192.168.122.140'
> ifconfig_path = '/sbin'
> if_up_cmd = 'ifconfig eth0:0 inet $_IP_$ netmask 255.255.255.0'
> if_down_cmd = 'ifconfig eth0:0 down'
> arping_path = '/usr/sbin'
> arping_cmd = 'arping -U $_IP_$ -w 1'
> clear_memqcache_on_escalation = on
> wd_escalation_command = ''
> wd_lifecheck_method = 'heartbeat'
> wd_interval = 3
> wd_heartbeat_port = 9694
> wd_heartbeat_keepalive = 2
> wd_heartbeat_deadtime = 10
> heartbeat_destination0 = '192.168.122.134'
> heartbeat_destination_port0 = 9694
> heartbeat_device0 = 'eth0'
> wd_life_point = 3
> wd_lifecheck_query = 'SELECT 1'
> wd_lifecheck_dbname = 'template1'
> wd_lifecheck_user = 'nobody'
> wd_lifecheck_password = ''
> other_pgpool_hostname0 = '192.168.122.134'
> other_pgpool_port0 = 9999
> other_wd_port0 = 9000
> relcache_expire = 0
> relcache_size = 256
> check_temp_table = on
> memory_cache_enabled = off
> memqcache_method = 'shmem'
> memqcache_memcached_host = 'localhost'
> memqcache_memcached_port = 11211
> memqcache_total_size = 67108864
> memqcache_max_num_cache = 1000000
> memqcache_expire = 0
> memqcache_auto_cache_invalidation = on
> memqcache_maxcache = 409600
> memqcache_cache_block_size = 1048576
> memqcache_oiddir = '/var/log/pgpool/oiddir'
> white_memqcache_table_list = ''
> black_memqcache_table_list = ''
> 
> 
> 
> 
> _______________________________________________
> pgpool-general mailing list
> pgpool-general at pgpool.net
> http://www.pgpool.net/mailman/listinfo/pgpool-general


-- 
Yugo Nagata <nagata at sraoss.co.jp>


More information about the pgpool-general mailing list