[pgpool-general: 2597] Re: connection to pgpool2 waits forever after master failover

Yugo Nagata nagata at sraoss.co.jp
Mon Mar 3 18:06:59 JST 2014


Hi,

I'm not familiar to repmgr, but I'll try to reproduce this.

Before that, could you please send log messages of pgpool and repmgr,
and repmgr.conf?


On Fri, 28 Feb 2014 15:16:50 +0100
Martin Kováčik <kovacik at redbyte.eu> wrote:

> Hi,
> 
> I'm trying to install a simple 2 node pgpool2 cluster with hot standby 
> streaming replication and automatic failover.
> 
> So far the streaming replication between 2 postresql servers is working 
> perfectly. For the failover part I'm using repmgr which also works great.
> 
> I can connect to both postgresql backends and also I can connect to both 
> postgresql backends through pgpool2.
> 
> The automatic failover works. When I stop the master node, I can see 
> that the standby becomes new master. I'm able to directly connect to new 
> master postgresql node and it is no longer in read only mode. However 
> when I want to connect to the cluster (after the failover) through 
> pgpool2 (using psql client) the connection waits forever and I'm no 
> longer to connect. I've turned on connection logging for postgresql and 
> there is no record of an attempted connection. Pgpool2 log also doesn't 
> show incomming connection attempt. To be able to connect through pgpool2 
> again, I must restart it.
> 
> I'm unable to connect only after the master failover. If I stop the 
> standby node, the backend is correctly detected as down, but I'm still 
> able to connect to pgpool2.
> 
> Please help. Thank you.
> 
> My setup is:
> 
> 2x PostgreSQL 9.3.2 on x86_64-unknown-linux-gnu, compiled by gcc 
> (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3, 64-bit
> pgpool-II version 3.3.2 (tokakiboshi) (I've also tried the latest master 
> at 46c9a71813768fa799580f3ae07599b9eaa14fef)
> Linux 3.11.0-12-generic #19-Ubuntu SMP Wed Oct 9 16:20:46 UTC 2013 
> x86_64 x86_64 x86_64 GNU/Linux
> 
> My pgpool.conf is:
> 
> listen_addresses = '*'
> port = 9999
> socket_dir = '/tmp'
> pcp_port = 9898
> pcp_socket_dir = '/tmp'
> backend_hostname0 = 'dbnode1'
> backend_port0 = 5432
> backend_weight0 = 1
> backend_data_directory0 = '/var/lib/postgresql/9.3/main'
> backend_flag0 = 'ALLOW_TO_FAILOVER'
> backend_hostname1 = 'dbnode2'
> backend_port1 = 5432
> backend_weight1 = 1
> backend_data_directory1 = '/var/lib/postgresql/9.3/main'
> backend_flag1 = 'ALLOW_TO_FAILOVER'
> enable_pool_hba = on
> pool_passwd = 'pool_passwd'
> authentication_timeout = 10
> ssl = off
> num_init_children = 8
> max_pool = 4
> child_life_time = 300
> child_max_connections = 0
> connection_life_time = 0
> client_idle_limit = 0
> log_destination = 'stderr'
> print_timestamp = on
> log_connections = on
> log_hostname = on
> log_statement = off
> log_per_node_statement = off
> log_standby_delay = 'none'
> syslog_facility = 'LOCAL0'
> syslog_ident = 'pgpool'
> debug_level = 1
> pid_file_name = '/var/run/pgpool/pgpool.pid'
> logdir = '/tmp'
> connection_cache = off
> reset_query_list = 'ABORT; DISCARD ALL'
> replication_mode = off
> replicate_select = off
> insert_lock = on
> lobj_lock_table = ''
> replication_stop_on_mismatch = off
> failover_if_affected_tuples_mismatch = off
> load_balance_mode = off
> ignore_leading_white_space = on
> white_function_list = ''
> black_function_list = 'nextval,setval'
> master_slave_mode = on
> master_slave_sub_mode = 'stream'
> sr_check_period = 5
> sr_check_user = 'repmgr'
> sr_check_password = ''
> delay_threshold = 0
> follow_master_command = ''
> parallel_mode = off
> pgpool2_hostname = ''
> system_db_hostname  = 'localhost'
> system_db_port = 5432
> system_db_dbname = 'pgpool'
> system_db_schema = 'pgpool_catalog'
> system_db_user = 'pgpool'
> system_db_password = ''
> health_check_period = 5
> health_check_timeout = 30
> health_check_user = 'repmgr'
> health_check_password = ''
> health_check_max_retries = 3
> health_check_retry_delay = 10
> failover_command = 'su postgres -c "ssh postgres@%H repmgr -f 
> /var/lib/postgresql/9.3/repmgr/repmgr.conf --verbose standby promote"'
> failback_command = ''
> fail_over_on_backend_error = on
> search_primary_node_timeout = 10
> recovery_user = 'nobody'
> recovery_password = ''
> recovery_1st_stage_command = ''
> recovery_2nd_stage_command = ''
> recovery_timeout = 90
> client_idle_limit_in_recovery = 0
> use_watchdog = false
> trusted_servers = ''
> ping_path = '/bin'
> wd_hostname = 'dbnode1'
> wd_port = 9000
> wd_authkey = ''
> delegate_IP = '10.5.4.241'
> ifconfig_path = '/sbin'
> if_up_cmd = 'ifconfig eth0:0 inet $_IP_$ netmask 255.255.255.0'
> if_down_cmd = 'ifconfig eth0:0 down'
> arping_path = '/usr/sbin'           # arping command path
> arping_cmd = 'arping -U $_IP_$ -w 1'
> clear_memqcache_on_escalation = on
> wd_escalation_command = ''
> wd_lifecheck_method = 'heartbeat'
> wd_interval = 3
> wd_heartbeat_port = 9694
> wd_heartbeat_keepalive = 2
> wd_heartbeat_deadtime = 30
> heartbeat_destination0 = 'dbnode2'
> heartbeat_destination_port0 = 9694
> heartbeat_device0 = ''
> wd_life_point = 3
> wd_lifecheck_query = 'SELECT 1'
> wd_lifecheck_dbname = 'template1'
> wd_lifecheck_user = 'repmgr'
> wd_lifecheck_password = ''
> other_pgpool_hostname0 = 'dbnode2'
> other_pgpool_port0 = 9999
> other_wd_port0 = 9000
> relcache_expire = 0
> relcache_size = 256
> check_temp_table = on
> memory_cache_enabled = off
> memqcache_method = 'shmem'
> memqcache_memcached_host = 'localhost'
> memqcache_memcached_port = 11211
> memqcache_total_size = 67108864
> memqcache_max_num_cache = 1000000
> memqcache_expire = 0
> memqcache_auto_cache_invalidation = on
> memqcache_maxcache = 409600
> memqcache_cache_block_size = 1048576
> memqcache_oiddir = '/var/log/pgpool/oiddir'
> white_memqcache_table_list = ''
> black_memqcache_table_list = ''
> 
> 
> -- 
> *Martin*


-- 
Yugo Nagata <nagata at sraoss.co.jp>


More information about the pgpool-general mailing list