[pgpool-general: 2824] Re: sql after an inactive psql session causes failover
Gary Fu
gfu at sigmaspace.com
Wed May 14 03:37:51 JST 2014
On 05/13/14 09:01, Yugo Nagata wrote:
> Hi,
>
> On Mon, 5 May 2014 17:23:28 -0400
> Gary Fu <gfu at sigmaspace.com> wrote:
>
>> Hi,
>>
>> We are running pgpool 3.3.2 with replication and load balance both on,
>> on a new server with postgres 9.2.8 on two separated db servers. We
>> noticed that when we issue a simple sql (select * from config) on a psql
>> session (connect to pgpool) without activities (idle) for more than 20
>> or 30 minutes, the sql will hang for about several minutes (maybe more
>> than 5 or 10 minutes) and then come back with a new connection after the
>> pgpool failover. During that period, there is no problem to make new
> Could you please send pgpool.conf and pgpool.log around the failover?
>
>> psql connection. We don't see the problem with an old pgpool server (on
>> different subnet), same pgpool.conf, connects to the same db servers
>> with different dbs. I think it is the network/tcpip configuration
>> and/or firewall issues, but I'm not export on network (the sysctl
>> command shows no difference between the two pgpool servers) and no
>> access to firewall setup. I don't know how to find out the real cause.
>> Can anyone provide any help/hint ?
>>
>> Thanks,
>> Gary
>> _______________________________________________
>>
>>
******************* psql session 1
[sd3j1uat1.j1uat1_admin].sd3j1uat1> show pool_nodes;
node_id | hostname | port | status | lb_weight | role
---------+----------+------+--------+-----------+--------
0 | sd3db3 | 5432 | 2 | 0.500000 | master
1 | sd3db4 | 5432 | 2 | 0.500000 | slave
(2 rows)
[sd3j1uat1.j1uat1_admin].sd3j1uat1> select now();
now
-------------------------------
2014-05-13 14:36:14.360448+00
(1 row)
(at 16:52, after 2:06 inactive, the following sql hangs)
[sd3j1uat1.j1uat1_admin].sd3j1uat1> select now();
(at 17;08, pgpool failover in the log file, and this psql session
reconnected)
The connection to the server was lost. Attempting reset: Succeeded.
[sd3j1uat1.j1uat1_admin].sd3j1uat1> select now();
now
-------------------------------
2014-05-13 17:08:31.836607+00
[sd3j1uat1.j1uat1_admin].sd3j1uat1> show pool_nodes;
node_id | hostname | port | status | lb_weight | role
---------+----------+------+--------+-----------+--------
0 | sd3db3 | 5432 | 3 | 0.500000 | slave
1 | sd3db4 | 5432 | 2 | 0.500000 | master
*********************** psql session 2 (before the psql session1 issued
the 2nd 'select now()'
[sd3j1uat1.j1uat1_admin].sd3j1uat1> select now();
now
-------------------------------
2014-05-13 16:52:52.478893+00
(1 row)
(at this time the pgpool shows both dbs were Up)
[sd3j1uat1.j1uat1_admin].sd3j1uat1> show pool_nodes;
node_id | hostname | port | status | lb_weight | role
---------+----------+------+--------+-----------+--------
0 | sd3db3 | 5432 | 2 | 0.500000 | master
1 | sd3db4 | 5432 | 2 | 0.500000 | slave
(2 rows)
******************* Log file before and after the failover
2014-05-13 14:35:47 LOG: pid 33466: pgpool-II successfully started.
version 3.3.2 (tokakiboshi)
2014-05-13 14:36:02 LOG: pid 33527: connection received:
host=172.28.34.200 port=35171
2014-05-13 16:52:35 LOG: pid 33484: connection received:
host=172.28.34.200 port=35260
2014-05-13 17:08:31 ERROR: pid 33527: pool_read: read failed (Connection
timed out)
2014-05-13 17:08:31 LOG: pid 33527: degenerate_backend_set: 0 fail
over request from pid 33527
2014-05-13 17:08:31 ERROR: pid 33527: pool_flush_it: write failed to
backend (0). reason: Broken pipe offset: 0 wlen: 5
2014-05-13 17:08:31 LOG: pid 33466: starting degeneration. shutdown
host sd3db3(5432)
2014-05-13 17:08:31 LOG: pid 33466: Restart all children
2014-05-13 17:08:31 LOG: pid 33466: failover: set new primary node: -1
2014-05-13 17:08:31 LOG: pid 33466: failover: set new master node: 1
2014-05-13 17:08:31 LOG: pid 37149: connection received:
host=172.28.34.200 port=35269
2014-05-13 17:08:31 LOG: pid 33466: failover done. shutdown host
sd3db3(5432)
2014-05-13 17:08:31 LOG: pid 33529: worker process received restart
request
2014-05-13 17:08:32 LOG: pid 33528: pcp child process received restart
request
2014-05-13 17:08:32 LOG: pid 33466: PCP child 33528 exits with status
256 in failover()
2014-05-13 17:08:32 LOG: pid 33466: fork a new PCP child pid 37198 in
failover()
2014-05-13 17:08:32 LOG: pid 33466: worker child 33529 exits with
status 256
2014-05-13 17:08:32 LOG: pid 33466: fork a new worker child pid 37199
*********************File pgpool.conf
#------------------------------------------------------------------------------
# CONNECTIONS
#------------------------------------------------------------------------------
# - pgpool Connection Settings -
listen_addresses = '*'
port = 5432
socket_dir = '/tmp'
# - pgpool Communication Manager Connection Settings -
pcp_port = 9898
pcp_socket_dir = '/tmp'
# - Backend Connection Settings -
backend_hostname0 = 'sd3db3'
backend_port0 = 5432
backend_weight0 = 1
#backend_data_directory0 = '/data1' # needed only for online recovery
backend_flag0 = 'ALLOW_TO_FAILOVER'
backend_hostname1 = 'sd3db4'
backend_port1 = 5432
backend_weight1 = 1
#backend_data_directory1 = '/data1' # needed only for online recovery
backend_flag1 = 'ALLOW_TO_FAILOVER'
# - Authentication -
enable_pool_hba = off
pool_passwd = ''
authentication_timeout = 60
# - SSL Connections -
ssl = off
#ssl_key = './server.key'
#ssl_cert = './server.cert'
#ssl_ca_cert = ''
#ssl_ca_cert_dir = ''
#------------------------------------------------------------------------------
# POOLS
num_init_children = 60
max_pool = 4
# - Life time -
child_life_time = 300
child_max_connections = 0
connection_life_time = 0
client_idle_limit = 0
#------------------------------------------------------------------------------
# LOGS
# - Where to log -
log_destination = 'stderr'
# - What to log -
print_timestamp = on
log_connections = on
log_hostname = off
log_statement = off
log_per_node_statement = off
log_standby_delay = 'none'
# - Syslog specific -
syslog_facility = 'LOCAL0'
syslog_ident = 'pgpool'
# - Debug -
debug_level = 0
#------------------------------------------------------------------------------
# FILE LOCATIONS
pid_file_name = '/var/run/pgpool/pgpool.pid'
logdir = '/var/log/pgpool'
#------------------------------------------------------------------------------
# CONNECTION POOLING
connection_cache = on
reset_query_list = 'ABORT; DISCARD ALL'
#reset_query_list = 'ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT'
#------------------------------------------------------------------------------
# REPLICATION MODE
replication_mode = on
replicate_select = off
insert_lock = on
lobj_lock_table = ''
# - Degenerate handling -
replication_stop_on_mismatch = off
failover_if_affected_tuples_mismatch = on
#------------------------------------------------------------------------------
# LOAD BALANCING MODE
load_balance_mode = on # (don't set to off, otherwise, the
Select will
ignore_leading_white_space = on
white_function_list = ''
black_function_list = 'nextval,setval,proc_.*'
#------------------------------------------------------------------------------
# MASTER/SLAVE MODE
master_slave_mode = off
master_slave_sub_mode = 'slony'
# - Streaming -
sr_check_period = 0
sr_check_user = 'nobody'
sr_check_password = ''
delay_threshold = 0
# - Special commands -
follow_master_command = ''
#------------------------------------------------------------------------------
# PARALLEL MODE
parallel_mode = off
pgpool2_hostname = ''
# - System DB info -
#system_db_hostname = 'localhost'
#system_db_port = 5432
#system_db_dbname = 'pgpool'
#system_db_schema = 'pgpool_catalog'
#system_db_user = 'pgpool'
#system_db_password = ''
#------------------------------------------------------------------------------
# HEALTH CHECK
health_check_period = 300
health_check_timeout = 20
health_check_user = 'pgpool'
health_check_password = 'pgpool'
health_check_max_retries = 2
health_check_retry_delay = 3
#------------------------------------------------------------------------------
# FAILOVER AND FAILBACK
failover_command = ''
failback_command = ''
fail_over_on_backend_error = on
search_primary_node_timeout = 10
#------------------------------------------------------------------------------
# ONLINE RECOVERY
recovery_user = 'nobody'
recovery_password = ''
recovery_1st_stage_command = ''
recovery_2nd_stage_command = ''
recovery_timeout = 90
client_idle_limit_in_recovery = 0
#------------------------------------------------------------------------------
# WATCHDOG
# - Enabling -
use_watchdog = off
# -Connection to up stream servers -
trusted_servers = ''
ping_path = '/bin'
# - Watchdog communication Settings -
wd_hostname = ''
wd_port = 9000
wd_authkey = ''
# - Virtual IP control Setting -
delegate_IP = ''
ifconfig_path = '/sbin'
if_up_cmd = 'ifconfig eth0:0 inet $_IP_$ netmask 255.255.255.0'
if_down_cmd = 'ifconfig eth0:0 down'
arping_path = '/usr/sbin' # arping command path
arping_cmd = 'arping -U $_IP_$ -w 1'
# - Behaivor on escalation Setting -
clear_memqcache_on_escalation = on
wd_escalation_command = ''
# - Lifecheck Setting -
wd_lifecheck_method = 'heartbeat'
wd_interval = 10
# -- heartbeat mode --
wd_heartbeat_port = 9694
wd_heartbeat_keepalive = 2
wd_heartbeat_deadtime = 30
heartbeat_destination0 = 'host0_ip1'
heartbeat_destination_port0 = 9694
heartbeat_device0 = ''
#heartbeat_destination1 = 'host0_ip2'
#heartbeat_destination_port1 = 9694
#heartbeat_device1 = ''
# -- query mode --
wd_life_point = 3
wd_lifecheck_query = 'SELECT 1'
wd_lifecheck_dbname = 'template1'
wd_lifecheck_user = 'nobody'
wd_lifecheck_password = ''
# - Other pgpool Connection Settings -
#other_pgpool_hostname0 = 'host0'
#other_pgpool_port0 = 5432
#other_wd_port0 = 9000
#other_pgpool_hostname1 = 'host1'
#other_pgpool_port1 = 5432
#other_wd_port1 = 9000
#------------------------------------------------------------------------------
# OTHERS
#------------------------------------------------------------------------------
relcache_expire = 0
relcache_size = 256
check_temp_table = on
#------------------------------------------------------------------------------
# ON MEMORY QUERY MEMORY CACHE
memory_cache_enabled = off
memqcache_method = 'shmem'
memqcache_memcached_host = 'localhost'
memqcache_memcached_port = 11211
memqcache_total_size = 67108864
memqcache_max_num_cache = 1000000
memqcache_expire = 0
memqcache_auto_cache_invalidation = on
memqcache_maxcache = 409600
memqcache_cache_block_size = 1048576
memqcache_oiddir = '/var/log/pgpool/oiddir'
white_memqcache_table_list = ''
black_memqcache_table_list = ''
More information about the pgpool-general
mailing list