[pgpool-general: 3764] Re: Connection Interrupted

Tatsuo Ishii ishii at postgresql.org
Thu May 28 17:29:56 JST 2015


Sorry I forgot to attach the patch.

Best regards,
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese:http://www.sraoss.co.jp

> I suspect while the health check routine tries to connect to backend,
> select(2) was interrupted by certain signal. The signal could be
> SIGCHLD, since you have pretty large number (2000) of child process,
> plus you enable child_life_time (300). This means that after pgpool
> becomes idle, child process starts to exit within 0.15 second (=
> 300/2000) in average. If health check is executed (which is every 10
> seconds) at the same time when child exits, select(2) will be
> interrupted.
> 
> Fix is, check if the signal is other than SIGALRM (which is used for
> health check by the health check routine itself). If not, just retries
> the select(2).
> 
> Attached patch implements it.
> 
> Best regards,
> --
> Tatsuo Ishii
> SRA OSS, Inc. Japan
> English: http://www.sraoss.co.jp/index_en.php
> Japanese:http://www.sraoss.co.jp
> 
>> Hi Tatsuo
>> 
>> Sorry for the late reply. Attached pgpool.cnf, but also in text below for mailing list.
>> 
>> 
>> Postgres: 9.3.5
>> 
>> Pgpool version: pgpool-II-pg93-3.4.0-3pgdg.rhel6.x86_64 (from rpm -qa)
>> 
>> 
>> Pgpool.conf (with out comments)
>> 
>> listen_addresses = '*'
>> port = 9999
>> socket_dir = '/tmp'
>> listen_backlog_multiplier = 2
>> pcp_listen_addresses = '*'
>> pcp_port = 9898
>> pcp_socket_dir = '/tmp'
>> backend_hostname0 = 'X.X.X.50'
>> backend_port0 = 5432
>> backend_weight0 = 4
>> backend_data_directory0 = '/var/lib/pgsql/9.3/data'
>> backend_flag0 = 'ALLOW_TO_FAILOVER'
>> backend_hostname1 = 'X.X.X.51'
>> backend_port1 = 5432
>> backend_weight1 = 3
>> backend_data_directory1 = '/var/lib/pgsql/9.3/data'
>> backend_flag1 = 'ALLOW_TO_FAILOVER'
>> backend_hostname2 = 'X.X.X.52'
>> backend_port2 = 5432
>> backend_weight2 = 3
>> backend_data_directory2 = '/var/lib/pgsql/9.3/data'
>> backend_flag2 = 'ALLOW_TO_FAILOVER'
>> enable_pool_hba = on
>> pool_passwd = 'pool_passwd'
>> authentication_timeout = 60
>> ssl = off
>> num_init_children = 2000
>> max_pool = 4
>> child_life_time = 300
>> child_max_connections = 0
>> connection_life_time = 0
>> client_idle_limit = 0
>> log_destination = 'stderr'
>> log_line_prefix = '%p %t '
>> log_standby_delay = 'none'
>> syslog_facility = 'LOCAL0'
>> syslog_ident = 'pgpool'
>> debug_level = 0
>> pid_file_name = '/tmp/pgpool/pgpool.pid'
>> logdir = '/var/log/pgpool'
>> connection_cache = on
>> reset_query_list = 'ABORT; DISCARD ALL'
>> replication_mode = off
>> replicate_select = off
>> insert_lock = on
>> lobj_lock_table = ''
>> 
>> 
>> replication_stop_on_mismatch = off
>> 
>> failover_if_affected_tuples_mismatch = off
>> 
>> 
>> 
>> load_balance_mode = on
>> ignore_leading_white_space = on
>> white_function_list = ''
>> black_function_list = 'breadcrumbtrails,f_round,t_round,fn_CalculateUserStatement,nextval,setval,nextval,setval,fn_settierlili,fn_GetImageForCategory,tblSessions,articles,fn_ManagerPoints,process,fn.*,fn_.*,fn_RepReport'
>> 
>> database_redirect_preference_list = 'otrstest:primary'
>> 
>> app_name_redirect_preference_list = ''
>> allow_sql_comments = off
>> 
>> 
>> master_slave_mode = on
>> master_slave_sub_mode = 'stream'
>> 
>> 
>> sr_check_period = 10
>> sr_check_user = 'postgres'
>> sr_check_password = ''
>> delay_threshold = 1024
>> 
>> 
>> follow_master_command = '/etc/pgpool-II/follow_master.sh %d %M %h'
>> 
>> 
>> health_check_period = 10
>> health_check_timeout = 20
>> health_check_user = 'postgres'
>> health_check_password = ''
>> health_check_max_retries = 2
>> health_check_retry_delay = 1
>> connect_timeout = 10000
>> 
>> 
>> failover_command = '/etc/pgpool-II/failover.sh %d %P %H %R'
>> failback_command = ''
>> 
>> fail_over_on_backend_error = on
>> 
>> search_primary_node_timeout = 10
>> 
>> 
>> recovery_user = 'postgres'
>> recovery_password = ''
>> recovery_1st_stage_command = 'recovery_1st_stage'
>> recovery_2nd_stage_command = ''
>> recovery_timeout = 90
>> client_idle_limit_in_recovery = 0
>> 
>> 
>> 
>> 
>> use_watchdog = on
>> 
>> 
>> trusted_servers = ''
>> ping_path = '/home/apache/sbin'
>> 
>> 
>> wd_hostname = 'UI-IS-POOL1'
>> wd_port = 9000
>> wd_authkey = ''
>> 
>> 
>> delegate_IP = 'X.X.X.60'
>> ifconfig_path = '/home/apache/sbin'
>> if_up_cmd = 'ifconfig ens32:0 inet $_IP_$ netmask 255.255.255.0'
>> if_down_cmd = 'ifconfig ens32:0 down'
>> 
>> 
>> arping_cmd = 'arping -U $_IP_$ -w 1'
>> 
>> 
>> clear_memqcache_on_escalation = on
>> wd_escalation_command = ''
>> 
>> 
>> 
>> wd_lifecheck_method = 'heartbeat'
>> wd_interval = 3
>> 
>> 
>> wd_heartbeat_port = 9694
>> wd_heartbeat_keepalive = 2
>> wd_heartbeat_deadtime = 30
>> 
>> heartbeat_destination0 = 'X.X.X.61'
>> heartbeat_destination_port0 = 9694
>> heartbeat_device0 = ''
>> heartbeat_destination1 = 'X.X.X.62'
>> heartbeat_destination_port1 = 9694
>> heartbeat_device1 = ''
>> 
>> 
>> wd_life_point = 3
>> wd_lifecheck_query = 'SELECT 1'
>> wd_lifecheck_dbname = 'template1'
>> wd_lifecheck_user = 'postgres'
>> wd_lifecheck_password = ''
>> 
>> 
>> other_pgpool_hostname0 = 'X.X.X.62'
>> other_pgpool_port0 = 9999
>> other_wd_port0 = 9000
>> 
>> 
>> relcache_expire = 0
>> 
>> relcache_size = 256
>> 
>> check_temp_table = on
>> 
>> check_unlogged_table = off
>> 
>> memory_cache_enabled = off
>> memqcache_method = 'shmem'
>> memqcache_memcached_host = 'localhost'
>> memqcache_memcached_port = 11211
>> memqcache_total_size = 67108864
>> memqcache_max_num_cache = 1000000
>> memqcache_expire = 0
>> memqcache_auto_cache_invalidation = on
>> memqcache_maxcache = 409600
>> memqcache_cache_block_size = 1048576
>> memqcache_oiddir = '/var/log/pgpool/oiddir'
>> white_memqcache_table_list = ''
>> black_memqcache_table_list = ''
>> 
>> 
>> 
>> Robert Kennedy| Software & Systems Developer|   
>>  
>> Tel: (011) 557-5700  | Ext: 011 557 5729    | Cell: 084 998 9850 
>>  
>>  Email : robert at uwiniwin.co.za | Site: www.uwiniwin.co.za 
>> Address : The Oval Office Park , Ground Floor Kingsmead, Meadowbrook Lane, Bryanston
>> -----Original Message-----
>> From: Tatsuo Ishii [mailto:ishii at postgresql.org] 
>> Sent: Wednesday, May 27, 2015 11:11 AM
>> To: Robert Kennedy
>> Cc: pgpool-general at pgpool.net
>> Subject: Re: [pgpool-general: 3756] Connection Interrupted
>> 
>> Can you please provide details:
>> 
>> - pgpool-II version (and RPM version if any)
>> - PostgreSQL version
>> - pgpool.conf
>> 
>> Best regards,
>> --
>> Tatsuo Ishii
>> SRA OSS, Inc. Japan
>> English: http://www.sraoss.co.jp/index_en.php
>> Japanese:http://www.sraoss.co.jp
>> 
>>> Hi Guys
>>> 
>>> I keep getting this error randomly appearing in my logs.
>>> 
>>> 23734 2015-05-27 10:45:52 LOG:  failed to connect to PostgreSQL server on "x.x.x.x:5432" using INET socket
>>> 23734 2015-05-27 10:45:52 DETAIL:  select() system call interrupted
>>> 23734 2015-05-27 10:45:52 ERROR:  failed to make persistent db connection
>>> 23734 2015-05-27 10:45:52 DETAIL:  connection to host:"x.x.x.x:5432" failed
>>> 
>>> However I can connect to the server on that perfectly, and it seems pgpool can connect as well as no failover event gets called.
>>> 
>>> My concern is I'm loosing data, as this is my primary(master) server. I do see the same error for my slave as well tho.
>>> 
>>> Please advise what may be causing this, and let me know if you require any more information.
>>> 
>>> Robert Kennedy| Software & Systems Developer
>>> 
>>> Tel: 011 557 5729 | Ext:229 | Cell: 084 998 9850
>>> 
>>> Email: robert at uwiniwin.co.za | Site: www.uwiniwin.co.za
>>> Address: The Oval Office Park , Ground Floor Kingsmead, Meadowbrook Lane, Bryanston
>>> 
>>> [site-award-logo]
>>> 
>>> [site-aita-logo]
>>> 
>>> [cid:image003.jpg at 01D0986B.205AF960]<https://twitter.com/intent/follow?original_referer=http%3A%2F%2Fwww.linkedin.com%2Fprofile%2Fview%3Fid%3D7202140%26locale%3Den_US%26trk%3Dtyah&region=follow_link&screen_name=sandmandavid&source=followbutton&variant=2.0>
>>> 
>>> [czxc]<http://www.facebook.com/UwinIwinIncentives>
>>> 
>>> [cid:image005.jpg at 01D0986B.205AF960]<http://www.linkedin.com/company/uwin-iwin-incentives?trk=fc_badge>
>>> 
>>> [cid:image006.jpg at 01D0986B.205AF960]<http://www.linkedin.com/company/uwin-iwin-incentives?trk=fc_badge>
>>> 
>>> 
>>> [cid:image007.jpg at 01D0986B.205AF960]<http://www.uwiniwin.net/>
>>> 
>>> 
>>> Please consider the environment before printing this email-
>>> 
>>> The views expressed in this e-mail are, unless otherwise stated, those of the author and not those of Uwin Iwin Incentives (PTY) LTD or its management. The information in this e-mail is confidential and is intended solely for the addressee. Access to this e-mail by anyone else is unauthorised. If you are not the intended recipient, any disclosure, copying, distribution or any action taken or omitted in reliance on this is prohibited and may be unlawful. Whilst all reasonable steps are taken to ensure the accuracy and integrity of information and data transmitted electronically and to preserve the confidentiality thereof, no liability or responsibility whatsoever is accepted if information or data is, for whatever reason, corrupted or does not reach its intended destination.
>>> 
>>> 
>>> 
>>> 
>>> 
> _______________________________________________
> pgpool-general mailing list
> pgpool-general at pgpool.net
> http://www.pgpool.net/mailman/listinfo/pgpool-general
-------------- next part --------------
A non-text attachment was scrubbed...
Name: pool_connection_pool.c.patch
Type: text/x-patch
Size: 1812 bytes
Desc: not available
URL: <http://www.sraoss.jp/pipermail/pgpool-general/attachments/20150528/a4d4c6a1/attachment.bin>


More information about the pgpool-general mailing list