[pgpool-general-jp: 1437] num_init_childrenを増やすとfailoverに失敗する

川口 憲彦 kawaguchi.norihiko @ daitec.co.jp
2016年 12月 7日 (水) 19:30:59 JST


はじめまして、川口と申します。

pgpool-IIでフェイルオーバの検証を行っています。

PostgreSQL2台でStreaming Replicationを構成し、
そこへpgpool-II(こちらも2台体制)から接続しています。
なお、「コネクションプーリング」「負荷分散」「watchdog」「フェイルオーバ」機能はすべてオンにしてあります。

フェイルオーバは、num_init_childrenがデフォルトの32だと成功するのですが、
それを600に増やすと、成功したり失敗したりで安定しません。


このような場合、pgpool.conf設定の何かのタイムアウト時間を伸ばすなどの
「勘所」のようなものはあるのでしょうか?

どなたかご存知の方がいらっしゃれば、お手数ですがお教えください。
よろしくお願いします。


●構成
OSは全てCentOS 7.2
pgpool-II : 3.5.4
PostgreSQL : 9.5.1

マシンは全て仮想マシンで、
PostgreSQL用×2、pgpool用×2の計4台


●設定ファイル(pgpool.conf)

#------------
# CONNECTIONS
#------------

listen_addresses = '*'
port = 9999
socket_dir = '(path)'
listen_backlog_multiplier = 2
serialize_accept = off
pcp_listen_addresses = '*'
pcp_port = 9898
pcp_socket_dir = '(path)'
backend_hostname0 = 'db-server1'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '(path)'
backend_flag0 = 'ALLOW_TO_FAILOVER'
backend_hostname1 = 'db-server2'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '(path)'
backend_flag1 = 'ALLOW_TO_FAILOVER'

enable_pool_hba = on
pool_passwd = 'pool_passwd'
authentication_timeout = 60

ssl = off
#ssl_key = '(path)/server.key'
#ssl_cert = '(path)/server.cert'
#ssl_ca_cert = ''
#ssl_ca_cert_dir = ''

#------
# POOLS
#------

num_init_children = 600
max_pool = 1
child_life_time = 300
child_max_connections = 0
connection_life_time = 0
client_idle_limit = 0

#------
# LOGS
#------
log_destination = 'syslog'
log_line_prefix = '%t: pid %p: '
log_connections = on
log_hostname = off
log_statement = off
log_per_node_statement = off
log_standby_delay = 'none'

syslog_facility = 'LOCAL0'
syslog_ident = 'pgpool'

debug_level = 0
log_error_verbosity = verbose
#client_min_messages = notice
log_min_messages = debug5

#---------------
# FILE LOCATIONS
#---------------
pid_file_name = '(path)/pgpool.pid'
logdir = '(path)/pgpool'

#-------------------
# CONNECTION POOLING
#-------------------
connection_cache = on
reset_query_list = 'ABORT; DISCARD ALL'
#reset_query_list = 'ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT'

#------------------
# REPLICATION MODE
#------------------
replication_mode = off
replicate_select = off
insert_lock = on
lobj_lock_table = ''

replication_stop_on_mismatch = off
failover_if_affected_tuples_mismatch = off

#--------------------
# LOAD BALANCING MODE
#--------------------
load_balance_mode = on
ignore_leading_white_space = on
white_function_list = ''
black_function_list = 'nextval,setval,nextval,setval'

database_redirect_preference_list = ''
app_name_redirect_preference_list = ''
allow_sql_comments = off

#------------------
# MASTER/SLAVE MODE
#------------------
master_slave_mode = on
master_slave_sub_mode = 'stream'

sr_check_period = 5
sr_check_user = 'user1'
sr_check_password = 'password'
sr_check_database = 'database1'
delay_threshold = 10000

follow_master_command = ''

#-------------
# HEALTH CHECK
#-------------
health_check_period = 5
health_check_timeout = 20
health_check_user = 'user1'
health_check_password = 'password'
health_check_database = ''
health_check_max_retries = 0
health_check_retry_delay = 1

connect_timeout = 10000

#----------------------
# FAILOVER AND FAILBACK
#----------------------
failover_command = '(path)/failover.sh %H %R'
failback_command = ''
fail_over_on_backend_error = on
search_primary_node_timeout = 10

#----------------
# ONLINE RECOVERY
#----------------

recovery_user = 'user1'
recovery_password = 'password'
recovery_1st_stage_command = ''
recovery_2nd_stage_command = ''
recovery_timeout = 90
client_idle_limit_in_recovery = 0

#---------
# WATCHDOG
#---------

use_watchdog = on

trusted_servers = ''
ping_path = '(path)'
wd_hostname = 'pgpool-server1'
wd_port = 9000
wd_priority = 1
wd_authkey = ''
wd_ipc_socket_dir = '/tmp'


delegate_IP = '172.16.XXX.XXX'
if_cmd_path = '(path)'
if_up_cmd = 'ip addr add $_IP_$/24 dev eth1 label eth1:0'
if_down_cmd = 'ip addr del $_IP_$/24 dev eth1'
arping_path = '(path)'
arping_cmd = 'arping -U $_IP_$ -w 1 -I eth1'

clear_memqcache_on_escalation = on
wd_escalation_command = ''
wd_de_escalation_command = ''


wd_monitoring_interfaces_list = ''
wd_lifecheck_method = 'heartbeat'
wd_interval = 10

wd_heartbeat_port = 9694
wd_heartbeat_keepalive = 2
wd_heartbeat_deadtime = 30
heartbeat_destination0 = 'pgpool-server2'
heartbeat_destination_port0 = 9694
heartbeat_device0 = ''

wd_life_point = 3
wd_lifecheck_query = 'SELECT 1'
wd_lifecheck_dbname = 'template1'
wd_lifecheck_user = 'nobody'
wd_lifecheck_password = ''

other_pgpool_hostname0 = 'pgpool-server2'
other_pgpool_port0 = 9999
other_wd_port0 = 9000

#-------
# OTHERS
#-------
relcache_expire = 0
relcache_size = 256
check_temp_table = on
check_unlogged_table = on

#-----------------------------
# IN MEMORY QUERY MEMORY CACHE
#-----------------------------

memory_cache_enabled = off
memqcache_method = 'shmem'
memqcache_memcached_host = 'localhost'
memqcache_memcached_port = 11211
memqcache_total_size = 67108864
memqcache_max_num_cache = 1000000
memqcache_expire = 0
memqcache_auto_cache_invalidation = on
memqcache_maxcache = 409600
memqcache_cache_block_size = 1048576
memqcache_oiddir = '(path)/oiddir'
white_memqcache_table_list = ''
black_memqcache_table_list = ''


以上です。




pgpool-general-jp メーリングリストの案内