[pgpool-general: 5038] pgpool fail to load balance after database restart

Dylan Luong Dylan.Luong at unisa.edu.au
Tue Oct 11 16:36:19 JST 2016


Hi,
I am new to pgpool. I have just installed pgpool and configured  Master/Slave mode with "stream" sub_mode. I have enabled load balance as that is the main feature we want test out. I have enabled Health Check. The load balance appears to work as I can see connections to both Master and Slave database.
pgpool is installed on a separate server. (linux Redhat 6)
ie: server1 = pgpool
server2 = Master
server3= Slave
(wal replication)

I tested two scenarios and it failing:

1.      When I stop/start the slave database, pgpool stops load balancing. It looks like it doesn't know that it available again after the slave comes backup online. Even after I restart pgpool it doesn't load balance. It just sends all queries to the Master.

2.      I tried shutting down slave db and then shut down master db (no failover) and restart master db and restart slave, pgpool failed to reconnect to the master or the slave. I get database unavailable errors. However, if I restart pgpool for this scenario, its all fine.

It appears that pgpool is not doing the healthcheck correctly?? Or have I missed something here??

Here are my main pgpool.conf changes:

#------------------------------------------------------------------------------
# CONNECTIONS
#------------------------------------------------------------------------------

# - pgpool Connection Settings -

listen_addresses = '*'
                                   # Host name or IP address to listen on:
                                   # '*' for all, '' for no TCP/IP connections
                                   # (change requires restart)
#port = 9999
port = 5432
                                   # Port number
                                   # (change requires restart)
socket_dir = '/var/run/postgresql'
                                   # Unix domain socket path
                                   # The Debian package defaults to
                                   # /var/run/postgresql
                                   # (change requires restart)
listen_backlog_multiplier = 2
                                   # Set the backlog parameter of listen(2) to
                                                                   # num_init_children * listen_backlog_multiplier.
                                   # (change requires restart)
serialize_accept = off
                                   # whether to serialize accept() call to avoid thundering herd problem
                                   # (change requires restart)

# - pgpool Communication Manager Connection Settings -

pcp_listen_addresses = '*'
                                   # Host name or IP address for pcp process to listen on:
                                   # '*' for all, '' for no TCP/IP connections
                                   # (change requires restart)
pcp_port = 9898
                                   # Port number for pcp
                                   # (change requires restart)
pcp_socket_dir = '/var/run/postgresql'
                                   # Unix domain socket path for pcp
                                   # The Debian package defaults to
                                   # /var/run/postgresql
                                   # (change requires restart)

# - Backend Connection Settings -

backend_hostname0 = '10.69.20.11'
                                   # Host name or IP address to connect to for backend 0
backend_port0 = 5432
                                   # Port number for backend 0
backend_weight0 = 1
                                   # Weight for backend 0 (only in load balancing mode)
backend_data_directory0 = '/var/lib/pgsql/9.5/data'
                                   # Data directory for backend 0
backend_flag0 = 'ALLOW_TO_FAILOVER'
                                   # Controls various backend behavior
                                   # ALLOW_TO_FAILOVER or DISALLOW_TO_FAILOVER
backend_hostname1 = '10.68.20.11'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/var/lib/pgsql/9.5/data'
backend_flag1 = 'ALLOW_TO_FAILOVER'


#------------------------------------------------------------------------------
# POOLS
#------------------------------------------------------------------------------

# - Concurrent session and pool size -

num_init_children = 32
                                   # Number of concurrent sessions allowed
                                   # (change requires restart)
max_pool = 4
                                   # Number of connection pool caches per connection
                                   # (change requires restart)

# - Life time -

child_life_time = 300
                                   # Pool exits after being idle for this many seconds
child_max_connections = 0
                                   # Pool exits after receiving that many connections
                                   # 0 means no exit
connection_life_time = 0
                                   # Connection to backend closes after being idle for this many seconds
                                   # 0 means no close
client_idle_limit = 0
                                   # Client is disconnected after being idle for that many seconds
                                   # (even inside an explicit transactions!)
                                   # 0 means no disconnection

#------------------------------------------------------------------------------
# CONNECTION POOLING
#------------------------------------------------------------------------------

connection_cache = on
                                   # Activate connection pools
                                   # (change requires restart)

                                   # Semicolon separated list of queries
                                   # to be issued at the end of a session
                                   # The default is for 8.3 and later
reset_query_list = 'ABORT; DISCARD ALL'
                                   # The following one is for 8.2 and before
#reset_query_list = 'ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT'


#------------------------------------------------------------------------------
# LOAD BALANCING MODE
#------------------------------------------------------------------------------

load_balance_mode = on
                                   # Activate load balancing mode
                                   # (change requires restart)
ignore_leading_white_space = on
                                   # Ignore leading white spaces of each query
white_function_list = ''
                                   # Comma separated list of function names
                                   # that don't write to database
                                   # Regexp are accepted
#black_function_list = 'nextval,setval,nextval,setval'
black_function_list = 'currval,lastval,nextval,setval'
                                   # Comma separated list of function names
                                   # that write to database
                                   # Regexp are accepted

database_redirect_preference_list = ''
                                                                   # comma separated list of pairs of database and node id.
                                                                   # example: postgres:primary,mydb[0-4]:1,mydb[5-9]:2'
                                                                   # valid for streaming replicaton mode only.

app_name_redirect_preference_list = ''
                                                                   # comma separated list of pairs of app name and node id.
                                                                   # example: 'psql:primary,myapp[0-4]:1,myapp[5-9]:standby'
                                                                   # valid for streaming replicaton mode only.
allow_sql_comments = off
                                                                   # if on, ignore SQL comments when judging if load balance or
                                                                   # query cache is possible.
                                                                   # If off, SQL comments effectively prevent the judgment
                                                                   # (pre 3.4 behavior).

#------------------------------------------------------------------------------
# MASTER/SLAVE MODE
#------------------------------------------------------------------------------

master_slave_mode = on
                                   # Activate master/slave mode
                                   # (change requires restart)
master_slave_sub_mode = 'stream'
                                   # Master/slave sub mode
                                   # Valid values are combinations slony or
                                   # stream. Default is slony.
                                   # (change requires restart)

# - Streaming -

sr_check_period = 5
                                   # Streaming replication check period
                                   # Disabled (0) by default
sr_check_user = 'postgres'
                                   # Streaming replication check user
                                   # This is necessary even if you disable
                                   # streaming replication delay check with
                                   # sr_check_period = 0
sr_check_password = 'xxxxxxxxxxxxxxxx'
                                   # Password for streaming replication check user
sr_check_database = 'postgres'
                                   # Database name for streaming replication check
delay_threshold = 10000000
                                   # Threshold before not dispatching query to standby node
                                   # Unit is in bytes
                                   # Disabled (0) by default

# - Special commands -

follow_master_command = ''
                                   # Executes this command after master failover
                                   # Special values:
                                   #   %d = node id
                                   #   %h = host name
                                   #   %p = port number
                                   #   %D = database cluster path
                                   #   %m = new master node id
                                   #   %H = hostname of the new master node
                                   #   %M = old master node id
                                   #   %P = old primary node id
                                   #   %r = new master port number
                                   #   %R = new master database cluster path
                                   #   %% = '%' character

#------------------------------------------------------------------------------
# HEALTH CHECK
#------------------------------------------------------------------------------

health_check_period = 40
                                   # Health check period
                                   # Disabled (0) by default
health_check_timeout = 10
                                   # Health check timeout
                                   # 0 means no timeout
health_check_user = 'postgres'
                                   # Health check user
health_check_password = 'xxxxxxxxxxxxxxxxxx'
                                   # Password for health check user
health_check_database = ''
                                   # Database name for health check. If '', tries 'postgres' frist, then 'template1'

health_check_max_retries = 3
                                   # Maximum number of times to retry a failed health check before giving up.
health_check_retry_delay = 1
                                   # Amount of time to wait (in seconds) between retries.
connect_timeout = 10000
                                   # Timeout value in milliseconds before giving up to connect to backend.
                                                                   # Default is 10000 ms (10 second). Flaky network user may want to increase
                                                                   # the value. 0 means no timeout.
                                                                   # Note that this value is not only used for health check,
                                                                   # but also for ordinary conection to backend.


Thanks.
Dylan

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.sraoss.jp/pipermail/pgpool-general/attachments/20161011/044d9367/attachment-0001.html>


More information about the pgpool-general mailing list