[pgpool-general: 1092] PGPool hangs on pcp_attach/detach

Oleg Mürk oleg.myrk at gmail.com
Wed Oct 17 19:47:30 JST 2012


Hello,

I am observing a situation when pgpool hangs in master/slave mode with
postgresql streaming replication:
* Start pgpool:
      sudo pgpool -f /etc/pgpool2/mx_large-pgpool.conf -F
/etc/pgpool2/pcp.conf -a /etc/pgpool2/pool_hba.conf -n -d
* To reproduce:
      $ sudo pcp_attach_node 100 localhost 9899 mx mx 1
      $ sudo pcp_node_info 100 localhost 9899 mx mx 1
      mx-db2 5434 1 0.600000
      $ sudo pcp_detach_node 100 localhost 9899 mx mx 1
      $ sudo pcp_node_info 100 localhost 9899 mx mx 1
      *** hangs here ***
* Also connecting to the port hangs:
      psql -p 6434 -U mx_large mx_large
* This freeze is not deterministic, but attaching/detaching standby
node and asking info for a few times will eventually freeze pgpool
* Pgpool version is: pgpool-II version 3.2.1, running on Debian
Wheezy, Postgresql 9.1
* Pgpool's debug output is attached (pgpool.out)
* Pgpool's conf is in the end of this email, changes are marked by #XXX:OM

On a related note which parts of pgpool.conf are reloaded by "pgpool reload".
Is it possible to add/remove standby nodes to pgpool using "pgpool
reload" or other means without losing existing connections to pgpool?

Thank You,
Oleg Mürk


$ sudo cat /etc/pgpool2/mx_large-pgpool.conf
# ----------------------------
# pgPool-II configuration file
# ----------------------------
#
# This file consists of lines of the form:
#
#   name = value
#
# Whitespace may be used.  Comments are introduced with "#" anywhere on a line.
# The complete list of parameter names and allowed values can be found in the
# pgPool-II documentation.
#
# This file is read on server startup and when the server receives a SIGHUP
# signal.  If you edit the file on a running system, you have to SIGHUP the
# server for the changes to take effect, or use "pgpool reload".  Some
# parameters, which are marked below, require a server shutdown and restart to
# take effect.
#


#------------------------------------------------------------------------------
# CONNECTIONS
#------------------------------------------------------------------------------

# - pgpool Connection Settings -

listen_addresses = '*'
                                   # Host name or IP address to listen on:
                                   # '*' for all, '' for no TCP/IP connections
                                   # (change requires restart)
port = 6434
                                   # Port number
                                   # (change requires restart)
socket_dir = '/var/run/postgresql'
                                   # Unix domain socket path
                                   # The Debian package defaults to
                                   # /var/run/postgresql
                                   # (change requires restart)


# - pgpool Communication Manager Connection Settings -

pcp_port = 9899
                                   # Port number for pcp
                                   # (change requires restart)
#XXX:OM
pcp_socket_dir = '/var/run/pgpool-mx_large'
                                   # Unix domain socket path for pcp
                                   # The Debian package defaults to
                                   # /var/run/postgresql
                                   # (change requires restart)

# - Backend Connection Settings -

backend_hostname0 = 'mx-db1'
                                   # Host name or IP address to
connect to for backend 0
backend_port0 = 5434
                                   # Port number for backend 0
backend_weight0 = 2
                                   # Weight for backend 0 (only in
load balancing mode)
#backend_data_directory0 = '/data'
                                   # Data directory for backend 0
backend_flag0 = 'DISALLOW_TO_FAILOVER'
                                   # Controls various backend behavior
                                   # ALLOW_TO_FAILOVER or DISALLOW_TO_FAILOVER
backend_hostname1 = 'mx-db2'
backend_port1 = 5434
backend_weight1 = 3
#backend_data_directory1 = '/data1'
#backend_flag1 = 'DISALLOW_TO_FAILOVER'

# - Authentication -

#XXX:OM
enable_pool_hba = on
                                   # Use pool_hba.conf for client authentication
authentication_timeout = 60
                                   # Delay in seconds to complete
client authentication
                                   # 0 means no timeout.

#XXX:OM
pool_passwd = 'mx_large-pool_passwd'

# - SSL Connections -

ssl = off
                                   # Enable SSL support
                                   # (change requires restart)
#ssl_key = './server.key'
                                   # Path to the SSL private key file
                                   # (change requires restart)
#ssl_cert = './server.cert'
                                   # Path to the SSL public certificate file
                                   # (change requires restart)
#ssl_ca_cert = ''
                                   # Path to a single PEM format file
                                   # containing CA root certificate(s)
                                   # (change requires restart)
#ssl_ca_cert_dir = ''
                                   # Directory containing CA root certificate(s)
                                   # (change requires restart)


#------------------------------------------------------------------------------
# POOLS
#------------------------------------------------------------------------------

# - Pool size -

#XXX:OM
num_init_children = 32
                                   # Number of pools
                                   # (change requires restart)
max_pool = 4
                                   # Number of connections per pool
                                   # (change requires restart)

# - Life time -

child_life_time = 300
                                   # Pool exits after being idle for
this many seconds
child_max_connections = 0
                                   # Pool exits after receiving that
many connections
                                   # 0 means no exit
connection_life_time = 0
                                   # Connection to backend closes
after being idle for this many seconds
                                   # 0 means no close
client_idle_limit = 0
                                   # Client is disconnected after
being idle for that many seconds
                                   # (even inside an explicit transactions!)
                                   # 0 means no disconnection


#------------------------------------------------------------------------------
# LOGS
#------------------------------------------------------------------------------

# - Where to log -

log_destination = 'stderr'
                                   # Where to log
                                   # Valid values are combinations of stderr,
                                   # and syslog. Default to stderr.

# - What to log -

print_timestamp = on
                                   # Print timestamp on each line
                                   # (change requires restart)

#XXX:OM
log_connections = on
                                   # Log connections
#XXX:OM
log_hostname = on
                                   # Hostname will be shown in ps status
                                   # and in logs if connections are logged
log_statement = off
                                   # Log all statements
log_per_node_statement = off
                                   # Log all statements
                                   # with node and backend informations
log_standby_delay = 'none'
                                   # Log standby delay
                                   # Valid values are combinations of always,
                                   # if_over_threshold, none

# - Syslog specific -

syslog_facility = 'LOCAL0'
                                   # Syslog local facility. Default to LOCAL0
syslog_ident = 'pgpool'
                                   # Syslog program identification string
                                   # Default to 'pgpool'

# - Debug -

debug_level = 0
                                   # Debug message verbosity level
                                   # 0 means no message, 1 or more mean verbose


#------------------------------------------------------------------------------
# FILE LOCATIONS
#------------------------------------------------------------------------------

#XXX:OM
pid_file_name = '/var/run/pgpool-mx_large/pgpool.pid'
                                   # PID file name
                                   # (change requires restart)
#XXX:OM
logdir = '/var/run/pgpool-mx_large/'
                                   # Directory of pgPool status file
                                   # (change requires restart)


#------------------------------------------------------------------------------
# CONNECTION POOLING
#------------------------------------------------------------------------------

connection_cache = on
                                   # Activate connection pools
                                   # (change requires restart)

                                   # Semicolon separated list of queries
                                   # to be issued at the end of a session
                                   # The default is for 8.3 and later
reset_query_list = 'ABORT; DISCARD ALL'
                                   # The following one is for 8.2 and before
#reset_query_list = 'ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT'


#------------------------------------------------------------------------------
# REPLICATION MODE
#------------------------------------------------------------------------------

replication_mode = off
                                   # Activate replication mode
                                   # (change requires restart)
replicate_select = off
                                   # Replicate SELECT statements
                                   # when in replication or parallel mode
                                   # replicate_select is higher priority than
                                   # load_balance_mode.

insert_lock = on
                                   # Automatically locks a dummy row or a table
                                   # with INSERT statements to keep SERIAL data
                                   # consistency
                                   # Without SERIAL, no lock will be issued
lobj_lock_table = ''
                                   # When rewriting lo_creat command in
                                   # replication mode, specify table name to
                                   # lock

# - Degenerate handling -

replication_stop_on_mismatch = off
                                   # On disagreement with the packet kind
                                   # sent from backend, degenerate the node
                                   # which is most likely "minority"
                                   # If off, just force to exit this session

failover_if_affected_tuples_mismatch = off
                                   # On disagreement with the number of affected
                                   # tuples in UPDATE/DELETE queries, then
                                   # degenerate the node which is most likely
                                   # "minority".
                                   # If off, just abort the transaction to
                                   # keep the consistency


#------------------------------------------------------------------------------
# LOAD BALANCING MODE
#------------------------------------------------------------------------------

#XXX:OM
load_balance_mode = on
                                   # Activate load balancing mode
                                   # (change requires restart)
ignore_leading_white_space = on
                                   # Ignore leading white spaces of each query
white_function_list = ''
                                   # Comma separated list of function names
                                   # that don't write to database
                                   # Regexp are accepted
black_function_list = 'nextval,setval'
                                   # Comma separated list of function names
                                   # that write to database
                                   # Regexp are accepted


#------------------------------------------------------------------------------
# MASTER/SLAVE MODE
#------------------------------------------------------------------------------

#XXX:OM
master_slave_mode = on
                                   # Activate master/slave mode
                                   # (change requires restart)
#XXX:OM
master_slave_sub_mode = 'stream'
                                   # Master/slave sub mode
                                   # Valid values are combinations slony or
                                   # stream. Default is slony.
                                   # (change requires restart)

# - Streaming -

#XXX:OM
sr_check_period = 60
                                   # Streaming replication check period
                                   # Disabled (0) by default
#XXX:OM
sr_check_user = 'mx_large'
                                   # Streaming replication check user
                                   # This is necessary even if you disable
                                   # streaming replication delay check with
                                   # sr_check_period = 0
#XXX:OM
sr_check_password = 'mx_large'
                                   # Password for streaming
replication check user
delay_threshold = 0
                                   # Threshold before not dispatching
query to standby node
                                   # Unit is in bytes
                                   # Disabled (0) by default

# - Special commands -

follow_master_command = ''
                                   # Executes this command after master failover
                                   # Special values:
                                   #   %d = node id
                                   #   %h = host name
                                   #   %p = port number
                                   #   %D = database cluster path
                                   #   %m = new master node id
                                   #   %H = hostname of the new master node
                                   #   %M = old master node id
                                   #   %P = old primary node id
                                   #   %% = '%' character


#------------------------------------------------------------------------------
# PARALLEL MODE AND QUERY CACHE
#------------------------------------------------------------------------------

parallel_mode = off
                                   # Activates parallel query mode
                                   # (change requires restart)
enable_query_cache = off
                                   # Activates query cache
                                   # (change requires restart)

pgpool2_hostname = ''
                                   # Set pgpool2 hostname
                                   # (change requires restart)

# - System DB info -

system_db_hostname  = 'localhost'
                                   # (change requires restart)
system_db_port = 5432
                                   # (change requires restart)
system_db_dbname = 'pgpool'
                                   # (change requires restart)
system_db_schema = 'pgpool_catalog'
                                   # (change requires restart)
system_db_user = 'pgpool'
                                   # (change requires restart)
system_db_password = ''
                                   # (change requires restart)


#------------------------------------------------------------------------------
# HEALTH CHECK
#------------------------------------------------------------------------------

health_check_period = 0
                                   # Health check period
                                   # Disabled (0) by default
health_check_timeout = 20
                                   # Health check timeout
                                   # 0 means no timeout
health_check_user = 'nobody'
                                   # Health check user
health_check_password = ''
                                   # This parameter is not yet implemented.
                                   # Password for health check user


#------------------------------------------------------------------------------
# FAILOVER AND FAILBACK
#------------------------------------------------------------------------------

failover_command = ''
                                   # Executes this command at failover
                                   # Special values:
                                   #   %d = node id
                                   #   %h = host name
                                   #   %p = port number
                                   #   %D = database cluster path
                                   #   %m = new master node id
                                   #   %H = hostname of the new master node
                                   #   %M = old master node id
                                   #   %P = old primary node id
                                   #   %% = '%' character
failback_command = ''
                                   # Executes this command at failback.
                                   # Special values:
                                   #   %d = node id
                                   #   %h = host name
                                   #   %p = port number
                                   #   %D = database cluster path
                                   #   %m = new master node id
                                   #   %H = hostname of the new master node
                                   #   %M = old master node id
                                   #   %P = old primary node id
                                   #   %% = '%' character

fail_over_on_backend_error = on
                                   # Initiates failover when writing to the
                                   # backend communication socket fails
                                   # This is the same behaviour of pgpool-II
                                   # 2.2.x and previous releases
                                   # If set to off, pgpool will report an
                                   # error and disconnect the session.


#------------------------------------------------------------------------------
# ONLINE RECOVERY
#------------------------------------------------------------------------------

recovery_user = 'nobody'
                                   # Online recovery user
recovery_password = ''
                                   # Online recovery password
recovery_1st_stage_command = ''
                                   # Executes a command in first stage
recovery_2nd_stage_command = ''
                                   # Executes a command in second stage
recovery_timeout = 90
                                   # Timeout in seconds to wait for the
                                   # recovering node's postmaster to start up
                                   # 0 means no wait
client_idle_limit_in_recovery = 0
                                   # Client is disconnected after being idle
                                   # for that many seconds in the second stage
                                   # of online recovery
                                   # 0 means no disconnection
                                   # -1 means immediate disconnection


#------------------------------------------------------------------------------
# OTHERS
#------------------------------------------------------------------------------

relcache_expire = 0
                                   # Life time of relation cache in seconds.
                                   # 0 means no cache expiration(the default).
                                   # The relation cache is used for cache the
                                   # query result against PostgreSQL system
                                   # catalog to obtain various information
                                   # including table structures or if it's a
                                   # temporary table or not. The cache is
                                   # maintained in a pgpool child local memory
                                   # and being kept as long as it survives.
                                   # If someone modify the table by using
                                   # ALTER TABLE or some such, the relcache is
                                   # not consistent anymore.
                                   # For this purpose, cache_expiration
                                   # controls the life time of the cache.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: pgpool.out
Type: application/octet-stream
Size: 33581 bytes
Desc: not available
URL: <http://www.sraoss.jp/pipermail/pgpool-general/attachments/20121017/f9bac377/attachment-0001.obj>


More information about the pgpool-general mailing list