View Issue Details

IDProjectCategoryView StatusLast Update
0000789Pgpool-IIBugpublic2023-04-05 16:50
Reportercobolbaby Assigned Topengbo  
PriorityurgentSeveritymajorReproducibilitysometimes
Status assignedResolutionopen 
Product Version4.3.5 
Summary0000789: pgpool-II 请求阻塞
Description目前我采用 pgpool-II 做数据库的负载均衡,并没有使用其他功能,但我发现当遇到某些异常情况时,pgpool-II 的请求会发生堆积,然后客户端请求会发生严重阻塞。

配置文件:

```
#------------------------------------------------------------------------------
# CONNECTIONS
#------------------------------------------------------------------------------

# - pgpool Connection Settings -

listen_addresses = '0.0.0.0'
                                   # Host name or IP address to listen on:
                                   # '*' for all, '' for no TCP/IP connections
                                   # (change requires restart)
port = 5432
                                   # Port number
                                   # (change requires restart)
#socket_dir = '/tmp'
                                   # Unix domain socket path
                                   # The Debian package defaults to
                                   # /var/run/postgresql
                                   # (change requires restart)
reserved_connections = 20
                                   # Number of reserved connections.
                                   # Pgpool-II does not accept connections if over
                                   # num_init_chidlren - reserved_connections.

# - Authentication -

enable_pool_hba = off
                                   # Use pool_hba.conf for client authentication
pool_passwd = 'pool_passwd'
                                   # File name of pool_passwd for md5 authentication.
                                   # "" disables pool_passwd.
                                   # (change requires restart)
#authentication_timeout = 1min
                                   # Delay in seconds to complete client authentication
                                   # 0 means no timeout.

allow_clear_text_frontend_auth = on
                                   # Allow Pgpool-II to use clear text password authentication
                                   # with clients, when pool_passwd does not
                                   # contain the user password

#------------------------------------------------------------------------------
# POOLS
#------------------------------------------------------------------------------

# - Concurrent session and pool size -

num_init_children = 600
                                   # Number of concurrent sessions allowed
                                   # (change requires restart)
max_pool = 1
                                   # Number of connection pool caches per connection
                                   # (change requires restart)

# - Life time -

#child_life_time = 5min
                                   # Pool exits after being idle for this many seconds
#child_max_connections = 0
                                   # Pool exits after receiving that many connections
                                   # 0 means no exit
#connection_life_time = 0
                                   # Connection to backend closes after being idle for this many seconds
                                   # 0 means no close
client_idle_limit = 30min
                                   # Client is disconnected after being idle for that many seconds
                                   # (even inside an explicit transactions!)
                                   # 0 means no disconnection

#------------------------------------------------------------------------------
# LOGS
#------------------------------------------------------------------------------

# - Where to log -

log_destination = 'stderr'
                                   # Where to log
                                   # Valid values are combinations of stderr,
                                   # and syslog. Default to stderr.

# - What to log -

log_line_prefix = '%t: pid %p: ' # printf-style string to output at beginning of each log line.

log_connections = on
                                   # Log connections
log_disconnections = off
                                   # Log disconnections
log_hostname = off
                                   # Hostname will be shown in ps status
                                   # and in logs if connections are logged
log_statement = on
                                   # Log all statements
log_per_node_statement = on
                                   # Log all statements
                                   # with node and backend informations
log_client_messages = off
                                   # Log any client messages
log_standby_delay = 'if_over_threshold'
                                   # Log standby delay
                                   # Valid values are combinations of always,
                                   # if_over_threshold, none

# - Syslog specific -

syslog_facility = 'LOCAL0'
                                   # Syslog local facility. Default to LOCAL0
syslog_ident = 'pgpool'
                                   # Syslog program identification string
                                   # Default to 'pgpool'

# - Debug -

#log_error_verbosity = default # terse, default, or verbose messages

#client_min_messages = notice # values in order of decreasing detail:
                                        # debug5
                                        # debug4
                                        # debug3
                                        # debug2
                                        # debug1
                                        # log
                                        # notice
                                        # warning
                                        # error

#log_min_messages = warning # values in order of decreasing detail:
                                        # debug5
                                        # debug4
                                        # debug3
                                        # debug2
                                        # debug1
                                        # info
                                        # notice
                                        # warning
                                        # error
                                        # log
                                        # fatal
                                        # panic

# This is used when logging to stderr:
logging_collector = on
                                        # Enable capturing of stderr
                                        # into log files.
                                        # (change requires restart)

# -- Only used if logging_collector is on ---

#log_directory = '/tmp/pgpool_logs'
                                        # directory where log files are written,
                                        # can be absolute
#log_filename = 'pgpool-%Y-%m-%d_%H%M%S.log'
                                        # log file name pattern,
                                        # can include strftime() escapes

#log_file_mode = 0600
                                        # creation mode for log files,
                                        # begin with 0 to use octal notation

#log_truncate_on_rotation = off
                                        # If on, an existing log file with the
                                        # same name as the new log file will be
                                        # truncated rather than appended to.
                                        # But such truncation only occurs on
                                        # time-driven rotation, not on restarts
                                        # or size-driven rotation. Default is
                                        # off, meaning append to existing files
                                        # in all cases.

#log_rotation_age = 1d
                                        # Automatic rotation of logfiles will
                                        # happen after that (minutes)time.
                                        # 0 disables time based rotation.
#log_rotation_size = 10MB
                                        # Automatic rotation of logfiles will
                                        # happen after that much (KB) log output.
                                        # 0 disables size based rotation.

#------------------------------------------------------------------------------
# LOAD BALANCING MODE
#------------------------------------------------------------------------------

load_balance_mode = on
                                   # Activate load balancing mode
                                   # (change requires restart)
ignore_leading_white_space = on
                                   # Ignore leading white spaces of each query
read_only_function_list = ''
                                   # Comma separated list of function names
                                   # that don't write to database
                                   # Regexp are accepted
write_function_list = 'nextval,setval,.*\.func_insert_.*,.*\.func_delete_.*,.*\.func_update_.*,.*\.func_nlb_.*,(scc|report_query)\..*'
                                   # Comma separated list of function names
                                   # that write to database
                                   # Regexp are accepted
                                   # If both read_only_function_list and write_function_list
                                   # is empty, function's volatile property is checked.
                                   # If it's volatile, the function is regarded as a
                                   # writing function.

primary_routing_query_pattern_list = ''
                                   # Semicolon separated list of query patterns
                                   # that should be sent to primary node
                                   # Regexp are accepted
                                   # valid for streaming replicaton mode only.

database_redirect_preference_list = ''
                                   # comma separated list of pairs of database and node id.
                                   # example: postgres:primary,mydb[0-4]:1,mydb[5-9]:2'
                                   # valid for streaming replicaton mode only.

app_name_redirect_preference_list = ''
                                   # comma separated list of pairs of app name and node id.
                                   # example: 'psql:primary,myapp[0-4]:1,myapp[5-9]:standby'
                                   # valid for streaming replicaton mode only.
allow_sql_comments = off
                                   # if on, ignore SQL comments when judging if load balance or
                                   # query cache is possible.
                                   # If off, SQL comments effectively prevent the judgment
                                   # (pre 3.4 behavior).

disable_load_balance_on_write = 'transaction'
                                   # Load balance behavior when write query is issued
                                   # in an explicit transaction.
                                   #
                                   # Valid values:
                                   #
                                   # 'transaction' (default):
                                   # if a write query is issued, subsequent
                                   # read queries will not be load balanced
                                   # until the transaction ends.
                                   #
                                   # 'trans_transaction':
                                   # if a write query is issued, subsequent
                                   # read queries in an explicit transaction
                                   # will not be load balanced until the session ends.
                                   #
                                   # 'dml_adaptive':
                                   # Queries on the tables that have already been
                                   # modified within the current explicit transaction will
                                   # not be load balanced until the end of the transaction.
                                   #
                                   # 'always':
                                   # if a write query is issued, read queries will
                                   # not be load balanced until the session ends.
                                   #
                                   # Note that any query not in an explicit transaction
                                   # is not affected by the parameter except 'always'.

dml_adaptive_object_relationship_list= ''
                                   # comma separated list of object pairs
                                   # [object]:[dependent-object], to disable load balancing
                                   # of dependent objects within the explicit transaction
                                   # after WRITE statement is issued on (depending-on) object.
                                   #
                                   # example: 'tb_t1:tb_t2,insert_tb_f_func():tb_f,tb_v:my_view'
                                   # Note: function name in this list must also be present in
                                   # the write_function_list
                                   # only valid for disable_load_balance_on_write = 'dml_adaptive'.

statement_level_load_balance = on
                                   # Enables statement level load balancing

#------------------------------------------------------------------------------
# STREAMING REPLICATION MODE
#------------------------------------------------------------------------------

# - Streaming -

sr_check_period = 10
                                   # Streaming replication check period
                                   # Disabled (0) by default
sr_check_user = 'pgpool'
                                   # Streaming replication check user
                                   # This is neccessary even if you disable streaming
                                   # replication delay check by sr_check_period = 0
sr_check_password = ''
                                   # Password for streaming replication check user
                                   # Leaving it empty will make Pgpool-II to first look for the
                                   # Password in pool_passwd file before using the empty password

#------------------------------------------------------------------------------
# HEALTH CHECK GLOBAL PARAMETERS
#------------------------------------------------------------------------------

health_check_period = 0
                                   # Health check period
                                   # Disabled (0) by default
health_check_timeout = 20
                                   # Health check timeout
                                   # 0 means no timeout
health_check_user = 'nobody'
                                   # Health check user
health_check_password = ''
                                   # Password for health check user
                                   # Leaving it empty will make Pgpool-II to first look for the
                                   # Password in pool_passwd file before using the empty password

#------------------------------------------------------------------------------
# FAILOVER AND FAILBACK
#------------------------------------------------------------------------------

#failover_command = ''
                                   # Executes this command at failover
                                   # Special values:
                                   # %d = failed node id
                                   # %h = failed node host name
                                   # %p = failed node port number
                                   # %D = failed node database cluster path
                                   # %m = new main node id
                                   # %H = new main node hostname
                                   # %M = old main node id
                                   # %P = old primary node id
                                   # %r = new main port number
                                   # %R = new main database cluster path
                                   # %N = old primary node hostname
                                   # %S = old primary node port number
                                   # %% = '%' character
#failback_command = ''
                                   # Executes this command at failback.
                                   # Special values:
                                   # %d = failed node id
                                   # %h = failed node host name
                                   # %p = failed node port number
                                   # %D = failed node database cluster path
                                   # %m = new main node id
                                   # %H = new main node hostname
                                   # %M = old main node id
                                   # %P = old primary node id
                                   # %r = new main port number
                                   # %R = new main database cluster path
                                   # %N = old primary node hostname
                                   # %S = old primary node port number
                                   # %% = '%' character

failover_on_backend_error = off
                                   # Initiates failover when reading/writing to the
                                   # backend communication socket fails
                                   # If set to off, pgpool will report an
                                   # error and disconnect the session.

#failover_on_backend_shutdown = off
                                   # Initiates failover when backend is shutdown,
                                   # or backend process is killed.
                                   # If set to off, pgpool will report an
                                   # error and disconnect the session.

#detach_false_primary = off
                                   # Detach false primary if on. Only
                                   # valid in streaming replicaton
                                   # mode and with PostgreSQL 9.6 or
                                   # after.

#search_primary_node_timeout = 5min
                                   # Timeout in seconds to search for the
                                   # primary node when a failover occurs.
                                   # 0 means no timeout, keep searching
                                   # for a primary node forever.

#------------------------------------------------------------------------------
# ONLINE RECOVERY
#------------------------------------------------------------------------------

#recovery_user = 'nobody'
                                   # Online recovery user
#recovery_password = ''
                                   # Online recovery password
                                   # Leaving it empty will make Pgpool-II to first look for the
                                   # Password in pool_passwd file before using the empty password

#recovery_1st_stage_command = ''
                                   # Executes a command in first stage
#recovery_2nd_stage_command = ''
                                   # Executes a command in second stage
#recovery_timeout = 90
                                   # Timeout in seconds to wait for the
                                   # recovering node's postmaster to start up
                                   # 0 means no wait
#client_idle_limit_in_recovery = 0
                                   # Client is disconnected after being idle
                                   # for that many seconds in the second stage
                                   # of online recovery
                                   # 0 means no disconnection
                                   # -1 means immediate disconnection
#auto_failback = off
                                   # https://b-peng.blogspot.com/2022/02/auto-failback.html
                                   # Dettached backend node reattach automatically
                                   # if replication_state is 'streaming'.
#auto_failback_interval = 1min
                                   # Min interval of executing auto_failback in
                                   # seconds.

#------------------------------------------------------------------------------
# OTHERS
#------------------------------------------------------------------------------
relcache_expire = 3600
                                   # Life time of relation cache in seconds.
                                   # 0 means no cache expiration(the default).
                                   # The relation cache is used for cache the
                                   # query result against PostgreSQL system
                                   # catalog to obtain various information
                                   # including table structures or if it's a
                                   # temporary table or not. The cache is
                                   # maintained in a pgpool child local memory
                                   # and being kept as long as it survives.
                                   # If someone modify the table by using
                                   # ALTER TABLE or some such, the relcache is
                                   # not consistent anymore.
                                   # For this purpose, cache_expiration
                                   # controls the life time of the cache.
relcache_size = 1024
                                   # Number of relation cache
                                   # entry. If you see frequently:
                                   # "pool_search_relcache: cache replacement happend"
                                   # in the pgpool log, you might want to increate this number.

check_temp_table = trace
                                   # Temporary table check method. catalog, trace or none.
                                   # Default is catalog.

check_unlogged_table = off
                                   # If on, enable unlogged table check in SELECT statements.
                                   # This initiates queries against system catalog of primary/main
                                   # thus increases load of primary.
                                   # If you are absolutely sure that your system never uses unlogged tables
                                   # and you want to save access to primary/main, you could turn this off.
                                   # Default is on.
enable_shared_relcache = on
                                   # If on, relation cache stored in memory cache,
                                   # the cache is shared among child process.
                                   # Default is on.
                                   # (change requires restart)

relcache_query_target = primary
                                   # Target node to send relcache queries. Default is primary node.
                                   # If load_balance_node is specified, queries will be sent to load balance node.

#------------------------------------------------------------------------------
# IN MEMORY QUERY MEMORY CACHE
#------------------------------------------------------------------------------
memory_cache_enabled = off
                                   # If on, use the memory cache functionality, off by default
                                   # (change requires restart)
memqcache_method = 'shmem'
                                   # Cache storage method. either 'shmem'(shared memory) or
                                   # 'memcached'. 'shmem' by default
                                   # (change requires restart)
memqcache_oiddir = '/tmp/oiddir'
                                   # Temporary work directory to record table oids
                                   # (change requires restart)

#------------------------------------------------------------------------------
# BACKENDS
#------------------------------------------------------------------------------
# https://www.pgpool.net/docs/42/en/html/runtime-config-backend-settings.html
# New nodes can be added by adding parameter rows and reloading a configuration file.
# However, the existing values cannot be updated, so you must restart Pgpool-II in that case.
# Generated by confd
{{range $index, $member := (gets "/members/*")}}
{{$data := json $member.Value}}
{{if eq $data.state "running"}}
backend_hostname{{$index}} = '{{base $member.Key}}'
backend_port{{$index}} = 5432
backend_weight{{$index}} = {{ if eq $data.role "master" }} 0 {{ else }} 1 {{ end }}
{{end}}
{{end}}

```

每当遇到阻塞问题时,查看 pcp_proc_info,可以看到所有的进程都处于 'Execute command' 状态,而数据库端其实已经都是 idle 状态了,此时查看 pgpool-II 的日志可以看到类似如下异常:

```
bash-5.0$ grep 12233 pgpool-2023-03-16_000000.log
2023-03-16 07:53:15: pid 12233: LOG: new connection received
2023-03-16 07:53:15: pid 12233: DETAIL: connecting host=10.0.0.20 port=50010
2023-03-16 07:53:15: pid 12233: LOG: using clear text authentication with frontend
2023-03-16 07:53:15: pid 12233: DETAIL: backend will still use md5 auth
2023-03-16 07:53:15: pid 12233: HINT: you can disable this behavior by setting allow_clear_text_frontend_auth to off
2023-03-16 07:53:15: pid 12233: LOG: statement:
2023-03-16 07:53:15: pid 12233: LOG: DB node id: 1 backend pid: 3396839 statement: SELECT version()
2023-03-16 09:33:41: pid 12233: ERROR: unable to read data from DB node 1
2023-03-16 09:33:41: pid 12233: DETAIL: socket read failed with error "Connection reset by peer"
2023-03-16 09:33:41: pid 12233: CONTEXT: while searching system catalog, When relcache is missed
2023-03-16 09:33:41: pid 12233: LOG: statement: DISCARD ALL
2023-03-16 09:33:41: pid 12233: LOG: DB node id: 1 backend pid: 3396839 statement: SELECT version()
2023-03-16 09:33:41: pid 12233: WARNING: write on backend 1 failed with error :"Broken pipe"
2023-03-16 09:33:41: pid 12233: DETAIL: while trying to write data from offset: 0 wlen: 22
2023-03-16 09:33:41: pid 12233: CONTEXT: while searching system catalog, When relcache is missed
2023-03-16 09:33:41: pid 12233: WARNING: write on backend 1 failed with error :"Broken pipe"
2023-03-16 09:33:41: pid 12233: DETAIL: while trying to write data from offset: 0 wlen: 5
2023-03-16 09:38:42: pid 9368: LOG: child process with pid: 12233 exits with status 256
```

理论上如果连接数据库的 TCP 连接被中断之后,请求应该会直接返回异常,或者交给别的子进程去处理,为什么会发生阻塞呢?
TagsNo tags attached.

Activities

cobolbaby

2023-03-22 12:14

reporter   ~0004304

cobolbaby

2023-03-22 12:30

reporter   ~0004305

At present, I only use pgpool-II to achieve load balancing of the database without using other functions, but after using it in the production environment, I found that when some some abnormal problems occur, pgpool-II requests will accumulate, and then client requests will occur Seriously blocked.

Whenever I encounter a blocking problem, check pcp_proc_info, you can see that all the processes are in the 'Execute command' state, and the database side is actually in the idle state. Check the log of pgpool-II at this time, and you can see an exception similar to the above.

In theory, if the TCP connection to the database is interrupted, the request should return an exception directly, or be handed over to another sub-process for processing. Why is there blocking?

Ps: How can I modify the submitted title and content?

cobolbaby

2023-03-23 13:14

reporter   ~0004307

```
failover_on_backend_error = off
                                   # Initiates failover when reading/writing to the
                                   # backend communication socket fails
                                   # If set to off, pgpool will report an
                                   # error and disconnect the session.

```

It seems that `failover_on_backend_error = off` doesn't work as expected.

Code: https://github.com/pgpool/pgpool2/blob/f18b58afc6af2625e815c9ab9fb114f02f25b76e/src/utils/pool_stream.c#L240-L255

pengbo

2023-03-24 23:33

developer   ~0004308

Last edited: 2023-04-05 16:50

Does this issue occur when PostgreSQL goes down?
Could you provide detailed steps to reproduce this issue?

pengbo

2023-03-24 23:36

developer   ~0004309

I think you can change the issue title by clicking "Edit" button.

cobolbaby

2023-03-26 13:29

reporter   ~0004311

> Does this issue occur when PostgreSQL goes down? Could you provide detailed steps to reproduce this issue?

No, PG does not restart or stop. It seems that the existing tcp connection between the pgpool container and the postgres container is reset, but the reconnection can actually be successful.
I don't know what caused the interruption, so I can't construct a reproducible case.

> It seems that `failover_on_backend_error = off` doesn't work as expected.

Because the PG HA environment is built based on Patroni with Etcd, and pgpool-II is only used for read-write separation. Is it not enough to just configure `failover_on_backend_error = off`?
In addition, if failover_on_backend_error is set to off, pgpool should disconnect the session. Why are subsequent requests still blocked?

cobolbaby

2023-03-26 13:31

reporter   ~0004312

> I think you can change the issue title by clicking "Edit" button.

Could you take a screenshot, I didn't find this button.

cobolbaby

2023-03-26 15:19

reporter   ~0004313

> No, PG does not restart or stop. It seems that the existing tcp connection between the pgpool container and the postgres container is reset, but the reconnection can actually be successful.
I don't know what caused the interruption, so I can't construct a reproducible case.

`Connection reset` happen from time to time, but not every time the execution state stays in the `Execute Command` for a long time.

cobolbaby

2023-03-26 15:33

reporter   ~0004314

Will Pgpool-II mark the pg node as "down" in its internal state when a backend node's connection has been reset ?

cobolbaby

2023-03-27 11:43

reporter   ~0004319

Accident Scene Analysis:

```
bash-5.0$ pcp_node_info -U pgpool
Password:
pg01 5432 2 1.000000 up up standby standby 160 streaming quorum 2023-03-23 11:17:50
pg02 5432 2 0.000000 up up primary primary 0 none none 2023-03-23 11:17:50
bash-5.0$ pcp_proc_info -U pgpool
Password:
dwf_tpms tpmuser 2023-03-26 16:25:07 0 3 0 2023-03-27 09:15:28 2023-03-27 09:15:28 0 (30:00 before client disconnected) 1 1030733 1 18067 0 Execute command
dwf_tpms tpmuser 2023-03-26 16:25:07 0 3 0 2023-03-27 09:15:28 2023-03-27 09:15:28 0 (30:00 before client disconnected) 1 2541701 1 18067 1 Execute command
dwf_tpms tpmuser 2023-03-27 07:40:03 0 3 0 2023-03-27 09:00:24 2023-03-27 09:00:24 0 (30:00 before client disconnected) 1 1030459 1 19187 0 Execute command
dwf_tpms tpmuser 2023-03-27 07:40:03 0 3 0 2023-03-27 09:00:24 2023-03-27 09:00:24 0 (30:00 before client disconnected) 1 2529422 1 19187 1 Execute command
dwf_tpms tpmuser 2023-03-27 08:50:21 0 3 0 2023-03-27 10:05:40 2023-03-27 10:05:40 0 (30:00 before client disconnected) 1 1031645 1 19201 0 Execute command
dwf_tpms tpmuser 2023-03-27 08:50:21 0 3 0 2023-03-27 10:05:40 2023-03-27 10:05:40 0 (30:00 before client disconnected) 1 2593215 1 19201 1 Execute command
dwf_tpms tpmuser 2023-03-26 23:35:10 0 3 0 2023-03-27 09:05:25 2023-03-27 09:05:25 0 (30:00 before client disconnected) 1 1030550 1 19047 0 Execute command
dwf_tpms tpmuser 2023-03-26 23:35:10 0 3 0 2023-03-27 09:05:25 2023-03-27 09:05:25 0 (30:00 before client disconnected) 1 2533056 1 19047 1 Execute command
dwf_tpms tpmuser 2023-03-27 07:50:06 0 3 0 2023-03-27 09:50:37 2023-03-27 09:50:37 0 (30:00 before client disconnected) 1 1031371 1 19189 0 Execute command
dwf_tpms tpmuser 2023-03-27 07:50:06 0 3 0 2023-03-27 09:50:37 2023-03-27 09:50:37 0 (30:00 before client disconnected) 1 2573501 1 19189 1 Execute command
dwf_tpms tpmuser 2023-03-26 20:05:09 0 3 0 2023-03-27 09:40:34 2023-03-27 09:40:34 0 (30:00 before client disconnected) 1 1031189 1 18569 0 Execute command
dwf_tpms tpmuser 2023-03-26 20:05:09 0 3 0 2023-03-27 09:40:34 2023-03-27 09:40:34 0 (30:00 before client disconnected) 1 2561202 1 18569 1 Execute command
dwf_tpms tpmuser 2023-03-27 01:44:17 0 3 0 2023-03-27 09:20:29 2023-03-27 09:20:29 0 (30:00 before client disconnected) 1 1030824 1 19134 0 Execute command
dwf_tpms tpmuser 2023-03-27 01:44:17 0 3 0 2023-03-27 09:20:29 2023-03-27 09:20:29 0 (30:00 before client disconnected) 1 2545352 1 19134 1 Execute command
dwf_tpms tpmuser 2023-03-26 20:15:08 0 3 0 2023-03-27 10:00:39 2023-03-27 10:00:39 0 (30:00 before client disconnected) 1 1031553 1 18591 0 Execute command
dwf_tpms tpmuser 2023-03-26 20:15:08 0 3 0 2023-03-27 10:00:39 2023-03-27 10:00:39 0 (30:00 before client disconnected) 1 2586086 1 18591 1 Execute command
dwf_tpms tpmuser 2023-03-27 02:09:24 0 3 0 2023-03-27 08:55:22 2023-03-27 08:55:22 0 (30:00 before client disconnected) 1 1030368 1 19139 0 Execute command
dwf_tpms tpmuser 2023-03-27 02:09:24 0 3 0 2023-03-27 08:55:22 2023-03-27 08:55:22 0 (30:00 before client disconnected) 1 2524205 1 19139 1 Execute command
dwf_tpms tpmuser 2023-03-27 06:19:42 0 3 0 2023-03-27 09:30:31 2023-03-27 09:30:31 0 (30:00 before client disconnected) 1 1031006 1 19171 0 Execute command
dwf_tpms tpmuser 2023-03-27 06:19:42 0 3 0 2023-03-27 09:30:31 2023-03-27 09:30:31 0 (30:00 before client disconnected) 1 2552466 1 19171 1 Execute command
dwf_tpms tpmuser 2023-03-26 15:55:10 0 3 0 2023-03-27 09:55:38 2023-03-27 09:55:38 0 (30:00 before client disconnected) 1 1031462 1 18012 0 Execute command
dwf_tpms tpmuser 2023-03-26 15:55:10 0 3 0 2023-03-27 09:55:38 2023-03-27 09:55:38 0 (30:00 before client disconnected) 1 2579484 1 18012 1 Execute command
dwf_tpms tpmuser 2023-03-27 04:40:02 0 3 0 2023-03-27 09:45:35 2023-03-27 09:45:35 0 (30:00 before client disconnected) 1 1031280 1 19169 0 Execute command
dwf_tpms tpmuser 2023-03-27 04:40:02 0 3 0 2023-03-27 09:45:35 2023-03-27 09:45:35 0 (30:00 before client disconnected) 1 2567322 1 19169 1 Execute command
dwf_tpms tpmuser 2023-03-26 22:15:12 0 3 0 2023-03-27 10:10:42 2023-03-27 10:10:42 0 (30:00 before client disconnected) 1 1031736 1 18867 0 Execute command
dwf_tpms tpmuser 2023-03-26 22:15:12 0 3 0 2023-03-27 10:10:42 2023-03-27 10:10:42 0 (30:00 before client disconnected) 1 2599789 1 18867 1 Execute command
postgres postgres 2023-03-26 23:05:10 0 3 0 2023-03-27 10:29:06 2023-03-27 10:29:06 4 (29:56 before client disconnected) 1 1032078 1 18980 0 Execute command
postgres postgres 2023-03-26 23:05:10 0 3 0 2023-03-27 10:29:06 2023-03-27 10:29:06 4 (29:56 before client disconnected) 1 2621579 1 18980 1 Execute command
dwf_tpms tpmuser 2023-03-26 08:05:08 0 3 0 2023-03-27 10:15:43 2023-03-27 10:15:43 0 (30:00 before client disconnected) 1 1031829 1 16906 0 Execute command
dwf_tpms tpmuser 2023-03-26 08:05:08 0 3 0 2023-03-27 10:15:43 2023-03-27 10:15:43 0 (30:00 before client disconnected) 1 2606140 1 16906 1 Execute command
dwf_tpms tpmuser 2023-03-26 19:58:46 0 3 0 2023-03-27 09:10:26 2023-03-27 09:10:26 0 (30:00 before client disconnected) 1 1030641 1 18542 0 Execute command
dwf_tpms tpmuser 2023-03-26 19:58:46 0 3 0 2023-03-27 09:10:26 2023-03-27 09:10:26 0 (30:00 before client disconnected) 1 2537139 1 18542 1 Execute command
dwf_tpms tpmuser 2023-03-26 22:19:31 0 3 0 2023-03-27 09:25:30 2023-03-27 09:25:30 0 (30:00 before client disconnected) 1 1030915 1 18876 0 Execute command
dwf_tpms tpmuser 2023-03-26 22:19:31 0 3 0 2023-03-27 09:25:30 2023-03-27 09:25:30 0 (30:00 before client disconnected) 1 2548862 1 18876 1 Execute command
dwf_tpms tpmuser 2023-03-26 22:35:09 0 3 0 2023-03-27 09:35:33 2023-03-27 09:35:33 0 (30:00 before client disconnected) 1 1031098 1 18912 0 Execute command
dwf_tpms tpmuser 2023-03-26 22:35:09 0 3 0 2023-03-27 09:35:33 2023-03-27 09:35:33 0 (30:00 before client disconnected) 1 2556747 1 18912 1 Execute command
bash-5.0$
bash-5.0$
bash-5.0$
bash-5.0$ ps -ef | grep -v wait
PID USER TIME COMMAND
    1 postgres 0:00 {entrypoint.sh} /bin/bash /opt/confd/bin/entrypoint.sh
   34 postgres 6:48 confd -prefix=pgcluster/pgcluster_f6_12 -interval=10 -backend etcdv3 -node http://etcd1:8917 -node http://etcd2:8917 -node http://etcd3:8917
 7535 postgres 0:20 pgpool -f /opt/pgpool-II/etc/pgpool.conf -F /opt/pgpool-II/etc/pcp.conf -D
 7536 postgres 0:43 pgpool: PgpoolLogger
 8139 postgres 2:09 pgpool: worker process
 8140 postgres 0:45 pgpool: health check process(0)
 8141 postgres 0:50 pgpool: health check process(1)
16906 postgres 0:05 pgpool: tpmuser dwf_tpms 10.0.0.20(36812) SELECT
17508 postgres 0:00 bash
18012 postgres 0:03 pgpool: tpmuser dwf_tpms 10.0.0.20(54384) SELECT
18067 postgres 0:03 pgpool: tpmuser dwf_tpms 10.0.0.20(58112) WITH
18542 postgres 0:02 pgpool: tpmuser dwf_tpms 10.0.0.20(47512) SELECT
18569 postgres 0:02 pgpool: tpmuser dwf_tpms 10.0.0.20(38654) SELECT
18591 postgres 0:03 pgpool: tpmuser dwf_tpms 10.0.0.20(49124) SELECT
18867 postgres 0:02 pgpool: tpmuser dwf_tpms 10.0.0.20(41608) SELECT
18876 postgres 0:02 pgpool: tpmuser dwf_tpms 10.0.0.20(44918) SELECT
18912 postgres 0:02 pgpool: tpmuser dwf_tpms 10.0.0.20(51010) SELECT
18980 postgres 0:02 pgpool: postgres postgres SHOW
19047 postgres 0:01 pgpool: tpmuser dwf_tpms 10.0.0.20(41388) WITH
19134 postgres 0:01 pgpool: tpmuser dwf_tpms 10.0.0.20(38774) SELECT
19139 postgres 0:01 pgpool: tpmuser dwf_tpms 10.0.0.20(36572) SELECT
19169 postgres 0:01 pgpool: tpmuser dwf_tpms 10.0.0.20(55924) SELECT
19171 postgres 0:00 pgpool: tpmuser dwf_tpms 10.0.0.20(52084) SELECT
19187 postgres 0:00 pgpool: tpmuser dwf_tpms 10.0.0.20(51066) SELECT
19189 postgres 0:00 pgpool: tpmuser dwf_tpms 10.0.0.20(54678) SELECT
19201 postgres 0:00 pgpool: tpmuser dwf_tpms 10.0.0.20(37050) SELECT
19219 postgres 0:00 bash
19260 postgres 0:00 psql
19272 postgres 0:00 ps -ef
bash-5.0$ pwd
/tmp/pgpool_logs
bash-5.0$ ls -ltr |tail -n 10
-rw------- 1 postgres postgres 10485966 Mar 25 12:00 pgpool-2023-03-25_060009.log
-rw------- 1 postgres postgres 10486450 Mar 25 17:50 pgpool-2023-03-25_120007.log
-rw------- 1 postgres postgres 10486162 Mar 25 23:40 pgpool-2023-03-25_175007.log
-rw------- 1 postgres postgres 571352 Mar 25 23:59 pgpool-2023-03-25_234006.log
-rw------- 1 postgres postgres 10486648 Mar 26 05:40 pgpool-2023-03-26_000000.log
-rw------- 1 postgres postgres 10486276 Mar 26 11:20 pgpool-2023-03-26_054007.log
-rw------- 1 postgres postgres 10485778 Mar 26 16:50 pgpool-2023-03-26_112005.log
-rw------- 1 postgres postgres 10485877 Mar 26 22:30 pgpool-2023-03-26_165009.log
-rw------- 1 postgres postgres 2539797 Mar 26 23:58 pgpool-2023-03-26_223006.log
-rw------- 1 postgres postgres 794130 Mar 27 10:35 pgpool-2023-03-27_000000.log
bash-5.0$ grep 16906 pgpool-2023-03-27_000000.log
2023-03-27 10:15:43: pid 16906: LOG: new connection received
2023-03-27 10:15:43: pid 16906: DETAIL: connecting host=10.0.0.20 port=36812
2023-03-27 10:15:43: pid 16906: LOG: using clear text authentication with frontend
2023-03-27 10:15:43: pid 16906: DETAIL: backend will still use md5 auth
2023-03-27 10:15:43: pid 16906: HINT: you can disable this behavior by setting allow_clear_text_frontend_auth to off
2023-03-27 10:15:43: pid 16906: LOG: statement: select --'2023-03-26T21:03:42.944Z' at time zone 'Asia/Shanghai' as time,
2023-03-27 10:15:43: pid 16906: LOG: DB node id: 1 backend pid: 2606140 statement: SELECT version()
bash-5.0$ grep 18067 pgpool-2023-03-27_000000.log
2023-03-27 09:15:28: pid 18067: LOG: new connection received
2023-03-27 09:15:28: pid 18067: DETAIL: connecting host=10.0.0.20 port=58112
2023-03-27 09:15:28: pid 18067: LOG: using clear text authentication with frontend
2023-03-27 09:15:28: pid 18067: DETAIL: backend will still use md5 auth
2023-03-27 09:15:28: pid 18067: HINT: you can disable this behavior by setting allow_clear_text_frontend_auth to off
2023-03-27 09:15:28: pid 18067: LOG: statement: with temp_fixure as
2023-03-27 09:15:28: pid 18067: LOG: DB node id: 1 backend pid: 2541701 statement: SELECT version()
bash-5.0$ date
Mon Mar 27 10:38:52 CST 2023
```

pengbo

2023-03-27 14:22

developer   ~0004321

Thank you for sharing the information.

> Because the PG HA environment is built based on Patroni with Etcd, and pgpool-II is only used for read-write separation. Is it not enough to just configure `failover_on_backend_error = off`?
> In addition, if failover_on_backend_error is set to off, pgpool should disconnect the session. Why are subsequent requests still blocked?

According to the screenshot of 2023-03-22 12:14, you can connect to Pgpool-II using psql,
but when you run "show pool_nodes" command, it hangs up.
When "show pool_nodes" command hangs up, any error displayed in pgpool logs?

I don't have a Patroni environment, it is difficult to confirm this issue.
Cloud you verify if the same issue occurs without using pgpool and postgresql container?

If it is possible to use GDB, could you attach to the process which is running "show pool_nodes" command and print backtrace?

pengbo

2023-03-27 14:25

developer   ~0004323

> Could you take a screenshot, I didn't find this button.
Attach screenshot.
edit-button.png (137,563 bytes)   
edit-button.png (137,563 bytes)   

cobolbaby

2023-03-27 16:51

reporter   ~0004324

> According to the screenshot of 2023-03-22 12:14, you can connect to Pgpool-II using psql,
> but when you run "show pool_nodes" command, it hangs up.

Yes, you are right.

> Cloud you verify if the same issue occurs without using pgpool and postgresql container?

This is a bit difficult, because all our services are deployed in containers.

> Could you take a screenshot, I didn't find this button.

It seems that I don't even have the basic permission to modify it.

cobolbaby

2023-03-27 16:52

reporter   ~0004325

> If it is possible to use GDB, could you attach to the process which is running "show pool_nodes" command and print backtrace?

I will have a try.

cobolbaby

2023-03-27 17:00

reporter   ~0004326

> Could you take a screenshot, I didn't find this button.

It seems that I don't even have the basic permission to modify it.

pengbo

2023-04-05 16:49

developer   ~0004344

In my previous reply, I said you should set backend_flagX = 'DISALLOW_TO_FAILOVER'.

If the failover is performed by Patroni and the primary and standby nodes change dynamically, you should not set DISALLOW_TO_FAILOVER,
because pgpool need to perform failover and find the new primary.

If you are using Patroni to handle failover, just disable failover_command and follow_primary_command:

failover_command = ''
follow_primary_command = ''
failover_on_backend_error = off

Issue History

Date Modified Username Field Change
2023-03-22 12:08 cobolbaby New Issue
2023-03-22 12:14 cobolbaby Note Added: 0004304
2023-03-22 12:14 cobolbaby File Added: 截屏2023-02-27_22.26.35.png
2023-03-22 12:30 cobolbaby Note Added: 0004305
2023-03-23 09:18 pengbo Assigned To => pengbo
2023-03-23 09:18 pengbo Status new => assigned
2023-03-23 13:14 cobolbaby Note Added: 0004307
2023-03-24 23:33 pengbo Note Added: 0004308
2023-03-24 23:36 pengbo Note Added: 0004309
2023-03-24 23:36 pengbo Status assigned => feedback
2023-03-26 13:29 cobolbaby Note Added: 0004311
2023-03-26 13:29 cobolbaby Status feedback => assigned
2023-03-26 13:31 cobolbaby Note Added: 0004312
2023-03-26 15:19 cobolbaby Note Added: 0004313
2023-03-26 15:33 cobolbaby Note Added: 0004314
2023-03-27 11:43 cobolbaby Note Added: 0004319
2023-03-27 11:43 cobolbaby File Added: pgpool-2023-03-27_000000.log
2023-03-27 14:22 pengbo Note Added: 0004321
2023-03-27 14:25 pengbo Note Added: 0004323
2023-03-27 14:25 pengbo File Added: edit-button.png
2023-03-27 16:51 cobolbaby Note Added: 0004324
2023-03-27 16:51 cobolbaby File Added: 2023-03-27 15-47-34屏幕截图.png
2023-03-27 16:52 cobolbaby Note Added: 0004325
2023-03-27 17:00 cobolbaby Note Added: 0004326
2023-03-27 17:00 cobolbaby File Added: 2023-03-27 16-00-01屏幕截图.png
2023-04-05 16:49 pengbo Note Added: 0004344
2023-04-05 16:50 pengbo Note Edited: 0004308