[pgpool-general: 8012] Pool Nodes are shown as down even if PG and PGPOOL are up

Wed Feb 2 06:45:26 JST 2022

Hi,

I’m migrating my Database cluster to PostgreSQL 14 and have been trying PGPOOL for the Load Balancer feature only.

At the moment I have two servers, and even though PGPOOL and Postgres are up and running on both of them, “show pool_nodes” shows as one of them being down.

server1 pgpool.conf:

listen_addresses = '*'
port = 9999
socket_dir = '/var/run/postgresql'
listen_backlog_multiplier = 2
serialize_accept = off
reserved_connections = 0
pcp_listen_addresses = '*'
pcp_port = 9898
pcp_socket_dir = '/var/run/postgresql'
backend_clustering_mode = 'streaming_replication'
backend_hostname0 = 'server1'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '/pgsql/14/main'
backend_flag0 = 'ALLOW_TO_FAILOVER'
backend_application_name0 = 'server1'
backend_hostname1 = 'server2'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/pgsql/14/main'
backend_flag1 = 'ALLOW_TO_FAILOVER'
backend_application_name1 = 'server2'
enable_pool_hba = off
pool_passwd = 'pool_passwd'
authentication_timeout = 60
allow_clear_text_frontend_auth = off
num_init_children = 32
max_pool = 4
child_life_time = 300
child_max_connections = 0
connection_life_time = 0
client_idle_limit = 0
syslog_facility = 'LOCAL0'
syslog_ident = 'pgpool'
pid_file_name = '/var/run/postgresql/pgpool.pid'
logdir = '/var/log/postgresql'
connection_cache = on
reset_query_list = 'ABORT; DISCARD ALL'
replication_mode = off
replicate_select = off
insert_lock = on
lobj_lock_table = ''
replication_stop_on_mismatch = off
failover_if_affected_tuples_mismatch = off
load_balance_mode = off
ignore_leading_white_space = on
white_function_list = ''
black_function_list = 'currval,lastval,nextval,setval'
black_query_pattern_list = ''
database_redirect_preference_list = ''
app_name_redirect_preference_list = ''
allow_sql_comments = off
disable_load_balance_on_write = 'transaction'
statement_level_load_balance = off
master_slave_mode = off
master_slave_sub_mode = 'stream'
sr_check_period = 0
sr_check_user = 'pgpool'
sr_check_password = ''
sr_check_database = 'geoop_live'
delay_threshold = 0
follow_master_command = '/etc/pgpool2/follow_primary.sh %d %h %p %D %m %H %M %P %r %R'
health_check_period = 5            # Seconds
health_check_timeout = 20
health_check_user = 'pgpool'
health_check_password = ''
health_check_database = ''
health_check_max_retries = 3
health_check_retry_delay = 1
connect_timeout = 10000
failover_command = '/etc/pgpool2/failover.sh %d %h %p %D %m %H %M %P %r %R %N %S'
failback_command = ''
failover_on_backend_error = on
detach_false_primary = off
search_primary_node_timeout = 300
auto_failback = off
auto_failback_interval = 60
recovery_user = 'pgpoolrecovery'
recovery_password = ''
recovery_1st_stage_command = 'recovery_1st_stage'
recovery_2nd_stage_command = ''
recovery_timeout = 90
client_idle_limit_in_recovery = 0
use_watchdog = on
trusted_servers = ''
ping_path = '/bin'
wd_hostname = 'server1'
wd_port = 9000
wd_priority = 1
wd_authkey = ''
wd_ipc_socket_dir = '/tmp'
delegate_IP = '192.168.137.150'
if_cmd_path = '/sbin'
if_up_cmd = '/usr/bin/sudo /sbin/ip addr add $_IP_$/24 dev ens5 label ens5:0'
if_down_cmd = '/usr/bin/sudo /sbin/ip addr del $_IP_$/24 dev ens5'
arping_path = '/usr/sbin'
arping_cmd = '/usr/bin/sudo /usr/sbin/arping -U $_IP_$ -w 1 -I ens5'
clear_memqcache_on_escalation = on
wd_escalation_command = '/etc/pgpool2/escalation.sh'
wd_de_escalation_command = ''
failover_when_quorum_exists = on
failover_require_consensus = on
allow_multiple_failover_requests_from_node = off
enable_consensus_with_half_votes = off
wd_monitoring_interfaces_list = ''  # Comma separated list of interfaces names to monitor.
wd_lifecheck_method = 'heartbeat'
wd_interval = 10
wd_heartbeat_port = 9694
wd_heartbeat_keepalive = 2
wd_heartbeat_deadtime = 30
heartbeat_destination0 = 'server2'
heartbeat_destination_port0 = 9694
heartbeat_device0 = ''
wd_life_point = 3
wd_lifecheck_query = 'SELECT 1'
wd_lifecheck_dbname = 'template1'
wd_lifecheck_user = 'nobody'
wd_lifecheck_password = ''
other_pgpool_hostname0 = 'server2'
other_pgpool_port0 = 9999
other_wd_port0 = 9000

server2 pgpool.conf:

listen_addresses = '*'
port = 9999
socket_dir = '/var/run/postgresql'
listen_backlog_multiplier = 2
serialize_accept = off
reserved_connections = 0
pcp_listen_addresses = '*'
pcp_port = 9898
pcp_socket_dir = '/var/run/postgresql'
backend_clustering_mode = 'streaming_replication'
backend_hostname0 = 'server2'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '/pgsql/14/main'
backend_flag0 = 'ALLOW_TO_FAILOVER'
backend_application_name0 = 'server2'
backend_hostname1 = 'server1'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/pgsql/14/main'
backend_flag1 = 'ALLOW_TO_FAILOVER'
backend_application_name1 = 'server1'
enable_pool_hba = off
pool_passwd = 'pool_passwd'
authentication_timeout = 60
allow_clear_text_frontend_auth = off
num_init_children = 32
max_pool = 4
child_life_time = 300
child_max_connections = 0
connection_life_time = 0
client_idle_limit = 0
syslog_facility = 'LOCAL0'
syslog_ident = 'pgpool'
pid_file_name = '/var/run/postgresql/pgpool.pid'
logdir = '/var/log/postgresql'
connection_cache = on
reset_query_list = 'ABORT; DISCARD ALL'
replication_mode = off
replicate_select = off
insert_lock = on
lobj_lock_table = ''
replication_stop_on_mismatch = off
failover_if_affected_tuples_mismatch = off
load_balance_mode = off
ignore_leading_white_space = on
white_function_list = ''
black_function_list = 'currval,lastval,nextval,setval'
black_query_pattern_list = ''
database_redirect_preference_list = ''
app_name_redirect_preference_list = ''
allow_sql_comments = off
disable_load_balance_on_write = 'transaction'
statement_level_load_balance = off
master_slave_mode = off
master_slave_sub_mode = 'stream'
sr_check_period = 0
sr_check_user = 'pgpool'
sr_check_password = ''
sr_check_database = 'geoop_live'
delay_threshold = 0
follow_master_command = '/etc/pgpool2/follow_primary.sh %d %h %p %D %m %H %M %P %r %R'
health_check_period = 5            # Seconds
health_check_timeout = 20
health_check_user = 'pgpool'
health_check_password = ''
health_check_database = ''
health_check_max_retries = 3
health_check_retry_delay = 1
connect_timeout = 10000
failover_command = '/etc/pgpool2/failover.sh %d %h %p %D %m %H %M %P %r %R %N %S'
failback_command = ''
failover_on_backend_error = on
detach_false_primary = off
search_primary_node_timeout = 300
auto_failback = off
auto_failback_interval = 60
recovery_user = 'pgpoolrecovery'
recovery_password = ''
recovery_1st_stage_command = 'recovery_1st_stage'
recovery_2nd_stage_command = ''
recovery_timeout = 90
client_idle_limit_in_recovery = 0
use_watchdog = on
trusted_servers = ''
ping_path = '/bin'
wd_hostname = 'server2'
wd_port = 9000
wd_priority = 1
wd_authkey = ''
wd_ipc_socket_dir = '/tmp'
delegate_IP = '192.168.137.150'
if_cmd_path = '/sbin'
if_up_cmd = '/usr/bin/sudo /sbin/ip addr add $_IP_$/24 dev ens5 label ens5:0'
if_down_cmd = '/usr/bin/sudo /sbin/ip addr del $_IP_$/24 dev ens5'
arping_path = '/usr/sbin'
arping_cmd = '/usr/bin/sudo /usr/sbin/arping -U $_IP_$ -w 1 -I ens5'
clear_memqcache_on_escalation = on
wd_escalation_command = '/etc/pgpool2/escalation.sh'
wd_de_escalation_command = ''
failover_when_quorum_exists = on
failover_require_consensus = on
allow_multiple_failover_requests_from_node = off
enable_consensus_with_half_votes = off
wd_monitoring_interfaces_list = ''  # Comma separated list of interfaces names to monitor.
wd_lifecheck_method = 'heartbeat'
wd_interval = 10
wd_heartbeat_port = 9694
wd_heartbeat_keepalive = 2
wd_heartbeat_deadtime = 30
heartbeat_destination0 = 'server1'
heartbeat_destination_port0 = 9694
heartbeat_device0 = ''
wd_life_point = 3
wd_lifecheck_query = 'SELECT 1'
wd_lifecheck_dbname = 'template1'
wd_lifecheck_user = 'nobody'
wd_lifecheck_password = ''
other_pgpool_hostname0 = 'server1'
other_pgpool_port0 = 9999
other_wd_port0 = 9000

Running “show pool_nodes”:

postgres at server1:~$ psql -p 9999 -U pgpool postgres -c "show pool_nodes"
Password for user pgpool:
 node_id |                  hostname                  | port | status | lb_weight |  role  | select_cnt | load_balance_node | replication_delay | replication_state | replication_sync_state | last_status_change
---------+--------------------------------------------+------+--------+-----------+--------+------------+-------------------+-------------------+-------------------+------------------------+---------------------
 0       | server1 | 5432 | down   | 0.500000  | slave  | 0          | false             | 0                 |                   |                        | 2022-02-01 21:32:24
 1       | server2  | 5432 | up     | 0.500000  | master | 397        | true              | 0                 |                   |                        | 2022-02-01 21:32:24
(2 rows)

What am I missing here? Any help is much appreciated. 

Thanks!
Lucas
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.pgpool.net/pipermail/pgpool-general/attachments/20220202/a59cf9f6/attachment-0001.htm>