[pgpool-general: 2171] Re: read_startup_packet: incorrect packet length What does that mean?
Mistina Michal
Michal.Mistina at virte.sk
Tue Oct 8 06:08:47 JST 2013
Hi Tatsuo.
Sorry for the delayed answer.
In the meantime I've changed the configuration, but the message
"read_startup_packet: incorrect packet length" didn't go away. I am attaching
configuration at the end of the e-mail and also a log file right after I
started Pgpool.
I think I understand the issue I've described in the last e-mail. I mean the
psql -c "show pool_nodes;" -d postgres -U postgres -p 9898 -> the port should
be 9999. Just silly typo.
I would like to summary of the issues I am experiencing now:
1. There are periodic ERROR messages in the Pgpool log file -
"read_startup_packet: incorrect packet length (-1656006549)".
2. There are periodic messages in the PostgreSQL log file - "incomplete
startup packet".
3. If I halt or reboot primary node (soft shutdown), the secondary node
triggers fail-over immediatelly. --> NOT DESIRED BEHAVIOUR
4. If I power off the primary node (unexpectedly), the secondary node applies
health_check and triggers fail-over after health_check time-out. --> DESIRED
BEHAVIOUR
I think, the first two issues are related. But don't know what could cause it.
We are using special services which connects to the Pgpool by using JDBC. Can
the services be the source of these error messages?
According to issue number 3.. I can see in the log file, what happened:
"postmaster on DB node 0 was shutdown by administrative command".
How can I force Pgpool to not fail-over after the administrative command was
issued? I would like to apply same health_check timouts also in the situation
when the administrative command was issued. It is because in our environment
one node is represented by more hosts where the Postgresql services are
migrating from one host to another automatically. It occurs also if the "halt"
or "reboot" command is issued on that one node. This is not errorneous
condition for our environment, because if the "halt" is issued on one host,
PostgreSQL service will be brought up on another node. Virtual IP address is
in front of it.
THE ENVIRONMENT:
Node 1:
- Installed PostgreSQL 9.2 and Pgpool 3.3.1 - Primary
- Streaming replication Primary
Node 2:
- Installed PostgreSQL 9.2 and Pgpool 3.3.1 - Secondary
- Streaming replication Secondary
CONFIG FILE (same for both nodes):
listen_addresses = '*'
port = 9999
socket_dir = '/tmp'
pcp_port = 9898
pcp_socket_dir = '/tmp'
backend_hostname0 = 'node1'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '/var/lib/pgsql/9.2/data'
backend_flag0 = 'ALLOW_TO_FAILOVER'
backend_hostname1 = 'node2'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/var/lib/pgsql/9.2/data'
backend_flag1 = 'ALLOW_TO_FAILOVER'
enable_pool_hba = on
pool_passwd = 'pool_passwd'
authentication_timeout = 60
ssl = off
num_init_children = 32
max_pool = 4
child_life_time = 300
child_max_connections = 0
connection_life_time = 0
client_idle_limit = 0
log_destination = 'stderr'
print_timestamp = on
log_connections = off
log_hostname = off
log_statement = off
log_per_node_statement = off
log_standby_delay = 'none'
syslog_facility = 'LOCAL0'
syslog_ident = 'pgpool'
debug_level = 0
pid_file_name = '/var/run/pgpool/pgpool.pid'
logdir = '/var/log/pgpool'
connection_cache = on
reset_query_list = 'ABORT; DISCARD ALL'
replication_mode = off
replicate_select = off
insert_lock = on
lobj_lock_table = ''
replication_stop_on_mismatch = off
failover_if_affected_tuples_mismatch = off
load_balance_mode = off
ignore_leading_white_space = on
white_function_list = ''
black_function_list = 'nextval,setval'
master_slave_mode = on
master_slave_sub_mode = 'stream'
sr_check_period = 10
sr_check_user = 'pgpool'
sr_check_password = 'pgpool'
delay_threshold = 10000000
follow_master_command = ''
parallel_mode = off
pgpool2_hostname = ''
system_db_hostname = 'localhost'
system_db_port = 5432
system_db_dbname = 'pgpool'
system_db_schema = 'pgpool_catalog'
system_db_user = 'pgpool'
system_db_password = ''
health_check_period = 70
health_check_timeout = 30
health_check_user = 'pgpool'
health_check_password = 'pgpool'
health_check_max_retries = 3
health_check_retry_delay = 10
failover_command = '/usr/pgsql-9.2/bin/failover_stream.sh %d %H
/tmp/trigger_file'
failback_command = ''
fail_over_on_backend_error = off
search_primary_node_timeout = 10
recovery_user = 'nobody'
recovery_password = ''
recovery_1st_stage_command = ''
recovery_2nd_stage_command = ''
recovery_timeout = 90
client_idle_limit_in_recovery = 0
use_watchdog = off
trusted_servers = '192.168.x.x'
ping_path = '/bin'
wd_hostname = ''
wd_port = 9000
wd_authkey = ''
delegate_IP = ''
ifconfig_path = '/sbin'
if_up_cmd = 'ifconfig eth0:0 inet $_IP_$ netmask 255.255.255.0'
if_down_cmd = 'ifconfig eth0:0 down'
arping_path = '/usr/sbin'
arping_cmd = 'arping -U $_IP_$ -w 1'
clear_memqcache_on_escalation = on
wd_escalation_command = ''
wd_lifecheck_method = 'heartbeat'
wd_interval = 5
wd_heartbeat_port = 9694
wd_heartbeat_keepalive = 2
wd_heartbeat_deadtime = 30
heartbeat_destination0 = 'node2'
heartbeat_destination_port0 = 9694
heartbeat_device0 = ''
wd_life_point = 3
wd_lifecheck_query = 'SELECT 1'
wd_lifecheck_dbname = 'template1'
wd_lifecheck_user = 'replication'
wd_lifecheck_password = 'replicate'
relcache_expire = 0
relcache_size = 256
check_temp_table = on
memory_cache_enabled = off
memqcache_method = 'shmem'
memqcache_memcached_host = 'localhost'
memqcache_memcached_port = 11211
memqcache_total_size = 67108864
memqcache_max_num_cache = 1000000
memqcache_expire = 0
memqcache_auto_cache_invalidation = on
memqcache_maxcache = 409600
memqcache_cache_block_size = 1048576
memqcache_oiddir = '/var/log/pgpool/oiddir'
white_memqcache_table_list = ''
black_memqcache_table_list = ''
PGPOOL.LOG - NODE1:
2013-10-07 22:37:28 LOG: pid 19961: received fast shutdown request
2013-10-07 22:37:37 LOG: pid 21563: read_status_file: 1 th backend is set to
down status
2013-10-07 22:37:37 LOG: pid 21563: pgpool-II successfully started. version
3.3.1 (tokakiboshi)
2013-10-07 22:37:37 LOG: pid 21563: find_primary_node: primary node id is 0
2013-10-07 22:37:47 ERROR: pid 21574: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:38:01 ERROR: pid 21574: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:38:17 ERROR: pid 21574: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:38:28 LOG: pid 21606: send_failback_request: fail back 1 th
node request from pid 21606
2013-10-07 22:38:28 LOG: pid 21563: starting fail back. reconnect host
node2(5432)
2013-10-07 22:38:28 LOG: pid 21563: Do not restart children because we are
failbacking node id 1 hostnode2 port:5432 and we are in streaming replication
mode
2013-10-07 22:38:28 LOG: pid 21563: find_primary_node_repeatedly: waiting
for finding a primary node
2013-10-07 22:38:28 LOG: pid 21563: find_primary_node: primary node id is 0
2013-10-07 22:38:28 LOG: pid 21563: failover: set new primary node: 0
2013-10-07 22:38:28 LOG: pid 21563: failover: set new master node: 0
2013-10-07 22:38:28 LOG: pid 21563: failback done. reconnect host
node2(5432)
2013-10-07 22:38:28 LOG: pid 21607: worker process received restart request
2013-10-07 22:38:29 LOG: pid 21606: pcp child process received restart
request
2013-10-07 22:38:29 LOG: pid 21563: PCP child 21606 exits with status 256 in
failover()
2013-10-07 22:38:29 LOG: pid 21563: fork a new PCP child pid 22141 in
failover()
2013-10-07 22:38:29 LOG: pid 21563: worker child 21607 exits with status 256
2013-10-07 22:38:29 LOG: pid 21563: fork a new worker child pid 22142
2013-10-07 22:38:31 ERROR: pid 21574: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:38:31 LOG: pid 21574: do_child: failback event found. restart
myself.
2013-10-07 22:38:47 ERROR: pid 22144: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:39:01 LOG: pid 21602: do_child: failback event found. restart
myself.
2013-10-07 22:39:01 ERROR: pid 22144: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:39:17 LOG: pid 21591: do_child: failback event found. restart
myself.
2013-10-07 22:39:17 ERROR: pid 22480: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:39:31 LOG: pid 21586: do_child: failback event found. restart
myself.
2013-10-07 22:39:31 ERROR: pid 22633: read_startup_packet: incorrect packet
length (-189809103)
PGPOOL.LOG - NODE2:
2013-10-07 22:37:28 LOG: pid 19961: received fast shutdown request
2013-10-07 22:37:37 LOG: pid 21563: read_status_file: 1 th backend is set to
down status
2013-10-07 22:37:37 LOG: pid 21563: pgpool-II successfully started. version
3.3.1 (tokakiboshi)
2013-10-07 22:37:37 LOG: pid 21563: find_primary_node: primary node id is 0
2013-10-07 22:37:47 ERROR: pid 21574: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:38:01 ERROR: pid 21574: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:38:17 ERROR: pid 21574: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:38:28 LOG: pid 21606: send_failback_request: fail back 1 th
node request from pid 21606
2013-10-07 22:38:28 LOG: pid 21563: starting fail back. reconnect host
node2(5432)
2013-10-07 22:38:28 LOG: pid 21563: Do not restart children because we are
failbacking node id 1 hostnode2 port:5432 and we are in streaming replication
mode
2013-10-07 22:38:28 LOG: pid 21563: find_primary_node_repeatedly: waiting
for finding a primary node
2013-10-07 22:38:28 LOG: pid 21563: find_primary_node: primary node id is 0
2013-10-07 22:38:28 LOG: pid 21563: failover: set new primary node: 0
2013-10-07 22:38:28 LOG: pid 21563: failover: set new master node: 0
2013-10-07 22:38:28 LOG: pid 21563: failback done. reconnect host
node2(5432)
2013-10-07 22:38:28 LOG: pid 21607: worker process received restart request
2013-10-07 22:38:29 LOG: pid 21606: pcp child process received restart
request
2013-10-07 22:38:29 LOG: pid 21563: PCP child 21606 exits with status 256 in
failover()
2013-10-07 22:38:29 LOG: pid 21563: fork a new PCP child pid 22141 in
failover()
2013-10-07 22:38:29 LOG: pid 21563: worker child 21607 exits with status 256
2013-10-07 22:38:29 LOG: pid 21563: fork a new worker child pid 22142
2013-10-07 22:38:31 ERROR: pid 21574: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:38:31 LOG: pid 21574: do_child: failback event found. restart
myself.
2013-10-07 22:38:47 ERROR: pid 22144: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:39:01 LOG: pid 21602: do_child: failback event found. restart
myself.
2013-10-07 22:39:01 ERROR: pid 22144: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:39:17 LOG: pid 21591: do_child: failback event found. restart
myself.
2013-10-07 22:39:17 ERROR: pid 22480: read_startup_packet: incorrect packet
length (-189809103)
2013-10-07 22:39:31 LOG: pid 21586: do_child: failback event found. restart
myself.
2013-10-07 22:39:31 ERROR: pid 22633: read_startup_packet: incorrect packet
length (-189809103)
POSTGRESQL LOG - NODE1:
2013-10-07 22:36:45 CEST LOG: shutting down
2013-10-07 22:36:45 CEST LOG: database system is shut down
2013-10-07 22:37:53 CEST LOG: database system was interrupted; last known up
at 2013-10-07 22:37:01 CEST
2013-10-07 22:37:53 CEST LOG: entering standby mode
2013-10-07 22:37:53 CEST LOG: streaming replication successfully connected to
primary
2013-10-07 22:37:55 CEST LOG: redo starts at 1/A5000020
2013-10-07 22:37:55 CEST LOG: consistent recovery state reached at 1/A50000E0
2013-10-07 22:37:55 CEST LOG: database system is ready to accept read only
connections
2013-10-07 22:37:57 CEST LOG: incomplete startup packet
2013-10-07 22:38:05 CEST LOG: incomplete startup packet
2013-10-07 22:38:27 CEST LOG: incomplete startup packet
2013-10-07 22:38:35 CEST LOG: incomplete startup packet
2013-10-07 22:38:57 CEST LOG: incomplete startup packet
2013-10-07 22:39:05 CEST LOG: incomplete startup packet
2013-10-07 22:39:27 CEST LOG: incomplete startup packet
2013-10-07 22:39:35 CEST LOG: incomplete startup packet
PGPOOL.LOG - NODE2 - after the fail-over:
2013-10-07 22:54:09 LOG: pid 1651: postmaster on DB node 0 was shutdown by
administrative command
2013-10-07 22:54:09 LOG: pid 1651: degenerate_backend_set: 0 fail over
request from pid 1651
2013-10-07 22:54:09 LOG: pid 843: starting degeneration. shutdown host
node1(5432)
2013-10-07 22:54:09 LOG: pid 843: Restart all children
2013-10-07 22:54:09 LOG: pid 856: postmaster on DB node 0 was shutdown by
administrative command
2013-10-07 22:54:09 LOG: pid 843: execute command:
/usr/pgsql-9.2/bin/failover_stream.sh 0 node2 /tmp/trigger_file
2013-10-07 22:54:09 LOG: pid 843: find_primary_node_repeatedly: waiting for
finding a primary node
Best regards,
Michal Mistina
-----Original Message-----
From: Tatsuo Ishii [mailto:ishii at postgresql.org]
Sent: Saturday, September 21, 2013 5:43 AM
To: Mistina Michal
Cc: pgpool-general at pgpool.net
Subject: Re: [pgpool-general: 2141] Re: read_startup_packet: incorrect packet
length What does that mean?
Please show pgpool log.
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp
> In addition.
>
> The errors described in the post could cause also something weird what
> I am experiencing.
>
> I hit following to show cluster nodes:
>
> psql -c "show pool_nodes;" -d postgres -U postgres -p 9898
>
>
>
> The terminal session is stucked then. I cannot cancel the command even
> with
> CTRL+C.
>
> However I can attach node successfully with command:
>
> pcp_attach_node -d 5 tc1 9898 postgres postgres 1
>
>
>
> Best regards,
>
> Michal Mistina
>
>
>
> From: pgpool-general-bounces at pgpool.net
> [mailto:pgpool-general-bounces at pgpool.net] On Behalf Of Mistina Michal
> Sent: Tuesday, September 17, 2013 1:54 PM
> To: pgpool-general at pgpool.net
> Subject: [pgpool-general: 2140] read_startup_packet: incorrect packet
> length What does that mean?
>
>
>
> Dear all.
>
> I don't understand what I see in the log files. Following keeps
> happening all the time after I start pgpool:
>
> 2013-09-17 13:26:31 ERROR: pid 5697: read_startup_packet: incorrect
> packet length (983664419)
>
> 2013-09-17 13:26:49 LOG: pid 3608: do_child: failback event found. restart
> myself.
>
> 2013-09-17 13:26:49 ERROR: pid 5833: read_startup_packet: incorrect
> packet length (983664419)
>
> 2013-09-17 13:27:01 LOG: pid 3591: do_child: failback event found. restart
> myself.
>
> 2013-09-17 13:27:01 ERROR: pid 6000: read_startup_packet: incorrect
> packet length (983664419)
>
> 2013-09-17 13:27:19 ERROR: pid 5372: read_startup_packet: incorrect
> packet length (983664419)
>
> 2013-09-17 13:27:31 LOG: pid 3586: do_child: failback event found. restart
> myself.
>
> 2013-09-17 13:27:31 ERROR: pid 5372: read_startup_packet: incorrect
> packet length (983664419)
>
> 2013-09-17 13:27:49 ERROR: pid 6151: read_startup_packet: incorrect
> packet length (983664419)
>
> 2013-09-17 13:28:01 ERROR: pid 3590: read_startup_packet: incorrect
> packet length (983664419)
>
>
>
> Versions:
>
> pgpool-II-3.3.1-1.pgdg.x86_64
>
> postgresql92-9.2.4-1PGDG.rhel6.x86_64
>
> postgresql92-libs-9.2.4-1PGDG.rhel6.x86_64
>
> postgresql92-server-9.2.4-1PGDG.rhel6.x86_64
>
>
>
> Where should I look at? When do occure aforementioned errors?
>
>
>
> Probably I set something wrong in the config file. Or permissions on
> some files are wrong.
>
>
>
> The pgpool.conf follows.
>
> listen_addresses = '*'
>
> port = 9999
>
> socket_dir = '/tmp'
>
> pcp_port = 9898
>
> pcp_socket_dir = '/tmp'
>
> backend_hostname0 = 'tc1'
>
> backend_port0 = 5432
>
> backend_weight0 = 1
>
> backend_data_directory0 = '/var/lib/pgsql/9.2/data'
>
> backend_flag0 = 'ALLOW_TO_FAILOVER'
>
> backend_hostname1 = 'tc2'
>
> backend_port1 = 5432
>
> backend_weight1 = 1
>
> backend_data_directory1 = '/var/lib/pgsql/9.2/data'
>
> backend_flag1 = 'ALLOW_TO_FAILOVER'
>
> enable_pool_hba = off
>
> pool_passwd = 'pool_passwd'
>
> authentication_timeout = 60
>
> ssl = off
>
> num_init_children = 32
>
> max_pool = 4
>
> child_life_time = 300
>
> child_max_connections = 0
>
> connection_life_time = 0
>
> client_idle_limit = 0
>
> log_destination = 'stderr'
>
> print_timestamp = on
>
> log_connections = off
>
> log_hostname = off
>
> log_statement = off
>
> log_per_node_statement = off
>
> log_standby_delay = 'none'
>
> syslog_facility = 'LOCAL0'
>
> syslog_ident = 'pgpool'
>
> debug_level = 0
>
> pid_file_name = '/var/run/pgpool/pgpool.pid'
>
> logdir = '/var/log/pgpool'
>
> connection_cache = on
>
> reset_query_list = 'ABORT; DISCARD ALL'
>
> replication_mode = off
>
> replicate_select = off
>
> insert_lock = on
>
> lobj_lock_table = ''
>
> replication_stop_on_mismatch = off
>
> failover_if_affected_tuples_mismatch = off
>
> load_balance_mode = off
>
> ignore_leading_white_space = on
>
> white_function_list = ''
>
> black_function_list = 'nextval,setval'
>
> master_slave_mode = on
>
> master_slave_sub_mode = 'stream'
>
> sr_check_period = 10
>
> sr_check_user = 'replication'
>
> sr_check_password = 'replicate'
>
> delay_threshold = 10000000
>
> follow_master_command = ''
>
> parallel_mode = off
>
> pgpool2_hostname = ''
>
> system_db_hostname = 'localhost'
>
> system_db_port = 5432
>
> system_db_dbname = 'pgpool'
>
> system_db_schema = 'pgpool_catalog'
>
> system_db_user = 'pgpool'
>
> system_db_password = ''
>
> health_check_period = 20
>
> health_check_timeout = 20
>
> health_check_user = 'replication'
>
> health_check_password = 'replicate'
>
> health_check_max_retries = 5
>
> health_check_retry_delay = 1
>
> failover_command = '/usr/pgsql-9.2/bin/failover_stream.sh %d %H
> /tmp/trigger_file'
>
> failback_command = ''
>
> fail_over_on_backend_error = on
>
> search_primary_node_timeout = 10
>
> recovery_user = 'nobody'
>
> recovery_password = ''
>
> recovery_1st_stage_command = ''
>
> recovery_2nd_stage_command = ''
>
> recovery_timeout = 90
>
> client_idle_limit_in_recovery = 0
>
> use_watchdog = off
>
> trusted_servers = '192.168.105.254'
>
> ping_path = '/bin'
>
> wd_hostname = ''
>
> wd_port = 9000
>
> wd_authkey = ''
>
> delegate_IP = ''
>
> ifconfig_path = '/sbin'
>
> if_up_cmd = 'ifconfig eth0:0 inet $_IP_$ netmask 255.255.255.0'
>
> if_down_cmd = 'ifconfig eth0:0 down'
>
> arping_path = '/usr/sbin'
>
> arping_cmd = 'arping -U $_IP_$ -w 1'
>
> clear_memqcache_on_escalation = on
>
> wd_escalation_command = ''
>
> wd_lifecheck_method = 'heartbeat'
>
> wd_interval = 5
>
> wd_heartbeat_port = 9694
>
> wd_heartbeat_keepalive = 2
>
> wd_heartbeat_deadtime = 30
>
> heartbeat_destination0 = 'tc2'
>
> heartbeat_destination_port0 = 9694
>
> heartbeat_device0 = ''
>
> wd_life_point = 3
>
> wd_lifecheck_query = 'SELECT 1'
>
> wd_lifecheck_dbname = 'template1'
>
> wd_lifecheck_user = 'replication'
>
> wd_lifecheck_password = 'replicate'
>
> relcache_expire = 0
>
> relcache_size = 256
>
> check_temp_table = on
>
> memory_cache_enabled = off
>
> memqcache_method = 'shmem'
>
> memqcache_memcached_host = 'localhost'
>
> memqcache_memcached_port = 11211
>
> memqcache_total_size = 67108864
>
> memqcache_max_num_cache = 1000000
>
> memqcache_expire = 0
>
> memqcache_auto_cache_invalidation = on
>
> memqcache_maxcache = 409600
>
> memqcache_cache_block_size = 1048576
>
> memqcache_oiddir = '/var/log/pgpool/oiddir'
>
> white_memqcache_table_list = ''
>
> black_memqcache_table_list = ''
>
>
>
>
>
> Best regards,
>
> Michal Mistina
>
>
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 3076 bytes
Desc: not available
URL: <http://www.pgpool.net/pipermail/pgpool-general/attachments/20131007/9ae94b9d/attachment.p7s>
More information about the pgpool-general
mailing list