[Pgpool-general] pgpool master/slave with streaming replication failover

Wed Nov 17 17:02:05 UTC 2010

Hi.
I have two machines with PostgreSQL-9.0.1 with a Hot Standby/Streaming
replication situation and a third machine with pgpool-II-3.0.1 with
master/slave-streaming replication configuration.

Summarizing:
primary: 192.168.0.128
standby: 192.168.0.127

Everything works fine:
# psql -p 9999 -U user -d database -h localhost -c "SELECT COUNT(*) FROM table;"
 count 
-------
  10

But if I kill the standby this is the result:
# psql -p 9999 -U user -d database -h localhost -c "SELECT COUNT(*) FROM table;"
psql: server closed the connection unexpectedly
	This probably means the server terminated abnormally
	before or while processing the request.

...and the pgpool log says:
2010-11-17 17:50:23 LOG:   pid 26343: pgpool-II successfully started. version
3.0.1 (umiyameboshi)
2010-11-17 17:50:44 LOG:   pid 26368: connection closed. retry to create new
connection pool.
2010-11-17 17:50:44 ERROR: pid 26368: connect_inet_domain_socket: connect()
failed: Connection refused
2010-11-17 17:50:44 ERROR: pid 26368: connection to 192.168.0.127(5433) failed
2010-11-17 17:50:44 ERROR: pid 26368: new_connection: create_cp() failed
2010-11-17 17:50:44 LOG:   pid 26368: notice_backend_error: 1 fail over request
from pid 26368
2010-11-17 17:50:44 LOG:   pid 26343: starting degeneration. shutdown host
192.168.0.127(5433)
2010-11-17 17:50:44 LOG:   pid 26343: execute command:
/usr/lib/pgpool-II/3.0.1/bin/failover.sh 192.168.0.127 192.168.0.128
/tmp/trigger_file0 0 0 1
Old master node ID: 0
New master node ID: 0
Backend ID of failed node: 1
2010-11-17 17:50:44 LOG:   pid 26343: failover_handler: set new master node: 0
2010-11-17 17:50:44 LOG:   pid 26343: failover done. shutdown host
192.168.0.127(5433)

If I perform the query again, leaving the standby down, now it works again:
# psql -p 9999 -U user -d database -h localhost -c "SELECT COUNT(*) FROM table;"
 count 
-------
  10

I suppose that the failover configuration is wrong somewhere...this is my
pgpool.conf:
listen_addresses = 'localhost'
port = 9999
pcp_port = 9898
socket_dir = '/tmp'
pcp_socket_dir = '/tmp'
backend_socket_dir = '/tmp'
pcp_timeout = 10
num_init_children = 32
max_pool = 4
child_life_time = 300
connection_life_time = 0
child_max_connections = 0
client_idle_limit = 0
authentication_timeout = 60
logdir = '/var/log/pgpool'
pid_file_name = '/var/log/pgpool/pgpool.pid'
replication_mode = false
load_balance_mode = true
replication_stop_on_mismatch = false
failover_if_affected_tuples_mismatch = false
replicate_select = false
reset_query_list = 'ABORT; DISCARD ALL'
white_function_list = ''
black_function_list = 'nextval,setval'
print_timestamp = true
master_slave_mode = true
master_slave_sub_mode = 'stream'
delay_threshold = 10000000
log_standby_delay = 'if_over_threshold'
connection_cache = true
health_check_timeout = 20
health_check_period = 3600
health_check_user = 'postgis'
failover_command = '/usr/lib/pgpool-II/3.0.1/bin/failover.sh %h %H
/tmp/trigger_file0 %M %m %d'
failback_command = ''
fail_over_on_backend_error = true
insert_lock = false
ignore_leading_white_space = true
log_statement = false
log_per_node_statement = false
log_connections = false
log_hostname = false
parallel_mode = false
enable_query_cache = false
pgpool2_hostname = ''
system_db_hostname = 'localhost'
system_db_port = 5432
system_db_dbname = 'pgpool'
system_db_schema = 'pgpool_catalog'
system_db_user = 'pgpool'
system_db_password = ''
backend_hostname0 = '192.168.0.128'
backend_port0 = 5433
backend_weight0 = 1
backend_hostname1 = '192.168.0.127'
backend_port1 = 5433
backend_weight1 = 1
enable_pool_hba = false
recovery_user = 'nobody'
recovery_password = ''
recovery_1st_stage_command = ''
recovery_2nd_stage_command = ''
recovery_timeout = 90
client_idle_limit_in_recovery = 0
lobj_lock_table = ''
ssl = false
debug_level = 0

and this is my failover script:
# IP address of failed machine
failed_node="${1}"

# The ip adress of new master machine
# (if failed_node is master, it is the standby,
#  if failed_node is standby, it is the master)
new_master="${2}"

# The trigger file touched on standby machine
trigger_file="${3}"

# Printing some values into pgpool log file
echo "Old master node ID: ${4}"
echo "New master node ID: ${5}"
echo "Backend ID of failed node: ${6}"

node=`echo "${failed_node}" | cut -d '.' -f 4`
# If standby goes down, do nothing
if [ ${node} -eq 127 ] ; then
	exit 0 ;
fi

# If master goes down, standby becomes the new master
if [ ${node} -eq 128 ] ; then
	# Create trigger file.
	/usr/bin/ssh -T postgres@${new_master} /bin/touch ${trigger_file}
fi

exit 0 ;

Is it possible to obtain the result of the query without performing it twice?
Where am I wrong?

Thanks in advance,

Alessandro