[pgpool-general: 5975] Automatic Failover Randomly does not work

Pankaj Joshi pankajjo02 at gmail.com
Mon Mar 5 18:40:56 JST 2018


Hello Team,

we have a setup of 2 nodes running   pgpool version 3.7.2 (amefuriboshi)
and postgres 9.6.7 and rep manager. these 2 nodes run both pgpool and
postgres in HA mode .

in production as well in the test environment when the primary pgpool and
primary postgres  are on same node and  that node is shutdown or restarted
the auto failover of the setup sporadically fails ,in my testing out of 20
instances of the primary node failure the auto failover fails 3-4 times ,
rest of the time it works fine .

we have 2 nodes osboxes44 and osboxes75  , on checking logs each time the
failover command which is exected is incorrect

execute command: /etc/pgpool-II-96/failover.sh 1 ""


I am putting pgpool logs of 3 instances when the failover failed , pgpool
goes keeps looking for the primary node , in all three cases the secondary
postgres node was up and running ..   , I am putting pgpool.conf as well
post the incident logs..


*instance 1 *

Feb  3 20:22:55 osboxes44 pgpool[5288]: [11-1] 2018-02-03 20:22:55: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:22:55 osboxes44 pgpool[5288]: [12-1] 2018-02-03 20:22:55: pid
5288: LOG:  health check retrying on DB node: 1 (round:1)
Feb  3 20:22:55 osboxes44 pgpool[5287]: [11-1] 2018-02-03 20:22:55: pid
5287: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:22:56 osboxes44 pgpool[5288]: [13-1] 2018-02-03 20:22:56: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:22:56 osboxes44 pgpool[5288]: [14-1] 2018-02-03 20:22:56: pid
5288: LOG:  health check retrying on DB node: 1 (round:2)
Feb  3 20:22:57 osboxes44 pgpool[5288]: [15-1] 2018-02-03 20:22:57: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:22:57 osboxes44 pgpool[5288]: [16-1] 2018-02-03 20:22:57: pid
5288: LOG:  health check retrying on DB node: 1 (round:3)
Feb  3 20:22:58 osboxes44 pgpool[5288]: [17-1] 2018-02-03 20:22:58: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:22:58 osboxes44 pgpool[5288]: [18-1] 2018-02-03 20:22:58: pid
5288: LOG:  health check retrying on DB node: 1 (round:4)
Feb  3 20:22:59 osboxes44 pgpool[5288]: [19-1] 2018-02-03 20:22:59: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:22:59 osboxes44 pgpool[5288]: [20-1] 2018-02-03 20:22:59: pid
5288: LOG:  health check retrying on DB node: 1 (round:5)
Feb  3 20:23:00 osboxes44 pgpool[5288]: [21-1] 2018-02-03 20:23:00: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:23:00 osboxes44 pgpool[5288]: [22-1] 2018-02-03 20:23:00: pid
5288: LOG:  health check retrying on DB node: 1 (round:6)
Feb  3 20:23:01 osboxes44 pgpool[5288]: [23-1] 2018-02-03 20:23:01: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:23:01 osboxes44 pgpool[5288]: [24-1] 2018-02-03 20:23:01: pid
5288: LOG:  health check retrying on DB node: 1 (round:7)
Feb  3 20:23:02 osboxes44 pgpool[5288]: [25-1] 2018-02-03 20:23:02: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:23:02 osboxes44 pgpool[5288]: [26-1] 2018-02-03 20:23:02: pid
5288: LOG:  health check retrying on DB node: 1 (round:8)
Feb  3 20:23:03 osboxes44 pgpool[5288]: [27-1] 2018-02-03 20:23:03: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:23:03 osboxes44 pgpool[5288]: [28-1] 2018-02-03 20:23:03: pid
5288: LOG:  health check retrying on DB node: 1 (round:9)
Feb  3 20:23:04 osboxes44 pgpool[5288]: [29-1] 2018-02-03 20:23:04: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:23:04 osboxes44 pgpool[5288]: [30-1] 2018-02-03 20:23:04: pid
5288: LOG:  health check retrying on DB node: 1 (round:10)
Feb  3 20:23:05 osboxes44 pgpool[5287]: [12-1] 2018-02-03 20:23:05: pid
5287: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:23:05 osboxes44 pgpool[5288]: [31-1] 2018-02-03 20:23:05: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:23:05 osboxes44 pgpool[5288]: [32-1] 2018-02-03 20:23:05: pid
5288: LOG:  health check failed on node 1 (timeout:0)
Feb  3 20:23:05 osboxes44 pgpool[5288]: [33-1] 2018-02-03 20:23:05: pid
5288: LOG:  received degenerate backend request for node_id: 1 from pid
[5288]
Feb  3 20:23:05 osboxes44 pgpool[5246]: [21-1] 2018-02-03 20:23:05: pid
5246: LOG:  new IPC connection received
Feb  3 20:23:05 osboxes44 pgpool[5246]: [22-1] 2018-02-03 20:23:05: pid
5246: LOG:  failover request from local pgpool-II node received on IPC
interface is forwarded to master watchdog node "osboxes75:5431 Linux
osboxes75"
Feb  3 20:23:05 osboxes44 pgpool[5246]: [22-2] 2018-02-03 20:23:05: pid
5246: DETAIL:  waiting for the reply...
Feb  3 20:23:10 osboxes44 pgpool[5246]: [23-1] 2018-02-03 20:23:10: pid
5246: LOG:  remote node "osboxes75:5431 Linux osboxes75" is not replying..
Feb  3 20:23:10 osboxes44 pgpool[5246]: [23-2] 2018-02-03 20:23:10: pid
5246: DETAIL:  marking the node as lost
Feb  3 20:23:10 osboxes44 pgpool[5246]: [24-1] 2018-02-03 20:23:10: pid
5246: LOG:  remote node "osboxes75:5431 Linux osboxes75" is lost
Feb  3 20:23:10 osboxes44 pgpool[5246]: [25-1] 2018-02-03 20:23:10: pid
5246: LOG:  watchdog cluster has lost the coordinator node
Feb  3 20:23:10 osboxes44 pgpool[5246]: [26-1] 2018-02-03 20:23:10: pid
5246: LOG:  unassigning the remote node "osboxes75:5431 Linux osboxes75"
from watchdog cluster master
Feb  3 20:23:10 osboxes44 pgpool[5246]: [27-1] 2018-02-03 20:23:10: pid
5246: LOG:  We have lost the cluster master node "osboxes75:5431 Linux
osboxes75"
Feb  3 20:23:10 osboxes44 pgpool[5246]: [28-1] 2018-02-03 20:23:10: pid
5246: LOG:  watchdog node state changed from [STANDBY] to [JOINING]
Feb  3 20:23:10 osboxes44 pgpool[5246]: [29-1] 2018-02-03 20:23:10: pid
5246: LOG:  connect on socket failed
Feb  3 20:23:10 osboxes44 pgpool[5246]: [29-2] 2018-02-03 20:23:10: pid
5246: DETAIL:  connect failed with error: "Network is unreachable"
Feb  3 20:23:10 osboxes44 pgpool[5288]: [34-1] 2018-02-03 20:23:10: pid
5288: LOG:  degenerate backend request for 1 node(s) from pid [5288] is
canceled by other pgpool
Feb  3 20:23:14 osboxes44 pgpool[5246]: [30-1] 2018-02-03 20:23:14: pid
5246: LOG:  watchdog node state changed from [JOINING] to [INITIALIZING]
Feb  3 20:23:15 osboxes44 pgpool[5287]: [13-1] 2018-02-03 20:23:15: pid
5287: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432" with
error "Network is unreachable"
Feb  3 20:23:15 osboxes44 pgpool[5246]: [31-1] 2018-02-03 20:23:15: pid
5246: LOG:  I am the only alive node in the watchdog cluster
Feb  3 20:23:15 osboxes44 pgpool[5246]: [31-2] 2018-02-03 20:23:15: pid
5246: HINT:  skiping stand for coordinator state
Feb  3 20:23:15 osboxes44 pgpool[5246]: [32-1] 2018-02-03 20:23:15: pid
5246: LOG:  watchdog node state changed from [INITIALIZING] to [MASTER]
Feb  3 20:23:15 osboxes44 pgpool[5246]: [33-1] 2018-02-03 20:23:15: pid
5246: LOG:  I am announcing my self as master/coordinator watchdog node
Feb  3 20:23:19 osboxes44 pgpool[5246]: [34-1] 2018-02-03 20:23:19: pid
5246: LOG:  I am the cluster leader node
Feb  3 20:23:19 osboxes44 pgpool[5246]: [34-2] 2018-02-03 20:23:19: pid
5246: DETAIL:  our declare coordinator message is accepted by all nodes
Feb  3 20:23:19 osboxes44 pgpool[5246]: [35-1] 2018-02-03 20:23:19: pid
5246: LOG:  setting the local node "osboxes44:5431 Linux osboxes44" as
watchdog cluster master
Feb  3 20:23:19 osboxes44 pgpool[5246]: [36-1] 2018-02-03 20:23:19: pid
5246: LOG:  I am the cluster leader node. Starting escalation process
Feb  3 20:23:19 osboxes44 pgpool[5246]: [37-1] 2018-02-03 20:23:19: pid
5246: LOG:  escalation process started with PID:31303
Feb  3 20:23:19 osboxes44 pgpool[5246]: [38-1] 2018-02-03 20:23:19: pid
5246: LOG:  new IPC connection received
Feb  3 20:23:19 osboxes44 pgpool[31303]: [37-1] 2018-02-03 20:23:19: pid
31303: LOG:  watchdog: escalation started
Feb  3 20:23:19 osboxes44 pgpool[31303]: [38-1] 2018-02-03 20:23:19: pid
31303: LOG:  failed to acquire the delegate IP address
Feb  3 20:23:19 osboxes44 pgpool[31303]: [38-2] 2018-02-03 20:23:19: pid
31303: DETAIL:  'if_up_cmd' failed
Feb  3 20:23:19 osboxes44 pgpool[5246]: [39-1] 2018-02-03 20:23:19: pid
5246: LOG:  watchdog escalation process with pid: 31303 exit with SUCCESS.
Feb  3 20:23:22 osboxes44 pgpool[5288]: [35-1] 2018-02-03 20:23:22: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:22 osboxes44 pgpool[5288]: [36-1] 2018-02-03 20:23:22: pid
5288: LOG:  health check retrying on DB node: 1 (round:1)
Feb  3 20:23:25 osboxes44 pgpool[5288]: [37-1] 2018-02-03 20:23:25: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:25 osboxes44 pgpool[5288]: [38-1] 2018-02-03 20:23:25: pid
5288: LOG:  health check retrying on DB node: 1 (round:2)
Feb  3 20:23:25 osboxes44 pgpool[5287]: [14-1] 2018-02-03 20:23:25: pid
5287: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:28 osboxes44 pgpool[5288]: [39-1] 2018-02-03 20:23:28: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:28 osboxes44 pgpool[5288]: [40-1] 2018-02-03 20:23:28: pid
5288: LOG:  health check retrying on DB node: 1 (round:3)
Feb  3 20:23:31 osboxes44 pgpool[5288]: [41-1] 2018-02-03 20:23:31: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:31 osboxes44 pgpool[5288]: [42-1] 2018-02-03 20:23:31: pid
5288: LOG:  health check retrying on DB node: 1 (round:4)
Feb  3 20:23:34 osboxes44 pgpool[5288]: [43-1] 2018-02-03 20:23:34: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:37 osboxes44 pgpool[5287]: [15-1] 2018-02-03 20:23:37: pid
5287: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:37 osboxes44 pgpool[5288]: [45-1] 2018-02-03 20:23:37: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:37 osboxes44 pgpool[5288]: [46-1] 2018-02-03 20:23:37: pid
5288: LOG:  health check retrying on DB node: 1 (round:6)
Feb  3 20:23:41 osboxes44 pgpool[5288]: [47-1] 2018-02-03 20:23:41: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:41 osboxes44 pgpool[5288]: [48-1] 2018-02-03 20:23:41: pid
5288: LOG:  health check retrying on DB node: 1 (round:7)
Feb  3 20:23:44 osboxes44 pgpool[5288]: [49-1] 2018-02-03 20:23:44: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:44 osboxes44 pgpool[5288]: [50-1] 2018-02-03 20:23:44: pid
5288: LOG:  health check retrying on DB node: 1 (round:8)
Feb  3 20:23:47 osboxes44 pgpool[5288]: [51-1] 2018-02-03 20:23:47: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:47 osboxes44 pgpool[5288]: [52-1] 2018-02-03 20:23:47: pid
5288: LOG:  health check retrying on DB node: 1 (round:9)
Feb  3 20:23:50 osboxes44 pgpool[5288]: [53-1] 2018-02-03 20:23:50: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:50 osboxes44 pgpool[5288]: [54-1] 2018-02-03 20:23:50: pid
5288: LOG:  health check retrying on DB node: 1 (round:10)
Feb  3 20:23:50 osboxes44 pgpool[5287]: [16-1] 2018-02-03 20:23:50: pid
5287: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:53 osboxes44 pgpool[5288]: [55-1] 2018-02-03 20:23:53: pid
5288: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:23:53 osboxes44 pgpool[5288]: [56-1] 2018-02-03 20:23:53: pid
5288: LOG:  health check failed on node 1 (timeout:0)
Feb  3 20:23:53 osboxes44 pgpool[5288]: [57-1] 2018-02-03 20:23:53: pid
5288: LOG:  received degenerate backend request for node_id: 1 from pid
[5288]
Feb  3 20:23:53 osboxes44 pgpool[5246]: [40-1] 2018-02-03 20:23:53: pid
5246: LOG:  new IPC connection received
Feb  3 20:23:53 osboxes44 pgpool[5246]: [41-1] 2018-02-03 20:23:53: pid
5246: LOG:  watchdog received the failover command from local pgpool-II on
IPC interface
Feb  3 20:23:53 osboxes44 pgpool[5246]: [42-1] 2018-02-03 20:23:53: pid
5246: LOG:  watchdog is processing the failover command
[DEGENERATE_BACKEND_REQUEST] received from local pgpool-II on IPC interface
Feb  3 20:23:53 osboxes44 pgpool[5246]: [43-1] 2018-02-03 20:23:53: pid
5246: LOG:  we have got the consensus to perform the failover
Feb  3 20:23:53 osboxes44 pgpool[5246]: [43-2] 2018-02-03 20:23:53: pid
5246: DETAIL:  1 node(s) voted in the favor
Feb  3 20:23:53 osboxes44 pgpool[5244]: [12-1] 2018-02-03 20:23:53: pid
5244: LOG:  Pgpool-II parent process has received failover request
Feb  3 20:23:53 osboxes44 pgpool[5246]: [44-1] 2018-02-03 20:23:53: pid
5246: LOG:  new IPC connection received
Feb  3 20:23:53 osboxes44 pgpool[5246]: [45-1] 2018-02-03 20:23:53: pid
5246: LOG:  received the failover indication from Pgpool-II on IPC interface
Feb  3 20:23:53 osboxes44 pgpool[5246]: [46-1] 2018-02-03 20:23:53: pid
5246: LOG:  watchdog is informed of failover end by the main process
Feb  3 20:23:53 osboxes44 pgpool[5244]: [13-1] 2018-02-03 20:23:53: pid
5244: LOG:  starting degeneration. shutdown host osboxes75(5432)
Feb  3 20:23:53 osboxes44 pgpool[5244]: [14-1] 2018-02-03 20:23:53: pid
5244: LOG:  failover: no valid backends node found
Feb  3 20:23:53 osboxes44 pgpool[5244]: [15-1] 2018-02-03 20:23:53: pid
5244: LOG:  Restart all children
*Feb  3 20:23:53 osboxes44 pgpool[5244]: [16-1] 2018-02-03 20:23:53: pid
5244: LOG:  execute command: /etc/pgpool-II-96/failover.sh 1 ""*
Feb  3 20:24:00 osboxes44 pgpool[5287]: [17-1] 2018-02-03 20:24:00: pid
5287: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:24:12 osboxes44 pgpool[5287]: [18-1] 2018-02-03 20:24:12: pid
5287: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb  3 20:24:13 osboxes44 pgpool[5244]: [17-1] 2018-02-03 20:24:13: pid
5244: LOG:  find_primary_node_repeatedly: waiting for finding a primary node
Feb  3 20:24:13 osboxes44 pgpool[5244]: [17-1] 2018-02-03 20:24:13: pid
5244: LOG:  find_primary_node_repeatedly: waiting for finding a primary node
Feb  3 20:24:13 osboxes44 pgpool[5244]: [18-1] 2018-02-03 20:24:13: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:13 osboxes44 pgpool[5244]: [19-1] 2018-02-03 20:24:13: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:14 osboxes44 pgpool[5244]: [20-1] 2018-02-03 20:24:14: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:14 osboxes44 pgpool[5244]: [21-1] 2018-02-03 20:24:14: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:15 osboxes44 pgpool[5244]: [22-1] 2018-02-03 20:24:15: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:15 osboxes44 pgpool[5244]: [23-1] 2018-02-03 20:24:15: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:16 osboxes44 pgpool[5244]: [24-1] 2018-02-03 20:24:16: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:16 osboxes44 pgpool[5244]: [25-1] 2018-02-03 20:24:16: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:17 osboxes44 pgpool[5244]: [26-1] 2018-02-03 20:24:17: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:17 osboxes44 pgpool[5244]: [27-1] 2018-02-03 20:24:17: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:18 osboxes44 pgpool[5244]: [28-1] 2018-02-03 20:24:18: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:18 osboxes44 pgpool[5244]: [29-1] 2018-02-03 20:24:18: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:19 osboxes44 pgpool[5244]: [30-1] 2018-02-03 20:24:19: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:19 osboxes44 pgpool[5244]: [31-1] 2018-02-03 20:24:19: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:20 osboxes44 pgpool[5244]: [32-1] 2018-02-03 20:24:20: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:20 osboxes44 pgpool[5244]: [33-1] 2018-02-03 20:24:20: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:21 osboxes44 pgpool[5244]: [34-1] 2018-02-03 20:24:21: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:21 osboxes44 pgpool[5244]: [35-1] 2018-02-03 20:24:21: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:22 osboxes44 pgpool[5244]: [36-1] 2018-02-03 20:24:22: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:22 osboxes44 pgpool[5244]: [37-1] 2018-02-03 20:24:22: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:23 osboxes44 pgpool[5244]: [38-1] 2018-02-03 20:24:23: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:23 osboxes44 pgpool[5244]: [39-1] 2018-02-03 20:24:23: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:24 osboxes44 pgpool[5244]: [40-1] 2018-02-03 20:24:24: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:24 osboxes44 pgpool[5244]: [41-1] 2018-02-03 20:24:24: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:25 osboxes44 pgpool[5244]: [42-1] 2018-02-03 20:24:25: pid
5244: LOG:  find_primary_node: checking backend no 0
Feb  3 20:24:25 osboxes44 pgpool[5244]: [43-1] 2018-02-03 20:24:25: pid
5244: LOG:  find_primary_node: checking backend no 1
Feb  3 20:24:25 osboxes44 pgpool[5287]: [19-1] 2018-02-03 20:24:25: pid
5287: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"




*instance 2*


Feb 26 13:46:30 osboxes44 pgpool[4398]: [56-1] 2018-02-26 13:46:30: pid
4398: LOG:  remote node "osboxes75:5431 Linux osboxes75" is shutting down
Feb 26 13:46:30 osboxes44 pgpool[4398]: [57-1] 2018-02-26 13:46:30: pid
4398: LOG:  watchdog cluster has lost the coordinator node
Feb 26 13:46:30 osboxes44 pgpool[4398]: [58-1] 2018-02-26 13:46:30: pid
4398: LOG:  unassigning the remote node "osboxes75:5431 Linux osboxes75"
from watchdog cluster master
Feb 26 13:46:30 osboxes44 pgpool[4398]: [59-1] 2018-02-26 13:46:30: pid
4398: LOG:  We have lost the cluster master node "osboxes75:5431 Linux
osboxes75"
Feb 26 13:46:30 osboxes44 pgpool[4398]: [60-1] 2018-02-26 13:46:30: pid
4398: LOG:  watchdog node state changed from [STANDBY] to [JOINING]
Feb 26 13:46:34 osboxes44 pgpool[4398]: [61-1] 2018-02-26 13:46:34: pid
4398: LOG:  watchdog node state changed from [JOINING] to [INITIALIZING]
Feb 26 13:46:35 osboxes44 pgpool[4398]: [62-1] 2018-02-26 13:46:35: pid
4398: LOG:  I am the only alive node in the watchdog cluster
Feb 26 13:46:35 osboxes44 pgpool[4398]: [62-2] 2018-02-26 13:46:35: pid
4398: HINT:  skipping stand for coordinator state
Feb 26 13:46:35 osboxes44 pgpool[4398]: [63-1] 2018-02-26 13:46:35: pid
4398: LOG:  watchdog node state changed from [INITIALIZING] to [MASTER]
Feb 26 13:46:35 osboxes44 pgpool[4398]: [64-1] 2018-02-26 13:46:35: pid
4398: LOG:  I am announcing my self as master/coordinator watchdog node
Feb 26 13:46:39 osboxes44 pgpool[4398]: [65-1] 2018-02-26 13:46:39: pid
4398: LOG:  I am the cluster leader node
Feb 26 13:46:39 osboxes44 pgpool[4398]: [65-2] 2018-02-26 13:46:39: pid
4398: DETAIL:  our declare coordinator message is accepted by all nodes
Feb 26 13:46:39 osboxes44 pgpool[4398]: [66-1] 2018-02-26 13:46:39: pid
4398: LOG:  setting the local node "osboxes44:5431 Linux osboxes44" as
watchdog cluster master
Feb 26 13:46:39 osboxes44 pgpool[4398]: [67-1] 2018-02-26 13:46:39: pid
4398: LOG:  I am the cluster leader node. Starting escalation process
Feb 26 13:46:39 osboxes44 pgpool[4398]: [68-1] 2018-02-26 13:46:39: pid
4398: LOG:  escalation process started with PID:5976
Feb 26 13:46:39 osboxes44 pgpool[4398]: [69-1] 2018-02-26 13:46:39: pid
4398: LOG:  new IPC connection received
Feb 26 13:46:39 osboxes44 pgpool[5976]: [68-1] 2018-02-26 13:46:39: pid
5976: LOG:  watchdog: escalation started
Feb 26 13:46:39 osboxes44 pgpool[5976]: [69-1] 2018-02-26 13:46:39: pid
5976: LOG:  failed to acquire the delegate IP address
Feb 26 13:46:39 osboxes44 pgpool[5976]: [69-2] 2018-02-26 13:46:39: pid
5976: DETAIL:  'if_up_cmd' failed
Feb 26 13:46:39 osboxes44 pgpool[4398]: [70-1] 2018-02-26 13:46:39: pid
4398: LOG:  watchdog escalation process with pid: 5976 exit with SUCCESS.
Feb 26 13:47:05 osboxes44 pgpool[4401]: [8-1] 2018-02-26 13:47:05: pid
4401: LOG:  informing the node status change to watchdog
Feb 26 13:47:05 osboxes44 pgpool[4401]: [8-2] 2018-02-26 13:47:05: pid
4401: DETAIL:  node id :1 status = "NODE DEAD" message:"No heartbeat signal
from node"
Feb 26 13:47:05 osboxes44 pgpool[4398]: [71-1] 2018-02-26 13:47:05: pid
4398: LOG:  new IPC connection received
Feb 26 13:47:05 osboxes44 pgpool[4398]: [72-1] 2018-02-26 13:47:05: pid
4398: LOG:  received node status change ipc message
Feb 26 13:47:05 osboxes44 pgpool[4398]: [72-2] 2018-02-26 13:47:05: pid
4398: DETAIL:  No heartbeat signal from node
Feb 26 13:47:05 osboxes44 pgpool[4398]: [73-1] 2018-02-26 13:47:05: pid
4398: LOG:  remote node "osboxes75:5431 Linux osboxes75" is shutting down
Feb 26 13:48:06 osboxes44 pgpool[4434]: [11-1] 2018-02-26 13:48:06: pid
4434: LOG:  forked new pcp worker, pid=5996 socket=8
Feb 26 13:48:06 osboxes44 pgpool[5996]: [11-1] 2018-02-26 13:48:06: pid
5996: FATAL:  authentication failed for user "pgpool"
Feb 26 13:48:06 osboxes44 pgpool[5996]: [11-2] 2018-02-26 13:48:06: pid
5996: DETAIL:  username and/or password does not match
Feb 26 13:48:06 osboxes44 pgpool[4434]: [12-1] 2018-02-26 13:48:06: pid
4434: LOG:  PCP process with pid: 5996 exit with SUCCESS.
Feb 26 13:48:06 osboxes44 pgpool[4434]: [13-1] 2018-02-26 13:48:06: pid
4434: LOG:  PCP process with pid: 5996 exits with status 256
Feb 26 13:48:09 osboxes44 pgpool[4434]: [14-1] 2018-02-26 13:48:09: pid
4434: LOG:  forked new pcp worker, pid=5999 socket=8
Feb 26 13:48:09 osboxes44 pgpool[4398]: [74-1] 2018-02-26 13:48:09: pid
4398: LOG:  new IPC connection received
Feb 26 13:48:09 osboxes44 pgpool[4434]: [15-1] 2018-02-26 13:48:09: pid
4434: LOG:  PCP process with pid: 5999 exit with SUCCESS.
Feb 26 13:48:09 osboxes44 pgpool[4434]: [16-1] 2018-02-26 13:48:09: pid
4434: LOG:  PCP process with pid: 5999 exits with status 0
Feb 26 13:48:20 osboxes44 pgpool[5951]: [201-1] 2018-02-26 13:48:20: pid
5951: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "No route to host"
Feb 26 13:48:20 osboxes44 pgpool[5951]: [202-1] 2018-02-26 13:48:20: pid
5951: LOG:  received degenerate backend request for node_id: 1 from pid
[5951]
Feb 26 13:48:20 osboxes44 pgpool[4398]: [75-1] 2018-02-26 13:48:20: pid
4398: LOG:  new IPC connection received
Feb 26 13:48:20 osboxes44 pgpool[4398]: [76-1] 2018-02-26 13:48:20: pid
4398: LOG:  watchdog received the failover command from local pgpool-II on
IPC interface
Feb 26 13:48:20 osboxes44 pgpool[4398]: [77-1] 2018-02-26 13:48:20: pid
4398: LOG:  watchdog is processing the failover command
[DEGENERATE_BACKEND_REQUEST] received from local pgpool-II on IPC interface
Feb 26 13:48:20 osboxes44 pgpool[4398]: [78-1] 2018-02-26 13:48:20: pid
4398: LOG:  we have got the consensus to perform the failover
Feb 26 13:48:20 osboxes44 pgpool[4398]: [78-2] 2018-02-26 13:48:20: pid
4398: DETAIL:  1 node(s) voted in the favor
Feb 26 13:48:20 osboxes44 pgpool[5951]: [203-1] 2018-02-26 13:48:20: pid
5951: FATAL:  failed to create a backend connection
Feb 26 13:48:20 osboxes44 pgpool[5951]: [203-2] 2018-02-26 13:48:20: pid
5951: DETAIL:  executing failover on backend
Feb 26 13:48:20 osboxes44 pgpool[4395]: [230-1] 2018-02-26 13:48:20: pid
4395: LOG:  Pgpool-II parent process has received failover request
Feb 26 13:48:20 osboxes44 pgpool[4398]: [79-1] 2018-02-26 13:48:20: pid
4398: LOG:  new IPC connection received
Feb 26 13:48:20 osboxes44 pgpool[4398]: [80-1] 2018-02-26 13:48:20: pid
4398: LOG:  received the failover indication from Pgpool-II on IPC interface
Feb 26 13:48:20 osboxes44 pgpool[4398]: [81-1] 2018-02-26 13:48:20: pid
4398: LOG:  watchdog is informed of failover end by the main process
Feb 26 13:48:20 osboxes44 pgpool[4395]: [231-1] 2018-02-26 13:48:20: pid
4395: LOG:  starting degeneration. shutdown host osboxes75(5432)
Feb 26 13:48:20 osboxes44 pgpool[4395]: [232-1] 2018-02-26 13:48:20: pid
4395: LOG:  failover: no valid backends node found
Feb 26 13:48:20 osboxes44 pgpool[4395]: [233-1] 2018-02-26 13:48:20: pid
4395: LOG:  Restart all children
*Feb 26 13:48:20 osboxes44 pgpool[4395]: [234-1] 2018-02-26 13:48:20: pid
4395: LOG:  execute command: /etc/pgpool-II-96/failover.sh 1 ""*
Feb 26 13:48:21 osboxes44 pgpool[4395]: [235-1] 2018-02-26 13:48:21: pid
4395: LOG:  find_primary_node_repeatedly: waiting for finding a primary node
Feb 26 13:48:21 osboxes44 pgpool[4395]: [236-1] 2018-02-26 13:48:21: pid
4395: LOG:  find_primary_node: checking backend no 0
Feb 26 13:48:21 osboxes44 pgpool[4395]: [237-1] 2018-02-26 13:48:21: pid
4395: LOG:  find_primary_node: checking backend no 1
Feb 26 13:48:22 osboxes44 pgpool[4395]: [238-1] 2018-02-26 13:48:22: pid
4395: LOG:  find_primary_node: checking backend no 0
Feb 26 13:48:22 osboxes44 pgpool[4395]: [239-1] 2018-02-26 13:48:22: pid
4395: LOG:  find_primary_node: checking backend no 1
Feb 26 13:48:23 osboxes44 pgpool[4395]: [240-1] 2018-02-26 13:48:23: pid
4395: LOG:  find_primary_node: checking backend no 0
Feb 26 13:48:23 osboxes44 pgpool[4395]: [241-1] 2018-02-26 13:48:23: pid
4395: LOG:  find_primary_node: checking backend no 1
Feb 26 13:48:24 osboxes44 pgpool[4395]: [242-1] 2018-02-26 13:48:24: pid
4395: LOG:  find_primary_node: checking backend no 0
Feb 26 13:48:24 osboxes44 pgpool[4395]: [243-1] 2018-02-26 13:48:24: pid
4395: LOG:  find_primary_node: checking backend no 1
Feb 26 13:48:25 osboxes44 pgpool[4395]: [244-1] 2018-02-26 13:48:25: pid
4395: LOG:  find_primary_node: checking backend no 0
Feb 26 13:48:25 osboxes44 pgpool[4395]: [245-1] 2018-02-26 13:48:25: pid
4395: LOG:  find_primary_node: checking backend no 1
Feb 26 13:48:26 osboxes44 pgpool[4395]: [246-1] 2018-02-26 13:48:26: pid
4395: LOG:  find_primary_node: checking backend no 0
Feb 26 13:48:26 osboxes44 pgpool[4395]: [247-1] 2018-02-26 13:48:26: pid
4395: LOG:  find_primary_node: checking backend no 1
Feb 26 13:48:27 osboxes44 pgpool[4395]: [248-1] 2018-02-26 13:48:27: pid
4395: LOG:  find_primary_node: checking backend no 0
Feb 26 13:48:27 osboxes44 pgpool[4395]: [249-1] 2018-02-26 13:48:27: pid
4395: LOG:  find_primary_node: checking backend no 1
Feb 26 13:48:28 osboxes44 pgpool[4395]: [250-1] 2018-02-26 13:48:28: pid
4395: LOG:  find_primary_node: checking backend no 0
Feb 26 13:48:28 osboxes44 pgpool[4395]: [251-1] 2018-02-26 13:48:28: pid
4395: LOG:  find_primary_node: checking backend no 1


*Instance 3*


Mar  1 00:43:22 osboxes44 pgpool[948]: [30-1] 2018-03-01 00:43:22: pid 948:
LOG:  remote node "osboxes75:5431 Linux osboxes75" is shutting down
Mar  1 00:43:22 osboxes44 pgpool[948]: [31-1] 2018-03-01 00:43:22: pid 948:
LOG:  watchdog cluster has lost the coordinator node
Mar  1 00:43:22 osboxes44 pgpool[948]: [32-1] 2018-03-01 00:43:22: pid 948:
LOG:  unassigning the remote node "osboxes75:5431 Linux osboxes75" from
watchdog cluster master
Mar  1 00:43:22 osboxes44 pgpool[948]: [33-1] 2018-03-01 00:43:22: pid 948:
LOG:  We have lost the cluster master node "osboxes75:5431 Linux osboxes75"
Mar  1 00:43:22 osboxes44 pgpool[948]: [34-1] 2018-03-01 00:43:22: pid 948:
LOG:  watchdog node state changed from [STANDBY] to [JOINING]
Mar  1 00:43:23 osboxes44 pgpool[1089]: [13-1] 2018-03-01 00:43:23: pid
1089: ERROR:  failed to authenticate
Mar  1 00:43:23 osboxes44 pgpool[1089]: [13-2] 2018-03-01 00:43:23: pid
1089: DETAIL:  the database system is shutting down
Mar  1 00:43:23 osboxes44 pgpool[1090]: [13-1] 2018-03-01 00:43:23: pid
1090: ERROR:  failed to authenticate
Mar  1 00:43:23 osboxes44 pgpool[1090]: [13-2] 2018-03-01 00:43:23: pid
1090: DETAIL:  the database system is shutting down
Mar  1 00:43:23 osboxes44 pgpool[1090]: [14-1] 2018-03-01 00:43:23: pid
1090: LOG:  health check retrying on DB node: 1 (round:1)
Mar  1 00:43:24 osboxes44 pgpool[1090]: [15-1] 2018-03-01 00:43:24: pid
1090: ERROR:  failed to authenticate
Mar  1 00:43:24 osboxes44 pgpool[1090]: [15-2] 2018-03-01 00:43:24: pid
1090: DETAIL:  the database system is shutting down
Mar  1 00:43:24 osboxes44 pgpool[1090]: [16-1] 2018-03-01 00:43:24: pid
1090: LOG:  health check retrying on DB node: 1 (round:2)
Mar  1 00:43:25 osboxes44 pgpool[1090]: [17-1] 2018-03-01 00:43:25: pid
1090: ERROR:  failed to authenticate
Mar  1 00:43:25 osboxes44 pgpool[1090]: [17-2] 2018-03-01 00:43:25: pid
1090: DETAIL:  the database system is shutting down
Mar  1 00:43:25 osboxes44 pgpool[1090]: [18-1] 2018-03-01 00:43:25: pid
1090: LOG:  health check retrying on DB node: 1 (round:3)
Mar  1 00:43:26 osboxes44 pgpool[948]: [35-1] 2018-03-01 00:43:26: pid 948:
LOG:  watchdog node state changed from [JOINING] to [INITIALIZING]
Mar  1 00:43:26 osboxes44 pgpool[1090]: [19-1] 2018-03-01 00:43:26: pid
1090: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "Connection refused"
Mar  1 00:43:26 osboxes44 pgpool[1090]: [20-1] 2018-03-01 00:43:26: pid
1090: ERROR:  failed to make persistent db connection
Mar  1 00:43:26 osboxes44 pgpool[1090]: [20-2] 2018-03-01 00:43:26: pid
1090: DETAIL:  connection to host:"osboxes75:5432" failed
Mar  1 00:43:26 osboxes44 pgpool[1090]: [21-1] 2018-03-01 00:43:26: pid
1090: LOG:  health check retrying on DB node: 1 (round:4)
Mar  1 00:43:27 osboxes44 pgpool[948]: [36-1] 2018-03-01 00:43:27: pid 948:
LOG:  I am the only alive node in the watchdog cluster
Mar  1 00:43:27 osboxes44 pgpool[948]: [36-2] 2018-03-01 00:43:27: pid 948:
HINT:  skipping stand for coordinator state
Mar  1 00:43:27 osboxes44 pgpool[948]: [37-1] 2018-03-01 00:43:27: pid 948:
LOG:  watchdog node state changed from [INITIALIZING] to [MASTER]
Mar  1 00:43:27 osboxes44 pgpool[948]: [38-1] 2018-03-01 00:43:27: pid 948:
LOG:  I am announcing my self as master/coordinator watchdog node
Mar  1 00:43:27 osboxes44 pgpool[1090]: [22-1] 2018-03-01 00:43:27: pid
1090: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "Connection refused"
Mar  1 00:43:27 osboxes44 pgpool[1090]: [23-1] 2018-03-01 00:43:27: pid
1090: ERROR:  failed to make persistent db connection
Mar  1 00:43:27 osboxes44 pgpool[1090]: [23-2] 2018-03-01 00:43:27: pid
1090: DETAIL:  connection to host:"osboxes75:5432" failed
Mar  1 00:43:27 osboxes44 pgpool[1090]: [24-1] 2018-03-01 00:43:27: pid
1090: LOG:  health check retrying on DB node: 1 (round:5)
Mar  1 00:43:31 osboxes44 pgpool[948]: [39-1] 2018-03-01 00:43:31: pid 948:
LOG:  I am the cluster leader node
Mar  1 00:43:31 osboxes44 pgpool[948]: [39-2] 2018-03-01 00:43:31: pid 948:
DETAIL:  our declare coordinator message is accepted by all nodes
Mar  1 00:43:31 osboxes44 pgpool[948]: [40-1] 2018-03-01 00:43:31: pid 948:
LOG:  setting the local node "osboxes44:5431 Linux osboxes44" as watchdog
cluster master
Mar  1 00:43:31 osboxes44 pgpool[948]: [41-1] 2018-03-01 00:43:31: pid 948:
LOG:  I am the cluster leader node. Starting escalation process
Mar  1 00:43:31 osboxes44 pgpool[948]: [42-1] 2018-03-01 00:43:31: pid 948:
LOG:  escalation process started with PID:5329
Mar  1 00:43:31 osboxes44 pgpool[948]: [43-1] 2018-03-01 00:43:31: pid 948:
LOG:  new IPC connection received
Mar  1 00:43:31 osboxes44 pgpool[5329]: [42-1] 2018-03-01 00:43:31: pid
5329: LOG:  watchdog: escalation started
Mar  1 00:43:31 osboxes44 pgpool[5329]: [43-1] 2018-03-01 00:43:31: pid
5329: LOG:  failed to acquire the delegate IP address
Mar  1 00:43:31 osboxes44 pgpool[5329]: [43-2] 2018-03-01 00:43:31: pid
5329: DETAIL:  'if_up_cmd' failed
Mar  1 00:43:31 osboxes44 pgpool[5329]: [44-1] 2018-03-01 00:43:31: pid
5329: WARNING:  watchdog escalation failed to acquire delegate IP
Mar  1 00:43:31 osboxes44 pgpool[948]: [44-1] 2018-03-01 00:43:31: pid 948:
LOG:  watchdog escalation process with pid: 5329 exit with SUCCESS.
Mar  1 00:43:38 osboxes44 pgpool[1090]: [25-1] 2018-03-01 00:43:38: pid
1090: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
timed out
Mar  1 00:43:38 osboxes44 pgpool[1090]: [26-1] 2018-03-01 00:43:38: pid
1090: ERROR:  failed to make persistent db connection
Mar  1 00:43:38 osboxes44 pgpool[1090]: [26-2] 2018-03-01 00:43:38: pid
1090: DETAIL:  connection to host:"osboxes75:5432" failed
Mar  1 00:43:38 osboxes44 pgpool[1090]: [27-1] 2018-03-01 00:43:38: pid
1090: LOG:  health check retrying on DB node: 1 (round:6)
Mar  1 00:43:41 osboxes44 pgpool[1084]: [13-1] 2018-03-01 00:43:41: pid
1084: LOG:  trying connecting to PostgreSQL server on "osboxes75:5432" by
INET socket
Mar  1 00:43:41 osboxes44 pgpool[1084]: [13-2] 2018-03-01 00:43:41: pid
1084: DETAIL:  timed out. retrying...
Mar  1 00:43:43 osboxes44 pgpool[1089]: [14-1] 2018-03-01 00:43:43: pid
1089: LOG:  trying connecting to PostgreSQL server on "osboxes75:5432" by
INET socket
Mar  1 00:43:43 osboxes44 pgpool[1089]: [14-2] 2018-03-01 00:43:43: pid
1089: DETAIL:  timed out. retrying...
Mar  1 00:43:49 osboxes44 pgpool[1090]: [28-1] 2018-03-01 00:43:49: pid
1090: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
timed out
Mar  1 00:43:49 osboxes44 pgpool[1090]: [29-1] 2018-03-01 00:43:49: pid
1090: ERROR:  failed to make persistent db connection
Mar  1 00:43:49 osboxes44 pgpool[1090]: [29-2] 2018-03-01 00:43:49: pid
1090: DETAIL:  connection to host:"osboxes75:5432" failed
Mar  1 00:43:49 osboxes44 pgpool[1090]: [30-1] 2018-03-01 00:43:49: pid
1090: LOG:  health check retrying on DB node: 1 (round:7)
Mar  1 00:43:51 osboxes44 pgpool[1084]: [14-1] 2018-03-01 00:43:51: pid
1084: LOG:  trying connecting to PostgreSQL server on "osboxes75:5432" by
INET socket
Mar  1 00:43:51 osboxes44 pgpool[1084]: [14-2] 2018-03-01 00:43:51: pid
1084: DETAIL:  timed out. retrying...
Mar  1 00:43:53 osboxes44 pgpool[1046]: [12-1] 2018-03-01 00:43:53: pid
1046: LOG:  informing the node status change to watchdog
Mar  1 00:43:53 osboxes44 pgpool[1046]: [12-2] 2018-03-01 00:43:53: pid
1046: DETAIL:  node id :1 status = "NODE DEAD" message:"No heartbeat signal
from node"
Mar  1 00:43:53 osboxes44 pgpool[948]: [45-1] 2018-03-01 00:43:53: pid 948:
LOG:  new IPC connection received
Mar  1 00:43:53 osboxes44 pgpool[948]: [46-1] 2018-03-01 00:43:53: pid 948:
LOG:  received node status change ipc message
Mar  1 00:43:53 osboxes44 pgpool[948]: [46-2] 2018-03-01 00:43:53: pid 948:
DETAIL:  No heartbeat signal from node
Mar  1 00:43:53 osboxes44 pgpool[948]: [47-1] 2018-03-01 00:43:53: pid 948:
LOG:  remote node "osboxes75:5431 Linux osboxes75" is shutting down
Mar  1 00:43:53 osboxes44 pgpool[1089]: [15-1] 2018-03-01 00:43:53: pid
1089: LOG:  trying connecting to PostgreSQL server on "osboxes75:5432" by
INET socket
Mar  1 00:43:53 osboxes44 pgpool[1089]: [15-2] 2018-03-01 00:43:53: pid
1089: DETAIL:  timed out. retrying...
Mar  1 00:43:57 osboxes44 pgpool[1090]: [31-1] 2018-03-01 00:43:57: pid
1090: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "Connection refused"
Mar  1 00:43:57 osboxes44 pgpool[1090]: [32-1] 2018-03-01 00:43:57: pid
1090: ERROR:  failed to make persistent db connection
Mar  1 00:43:57 osboxes44 pgpool[1090]: [32-2] 2018-03-01 00:43:57: pid
1090: DETAIL:  connection to host:"osboxes75:5432" failed
Mar  1 00:43:57 osboxes44 pgpool[1090]: [33-1] 2018-03-01 00:43:57: pid
1090: LOG:  health check retrying on DB node: 1 (round:8)
Mar  1 00:43:58 osboxes44 pgpool[1090]: [34-1] 2018-03-01 00:43:58: pid
1090: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "Connection refused"
Mar  1 00:43:58 osboxes44 pgpool[1090]: [35-1] 2018-03-01 00:43:58: pid
1090: ERROR:  failed to make persistent db connection
Mar  1 00:43:58 osboxes44 pgpool[1090]: [35-2] 2018-03-01 00:43:58: pid
1090: DETAIL:  connection to host:"osboxes75:5432" failed
Mar  1 00:43:58 osboxes44 pgpool[1090]: [36-1] 2018-03-01 00:43:58: pid
1090: LOG:  health check retrying on DB node: 1 (round:9)
Mar  1 00:43:59 osboxes44 pgpool[1090]: [37-1] 2018-03-01 00:43:59: pid
1090: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "Connection refused"
Mar  1 00:43:59 osboxes44 pgpool[1090]: [38-1] 2018-03-01 00:43:59: pid
1090: ERROR:  failed to make persistent db connection
Mar  1 00:43:59 osboxes44 pgpool[1090]: [38-2] 2018-03-01 00:43:59: pid
1090: DETAIL:  connection to host:"osboxes75:5432" failed
Mar  1 00:43:59 osboxes44 pgpool[1090]: [39-1] 2018-03-01 00:43:59: pid
1090: LOG:  health check retrying on DB node: 1 (round:10)
Mar  1 00:44:00 osboxes44 pgpool[1090]: [40-1] 2018-03-01 00:44:00: pid
1090: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "Connection refused"
Mar  1 00:44:00 osboxes44 pgpool[1090]: [41-1] 2018-03-01 00:44:00: pid
1090: ERROR:  failed to make persistent db connection
Mar  1 00:44:00 osboxes44 pgpool[1090]: [41-2] 2018-03-01 00:44:00: pid
1090: DETAIL:  connection to host:"osboxes75:5432" failed
Mar  1 00:44:00 osboxes44 pgpool[1090]: [42-1] 2018-03-01 00:44:00: pid
1090: LOG:  health check failed on node 1 (timeout:0)
Mar  1 00:44:00 osboxes44 pgpool[1090]: [43-1] 2018-03-01 00:44:00: pid
1090: LOG:  received degenerate backend request for node_id: 1 from pid
[1090]
Mar  1 00:44:00 osboxes44 pgpool[948]: [48-1] 2018-03-01 00:44:00: pid 948:
LOG:  new IPC connection received
Mar  1 00:44:00 osboxes44 pgpool[948]: [49-1] 2018-03-01 00:44:00: pid 948:
LOG:  watchdog received the failover command from local pgpool-II on IPC
interface
Mar  1 00:44:00 osboxes44 pgpool[948]: [50-1] 2018-03-01 00:44:00: pid 948:
LOG:  watchdog is processing the failover command
[DEGENERATE_BACKEND_REQUEST] received from local pgpool-II on IPC interface
Mar  1 00:44:00 osboxes44 pgpool[948]: [51-1] 2018-03-01 00:44:00: pid 948:
LOG:  we have got the consensus to perform the failover
Mar  1 00:44:00 osboxes44 pgpool[948]: [51-2] 2018-03-01 00:44:00: pid 948:
DETAIL:  1 node(s) voted in the favor
Mar  1 00:44:00 osboxes44 pgpool[905]: [20-1] 2018-03-01 00:44:00: pid 905:
LOG:  Pgpool-II parent process has received failover request
Mar  1 00:44:00 osboxes44 pgpool[948]: [52-1] 2018-03-01 00:44:00: pid 948:
LOG:  new IPC connection received
Mar  1 00:44:00 osboxes44 pgpool[948]: [53-1] 2018-03-01 00:44:00: pid 948:
LOG:  received the failover indication from Pgpool-II on IPC interface
Mar  1 00:44:00 osboxes44 pgpool[948]: [54-1] 2018-03-01 00:44:00: pid 948:
LOG:  watchdog is informed of failover end by the main process
Mar  1 00:44:00 osboxes44 pgpool[905]: [21-1] 2018-03-01 00:44:00: pid 905:
LOG:  starting degeneration. shutdown host osboxes75(5432)
Mar  1 00:44:00 osboxes44 pgpool[905]: [22-1] 2018-03-01 00:44:00: pid 905:
WARNING:  All the DB nodes are in down status and skip writing status file.
Mar  1 00:44:00 osboxes44 pgpool[905]: [23-1] 2018-03-01 00:44:00: pid 905:
LOG:  failover: no valid backends node found
Mar  1 00:44:00 osboxes44 pgpool[905]: [24-1] 2018-03-01 00:44:00: pid 905:
LOG:  Restart all children
*Mar  1 00:44:00 osboxes44 pgpool[905]: [25-1] 2018-03-01 00:44:00: pid
905: LOG:  execute command: /etc/pgpool-II-96/failover.sh 1 ""*
Mar  1 00:44:01 osboxes44 pgpool[905]: [26-1] 2018-03-01 00:44:01: pid 905:
LOG:  find_primary_node_repeatedly: waiting for finding a primary node
Mar  1 00:44:01 osboxes44 pgpool[905]: [27-1] 2018-03-01 00:44:01: pid 905:
LOG:  find_primary_node: checking backend no 0
Mar  1 00:44:01 osboxes44 pgpool[905]: [28-1] 2018-03-01 00:44:01: pid 905:
LOG:  find_primary_node: checking backend no 1
Mar  1 00:44:02 osboxes44 pgpool[905]: [29-1] 2018-03-01 00:44:02: pid 905:
LOG:  find_primary_node: checking backend no 0
Mar  1 00:44:02 osboxes44 pgpool[905]: [30-1] 2018-03-01 00:44:02: pid 905:
LOG:  find_primary_node: checking backend no 1
Mar  1 00:44:03 osboxes44 pgpool[905]: [31-1] 2018-03-01 00:44:03: pid 905:
LOG:  find_primary_node: checking backend no 0
Mar  1 00:44:03 osboxes44 pgpool[905]: [32-1] 2018-03-01 00:44:03: pid 905:
LOG:  find_primary_node: checking backend no 1
Mar  1 00:44:03 osboxes44 pgpool[1089]: [16-1] 2018-03-01 00:44:03: pid
1089: LOG:  trying connecting to PostgreSQL server on "osboxes75:5432" by
INET socket
Mar  1 00:44:03 osboxes44 pgpool[1089]: [16-2] 2018-03-01 00:44:03: pid
1089: DETAIL:  timed out. retrying...
Mar  1 00:44:04 osboxes44 pgpool[905]: [33-1] 2018-03-01 00:44:04: pid 905:
LOG:  find_primary_node: checking backend no 0
Mar  1 00:44:04 osboxes44 pgpool[905]: [34-1] 2018-03-01 00:44:04: pid 905:
LOG:  find_primary_node: checking backend no 1
Mar  1 00:44:04 osboxes44 pgpool[1089]: [17-1] 2018-03-01 00:44:04: pid
1089: LOG:  failed to connect to PostgreSQL server on "osboxes75:5432",
getsockopt() detected error "Connection refused"
Mar  1 00:44:04 osboxes44 pgpool[1089]: [18-1] 2018-03-01 00:44:04: pid
1089: ERROR:  failed to make persistent db connection
Mar  1 00:44:04 osboxes44 pgpool[1089]: [18-2] 2018-03-01 00:44:04: pid
1089: DETAIL:  connection to host:"osboxes75:5432" failed
Mar  1 00:44:05 osboxes44 pgpool[948]: [55-1] 2018-03-01 00:44:05: pid 948:
LOG:  new watchdog node connection is received from "192.168.0.6:52436"
Mar  1 00:44:05 osboxes44 pgpool[948]: [56-1] 2018-03-01 00:44:05: pid 948:
LOG:  new node joined the cluster hostname:"osboxes75" port:9000
pgpool_port:5431
Mar  1 00:44:05 osboxes44 pgpool[948]: [57-1] 2018-03-01 00:44:05: pid 948:
LOG:  new outbound connection to osboxes75:9000
Mar  1 00:44:05 osboxes44 pgpool[905]: [35-1] 2018-03-01 00:44:05: pid 905:
LOG:  find_primary_node: checking backend no 0
Mar  1 00:44:05 osboxes44 pgpool[905]: [36-1] 2018-03-01 00:44:05: pid 905:
LOG:  find_primary_node: checking backend no 1
Mar  1 00:44:06 osboxes44 pgpool[948]: [58-1] 2018-03-01 00:44:06: pid 948:
LOG:  adding watchdog node "osboxes75:5431 Linux osboxes75" to the standby
list
Mar  1 00:44:06 osboxes44 pgpool[905]: [37-1] 2018-03-01 00:44:06: pid 905:
LOG:  find_primary_node: checking backend no 0
Mar  1 00:44:06 osboxes44 pgpool[905]: [38-1] 2018-03-01 00:44:06: pid 905:
LOG:  find_primary_node: checking backend no 1
Mar  1 00:44:06 osboxes44 pgpool[905]: [39-1] 2018-03-01 00:44:06: pid 905:
LOG:  Pgpool-II parent process received watchdog quorum change signal from
watchdog
Mar  1 00:44:06 osboxes44 pgpool[948]: [59-1] 2018-03-01 00:44:06: pid 948:
LOG:  new IPC connection received
Mar  1 00:44:06 osboxes44 pgpool[905]: [40-1] 2018-03-01 00:44:06: pid 905:
LOG:  watchdog cluster now holds the quorum
Mar  1 00:44:06 osboxes44 pgpool[905]: [40-2] 2018-03-01 00:44:06: pid 905:
DETAIL:  updating the state of quarantine backend nodes
Mar  1 00:44:06 osboxes44 pgpool[948]: [60-1] 2018-03-01 00:44:06: pid 948:
LOG:  new IPC connection received




pgpool conf is mentioned below


# ----------------------------
# pgPool-II configuration file
# ----------------------------
#
# This file consists of lines of the form:
#
#   name = value
#
# Whitespace may be used.  Comments are introduced with "#" anywhere on a
line.
# The complete list of parameter names and allowed values can be found in
the
# pgPool-II documentation.
#
# This file is read on server startup and when the server receives a SIGHUP
# signal.  If you edit the file on a running system, you have to SIGHUP the
# server for the changes to take effect, or use "pgpool reload".  Some
# parameters, which are marked below, require a server shutdown and restart
to
# take effect.
#


#------------------------------------------------------------------------------
# CONNECTIONS
#------------------------------------------------------------------------------

# - pgpool Connection Settings -

listen_addresses = 'localhost'
                                   # Host name or IP address to listen on:
                                   # '*' for all, '' for no TCP/IP
connections
                                   # (change requires restart)
port = 9999
                                   # Port number
                                   # (change requires restart)
socket_dir = '/tmp'
                                   # Unix domain socket path
                                   # The Debian package defaults to
                                   # /var/run/postgresql
                                   # (change requires restart)


# - pgpool Communication Manager Connection Settings -

pcp_listen_addresses = '*'
                                   # Host name or IP address for pcp
process to listen on:
                                   # '*' for all, '' for no TCP/IP
connections
                                   # (change requires restart)
pcp_port = 9898
                                   # Port number for pcp
                                   # (change requires restart)
pcp_socket_dir = '/tmp'
                                   # Unix domain socket path for pcp
                                   # The Debian package defaults to
                                   # /var/run/postgresql
                                   # (change requires restart)
listen_backlog_multiplier = 2
                                   # Set the backlog parameter of listen(2)
to
                                                                   #
num_init_children * listen_backlog_multiplier.
                                   # (change requires restart)
serialize_accept = off
                                   # whether to serialize accept() call to
avoid thundering herd problem
                                   # (change requires restart)

# - Backend Connection Settings -

backend_hostname0 = 'host1'
                                   # Host name or IP address to connect to
for backend 0
backend_port0 = 5432
                                   # Port number for backend 0
backend_weight0 = 1
                                   # Weight for backend 0 (only in load
balancing mode)
backend_data_directory0 = '/data'
                                   # Data directory for backend 0
backend_flag0 = 'ALLOW_TO_FAILOVER'
                                   # Controls various backend behavior
                                   # ALLOW_TO_FAILOVER, DISALLOW_TO_FAILOVER
                                   # or ALWAYS_MASTER
#backend_hostname1 = 'host2'
#backend_port1 = 5433
#backend_weight1 = 1
#backend_data_directory1 = '/data1'
#backend_flag1 = 'ALLOW_TO_FAILOVER'

# - Authentication -

enable_pool_hba = off
                                   # Use pool_hba.conf for client
authentication
pool_passwd = 'pool_passwd'
                                   # File name of pool_passwd for md5
authentication.
                                   # "" disables pool_passwd.
                                   # (change requires restart)
authentication_timeout = 60
                                   # Delay in seconds to complete client
authentication
                                   # 0 means no timeout.

# - SSL Connections -

ssl = off
                                   # Enable SSL support
                                   # (change requires restart)
#ssl_key = './server.key'
                                   # Path to the SSL private key file
                                   # (change requires restart)
#ssl_cert = './server.cert'
                                   # Path to the SSL public certificate file
                                   # (change requires restart)
#ssl_ca_cert = ''
                                   # Path to a single PEM format file
                                   # containing CA root certificate(s)
                                   # (change requires restart)
#ssl_ca_cert_dir = ''
                                   # Directory containing CA root
certificate(s)
                                   # (change requires restart)


#------------------------------------------------------------------------------
# POOLS
#------------------------------------------------------------------------------

# - Concurrent session and pool size -

num_init_children = 32
                                   # Number of concurrent sessions allowed
                                   # (change requires restart)
max_pool = 4
                                   # Number of connection pool caches per
connection
                                   # (change requires restart)

# - Life time -

child_life_time = 300
                                   # Pool exits after being idle for this
many seconds
child_max_connections = 0
                                   # Pool exits after receiving that many
connections
                                   # 0 means no exit
connection_life_time = 0
                                   # Connection to backend closes after
being idle for this many seconds
                                   # 0 means no close
client_idle_limit = 0
                                   # Client is disconnected after being
idle for that many seconds
                                   # (even inside an explicit transactions!)
                                   # 0 means no disconnection


#------------------------------------------------------------------------------
# LOGS
#------------------------------------------------------------------------------

# - Where to log -

log_destination = 'stderr'
                                   # Where to log
                                   # Valid values are combinations of
stderr,
                                   # and syslog. Default to stderr.

# - What to log -

log_line_prefix = '%t: pid %p: '   # printf-style string to output at
beginning of each log line.

log_connections = off
                                   # Log connections
log_hostname = off
                                   # Hostname will be shown in ps status
                                   # and in logs if connections are logged
log_statement = off
                                   # Log all statements
log_per_node_statement = off
                                   # Log all statements
                                   # with node and backend informations
log_standby_delay = 'if_over_threshold'
                                   # Log standby delay
                                   # Valid values are combinations of
always,
                                   # if_over_threshold, none

# - Syslog specific -

syslog_facility = 'LOCAL0'
                                   # Syslog local facility. Default to
LOCAL0
syslog_ident = 'pgpool'
                                   # Syslog program identification string
                                   # Default to 'pgpool'

# - Debug -

#log_error_verbosity = default          # terse, default, or verbose
messages

#client_min_messages = notice           # values in order of decreasing
detail:
                                        #   debug5
                                        #   debug4
                                        #   debug3
                                        #   debug2
                                        #   debug1
                                        #   log
                                        #   notice
                                        #   warning
                                        #   error

#log_min_messages = warning             # values in order of decreasing
detail:
                                        #   debug5
                                        #   debug4
                                        #   debug3
                                        #   debug2
                                        #   debug1
                                        #   info
                                        #   notice
                                        #   warning
                                        #   error
                                        #   log
                                        #   fatal
                                        #   panic

#------------------------------------------------------------------------------
# FILE LOCATIONS
#------------------------------------------------------------------------------

pid_file_name = '/var/run/pgpool/pgpool.pid'
                                   # PID file name
                                   # Can be specified as relative to the"
                                   # location of pgpool.conf file or
                                   # as an absolute path
                                   # (change requires restart)
logdir = '/tmp'
                                   # Directory of pgPool status file
                                   # (change requires restart)


#------------------------------------------------------------------------------
# CONNECTION POOLING
#------------------------------------------------------------------------------

connection_cache = on
                                   # Activate connection pools
                                   # (change requires restart)

                                   # Semicolon separated list of queries
                                   # to be issued at the end of a session
                                   # The default is for 8.3 and later
reset_query_list = 'ABORT; DISCARD ALL'
                                   # The following one is for 8.2 and before
#reset_query_list = 'ABORT; RESET ALL; SET SESSION AUTHORIZATION DEFAULT'


#------------------------------------------------------------------------------
# REPLICATION MODE
#------------------------------------------------------------------------------

replication_mode = off
                                   # Activate replication mode
                                   # (change requires restart)
replicate_select = off
                                   # Replicate SELECT statements
                                   # when in replication mode
                                   # replicate_select is higher priority
than
                                   # load_balance_mode.

insert_lock = off
                                   # Automatically locks a dummy row or a
table
                                   # with INSERT statements to keep SERIAL
data
                                   # consistency
                                   # Without SERIAL, no lock will be issued
lobj_lock_table = ''
                                   # When rewriting lo_creat command in
                                   # replication mode, specify table name to
                                   # lock

# - Degenerate handling -

replication_stop_on_mismatch = off
                                   # On disagreement with the packet kind
                                   # sent from backend, degenerate the node
                                   # which is most likely "minority"
                                   # If off, just force to exit this session

failover_if_affected_tuples_mismatch = off
                                   # On disagreement with the number of
affected
                                   # tuples in UPDATE/DELETE queries, then
                                   # degenerate the node which is most
likely
                                   # "minority".
                                   # If off, just abort the transaction to
                                   # keep the consistency


#------------------------------------------------------------------------------
# LOAD BALANCING MODE
#------------------------------------------------------------------------------

load_balance_mode = on
                                   # Activate load balancing mode
                                   # (change requires restart)
ignore_leading_white_space = on
                                   # Ignore leading white spaces of each
query
white_function_list = ''
                                   # Comma separated list of function names
                                   # that don't write to database
                                   # Regexp are accepted
black_function_list = 'currval,lastval,nextval,setval'
                                   # Comma separated list of function names
                                   # that write to database
                                   # Regexp are accepted

database_redirect_preference_list = ''
                                                                   # comma
separated list of pairs of database and node id.
                                                                   #
example: postgres:primary,mydb[0-4]:1,mydb[5-9]:2'
                                                                   # valid
for streaming replicaton mode only.

app_name_redirect_preference_list = ''
                                                                   # comma
separated list of pairs of app name and node id.
                                                                   #
example: 'psql:primary,myapp[0-4]:1,myapp[5-9]:standby'
                                                                   # valid
for streaming replicaton mode only.
allow_sql_comments = off
                                                                   # if on,
ignore SQL comments when judging if load balance or
                                                                   # query
cache is possible.
                                                                   # If
off, SQL comments effectively prevent the judgment
                                                                   # (pre
3.4 behavior).

#------------------------------------------------------------------------------
# MASTER/SLAVE MODE
#------------------------------------------------------------------------------

master_slave_mode = on
                                   # Activate master/slave mode
                                   # (change requires restart)
master_slave_sub_mode = 'stream'
                                   # Master/slave sub mode
                                   # Valid values are combinations stream,
slony
                                   # or logical. Default is stream.
                                   # (change requires restart)

# - Streaming -

sr_check_period = 10
                                   # Streaming replication check period
                                   # Disabled (0) by default
sr_check_user = 'nobody'
                                   # Streaming replication check user
                                   # This is neccessary even if you disable
streaming
                                   # replication delay check by
sr_check_period = 0
sr_check_password = ''
                                   # Password for streaming replication
check user
sr_check_database = 'postgres'
                                   # Database name for streaming
replication check
delay_threshold = 10000000
                                   # Threshold before not dispatching query
to standby node
                                   # Unit is in bytes
                                   # Disabled (0) by default

# - Special commands -

follow_master_command = ''
                                   # Executes this command after master
failover
                                   # Special values:
                                   #   %d = node id
                                   #   %h = host name
                                   #   %p = port number
                                   #   %D = database cluster path
                                   #   %m = new master node id
                                   #   %H = hostname of the new master node
                                   #   %M = old master node id
                                   #   %P = old primary node id
                                                                   #   %r =
new master port number
                                                                   #   %R =
new master database cluster path
                                   #   %% = '%' character

#------------------------------------------------------------------------------
# HEALTH CHECK GLOBAL PARAMETERS
#------------------------------------------------------------------------------

health_check_period = 0
                                   # Health check period
                                   # Disabled (0) by default
health_check_timeout = 20
                                   # Health check timeout
                                   # 0 means no timeout
health_check_user = 'nobody'
                                   # Health check user
health_check_password = ''
                                   # Password for health check user
health_check_database = ''
                                   # Database name for health check. If '',
tries 'postgres' frist,
health_check_max_retries = 0
                                   # Maximum number of times to retry a
failed health check before giving up.
health_check_retry_delay = 1
                                   # Amount of time to wait (in seconds)
between retries.
connect_timeout = 10000
                                   # Timeout value in milliseconds before
giving up to connect to backend.
                                                                   #
Default is 10000 ms (10 second). Flaky network user may want to increase
                                                                   # the
value. 0 means no timeout.
                                                                   # Note
that this value is not only used for health check,
                                                                   # but
also for ordinary conection to backend.

#------------------------------------------------------------------------------
# HEALTH CHECK PER NODE PARAMETERS (OPTIONAL)
#------------------------------------------------------------------------------
health_check_period0 = 0
health_check_timeout0 = 20
health_check_user0 = 'nobody'
health_check_password0 = ''
health_check_database0 = ''
health_check_max_retries0 = 0
health_check_retry_delay0 = 1
connect_timeout0 = 10000

#------------------------------------------------------------------------------
# FAILOVER AND FAILBACK
#------------------------------------------------------------------------------

failover_command = ''
                                   # Executes this command at failover
                                   # Special values:
                                   #   %d = node id
                                   #   %h = host name
                                   #   %p = port number
                                   #   %D = database cluster path
                                   #   %m = new master node id
                                   #   %H = hostname of the new master node
                                   #   %M = old master node id
                                   #   %P = old primary node id
                                                                   #   %r =
new master port number
                                                                   #   %R =
new master database cluster path
                                   #   %% = '%' character
failback_command = ''
                                   # Executes this command at failback.
                                   # Special values:
                                   #   %d = node id
                                   #   %h = host name
                                   #   %p = port number
                                   #   %D = database cluster path
                                   #   %m = new master node id
                                   #   %H = hostname of the new master node
                                   #   %M = old master node id
                                   #   %P = old primary node id
                                                                   #   %r =
new master port number
                                                                   #   %R =
new master database cluster path
                                   #   %% = '%' character

fail_over_on_backend_error = on
                                   # Initiates failover when
reading/writing to the
                                   # backend communication socket fails
                                   # If set to off, pgpool will report an
                                   # error and disconnect the session.

search_primary_node_timeout = 300
                                   # Timeout in seconds to search for the
                                   # primary node when a failover occurs.
                                   # 0 means no timeout, keep searching
                                   # for a primary node forever.

#------------------------------------------------------------------------------
# ONLINE RECOVERY
#------------------------------------------------------------------------------

recovery_user = 'nobody'
                                   # Online recovery user
recovery_password = ''
                                   # Online recovery password
recovery_1st_stage_command = ''
                                   # Executes a command in first stage
recovery_2nd_stage_command = ''
                                   # Executes a command in second stage
recovery_timeout = 90
                                   # Timeout in seconds to wait for the
                                   # recovering node's postmaster to start
up
                                   # 0 means no wait
client_idle_limit_in_recovery = 0
                                   # Client is disconnected after being idle
                                   # for that many seconds in the second
stage
                                   # of online recovery
                                   # 0 means no disconnection
                                   # -1 means immediate disconnection


#------------------------------------------------------------------------------
# WATCHDOG
#------------------------------------------------------------------------------

# - Enabling -

use_watchdog = off
                                    # Activates watchdog
                                    # (change requires restart)

# -Connection to up stream servers -

trusted_servers = ''
                                    # trusted server list which are used
                                    # to confirm network connection
                                    # (hostA,hostB,hostC,...)
                                    # (change requires restart)
ping_path = '/bin'
                                    # ping command path
                                    # (change requires restart)

# - Watchdog communication Settings -

wd_hostname = ''
                                    # Host name or IP address of this
watchdog
                                    # (change requires restart)
wd_port = 9000
                                    # port number for watchdog service
                                    # (change requires restart)
wd_priority = 1
                                                                        #
priority of this watchdog in leader election
                                                                        #
(change requires restart)

wd_authkey = ''
                                    # Authentication key for watchdog
communication
                                    # (change requires restart)

wd_ipc_socket_dir = '/tmp'
                                                                        #
Unix domain socket path for watchdog IPC socket
                                                                        #
The Debian package defaults to
                                                                        #
/var/run/postgresql
                                                                        #
(change requires restart)


# - Virtual IP control Setting -

delegate_IP = ''
                                    # delegate IP address
                                    # If this is empty, virtual IP never
bring up.
                                    # (change requires restart)
if_cmd_path = '/sbin'
                                    # path to the directory where
if_up/down_cmd exists
                                    # (change requires restart)
if_up_cmd = 'ip addr add $_IP_$/24 dev eth0 label eth0:0'
                                    # startup delegate IP command
                                    # (change requires restart)
if_down_cmd = 'ip addr del $_IP_$/24 dev eth0'
                                    # shutdown delegate IP command
                                    # (change requires restart)
arping_path = '/usr/sbin'
                                    # arping command path
                                    # (change requires restart)
arping_cmd = 'arping -U $_IP_$ -w 1'
                                    # arping command
                                    # (change requires restart)

# - Behaivor on escalation Setting -

clear_memqcache_on_escalation = on
                                    # Clear all the query cache on shared
memory
                                    # when standby pgpool escalate to
active pgpool
                                    # (= virtual IP holder).
                                    # This should be off if client connects
to pgpool
                                    # not using virtual IP.
                                    # (change requires restart)
wd_escalation_command = ''
                                    # Executes this command at escalation
on new active pgpool.
                                    # (change requires restart)
wd_de_escalation_command = ''
                                                                        #
Executes this command when master pgpool resigns from being master.
                                                                        #
(change requires restart)

# - Watchdog consensus settings for failover -

failover_when_quorum_exists = on
                                                                        #
Only perform backend node failover
                                                                        #
when the watchdog cluster holds the quorum
                                                                        #
(change requires restart)

failover_require_consensus = on
                                                                        #
Perform failover when majority of Pgpool-II nodes
                                                                        #
aggrees on the backend node status change
                                                                        #
(change requires restart)

allow_multiple_failover_requests_from_node = off
                                                                        # A
Pgpool-II node can cast multiple votes
                                                                        #
for building the consensus on failover
                                                                        #
(change requires restart)


# - Lifecheck Setting -

# -- common --

wd_monitoring_interfaces_list = ''  # Comma separated list of interfaces
names to monitor.
                                                                        #
if any interface from the list is active the watchdog will
                                                                        #
consider the network is fine
                                                                        #
'any' to enable monitoring on all interfaces except loopback
                                                                        #
'' to disable monitoring
                                                                        #
(change requires restart)

wd_lifecheck_method = 'heartbeat'
                                    # Method of watchdog lifecheck
('heartbeat' or 'query' or 'external')
                                    # (change requires restart)
wd_interval = 10
                                    # lifecheck interval (sec) > 0
                                    # (change requires restart)

# -- heartbeat mode --

wd_heartbeat_port = 9694
                                    # Port number for receiving heartbeat
signal
                                    # (change requires restart)
wd_heartbeat_keepalive = 2
                                    # Interval time of sending heartbeat
signal (sec)
                                    # (change requires restart)
wd_heartbeat_deadtime = 30
                                    # Deadtime interval for heartbeat
signal (sec)
                                    # (change requires restart)
heartbeat_destination0 = 'host0_ip1'
                                    # Host name or IP address of
destination 0
                                    # for sending heartbeat signal.
                                    # (change requires restart)
heartbeat_destination_port0 = 9694
                                    # Port number of destination 0 for
sending
                                    # heartbeat signal. Usually this is the
                                    # same as wd_heartbeat_port.
                                    # (change requires restart)
heartbeat_device0 = ''
                                    # Name of NIC device (such like 'eth0')
                                    # used for sending/receiving heartbeat
                                    # signal to/from destination 0.
                                    # This works only when this is not empty
                                    # and pgpool has root privilege.
                                    # (change requires restart)

#heartbeat_destination1 = 'host0_ip2'
#heartbeat_destination_port1 = 9694
#heartbeat_device1 = ''

# -- query mode --

wd_life_point = 3
                                    # lifecheck retry times
                                    # (change requires restart)
wd_lifecheck_query = 'SELECT 1'
                                    # lifecheck query to pgpool from
watchdog
                                    # (change requires restart)
wd_lifecheck_dbname = 'template1'
                                    # Database name connected for lifecheck
                                    # (change requires restart)
wd_lifecheck_user = 'nobody'
                                    # watchdog user monitoring pgpools in
lifecheck
                                    # (change requires restart)
wd_lifecheck_password = ''
                                    # Password for watchdog user in
lifecheck
                                    # (change requires restart)

# - Other pgpool Connection Settings -

#other_pgpool_hostname0 = 'host0'
                                    # Host name or IP address to connect to
for other pgpool 0
                                    # (change requires restart)
#other_pgpool_port0 = 5432
                                    # Port number for other pgpool 0
                                    # (change requires restart)
#other_wd_port0 = 9000
                                    # Port number for other watchdog 0
                                    # (change requires restart)
#other_pgpool_hostname1 = 'host1'
#other_pgpool_port1 = 5432
#other_wd_port1 = 9000


#------------------------------------------------------------------------------
# OTHERS
#------------------------------------------------------------------------------
relcache_expire = 0
                                   # Life time of relation cache in seconds.
                                   # 0 means no cache expiration(the
default).
                                   # The relation cache is used for cache
the
                                   # query result against PostgreSQL system
                                   # catalog to obtain various information
                                   # including table structures or if it's a
                                   # temporary table or not. The cache is
                                   # maintained in a pgpool child local
memory
                                   # and being kept as long as it survives.
                                   # If someone modify the table by using
                                   # ALTER TABLE or some such, the relcache
is
                                   # not consistent anymore.
                                   # For this purpose, cache_expiration
                                   # controls the life time of the cache.
relcache_size = 256
                                   # Number of relation cache
                                   # entry. If you see frequently:
                                                                   #
"pool_search_relcache: cache replacement happend"
                                                                   # in the
pgpool log, you might want to increate this number.

check_temp_table = on
                                   # If on, enable temporary table check in
SELECT statements.
                                   # This initiates queries against system
catalog of primary/master
                                                                   # thus
increases load of master.
                                                                   # If you
are absolutely sure that your system never uses temporary tables
                                                                   # and
you want to save access to primary/master, you could turn this off.
                                                                   #
Default is on.

check_unlogged_table = on
                                   # If on, enable unlogged table check in
SELECT statements.
                                   # This initiates queries against system
catalog of primary/master
                                   # thus increases load of master.
                                   # If you are absolutely sure that your
system never uses unlogged tables
                                   # and you want to save access to
primary/master, you could turn this off.
                                   # Default is on.

#------------------------------------------------------------------------------
# IN MEMORY QUERY MEMORY CACHE
#------------------------------------------------------------------------------
memory_cache_enabled = off
                                                                   # If on,
use the memory cache functionality, off by default
memqcache_method = 'shmem'
                                                                   # Cache
storage method. either 'shmem'(shared memory) or
                                                                   #
'memcached'. 'shmem' by default
                                   # (change requires restart)
memqcache_memcached_host = 'localhost'
                                                                   #
Memcached host name or IP address. Mandatory if
                                                                   #
memqcache_method = 'memcached'.
                                                                   #
Defaults to localhost.
                                   # (change requires restart)
memqcache_memcached_port = 11211
                                                                   #
Memcached port number. Mondatory if memqcache_method = 'memcached'.
                                                                   #
Defaults to 11211.
                                   # (change requires restart)
memqcache_total_size = 67108864
                                                                   # Total
memory size in bytes for storing memory cache.
                                                                   #
Mandatory if memqcache_method = 'shmem'.
                                                                   #
Defaults to 64MB.
                                   # (change requires restart)
memqcache_max_num_cache = 1000000
                                                                   # Total
number of cache entries. Mandatory
                                                                   # if
memqcache_method = 'shmem'.
                                                                   # Each
cache entry consumes 48 bytes on shared memory.
                                                                   #
Defaults to 1,000,000(45.8MB).
                                   # (change requires restart)
memqcache_expire = 0
                                                                   # Memory
cache entry life time specified in seconds.
                                                                   # 0
means infinite life time. 0 by default.
                                   # (change requires restart)
memqcache_auto_cache_invalidation = on
                                                                   # If on,
invalidation of query cache is triggered by corresponding
                                                                   #
DDL/DML/DCL(and memqcache_expire).  If off, it is only triggered
                                                                   # by
memqcache_expire.  on by default.
                                   # (change requires restart)
memqcache_maxcache = 409600
                                                                   #
Maximum SELECT result size in bytes.
                                                                   # Must
be smaller than memqcache_cache_block_size. Defaults to 400KB.
                                   # (change requires restart)
memqcache_cache_block_size = 1048576
                                                                   # Cache
block size in bytes. Mandatory if memqcache_method = 'shmem'.
                                                                   #
Defaults to 1MB.
                                   # (change requires restart)
memqcache_oiddir = '/var/log/pgpool/oiddir'
                                                                   #
Temporary work directory to record table oids
                                   # (change requires restart)
white_memqcache_table_list = ''
                                   # Comma separated list of table names to
memcache
                                   # that don't write to database
                                   # Regexp are accepted
black_memqcache_table_list = ''
                                   # Comma separated list of table names
not to memcache
                                   # that don't write to database
                                   # Regexp are accepted

listen_addresses = '*'
port = 5431
backend_hostname0 = 'osboxes44'
backend_port0 = 5432
backend_weight0 = 1
backend_data_directory0 = '/var/lib/pgsql/9.6/data'
backend_flag0 = 'ALLOW_TO_FAILOVER'
backend_hostname1 = 'osboxes75'
backend_port1 = 5432
backend_weight1 = 1
backend_data_directory1 = '/var/lib/pgsql/9.6/data'
backend_flag1 = 'ALLOW_TO_FAILOVER'
enable_pool_hba = on
pid_file_name = '/var/run/pgpool-II-96/pgpool.pid'
sr_check_user = 'pgpool'
sr_check_password = 'secret'
health_check_period = 10
health_check_user = 'pgpool'
health_check_password = 'secret'
failover_command = '/etc/pgpool-II-96/failover.sh %d %H'
recovery_user = 'pgpool'
recovery_password = 'secret'
recovery_1st_stage_command = 'basebackup.sh'
log_destination = 'syslog,stderr'
client_min_messages = log
log_min_messages = info
health_check_max_retries = 10
socket_dir = '/var/run/pgpool-II-96'

use_watchdog = on
delegate_IP = '192.168.0.200'
wd_hostname = 'osboxes44'
wd_port = 9000
ifconfig_path = '/usr/sbin'
arping_path = '/usr/sbin'
wd_lifecheck_method = 'heartbeat'
wd_interval = 5
wd_heartbeat_port = 9694
heartbeat_destination0 = 'osboxes75'
heartbeat_destination_port0 = 9694
other_pgpool_hostname0 = 'osboxes75'
other_pgpool_port0 = 5431
other_wd_port0 = 9000
load_balance_mode = off
if_up_cmd = 'ip addr add $_IP_$/24 dev enp0s3:1 label enp0s3:1'
if_down_cmd = 'ip addr del $_IP_$/24 dev enp0s3:1'
pcp_socket_dir = '/var/run/pgpool-II-96'
wd_ipc_socket_dir = '/var/run/pgpool-II-96'



Thanks
Pankaj Joshi
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://www.sraoss.jp/pipermail/pgpool-general/attachments/20180305/61e5a3b7/attachment-0001.html>


More information about the pgpool-general mailing list