diff --git a/src/config/pool_config_variables.c b/src/config/pool_config_variables.c index a1be26f4..cde97094 100644 --- a/src/config/pool_config_variables.c +++ b/src/config/pool_config_variables.c @@ -413,6 +413,15 @@ static struct config_bool ConfigureNamesBool[] = false, NULL, NULL, NULL }, + { + {"wd_remove_shutdown_nodes", CFGCXT_RELOAD, WATCHDOG_CONFIG, + "Discount properly shutdown watchdog nodes from quorum computation.", + CONFIG_VAR_TYPE_BOOL, false, 0 + }, + &g_pool_config.wd_remove_shutdown_nodes, + false, + NULL, NULL, NULL + }, { {"log_connections", CFGCXT_RELOAD, LOGGING_CONFIG, "Logs each successful connection.", @@ -1983,6 +1992,26 @@ static struct config_int ConfigureNamesInt[] = 0, INT_MAX, NULL, NULL, NULL }, + { + {"wd_lost_node_removal_timeout", CFGCXT_RELOAD, WATCHDOG_CONFIG, + "Time in seconds to remove LOST watchdog nodes from cluster's quorum computation.", + CONFIG_VAR_TYPE_INT, false, GUC_UNIT_S + }, + &g_pool_config.wd_lost_node_removal_timeout, + 0, + 0, INT_MAX, + NULL, NULL, NULL + }, + { + {"wd_initial_node_showup_time", CFGCXT_RELOAD, WATCHDOG_CONFIG, + "Time in seconds to remove NO-SHOWLOST watchdog nodes from cluster's quorum computation.", + CONFIG_VAR_TYPE_INT, false, GUC_UNIT_S + }, + &g_pool_config.wd_initial_node_showup_time, + 0, + 0, INT_MAX, + NULL, NULL, NULL + }, { {"wd_life_point", CFGCXT_INIT, WATCHDOG_CONFIG, diff --git a/src/include/pcp/pcp.h b/src/include/pcp/pcp.h index d4fd8832..96222893 100644 --- a/src/include/pcp/pcp.h +++ b/src/include/pcp/pcp.h @@ -38,9 +38,11 @@ typedef struct PCPWDNodeInfo { int state; + int quorum_state; char nodeName[WD_MAX_HOST_NAMELEN]; char hostName[WD_MAX_HOST_NAMELEN]; /* host name */ char stateName[WD_MAX_HOST_NAMELEN]; /* state name */ + char quorum_state_string[WD_MAX_HOST_NAMELEN]; /* quorum state of this node */ int wd_port; /* watchdog port */ int wd_priority; /* node priority in leader election */ int pgpool_port; /* pgpool port */ @@ -51,6 +53,8 @@ typedef struct PCPWDNodeInfo typedef struct PCPWDClusterInfo { int remoteNodeCount; + int validRemoteNodeCount; + int nodesRequiredForQuorum; int quorumStatus; int aliveNodeCount; bool escalated; diff --git a/src/include/pool_config.h b/src/include/pool_config.h index 8ec4da35..9509c492 100644 --- a/src/include/pool_config.h +++ b/src/include/pool_config.h @@ -538,6 +538,9 @@ typedef struct * votes in a cluster with an even * number of nodes. */ + bool wd_remove_shutdown_nodes; + int wd_lost_node_removal_timeout; + int wd_initial_node_showup_time; WdLifeCheckMethod wd_lifecheck_method; /* method of lifecheck. * 'heartbeat' or 'query' */ diff --git a/src/include/watchdog/watchdog.h b/src/include/watchdog/watchdog.h index 65ba3189..fa34313e 100644 --- a/src/include/watchdog/watchdog.h +++ b/src/include/watchdog/watchdog.h @@ -130,6 +130,13 @@ typedef enum { NODE_LOST_SHUTDOWN } WD_NODE_LOST_REASONS; +typedef enum { + WD_NODE_PART_OF_QUORUM, + WD_NODE_SHUTDOWN_REMOVED, + WD_NODE_NO_SHOW_REMOVED, + WD_NODE_LOST_REMOVED +}WD_NODE_QUORUM_STATE; + typedef struct SocketConnection { int sock; /* socket descriptor */ @@ -148,6 +155,11 @@ typedef struct WatchdogNode * from the node */ struct timeval last_sent_time; /* timestamp when last packet was sent on * the node */ + struct timeval lost_time; /* timestamp when the remote node was lost on coordinator + */ + WD_NODE_QUORUM_STATE quorum_state; /* state of node towards watchdog cluster + * quorum calculations + Only valid for remote nodes */ bool has_lost_us; /* * True when this remote node thinks * we are lost diff --git a/src/include/watchdog/wd_commands.h b/src/include/watchdog/wd_commands.h index 8e43c5bd..9a68fdc4 100644 --- a/src/include/watchdog/wd_commands.h +++ b/src/include/watchdog/wd_commands.h @@ -32,6 +32,8 @@ typedef struct WDNodeInfo { int state; + int quorum_state; + char quorum_state_string[WD_MAX_HOST_NAMELEN]; char nodeName[WD_MAX_HOST_NAMELEN]; char hostName[WD_MAX_HOST_NAMELEN]; /* host name */ char stateName[WD_MAX_HOST_NAMELEN]; /* watchdog state name */ diff --git a/src/libs/pcp/pcp.c b/src/libs/pcp/pcp.c index 1c5ef57e..1863d5a8 100644 --- a/src/libs/pcp/pcp.c +++ b/src/libs/pcp/pcp.c @@ -1518,6 +1518,15 @@ process_watchdog_info_response(PCPConnInfo * pcpConn, char *buf, int len) json_value_free(root); goto INVALID_RESPONSE; } + if (json_get_int_value_for_key(root, "ValidRemoteNodeCount", &wd_cluster_info->validRemoteNodeCount)) + { + wd_cluster_info->validRemoteNodeCount = -1; + } + if (json_get_int_value_for_key(root, "NodesRequireForQuorum", &wd_cluster_info->nodesRequiredForQuorum)) + { + wd_cluster_info->nodesRequiredForQuorum = -1; + } + if (json_get_int_value_for_key(root, "QuorumStatus", &wd_cluster_info->quorumStatus)) { json_value_free(root); @@ -1594,6 +1603,20 @@ process_watchdog_info_response(PCPConnInfo * pcpConn, char *buf, int len) } strncpy(wdNodeInfo->delegate_ip, ptr, sizeof(wdNodeInfo->delegate_ip) - 1); + if (json_get_int_value_for_key(nodeInfoValue, "QuorumState", &wdNodeInfo->quorum_state)) + { + /* would be from the older version. No need to panic */ + wdNodeInfo->quorum_state = 0; + } + + ptr = json_get_string_value_for_key(nodeInfoValue, "QuorumStateString"); + if (ptr == NULL) + { + strncpy(wdNodeInfo->quorum_state_string, "NOT-Available", sizeof(wdNodeInfo->quorum_state_string) - 1); + } + else + strncpy(wdNodeInfo->quorum_state_string, ptr, sizeof(wdNodeInfo->quorum_state_string) - 1); + if (json_get_int_value_for_key(nodeInfoValue, "WdPort", &wdNodeInfo->wd_port)) { json_value_free(root); diff --git a/src/sample/pgpool.conf.sample-logical b/src/sample/pgpool.conf.sample-logical index a2684e2c..cd71af84 100644 --- a/src/sample/pgpool.conf.sample-logical +++ b/src/sample/pgpool.conf.sample-logical @@ -684,7 +684,8 @@ backend_clustering_mode = 'logical_replication' #wd_de_escalation_command = '' # Executes this command when leader pgpool resigns from being leader. # (change requires restart) -# - Watchdog consensus settings for failover - + +# - Watchdog consensus settings for failover and delegate IP - #failover_when_quorum_exists = on # Only perform backend node failover @@ -708,6 +709,25 @@ backend_clustering_mode = 'logical_replication' # half of the total votes. # (change requires restart) +#wd_remove_shutdown_nodes = off + # when enabled properly shutdown watchdog nodes get + # removed from the cluster and does not count towards + # the quorum and consensus computations + +#wd_lost_node_removal_timeout = 0s + # Time after which the LOST watchdog nodes get + # removed from the cluster and does not count towards + # the quorum and consensus computations + # setting it to 0 will never remove the LOST nodes + +#wd_initial_node_showup_time = 0s + # Time to wait for Watchdog nodes to connect to the cluster. + # After that time the nodes are considered to be not part of + # the cluster and will not count towards + # the quorum and consensus computations + # setting it to 0 will wait forever + + # - Lifecheck Setting - # -- common -- diff --git a/src/sample/pgpool.conf.sample-raw b/src/sample/pgpool.conf.sample-raw index fe0d6749..a794ee03 100644 --- a/src/sample/pgpool.conf.sample-raw +++ b/src/sample/pgpool.conf.sample-raw @@ -723,7 +723,7 @@ backend_clustering_mode = 'raw' # Executes this command when leader pgpool resigns from being leader. # (change requires restart) -# - Watchdog consensus settings for failover - +# - Watchdog consensus settings for failover and delegate IP - #failover_when_quorum_exists = on # Only perform backend node failover @@ -751,6 +751,25 @@ backend_clustering_mode = 'raw' # half of the total votes. # (change requires restart) +#wd_remove_shutdown_nodes = off + # when enabled properly shutdown watchdog nodes gets + # removed from the cluster and does not count towards + # the quorum and consensus computations + +#wd_lost_node_removal_timeout = 0s + # Time after which the LOST watchdog nodes gets + # removed from the cluster and does not count towards + # the quorum and consensus computations + # setting it to 0 will never remove the LOST nodes + +#wd_initial_node_showup_time = 0s + # Time to wait for Watchdog nodes to connect to the cluster. + # After that time the nodes are considered to be not part of + # the cluster and will not count towards + # the quorum and consensus computations + # setting it to 0 will wait forever + + # - Lifecheck Setting - # -- common -- diff --git a/src/sample/pgpool.conf.sample-replication b/src/sample/pgpool.conf.sample-replication index b980d576..fbc07934 100644 --- a/src/sample/pgpool.conf.sample-replication +++ b/src/sample/pgpool.conf.sample-replication @@ -721,7 +721,7 @@ backend_clustering_mode = 'native_replication' # Executes this command when leader pgpool resigns from being leader. # (change requires restart) -# - Watchdog consensus settings for failover - +# - Watchdog consensus settings for failover and delegate IP - #failover_when_quorum_exists = off # Only perform backend node failover @@ -751,6 +751,25 @@ backend_clustering_mode = 'native_replication' # half of the total votes. # (change requires restart) +#wd_remove_shutdown_nodes = off + # when enabled properly shutdown watchdog nodes gets + # removed from the cluster and does not count towards + # the quorum and consensus computations + +#wd_lost_node_removal_timeout = 0s + # Time after which the LOST watchdog nodes gets + # removed from the cluster and does not count towards + # the quorum and consensus computations + # setting it to 0 will never remove the LOST nodes + +#wd_initial_node_showup_time = 0s + # Time to wait for Watchdog nodes to connect to the cluster. + # After that time the nodes are considered to be not part of + # the cluster and will not count towards + # the quorum and consensus computations + # setting it to 0 will wait forever + + # - Lifecheck Setting - # -- common -- diff --git a/src/sample/pgpool.conf.sample-slony b/src/sample/pgpool.conf.sample-slony index eecce973..9b212091 100644 --- a/src/sample/pgpool.conf.sample-slony +++ b/src/sample/pgpool.conf.sample-slony @@ -721,7 +721,7 @@ log_destination = 'stderr' # Executes this command when leader pgpool resigns from being leader. # (change requires restart) -# - Watchdog consensus settings for failover - +# - Watchdog consensus settings for failover and delegate IP - #failover_when_quorum_exists = on # Only perform backend node failover @@ -750,6 +750,26 @@ log_destination = 'stderr' # half of the total votes. # (change requires restart) +#wd_remove_shutdown_nodes = off + # when enabled properly shutdown watchdog nodes gets + # removed from the cluster and does not count towards + # the quorum and consensus computations + +#wd_lost_node_removal_timeout = 0s + # Time after which the LOST watchdog nodes gets + # removed from the cluster and does not count towards + # the quorum and consensus computations + # setting it to 0 will never remove the LOST nodes + +#wd_initial_node_showup_time = 0s + # Time to wait for Watchdog nodes to connect to the cluster. + # After that time the nodes are considered to be not part of + # the cluster and will not count towards + # the quorum and consensus computations + # setting it to 0 will wait forever + + + # - Lifecheck Setting - # -- common -- diff --git a/src/sample/pgpool.conf.sample-snapshot b/src/sample/pgpool.conf.sample-snapshot index 84a0b24b..644ece5e 100644 --- a/src/sample/pgpool.conf.sample-snapshot +++ b/src/sample/pgpool.conf.sample-snapshot @@ -719,7 +719,7 @@ backend_clustering_mode = 'snapshot_isolation' # Executes this command when leader pgpool resigns from being leader. # (change requires restart) -# - Watchdog consensus settings for failover - +# - Watchdog consensus settings for failover and delegate IP - #failover_when_quorum_exists = off # Only perform backend node failover @@ -749,6 +749,25 @@ backend_clustering_mode = 'snapshot_isolation' # half of the total votes. # (change requires restart) +#wd_remove_shutdown_nodes = off + # when enabled properly shutdown watchdog nodes gets + # removed from the cluster and does not count towards + # the quorum and consensus computations + +#wd_lost_node_removal_timeout = 0s + # Time after which the LOST watchdog nodes gets + # removed from the cluster and does not count towards + # the quorum and consensus computations + # setting it to 0 will never remove the LOST nodes + +#wd_initial_node_showup_time = 0s + # Time to wait for Watchdog nodes to connect to the cluster. + # After that time the nodes are considered to be not part of + # the cluster and will not count towards + # the quorum and consensus computations + # setting it to 0 will wait forever + + # - Lifecheck Setting - # -- common -- diff --git a/src/sample/pgpool.conf.sample-stream b/src/sample/pgpool.conf.sample-stream index 8222fd35..d55428b1 100644 --- a/src/sample/pgpool.conf.sample-stream +++ b/src/sample/pgpool.conf.sample-stream @@ -723,7 +723,7 @@ hostname0 = '' # Executes this command when leader pgpool resigns from being leader. # (change requires restart) -# - Watchdog consensus settings for failover - +# - Watchdog consensus settings for failover and delegate IP - #failover_when_quorum_exists = on # Only perform backend node failover @@ -751,6 +751,25 @@ hostname0 = '' # half of the total votes. # (change requires restart) +#wd_remove_shutdown_nodes = off + # when enabled properly shutdown watchdog nodes gets + # removed from the cluster and does not count towards + # the quorum and consensus computations + +#wd_lost_node_removal_timeout = 0s + # Time after which the LOST watchdog nodes gets + # removed from the cluster and does not count towards + # the quorum and consensus computations + # setting it to 0 will never remove the LOST nodes + +#wd_initial_node_showup_time = 0s + # Time to wait for Watchdog nodes to connect to the cluster. + # After that time the nodes are considered to be not part of + # the cluster and will not count towards + # the quorum and consensus computations + # setting it to 0 will wait forever + + # - Lifecheck Setting - # -- common -- diff --git a/src/tools/pcp/pcp_frontend_client.c b/src/tools/pcp/pcp_frontend_client.c index 235fcf67..94d8c844 100644 --- a/src/tools/pcp/pcp_frontend_client.c +++ b/src/tools/pcp/pcp_frontend_client.c @@ -775,13 +775,15 @@ output_watchdog_info_result(PCPResultInfo * pcpResInfo, bool verbose) quorumStatus = "UNKNOWN"; printf("Watchdog Cluster Information \n"); - printf("Total Nodes : %d\n", cluster->remoteNodeCount + 1); - printf("Remote Nodes : %d\n", cluster->remoteNodeCount); - printf("Quorum state : %s\n", quorumStatus); - printf("Alive Remote Nodes : %d\n", cluster->aliveNodeCount); - printf("VIP up on local node : %s\n", cluster->escalated ? "YES" : "NO"); - printf("Leader Node Name : %s\n", cluster->leaderNodeName); - printf("Leader Host Name : %s\n\n", cluster->leaderHostName); + printf("Total Nodes : %d\n", cluster->remoteNodeCount + 1); + printf("Remote Nodes : %d\n", cluster->remoteNodeCount); + printf("Valid Remote Nodes : %d\n", cluster->validRemoteNodeCount); + printf("Alive Remote Nodes : %d\n", cluster->aliveNodeCount); + printf("Nodes required for quorum: %d\n", cluster->nodesRequiredForQuorum); + printf("Quorum state : %s\n", quorumStatus); + printf("VIP up on local node : %s\n", cluster->escalated ? "YES" : "NO"); + printf("Leader Node Name : %s\n", cluster->leaderNodeName); + printf("Leader Host Name : %s\n\n", cluster->leaderHostName); printf("Watchdog Node Information \n"); for (i = 0; i < cluster->nodeCount; i++) @@ -795,7 +797,8 @@ output_watchdog_info_result(PCPResultInfo * pcpResInfo, bool verbose) printf("Watchdog port : %d\n", watchdog_info->wd_port); printf("Node priority : %d\n", watchdog_info->wd_priority); printf("Status : %d\n", watchdog_info->state); - printf("Status Name : %s\n\n", watchdog_info->stateName); + printf("Status Name : %s\n", watchdog_info->stateName); + printf("Quorum State : %s\n\n", watchdog_info->quorum_state_string); } } else diff --git a/src/watchdog/watchdog.c b/src/watchdog/watchdog.c index d00fa1d3..6184cfc9 100644 --- a/src/watchdog/watchdog.c +++ b/src/watchdog/watchdog.c @@ -250,6 +250,12 @@ char *wd_node_lost_reasons[] = { "SHUTDOWN" }; +char *wd_node_quorum_states[] = { + "ACTIVE", + "REMOVED-SHUTDOWN", + "REMOVED-NO-SHOW", + "REMOVED-LOST" +}; /* * Command packet definition. */ @@ -362,6 +368,7 @@ typedef struct wd_cluster WatchdogNode *remoteNodes; WDClusterLeaderInfo clusterLeaderInfo; int remoteNodeCount; + int validRemoteNodeCount; /* no of nodes that count towards quorum and consensus */ int quorum_status; unsigned int nextCommandID; pid_t escalation_pid; @@ -594,6 +601,10 @@ static void set_cluster_leader_node(WatchdogNode * wdNode); static void clear_standby_nodes_list(void); static int standby_node_left_cluster(WatchdogNode * wdNode); static int standby_node_join_cluster(WatchdogNode * wdNode); +static void reset_lost_timers(void); +static int remove_node_from_cluster_definition(WatchdogNode* wdNode, WD_NODE_QUORUM_STATE remove_state); +static int update_valid_cluster_definition(void); +static int add_node_to_cluster_definition(WatchdogNode* wdNode); static void update_missed_beacon_count(WDCommandData* ipcCommand, bool clear); static void wd_execute_cluster_command_processor(WatchdogNode * wdNode, WDPacketData * pkt); @@ -761,10 +772,10 @@ wd_cluster_initialize(void) /* initialize remote nodes */ g_cluster.remoteNodeCount = pool_config->wd_nodes.num_wd - 1; + g_cluster.validRemoteNodeCount = g_cluster.remoteNodeCount; if (g_cluster.remoteNodeCount == 0) ereport(ERROR, (errmsg("invalid watchdog configuration. other pgpools setting is not defined"))); - ereport(LOG, (errmsg("watchdog cluster is configured with %d remote nodes", g_cluster.remoteNodeCount))); g_cluster.remoteNodes = palloc0((sizeof(WatchdogNode) * g_cluster.remoteNodeCount)); @@ -1638,6 +1649,7 @@ read_sockets(fd_set *rmask, int pending_fds_count) } if (found) { + add_node_to_cluster_definition(wdNode); /* reply with node info message */ ereport(LOG, (errmsg("new node joined the cluster hostname:\"%s\" port:%d pgpool_port:%d", wdNode->hostname, @@ -3633,6 +3645,8 @@ add_nodeinfo_to_json(JsonNode * jNode, WatchdogNode * node) jw_put_int(jNode, "ID", nodeIfNull_int(pgpool_node_id, -1)); jw_put_int(jNode, "State", nodeIfNull_int(state, -1)); + jw_put_int(jNode, "QuorumState", nodeIfNull_int(quorum_state, -1)); + jw_put_string(jNode, "QuorumStateString", node ? wd_node_quorum_states[node->quorum_state] : NotSet); jw_put_string(jNode, "NodeName", nodeIfNull_str(nodeName, NotSet)); jw_put_string(jNode, "HostName", nodeIfNull_str(hostname, NotSet)); jw_put_string(jNode, "StateName", node ? wd_state_names[node->state] : NotSet); @@ -3652,6 +3666,8 @@ static JsonNode * get_node_list_json(int id) JsonNode *jNode = jw_create_with_object(true); jw_put_int(jNode, "RemoteNodeCount", g_cluster.remoteNodeCount); + jw_put_int(jNode, "ValidRemoteNodeCount", g_cluster.validRemoteNodeCount); + jw_put_int(jNode, "NodesRequireForQuorum", get_minimum_votes_to_resolve_consensus()); jw_put_int(jNode, "QuorumStatus", WD_LEADER_NODE ? WD_LEADER_NODE->quorum_status : -2); jw_put_int(jNode, "AliveNodeCount", WD_LEADER_NODE ? WD_LEADER_NODE->standby_nodes_count : 0); jw_put_int(jNode, "Escalated", g_cluster.localNode->escalated); @@ -4755,6 +4771,34 @@ service_unreachable_nodes(void) { WatchdogNode *wdNode = &(g_cluster.remoteNodes[i]); + if (wdNode->state == WD_LOST && wdNode->quorum_state == WD_NODE_PART_OF_QUORUM + && pool_config->wd_lost_node_removal_timeout) + { + int lost_seconds = WD_TIME_DIFF_SEC(currTime, wdNode->lost_time); + if (lost_seconds >= pool_config->wd_lost_node_removal_timeout) + { + ereport(LOG, + (errmsg("remote node \"%s\" is lost for %d seconds", wdNode->nodeName,lost_seconds), + errdetail("removing the node from quorum"))); + remove_node_from_cluster_definition(wdNode,WD_NODE_LOST_REMOVED); + } + continue; + } + + if (wdNode->state == WD_DEAD && wdNode->quorum_state == WD_NODE_PART_OF_QUORUM + && pool_config->wd_initial_node_showup_time) + { + int no_show_seconds = WD_TIME_DIFF_SEC(currTime, g_cluster.localNode->startup_time); + if (no_show_seconds >= pool_config->wd_initial_node_showup_time) + { + ereport(LOG, + (errmsg("remote node \"%s\" didn't showed-up in %d seconds", wdNode->nodeName,no_show_seconds), + errdetail("removing the node from quorum"))); + remove_node_from_cluster_definition(wdNode,WD_NODE_NO_SHOW_REMOVED); + } + continue; + } + if (is_node_active(wdNode) == false) continue; @@ -5402,6 +5446,8 @@ watchdog_state_machine(WD_EVENTS event, WatchdogNode * wdNode, WDPacketData * pk { ereport(LOG, (errmsg("remote node \"%s\" is shutting down", wdNode->nodeName))); + if (pool_config->wd_remove_shutdown_nodes) + remove_node_from_cluster_definition(wdNode,WD_NODE_SHUTDOWN_REMOVED); } else { @@ -5440,6 +5486,8 @@ watchdog_state_machine(WD_EVENTS event, WatchdogNode * wdNode, WDPacketData * pk wdNode->node_lost_reason = NODE_LOST_UNKNOWN_REASON; wdNode->state = WD_LOADING; send_cluster_service_message(wdNode, pkt, CLUSTER_NODE_APPEARING_FOUND); + /* if this node was kicked out of quorum calculation. add it back */ + add_node_to_cluster_definition(wdNode); } else if (event == WD_EVENT_PACKET_RCV) { @@ -6004,8 +6052,9 @@ watchdog_state_machine_coordinator(WD_EVENTS event, WatchdogNode * wdNode, WDPac if (clusterCommand->commandStatus == COMMAND_FINISHED_ALL_REPLIED || clusterCommand->commandStatus == COMMAND_FINISHED_TIMEOUT) { + update_valid_cluster_definition(); update_quorum_status(); - + reset_lost_timers(); ereport(DEBUG1, (errmsg("declare coordinator command finished with status:[%s]", clusterCommand->commandStatus == COMMAND_FINISHED_ALL_REPLIED ? @@ -7060,7 +7109,7 @@ update_quorum_status(void) } else if (g_cluster.clusterLeaderInfo.standby_nodes_count == get_minimum_remote_nodes_required_for_quorum()) { - if (g_cluster.remoteNodeCount % 2 != 0) + if (g_cluster.validRemoteNodeCount % 2 != 0) { if (pool_config->enable_consensus_with_half_votes) g_cluster.quorum_status = 0; /* on the edge */ @@ -7091,14 +7140,14 @@ get_minimum_remote_nodes_required_for_quorum(void) * Even number of remote nodes, That means total number of nodes are odd, * so minimum quorum is just remote/2. */ - if (g_cluster.remoteNodeCount % 2 == 0) - return (g_cluster.remoteNodeCount / 2); + if (g_cluster.validRemoteNodeCount % 2 == 0) + return (g_cluster.validRemoteNodeCount / 2); /* * Total nodes including self are even, So we return 50% nodes as quorum * requirements */ - return ((g_cluster.remoteNodeCount - 1) / 2); + return ((g_cluster.validRemoteNodeCount - 1) / 2); } /* @@ -7143,7 +7192,7 @@ get_minimum_votes_to_resolve_consensus(void) * So for even number of nodes when enable_consensus_with_half_votes is * not allowed than we would add one more vote than exact 50% */ - if (g_cluster.remoteNodeCount % 2 != 0) + if (g_cluster.validRemoteNodeCount % 2 != 0) { if (pool_config->enable_consensus_with_half_votes == false) required_node_count += 1; @@ -7922,18 +7971,88 @@ set_cluster_leader_node(WatchdogNode * wdNode) } } -static WatchdogNode * getLeaderWatchdogNode(void) +static WatchdogNode* +getLeaderWatchdogNode(void) { return g_cluster.clusterLeaderInfo.leaderNode; } +static int +update_valid_cluster_definition(void) +{ + int i; + g_cluster.validRemoteNodeCount = g_cluster.remoteNodeCount; + for (i = 0; i < g_cluster.remoteNodeCount; i++) + { + WatchdogNode *wdNode = &(g_cluster.remoteNodes[i]); + if (wdNode->quorum_state != WD_NODE_PART_OF_QUORUM) + g_cluster.validRemoteNodeCount--; + } + return g_cluster.validRemoteNodeCount; +} + +static int +remove_node_from_cluster_definition(WatchdogNode* wdNode, WD_NODE_QUORUM_STATE remove_state) +{ + if (wdNode->quorum_state == WD_NODE_PART_OF_QUORUM) + { + wdNode->quorum_state = remove_state; + ereport(LOG, + (errmsg("removing [%s] node:\"%s\" [node_id:%d] from the quorum", + wd_state_names[wdNode->state], wdNode->nodeName,wdNode->pgpool_node_id), + errdetail("the node was removed because it was \"%s\"", + wd_node_quorum_states[wdNode->quorum_state]))); + + g_cluster.validRemoteNodeCount--; + } + return g_cluster.validRemoteNodeCount; +} + +static int +add_node_to_cluster_definition(WatchdogNode* wdNode) +{ + if (wdNode->quorum_state != WD_NODE_PART_OF_QUORUM) + { + ereport(LOG, + (errmsg("Adding node:\"%s\" to the quorum",wdNode->nodeName), + errdetail("the node was removed because it was \"%s\"", + wd_node_quorum_states[wdNode->quorum_state]))); + + wdNode->quorum_state = WD_NODE_PART_OF_QUORUM; + ereport(LOG, + (errmsg("*** Added node:\"%s\" to the quorum",wdNode->nodeName), + errdetail("the node was removed because it was \"%s\"", + wd_node_quorum_states[wdNode->quorum_state]))); + /* reset the lost time on the node */ + wdNode->lost_time.tv_sec = 0; + wdNode->lost_time.tv_usec = 0; + g_cluster.validRemoteNodeCount++; + } + return g_cluster.validRemoteNodeCount; +} + +static void +reset_lost_timers(void) +{ + int i; + for (i = 0; i < g_cluster.remoteNodeCount; i++) + { + WatchdogNode *wdNode = &(g_cluster.remoteNodes[i]); + wdNode->lost_time.tv_sec = 0; + wdNode->lost_time.tv_usec = 0; + } +} + static int standby_node_join_cluster(WatchdogNode * wdNode) { if (get_local_node_state() == WD_COORDINATOR) { int i; - + /* Just rest the lost time stamp*/ + /* set the timestamp on node to track for how long this node is lost */ + wdNode->lost_time.tv_sec = 0; + wdNode->lost_time.tv_usec = 0; /* First check if the node is already in the List */ for (i = 0; i < g_cluster.clusterLeaderInfo.standby_nodes_count; i++) { @@ -7983,7 +8102,8 @@ standby_node_left_cluster(WatchdogNode * wdNode) */ ereport(LOG, (errmsg("removing watchdog node \"%s\" from the standby list", wdNode->nodeName))); - + /* set the timestamp on node to track for how long this node is lost */ + gettimeofday(&wdNode->lost_time, NULL); g_cluster.clusterLeaderInfo.standbyNodes[i] = NULL; g_cluster.clusterLeaderInfo.standby_nodes_count--; removed = true; diff --git a/src/watchdog/wd_commands.c b/src/watchdog/wd_commands.c index e1aae9e0..d4a25a4b 100644 --- a/src/watchdog/wd_commands.c +++ b/src/watchdog/wd_commands.c @@ -346,6 +346,20 @@ parse_watchdog_node_info_from_wd_node_json(json_value * source) (errmsg("invalid json data"), errdetail("unable to find Watchdog Node ID"))); } + if (json_get_int_value_for_key(source, "QuorumState", &wdNodeInfo->quorum_state)) + { + /* would be from the older version. No need to panic */ + wdNodeInfo->quorum_state = WD_NODE_PART_OF_QUORUM; + } + + ptr = json_get_string_value_for_key(source, "QuorumStateString"); + if (ptr == NULL) + { + strncpy(wdNodeInfo->quorum_state_string, "NOT-Available", sizeof(wdNodeInfo->quorum_state_string) - 1); + } + else + strncpy(wdNodeInfo->quorum_state_string, ptr, sizeof(wdNodeInfo->quorum_state_string) - 1); + ptr = json_get_string_value_for_key(source, "NodeName"); if (ptr == NULL)