diff --git a/doc.ja/src/sgml/stream-check.sgml b/doc.ja/src/sgml/stream-check.sgml
index caaae13..f5df81d 100644
--- a/doc.ja/src/sgml/stream-check.sgml
+++ b/doc.ja/src/sgml/stream-check.sgml
@@ -251,6 +251,39 @@
+
+ prefer_lower_delay_standby (boolean)
+
+
+ prefer_lower_delay_standby 設定パラメータ
+
+
+
+
+
+ このパラメータはを使用しているときに有効です。
+ onに設定すると、負荷分散先のスタンバイサーバがdelay_thresholdを超えて遅延したときに、プライマリサーバでなく、
+ が0より大きくて1番遅延の少ないスタンバイサーバを負荷分散ノードにします。
+ 全てのスタンバイサーバがdelay_thresholdを超えて遅延している場合はプライマリサーバに送ります。
+ デフォルトはoffです。
+
+
+
+ このパラメータはPgpool-IIの設定を再読み込みすることで変更可能です。
+
+
+
+
log_standby_delay (string)
diff --git a/doc/src/sgml/stream-check.sgml b/doc/src/sgml/stream-check.sgml
index d4cef8a..0c9817c 100644
--- a/doc/src/sgml/stream-check.sgml
+++ b/doc/src/sgml/stream-check.sgml
@@ -199,6 +199,26 @@
+
+ prefer_lower_delay_standby (boolean)
+
+ prefer_lower_delay_standby configuration parameter
+
+
+
+
+ This parameter is valid only when delay_threshold is set to greater than 0.
+ When set to on, if the delay of the load balancing node is greater than delay_threshold
+ Pgpool-II does not send read queries to the primary node but the least delay standby with backend_weight to greater than 0.
+ If delay of all standby nodes are greater than delay_threshold, Pgpool-II send to the primary.
+ Default is off.
+
+
+ This parameter can be changed by reloading the Pgpool-II configurations.
+
+
+
+
log_standby_delay (string)
diff --git a/src/config/pool_config_variables.c b/src/config/pool_config_variables.c
index cd42b1f..2e67e2a 100644
--- a/src/config/pool_config_variables.c
+++ b/src/config/pool_config_variables.c
@@ -503,6 +503,16 @@ static struct config_bool ConfigureNamesBool[] =
},
{
+ {"prefer_lower_delay_standby", CFGCXT_RELOAD, STREAMING_REPLICATION_CONFIG,
+ "If the load balance node is delayed over delay_threshold on SR, pgpool find another standby node which is lower delayed.",
+ CONFIG_VAR_TYPE_BOOL, false, 0
+ },
+ &g_pool_config.prefer_lower_delay_standby,
+ false,
+ NULL, NULL, NULL
+ },
+
+ {
{"connection_cache", CFGCXT_INIT, CONNECTION_POOL_CONFIG,
"Caches connections to backends.",
CONFIG_VAR_TYPE_BOOL, false, 0
diff --git a/src/context/pool_query_context.c b/src/context/pool_query_context.c
index 6635369..f60dc5a 100644
--- a/src/context/pool_query_context.c
+++ b/src/context/pool_query_context.c
@@ -547,8 +547,10 @@ pool_where_to_send(POOL_QUERY_CONTEXT * query_context, char *query, Node *node)
*/
/*
- * If replication delay is too much, we prefer to send to
- * the primary.
+ * As streaming replication delay is too much, if
+ * prefer_lower_delay_standby is true then elect new
+ * load balance node which is lower delayed,
+ * false then send to the primary.
*/
if (STREAM &&
pool_config->delay_threshold &&
@@ -558,7 +560,20 @@ pool_where_to_send(POOL_QUERY_CONTEXT * query_context, char *query, Node *node)
(errmsg("could not load balance because of too much replication delay"),
errdetail("destination = %d for query= \"%s\"", dest, query)));
- pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
+ if (pool_config->prefer_lower_delay_standby)
+ {
+ int new_load_balancing_node = select_load_balancing_node();
+ ereport(DEBUG1,
+ (errmsg("select new load balancing node which is the most lower delay standby"),
+ errdetail("new load balancing node is node %d", new_load_balancing_node)));
+ session_context->load_balance_node_id = new_load_balancing_node;
+ session_context->query_context->load_balance_node_id = session_context->load_balance_node_id;
+ pool_set_node_to_be_sent(query_context, session_context->query_context->load_balance_node_id);
+ }
+ else
+ {
+ pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID);
+ }
}
/*
diff --git a/src/include/pool_config.h b/src/include/pool_config.h
index 08c9b1c..58f0758 100644
--- a/src/include/pool_config.h
+++ b/src/include/pool_config.h
@@ -304,6 +304,9 @@ typedef struct
* that health_check_period required to be
* greater than 0 to enable the
* functionality. */
+
+ bool prefer_lower_delay_standby;
+
LogStandbyDelayModes log_standby_delay; /* how to log standby lag */
bool connection_cache; /* cache connection pool? */
int health_check_timeout; /* health check timeout */
diff --git a/src/parser/gram_minimal.c b/src/parser/gram_minimal.c
index 48dbaf1..dfea558 100644
--- a/src/parser/gram_minimal.c
+++ b/src/parser/gram_minimal.c
@@ -26931,7 +26931,7 @@ yyreduce:
#line 1460 "gram_minimal.y" /* yacc.c:1646 */
{
VariableSetStmt *n = (yyvsp[0].vsetstmt);
- n->type = T_PgpoolVariableSetStmt; /* Hack to keep changes minumum */
+ n->type = T_PgpoolVariableSetStmt; /* Hack to keep changes minimum */
n->is_local = false;
(yyval.node) = (Node *) n;
}
@@ -27356,7 +27356,7 @@ yyreduce:
#line 1720 "gram_minimal.y" /* yacc.c:1646 */
{
VariableSetStmt *n = (yyvsp[0].vsetstmt);
- n->type = T_PgpoolVariableSetStmt; /* Hack to keep the changes minumum */
+ n->type = T_PgpoolVariableSetStmt; /* Hack to keep the changes minimum */
(yyval.node) = (Node *) n;
}
#line 27363 "gram_minimal.c" /* yacc.c:1646 */
diff --git a/src/protocol/pool_pg_utils.c b/src/protocol/pool_pg_utils.c
index 59ba540..8a8d14b 100644
--- a/src/protocol/pool_pg_utils.c
+++ b/src/protocol/pool_pg_utils.c
@@ -313,6 +313,8 @@ select_load_balancing_node(void)
POOL_SESSION_CONTEXT *ses = pool_get_session_context(false);
int tmp;
int no_load_balance_node_id = -2;
+ uint64 lowest_delay;
+ int lowest_delay_nodes[NUM_BACKENDS];
/*
* -2 indicates there's no database_redirect_preference_list. -1 indicates
@@ -399,6 +401,90 @@ select_load_balancing_node(void)
if (suggested_node_id >= 0)
{
/*
+ * If Streaming Replication mode and delay_threshold and
+ * prefer_lower_delay_standby is true, we choose the least delayed
+ * node if suggested_node is standby and delayed over delay_threshold.
+ */
+ if (STREAM &&
+ pool_config->delay_threshold &&
+ pool_config->prefer_lower_delay_standby &&
+ (suggested_node_id != PRIMARY_NODE_ID) &&
+ (BACKEND_INFO(suggested_node_id).standby_delay > pool_config->delay_threshold))
+ {
+ ereport(DEBUG1,
+ (errmsg("selecting load balance node"),
+ errdetail("suggested backend %d is streaming delayed over delay_threshold", suggested_node_id)));
+
+ /*
+ * The new load balancing node is seleted from the
+ * nodes which have the lowest delay.
+ */
+ lowest_delay = pool_config->delay_threshold;
+
+ /* Initialize */
+ total_weight = 0.0;
+ for (i = 0; i < NUM_BACKENDS; i++)
+ {
+ lowest_delay_nodes[i] = 0;
+ }
+
+ for (i = 0; i < NUM_BACKENDS; i++)
+ {
+ if (VALID_BACKEND_RAW(i) &&
+ (i != PRIMARY_NODE_ID) &&
+ (BACKEND_INFO(i).backend_weight > 0.0))
+ {
+ if (lowest_delay == BACKEND_INFO(i).standby_delay)
+ {
+ lowest_delay_nodes[i] = 1;
+ total_weight += BACKEND_INFO(i).backend_weight;
+ }
+ else if (lowest_delay > BACKEND_INFO(i).standby_delay)
+ {
+ int ii;
+ lowest_delay = BACKEND_INFO(i).standby_delay;
+ for (ii = 0; ii > NUM_BACKENDS; ii++)
+ {
+ lowest_delay_nodes[ii] = 0;
+ }
+ lowest_delay_nodes[i] = 1;
+ total_weight = BACKEND_INFO(i).backend_weight;
+ }
+ }
+ }
+
+#if defined(sun) || defined(__sun)
+ r = (((double) rand()) / RAND_MAX) * total_weight;
+#else
+ r = (((double) random()) / RAND_MAX) * total_weight;
+#endif
+
+ selected_slot = PRIMARY_NODE_ID;
+ total_weight = 0.0;
+ for (i = 0; i < NUM_BACKENDS; i++)
+ {
+ if (lowest_delay_nodes[i] == 0)
+ continue;
+
+ if (selected_slot == PRIMARY_NODE_ID)
+ selected_slot = i;
+
+ if (r >= total_weight)
+ selected_slot = i;
+ else
+ break;
+
+ total_weight += BACKEND_INFO(i).backend_weight;
+
+ }
+
+ ereport(DEBUG1,
+ (errmsg("selecting load balance node"),
+ errdetail("selected backend id is %d", selected_slot)));
+ return selected_slot;
+ }
+
+ /*
* If the weight is bigger than random rate then send to
* suggested_node_id. If the weight is less than random rate then
* choose load balance node from other nodes.
@@ -470,6 +556,81 @@ select_load_balancing_node(void)
total_weight += BACKEND_INFO(i).backend_weight;
}
}
+
+ /*
+ * If Streaming Replication mode and delay_threshold and
+ * prefer_lower_delay_standby is true, we elect the most lower delayed
+ * node if suggested_node is standby and delayed over delay_threshold.
+ */
+ if (STREAM &&
+ pool_config->delay_threshold &&
+ pool_config->prefer_lower_delay_standby &&
+ (BACKEND_INFO(selected_slot).standby_delay > pool_config->delay_threshold))
+ {
+ ereport(DEBUG1,
+ (errmsg("selecting load balance node"),
+ errdetail("backend id %d is streaming delayed over delay_threshold", selected_slot)));
+
+ lowest_delay = pool_config->delay_threshold;
+ total_weight = 0.0;
+ for (i = 0; i < NUM_BACKENDS; i++)
+ {
+ lowest_delay_nodes[i] = 0;
+ }
+
+ for (i = 0; i < NUM_BACKENDS; i++)
+ {
+ if ((i != PRIMARY_NODE_ID) &&
+ VALID_BACKEND_RAW(i) &&
+ (BACKEND_INFO(i).backend_weight > 0.0))
+ {
+ if (lowest_delay == BACKEND_INFO(i).standby_delay)
+ {
+ lowest_delay_nodes[i] = 1;
+ total_weight += BACKEND_INFO(i).backend_weight;
+ }
+ else if (lowest_delay > BACKEND_INFO(i).standby_delay)
+ {
+ int ii;
+ lowest_delay = BACKEND_INFO(i).standby_delay;
+ for (ii = 0; ii > NUM_BACKENDS; ii++)
+ {
+ lowest_delay_nodes[ii] = 0;
+ }
+ lowest_delay_nodes[i] = 1;
+ total_weight = BACKEND_INFO(i).backend_weight;
+ }
+ }
+ }
+
+#if defined(sun) || defined(__sun)
+ r = (((double) rand()) / RAND_MAX) * total_weight;
+#else
+ r = (((double) random()) / RAND_MAX) * total_weight;
+#endif
+
+ selected_slot = PRIMARY_NODE_ID;
+
+ total_weight = 0.0;
+ for (i = 0; i < NUM_BACKENDS; i++)
+ {
+ if (lowest_delay_nodes[i] == 0)
+ continue;
+
+ if (selected_slot == PRIMARY_NODE_ID)
+ {
+ selected_slot = i;
+ }
+
+ if (r >= total_weight)
+ selected_slot = i;
+ else
+ break;
+
+ total_weight += BACKEND_INFO(i).backend_weight;
+ }
+ }
+
ereport(DEBUG1,
(errmsg("selecting load balance node"),
errdetail("selected backend id is %d", selected_slot)));
diff --git a/src/sample/pgpool.conf.sample-logical b/src/sample/pgpool.conf.sample-logical
index b156250..14d5f55 100644
--- a/src/sample/pgpool.conf.sample-logical
+++ b/src/sample/pgpool.conf.sample-logical
@@ -426,6 +426,11 @@ delay_threshold = 10000000
# Threshold before not dispatching query to standby node
# Unit is in bytes
# Disabled (0) by default
+prefer_lower_delay_standby = off
+ # If delay_threshold is set larger than 0, Pgpool-II send to
+ # the primary when selected node is delayed over delay_threshold.
+ # If this is set to on, Pgpool-II send query to other standby
+ # delayed lower.
# - Special commands -
diff --git a/src/sample/pgpool.conf.sample-raw b/src/sample/pgpool.conf.sample-raw
index 77cc94f..e806595 100644
--- a/src/sample/pgpool.conf.sample-raw
+++ b/src/sample/pgpool.conf.sample-raw
@@ -466,6 +466,11 @@ delay_threshold = 10000000
# Threshold before not dispatching query to standby node
# Unit is in bytes
# Disabled (0) by default
+prefer_lower_delay_standby = off
+ # If delay_threshold is set larger than 0, Pgpool-II send to
+ # the primary when selected node is delayed over delay_threshold.
+ # If this is set to on, Pgpool-II send query to other standby
+ # delayed lower.
# - Special commands -
diff --git a/src/sample/pgpool.conf.sample-replication b/src/sample/pgpool.conf.sample-replication
index b72dc90..8544535 100644
--- a/src/sample/pgpool.conf.sample-replication
+++ b/src/sample/pgpool.conf.sample-replication
@@ -462,6 +462,11 @@ delay_threshold = 0
# Threshold before not dispatching query to standby node
# Unit is in bytes
# Disabled (0) by default
+prefer_lower_delay_standby = off
+ # If delay_threshold is set larger than 0, Pgpool-II send to
+ # the primary when selected node is delayed over delay_threshold.
+ # If this is set to on, Pgpool-II send query to other standby
+ # delayed lower.
# - Special commands -
diff --git a/src/sample/pgpool.conf.sample-slony b/src/sample/pgpool.conf.sample-slony
index ce802c4..a61af0e 100644
--- a/src/sample/pgpool.conf.sample-slony
+++ b/src/sample/pgpool.conf.sample-slony
@@ -463,6 +463,11 @@ delay_threshold = 0
# Threshold before not dispatching query to standby node
# Unit is in bytes
# Disabled (0) by default
+prefer_lower_delay_standby = off
+ # If delay_threshold is set larger than 0, Pgpool-II send to
+ # the primary when selected node is delayed over delay_threshold.
+ # If this is set to on, Pgpool-II send query to other standby
+ # delayed lower.
# - Special commands -
diff --git a/src/sample/pgpool.conf.sample-snapshot b/src/sample/pgpool.conf.sample-snapshot
index 60499bd..cf489f7 100644
--- a/src/sample/pgpool.conf.sample-snapshot
+++ b/src/sample/pgpool.conf.sample-snapshot
@@ -460,6 +460,11 @@ delay_threshold = 0
# Threshold before not dispatching query to standby node
# Unit is in bytes
# Disabled (0) by default
+prefer_lower_delay_standby = off
+ # If delay_threshold is set larger than 0, Pgpool-II send to
+ # the primary when selected node is delayed over delay_threshold.
+ # If this is set to on, Pgpool-II send query to other standby
+ # delayed lower.
# - Special commands -
diff --git a/src/sample/pgpool.conf.sample-stream b/src/sample/pgpool.conf.sample-stream
index 84fdce3..4097a58 100644
--- a/src/sample/pgpool.conf.sample-stream
+++ b/src/sample/pgpool.conf.sample-stream
@@ -465,6 +465,11 @@ delay_threshold = 10000000
# Threshold before not dispatching query to standby node
# Unit is in bytes
# Disabled (0) by default
+prefer_lower_delay_standby = off
+ # If delay_threshold is set larger than 0, Pgpool-II send to
+ # the primary when selected node is delayed over delay_threshold.
+ # If this is set to on, Pgpool-II send query to other standby
+ # delayed lower.
# - Special commands -
diff --git a/src/test/regression/tests/033.prefer_lower_standby_delay/test.sh b/src/test/regression/tests/033.prefer_lower_standby_delay/test.sh
index e69de29..a4d1744 100644
--- a/src/test/regression/tests/033.prefer_lower_standby_delay/test.sh
+++ b/src/test/regression/tests/033.prefer_lower_standby_delay/test.sh
@@ -0,0 +1,103 @@
+#!/usr/bin/env bash
+#-------------------------------------------------------------------
+# test script for load balancing.
+#
+source $TESTLIBS
+TESTDIR=testdir
+PG_CTL=$PGBIN/pg_ctl
+PSQL="$PGBIN/psql -X "
+
+version=`$PSQL --version|awk '{print $3}'`
+major_version=${version%.*}
+
+result=`echo "$major_version >= 10"|bc`
+if [ $result == 1 ];then
+ REPLAY_PAUSE="SELECT pg_wal_replay_pause();"
+ REPLAY_RESUME="SELECT pg_wal_replay_resume();"
+else
+ REPLAY_PAUSE="SELECT pg_xlog_replay_pause();"
+ REPLAY_RESUME="SELECT pg_xlog_replay_resume();"
+fi
+
+for mode in s
+do
+ rm -fr $TESTDIR
+ mkdir $TESTDIR
+ cd $TESTDIR
+
+ # create test environment
+ echo -n "creating test environment..."
+ $PGPOOL_SETUP -m s -n 3 || exit 1
+ echo "done."
+
+ source ./bashrc.ports
+ echo "app_name_redirect_preference_list = 'psql:1'" >> etc/pgpool.conf
+ echo "delay_threshold = 10" >> etc/pgpool.conf
+ echo "prefer_lower_delay_standby = on" >> etc/pgpool.conf
+ echo "sr_check_period = 3" >> etc/pgpool.conf
+
+ ./startall
+
+ export PGPORT=$PGPOOL_PORT
+
+ wait_for_pgpool_startup
+
+ $PSQL test </dev/null 2>&1
+ if [ $? != 0 ];then
+ # expected result not found
+ echo fail: query is sent to primary node.
+ ./shutdownall
+ exit 1
+ fi
+
+ echo ok: query is sent to another standby node.
+
+ $PSQL -p 11003 -c "$REPLAY_RESUME"
+
+ echo start: prefer_lower_delay_standby is off.
+
+ echo "prefer_lower_delay_standby = off" >> etc/pgpool.conf
+
+ $PGPOOL_INSTALL_DIR/bin/pcp_reload_config -w -h localhost -p $PCP_PORT
+
+ wait_for_pgpool_startup
+
+ $PSQL -p 11003 -c "$REPLAY_PAUSE"
+
+ $PSQL test </dev/null 2>&1
+ if [ $? != 0 ];then
+ # expected result not found
+ echo fail: query is sent to standby node.
+ ./shutdownall
+ exit 1
+ fi
+
+ echo ok: prefer lower delay standby works.
+
+ ./shutdownall
+
+done
+exit 0
\ No newline at end of file
diff --git a/src/utils/pool_process_reporting.c b/src/utils/pool_process_reporting.c
index 429864b..62da4de 100644
--- a/src/utils/pool_process_reporting.c
+++ b/src/utils/pool_process_reporting.c
@@ -574,6 +574,11 @@ get_config(int *nrows)
StrNCpy(status[i].desc, "standby delay threshold", POOLCONFIG_MAXDESCLEN);
i++;
+ StrNCpy(status[i].name, "prefer_lower_delay_standby", POOLCONFIG_MAXNAMELEN);
+ snprintf(status[i].value, POOLCONFIG_MAXVALLEN, "%d", pool_config->prefer_lower_delay_standby);
+ StrNCpy(status[i].desc, "load balancing considering streaming delay", POOLCONFIG_MAXDESCLEN);
+ i++;
+
/* - Special commands - */
StrNCpy(status[i].name, "follow_primary_command", POOLCONFIG_MAXNAMELEN);
snprintf(status[i].value, POOLCONFIG_MAXVALLEN, "%s", pool_config->follow_primary_command);