diff --git a/doc.ja/src/sgml/stream-check.sgml b/doc.ja/src/sgml/stream-check.sgml index caaae13..f5df81d 100644 --- a/doc.ja/src/sgml/stream-check.sgml +++ b/doc.ja/src/sgml/stream-check.sgml @@ -251,6 +251,39 @@ + + prefer_lower_delay_standby (boolean) + + + prefer_lower_delay_standby 設定パラメータ + + + + + + このパラメータはを使用しているときに有効です。 + onに設定すると、負荷分散先のスタンバイサーバがdelay_thresholdを超えて遅延したときに、プライマリサーバでなく、 + が0より大きくて1番遅延の少ないスタンバイサーバを負荷分散ノードにします。 + 全てのスタンバイサーバがdelay_thresholdを超えて遅延している場合はプライマリサーバに送ります。 + デフォルトはoffです。 + + + + このパラメータはPgpool-IIの設定を再読み込みすることで変更可能です。 + + + + log_standby_delay (string) diff --git a/doc/src/sgml/stream-check.sgml b/doc/src/sgml/stream-check.sgml index d4cef8a..0c9817c 100644 --- a/doc/src/sgml/stream-check.sgml +++ b/doc/src/sgml/stream-check.sgml @@ -199,6 +199,26 @@ + + prefer_lower_delay_standby (boolean) + + prefer_lower_delay_standby configuration parameter + + + + + This parameter is valid only when delay_threshold is set to greater than 0. + When set to on, if the delay of the load balancing node is greater than delay_threshold + Pgpool-II does not send read queries to the primary node but the least delay standby with backend_weight to greater than 0. + If delay of all standby nodes are greater than delay_threshold, Pgpool-II send to the primary. + Default is off. + + + This parameter can be changed by reloading the Pgpool-II configurations. + + + + log_standby_delay (string) diff --git a/src/config/pool_config_variables.c b/src/config/pool_config_variables.c index cd42b1f..2e67e2a 100644 --- a/src/config/pool_config_variables.c +++ b/src/config/pool_config_variables.c @@ -503,6 +503,16 @@ static struct config_bool ConfigureNamesBool[] = }, { + {"prefer_lower_delay_standby", CFGCXT_RELOAD, STREAMING_REPLICATION_CONFIG, + "If the load balance node is delayed over delay_threshold on SR, pgpool find another standby node which is lower delayed.", + CONFIG_VAR_TYPE_BOOL, false, 0 + }, + &g_pool_config.prefer_lower_delay_standby, + false, + NULL, NULL, NULL + }, + + { {"connection_cache", CFGCXT_INIT, CONNECTION_POOL_CONFIG, "Caches connections to backends.", CONFIG_VAR_TYPE_BOOL, false, 0 diff --git a/src/context/pool_query_context.c b/src/context/pool_query_context.c index 6635369..f60dc5a 100644 --- a/src/context/pool_query_context.c +++ b/src/context/pool_query_context.c @@ -547,8 +547,10 @@ pool_where_to_send(POOL_QUERY_CONTEXT * query_context, char *query, Node *node) */ /* - * If replication delay is too much, we prefer to send to - * the primary. + * As streaming replication delay is too much, if + * prefer_lower_delay_standby is true then elect new + * load balance node which is lower delayed, + * false then send to the primary. */ if (STREAM && pool_config->delay_threshold && @@ -558,7 +560,20 @@ pool_where_to_send(POOL_QUERY_CONTEXT * query_context, char *query, Node *node) (errmsg("could not load balance because of too much replication delay"), errdetail("destination = %d for query= \"%s\"", dest, query))); - pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); + if (pool_config->prefer_lower_delay_standby) + { + int new_load_balancing_node = select_load_balancing_node(); + ereport(DEBUG1, + (errmsg("select new load balancing node which is the most lower delay standby"), + errdetail("new load balancing node is node %d", new_load_balancing_node))); + session_context->load_balance_node_id = new_load_balancing_node; + session_context->query_context->load_balance_node_id = session_context->load_balance_node_id; + pool_set_node_to_be_sent(query_context, session_context->query_context->load_balance_node_id); + } + else + { + pool_set_node_to_be_sent(query_context, PRIMARY_NODE_ID); + } } /* diff --git a/src/include/pool_config.h b/src/include/pool_config.h index 08c9b1c..58f0758 100644 --- a/src/include/pool_config.h +++ b/src/include/pool_config.h @@ -304,6 +304,9 @@ typedef struct * that health_check_period required to be * greater than 0 to enable the * functionality. */ + + bool prefer_lower_delay_standby; + LogStandbyDelayModes log_standby_delay; /* how to log standby lag */ bool connection_cache; /* cache connection pool? */ int health_check_timeout; /* health check timeout */ diff --git a/src/parser/gram_minimal.c b/src/parser/gram_minimal.c index 48dbaf1..dfea558 100644 --- a/src/parser/gram_minimal.c +++ b/src/parser/gram_minimal.c @@ -26931,7 +26931,7 @@ yyreduce: #line 1460 "gram_minimal.y" /* yacc.c:1646 */ { VariableSetStmt *n = (yyvsp[0].vsetstmt); - n->type = T_PgpoolVariableSetStmt; /* Hack to keep changes minumum */ + n->type = T_PgpoolVariableSetStmt; /* Hack to keep changes minimum */ n->is_local = false; (yyval.node) = (Node *) n; } @@ -27356,7 +27356,7 @@ yyreduce: #line 1720 "gram_minimal.y" /* yacc.c:1646 */ { VariableSetStmt *n = (yyvsp[0].vsetstmt); - n->type = T_PgpoolVariableSetStmt; /* Hack to keep the changes minumum */ + n->type = T_PgpoolVariableSetStmt; /* Hack to keep the changes minimum */ (yyval.node) = (Node *) n; } #line 27363 "gram_minimal.c" /* yacc.c:1646 */ diff --git a/src/protocol/pool_pg_utils.c b/src/protocol/pool_pg_utils.c index 59ba540..8a8d14b 100644 --- a/src/protocol/pool_pg_utils.c +++ b/src/protocol/pool_pg_utils.c @@ -313,6 +313,8 @@ select_load_balancing_node(void) POOL_SESSION_CONTEXT *ses = pool_get_session_context(false); int tmp; int no_load_balance_node_id = -2; + uint64 lowest_delay; + int lowest_delay_nodes[NUM_BACKENDS]; /* * -2 indicates there's no database_redirect_preference_list. -1 indicates @@ -399,6 +401,90 @@ select_load_balancing_node(void) if (suggested_node_id >= 0) { /* + * If Streaming Replication mode and delay_threshold and + * prefer_lower_delay_standby is true, we choose the least delayed + * node if suggested_node is standby and delayed over delay_threshold. + */ + if (STREAM && + pool_config->delay_threshold && + pool_config->prefer_lower_delay_standby && + (suggested_node_id != PRIMARY_NODE_ID) && + (BACKEND_INFO(suggested_node_id).standby_delay > pool_config->delay_threshold)) + { + ereport(DEBUG1, + (errmsg("selecting load balance node"), + errdetail("suggested backend %d is streaming delayed over delay_threshold", suggested_node_id))); + + /* + * The new load balancing node is seleted from the + * nodes which have the lowest delay. + */ + lowest_delay = pool_config->delay_threshold; + + /* Initialize */ + total_weight = 0.0; + for (i = 0; i < NUM_BACKENDS; i++) + { + lowest_delay_nodes[i] = 0; + } + + for (i = 0; i < NUM_BACKENDS; i++) + { + if (VALID_BACKEND_RAW(i) && + (i != PRIMARY_NODE_ID) && + (BACKEND_INFO(i).backend_weight > 0.0)) + { + if (lowest_delay == BACKEND_INFO(i).standby_delay) + { + lowest_delay_nodes[i] = 1; + total_weight += BACKEND_INFO(i).backend_weight; + } + else if (lowest_delay > BACKEND_INFO(i).standby_delay) + { + int ii; + lowest_delay = BACKEND_INFO(i).standby_delay; + for (ii = 0; ii > NUM_BACKENDS; ii++) + { + lowest_delay_nodes[ii] = 0; + } + lowest_delay_nodes[i] = 1; + total_weight = BACKEND_INFO(i).backend_weight; + } + } + } + +#if defined(sun) || defined(__sun) + r = (((double) rand()) / RAND_MAX) * total_weight; +#else + r = (((double) random()) / RAND_MAX) * total_weight; +#endif + + selected_slot = PRIMARY_NODE_ID; + total_weight = 0.0; + for (i = 0; i < NUM_BACKENDS; i++) + { + if (lowest_delay_nodes[i] == 0) + continue; + + if (selected_slot == PRIMARY_NODE_ID) + selected_slot = i; + + if (r >= total_weight) + selected_slot = i; + else + break; + + total_weight += BACKEND_INFO(i).backend_weight; + + } + + ereport(DEBUG1, + (errmsg("selecting load balance node"), + errdetail("selected backend id is %d", selected_slot))); + return selected_slot; + } + + /* * If the weight is bigger than random rate then send to * suggested_node_id. If the weight is less than random rate then * choose load balance node from other nodes. @@ -470,6 +556,81 @@ select_load_balancing_node(void) total_weight += BACKEND_INFO(i).backend_weight; } } + + /* + * If Streaming Replication mode and delay_threshold and + * prefer_lower_delay_standby is true, we elect the most lower delayed + * node if suggested_node is standby and delayed over delay_threshold. + */ + if (STREAM && + pool_config->delay_threshold && + pool_config->prefer_lower_delay_standby && + (BACKEND_INFO(selected_slot).standby_delay > pool_config->delay_threshold)) + { + ereport(DEBUG1, + (errmsg("selecting load balance node"), + errdetail("backend id %d is streaming delayed over delay_threshold", selected_slot))); + + lowest_delay = pool_config->delay_threshold; + total_weight = 0.0; + for (i = 0; i < NUM_BACKENDS; i++) + { + lowest_delay_nodes[i] = 0; + } + + for (i = 0; i < NUM_BACKENDS; i++) + { + if ((i != PRIMARY_NODE_ID) && + VALID_BACKEND_RAW(i) && + (BACKEND_INFO(i).backend_weight > 0.0)) + { + if (lowest_delay == BACKEND_INFO(i).standby_delay) + { + lowest_delay_nodes[i] = 1; + total_weight += BACKEND_INFO(i).backend_weight; + } + else if (lowest_delay > BACKEND_INFO(i).standby_delay) + { + int ii; + lowest_delay = BACKEND_INFO(i).standby_delay; + for (ii = 0; ii > NUM_BACKENDS; ii++) + { + lowest_delay_nodes[ii] = 0; + } + lowest_delay_nodes[i] = 1; + total_weight = BACKEND_INFO(i).backend_weight; + } + } + } + +#if defined(sun) || defined(__sun) + r = (((double) rand()) / RAND_MAX) * total_weight; +#else + r = (((double) random()) / RAND_MAX) * total_weight; +#endif + + selected_slot = PRIMARY_NODE_ID; + + total_weight = 0.0; + for (i = 0; i < NUM_BACKENDS; i++) + { + if (lowest_delay_nodes[i] == 0) + continue; + + if (selected_slot == PRIMARY_NODE_ID) + { + selected_slot = i; + } + + if (r >= total_weight) + selected_slot = i; + else + break; + + total_weight += BACKEND_INFO(i).backend_weight; + } + } + ereport(DEBUG1, (errmsg("selecting load balance node"), errdetail("selected backend id is %d", selected_slot))); diff --git a/src/sample/pgpool.conf.sample-logical b/src/sample/pgpool.conf.sample-logical index b156250..14d5f55 100644 --- a/src/sample/pgpool.conf.sample-logical +++ b/src/sample/pgpool.conf.sample-logical @@ -426,6 +426,11 @@ delay_threshold = 10000000 # Threshold before not dispatching query to standby node # Unit is in bytes # Disabled (0) by default +prefer_lower_delay_standby = off + # If delay_threshold is set larger than 0, Pgpool-II send to + # the primary when selected node is delayed over delay_threshold. + # If this is set to on, Pgpool-II send query to other standby + # delayed lower. # - Special commands - diff --git a/src/sample/pgpool.conf.sample-raw b/src/sample/pgpool.conf.sample-raw index 77cc94f..e806595 100644 --- a/src/sample/pgpool.conf.sample-raw +++ b/src/sample/pgpool.conf.sample-raw @@ -466,6 +466,11 @@ delay_threshold = 10000000 # Threshold before not dispatching query to standby node # Unit is in bytes # Disabled (0) by default +prefer_lower_delay_standby = off + # If delay_threshold is set larger than 0, Pgpool-II send to + # the primary when selected node is delayed over delay_threshold. + # If this is set to on, Pgpool-II send query to other standby + # delayed lower. # - Special commands - diff --git a/src/sample/pgpool.conf.sample-replication b/src/sample/pgpool.conf.sample-replication index b72dc90..8544535 100644 --- a/src/sample/pgpool.conf.sample-replication +++ b/src/sample/pgpool.conf.sample-replication @@ -462,6 +462,11 @@ delay_threshold = 0 # Threshold before not dispatching query to standby node # Unit is in bytes # Disabled (0) by default +prefer_lower_delay_standby = off + # If delay_threshold is set larger than 0, Pgpool-II send to + # the primary when selected node is delayed over delay_threshold. + # If this is set to on, Pgpool-II send query to other standby + # delayed lower. # - Special commands - diff --git a/src/sample/pgpool.conf.sample-slony b/src/sample/pgpool.conf.sample-slony index ce802c4..a61af0e 100644 --- a/src/sample/pgpool.conf.sample-slony +++ b/src/sample/pgpool.conf.sample-slony @@ -463,6 +463,11 @@ delay_threshold = 0 # Threshold before not dispatching query to standby node # Unit is in bytes # Disabled (0) by default +prefer_lower_delay_standby = off + # If delay_threshold is set larger than 0, Pgpool-II send to + # the primary when selected node is delayed over delay_threshold. + # If this is set to on, Pgpool-II send query to other standby + # delayed lower. # - Special commands - diff --git a/src/sample/pgpool.conf.sample-snapshot b/src/sample/pgpool.conf.sample-snapshot index 60499bd..cf489f7 100644 --- a/src/sample/pgpool.conf.sample-snapshot +++ b/src/sample/pgpool.conf.sample-snapshot @@ -460,6 +460,11 @@ delay_threshold = 0 # Threshold before not dispatching query to standby node # Unit is in bytes # Disabled (0) by default +prefer_lower_delay_standby = off + # If delay_threshold is set larger than 0, Pgpool-II send to + # the primary when selected node is delayed over delay_threshold. + # If this is set to on, Pgpool-II send query to other standby + # delayed lower. # - Special commands - diff --git a/src/sample/pgpool.conf.sample-stream b/src/sample/pgpool.conf.sample-stream index 84fdce3..4097a58 100644 --- a/src/sample/pgpool.conf.sample-stream +++ b/src/sample/pgpool.conf.sample-stream @@ -465,6 +465,11 @@ delay_threshold = 10000000 # Threshold before not dispatching query to standby node # Unit is in bytes # Disabled (0) by default +prefer_lower_delay_standby = off + # If delay_threshold is set larger than 0, Pgpool-II send to + # the primary when selected node is delayed over delay_threshold. + # If this is set to on, Pgpool-II send query to other standby + # delayed lower. # - Special commands - diff --git a/src/test/regression/tests/033.prefer_lower_standby_delay/test.sh b/src/test/regression/tests/033.prefer_lower_standby_delay/test.sh index e69de29..a4d1744 100644 --- a/src/test/regression/tests/033.prefer_lower_standby_delay/test.sh +++ b/src/test/regression/tests/033.prefer_lower_standby_delay/test.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +#------------------------------------------------------------------- +# test script for load balancing. +# +source $TESTLIBS +TESTDIR=testdir +PG_CTL=$PGBIN/pg_ctl +PSQL="$PGBIN/psql -X " + +version=`$PSQL --version|awk '{print $3}'` +major_version=${version%.*} + +result=`echo "$major_version >= 10"|bc` +if [ $result == 1 ];then + REPLAY_PAUSE="SELECT pg_wal_replay_pause();" + REPLAY_RESUME="SELECT pg_wal_replay_resume();" +else + REPLAY_PAUSE="SELECT pg_xlog_replay_pause();" + REPLAY_RESUME="SELECT pg_xlog_replay_resume();" +fi + +for mode in s +do + rm -fr $TESTDIR + mkdir $TESTDIR + cd $TESTDIR + + # create test environment + echo -n "creating test environment..." + $PGPOOL_SETUP -m s -n 3 || exit 1 + echo "done." + + source ./bashrc.ports + echo "app_name_redirect_preference_list = 'psql:1'" >> etc/pgpool.conf + echo "delay_threshold = 10" >> etc/pgpool.conf + echo "prefer_lower_delay_standby = on" >> etc/pgpool.conf + echo "sr_check_period = 3" >> etc/pgpool.conf + + ./startall + + export PGPORT=$PGPOOL_PORT + + wait_for_pgpool_startup + + $PSQL test </dev/null 2>&1 + if [ $? != 0 ];then + # expected result not found + echo fail: query is sent to primary node. + ./shutdownall + exit 1 + fi + + echo ok: query is sent to another standby node. + + $PSQL -p 11003 -c "$REPLAY_RESUME" + + echo start: prefer_lower_delay_standby is off. + + echo "prefer_lower_delay_standby = off" >> etc/pgpool.conf + + $PGPOOL_INSTALL_DIR/bin/pcp_reload_config -w -h localhost -p $PCP_PORT + + wait_for_pgpool_startup + + $PSQL -p 11003 -c "$REPLAY_PAUSE" + + $PSQL test </dev/null 2>&1 + if [ $? != 0 ];then + # expected result not found + echo fail: query is sent to standby node. + ./shutdownall + exit 1 + fi + + echo ok: prefer lower delay standby works. + + ./shutdownall + +done +exit 0 \ No newline at end of file diff --git a/src/utils/pool_process_reporting.c b/src/utils/pool_process_reporting.c index 429864b..62da4de 100644 --- a/src/utils/pool_process_reporting.c +++ b/src/utils/pool_process_reporting.c @@ -574,6 +574,11 @@ get_config(int *nrows) StrNCpy(status[i].desc, "standby delay threshold", POOLCONFIG_MAXDESCLEN); i++; + StrNCpy(status[i].name, "prefer_lower_delay_standby", POOLCONFIG_MAXNAMELEN); + snprintf(status[i].value, POOLCONFIG_MAXVALLEN, "%d", pool_config->prefer_lower_delay_standby); + StrNCpy(status[i].desc, "load balancing considering streaming delay", POOLCONFIG_MAXDESCLEN); + i++; + /* - Special commands - */ StrNCpy(status[i].name, "follow_primary_command", POOLCONFIG_MAXNAMELEN); snprintf(status[i].value, POOLCONFIG_MAXVALLEN, "%s", pool_config->follow_primary_command);