[pgpool-general-jp: 388] Re: pgpool 3.4.1のhealth checkについて
Kenichi Sawada
k @ sawada.cc
2008年 2月 26日 (火) 00:07:40 JST
澤田です。
パッチの件、ご検討ありがとうございます。メールを書いてから
良く考えてみたのですが、health_checkは主に親プロセスから呼ばれることから
child.cからmain.cに移してしまった方が、health_check_timer_expiredを
グローバル変数にする必要がなく、筋が良いかと思いましたがいかがでしょうか?
(添付は元の3.4.1のコードに対する差分です)
> > また、health checkなしでも上記状況の場合に縮退させるには
> > どうしたらよいでしょうか?
>
> 今のところ手段は無いです。単なるロック待ちという可能性もあるので、異常
> かどうかを判断するのが難しいためです。health check を有効にしていただ
> くか、別の監視ツール等で異常な PostgreSQL をシャットダウンさせるような
> 仕組みが必要になります。
こちらもありがとうございます。health checkが良さそうですね。
--澤田 研一
--k @ sawada.cc
From: Yoshiyuki Asaba <y-asaba @ sraoss.co.jp>
Subject: [pgpool-general-jp: 387] Re: pgpool 3.4.1のhealth checkについて
Date: Mon, 25 Feb 2008 19:15:16 +0900 (JST)
> 浅羽です。
>
> From: Kenichi Sawada <k @ sawada.cc>
> Subject: [pgpool-general-jp: 386] pgpool 3.4.1のhealth checkについて
> Date: Mon, 25 Feb 2008 11:31:25 +0900 (JST)
>
> > pgpool-3.4.1をhealth checkありで使っているのですが、
> > 「バックエンドのPostgreSQLが何らかの原因で動作しなくなったが
> > portはlistenしており、TCPレベルではconnectするものの、返答が一切ない」
> > という状況で、health checkが動作(タイムアウト)しないようです。
> > これはおそらくhealth_check()においてALARMシグナルがブロックされている
> > ためではないかと思い、添付の通りパッチを作成してみたところ
> > 一応動作するようですが、他部分への影響など何かありますでしょうか?
>
> パッチありがとうございます。取り込ませていただきます。
>
>
> > また、health checkなしでも上記状況の場合に縮退させるには
> > どうしたらよいでしょうか?
>
> 今のところ手段は無いです。単なるロック待ちという可能性もあるので、異常
> かどうかを判断するのが難しいためです。health check を有効にしていただ
> くか、別の監視ツール等で異常な PostgreSQL をシャットダウンさせるような
> 仕組みが必要になります。
>
> --
> Yoshiyuki Asaba
> y-asaba @ sraoss.co.jp
> _______________________________________________
> pgpool-general-jp mailing list
> pgpool-general-jp @ sraoss.jp
> http://www.sraoss.jp/mailman/listinfo/pgpool-general-jp
>
-------------- next part --------------
*** main.c 2007-08-10 12:57:44.000000000 +0900
--- main.c.new 2008-02-25 23:54:04.000000000 +0900
***************
*** 360,366 ****
/*
* set health checker timeout. we want to detect
* commnuication path failure much earlier before
! * TCP/IP statck detects it.
*/
pool_signal(SIGALRM, health_check_timer_handler);
alarm(pool_config.health_check_timeout);
--- 360,366 ----
/*
* set health checker timeout. we want to detect
* commnuication path failure much earlier before
! * TCP/IP stack detects it.
*/
pool_signal(SIGALRM, health_check_timer_handler);
alarm(pool_config.health_check_timeout);
***************
*** 371,377 ****
--- 371,379 ----
*/
errno = 0;
health_check_timer_expired = 0;
+ POOL_SETMASK(&UnBlockSig);
sts = health_check();
+ POOL_SETMASK(&BlockSig);
if (errno != EINTR || (errno == EINTR && health_check_timer_expired))
{
***************
*** 1134,1136 ****
--- 1136,1253 ----
}
POOL_SETMASK(&BlockSig);
}
+
+ /*
+ * check if we can connect to the backend
+ * returns 0 for ok. -1 for master down, -2 for secondary down.
+ */
+ int health_check(void)
+ {
+ int fd;
+
+ /* V2 startup packet */
+ typedef struct {
+ int len; /* startup packet length */
+ StartupPacket_v2 sp;
+ } MySp;
+ MySp mysp;
+ char kind;
+
+ memset(&mysp, 0, sizeof(mysp));
+ mysp.len = htonl(296);
+ mysp.sp.protoVersion = htonl(PROTO_MAJOR_V2 << 16);
+ strcpy(mysp.sp.database, "template1");
+ strncpy(mysp.sp.user, pool_config.health_check_user, sizeof(mysp.sp.user) - 1);
+ *mysp.sp.options = '\0';
+ *mysp.sp.unused = '\0';
+ *mysp.sp.tty = '\0';
+
+ if (*pool_config.current_backend_host_name == '\0')
+ fd = connect_unix_domain_socket(0);
+ else
+ fd = connect_inet_domain_socket(0);
+
+ if (fd < 0)
+ {
+ pool_error("health check failed. master %s at port %d is down",
+ pool_config.current_backend_host_name,
+ pool_config.current_backend_port);
+ return -1;
+ }
+
+ if (write(fd, &mysp, sizeof(mysp)) < 0)
+ {
+ pool_error("health check failed during write. master %s at port %d is down",
+ pool_config.current_backend_host_name,
+ pool_config.current_backend_port);
+ close(fd);
+ return -1;
+ }
+
+ read(fd, &kind, 1);
+ if (health_check_timer_expired == 1) {
+ pool_error("health check failed during read. master %s at port %d is open but no response",
+ pool_config.current_backend_host_name,
+ pool_config.current_backend_port);
+ close(fd);
+ return -1;
+ }
+
+ if (write(fd, "X", 1) < 0)
+ {
+ pool_error("health check failed during write. master %s at port %d is down",
+ pool_config.current_backend_host_name,
+ pool_config.current_backend_port);
+ close(fd);
+ return -1;
+ }
+
+ close(fd);
+
+ if (!DUAL_MODE)
+ return 0;
+
+ if (*pool_config.secondary_backend_host_name == '\0')
+ fd = connect_unix_domain_socket(1);
+ else
+ fd = connect_inet_domain_socket(1);
+
+ if (fd < 0)
+ {
+ pool_error("health check failed. secondary %s at port %d is down",
+ pool_config.secondary_backend_host_name,
+ pool_config.secondary_backend_port);
+ return -2;
+ }
+
+ if (write(fd, &mysp, sizeof(mysp)) < 0)
+ {
+ pool_error("health check failed during write. secondary %s at port %d is down",
+ pool_config.secondary_backend_host_name,
+ pool_config.secondary_backend_port);
+ close(fd);
+ return -2;
+ }
+
+ read(fd, &kind, 1);
+ if (health_check_timer_expired == 1) {
+ pool_error("health check failed during read. secondary %s at port %d is open but no response",
+ pool_config.secondary_backend_host_name,
+ pool_config.secondary_backend_port);
+ close(fd);
+ return -2;
+ }
+
+ if (write(fd, "X", 1) < 0)
+ {
+ pool_error("health check failed during write. secondary %s at port %d is down",
+ pool_config.secondary_backend_host_name,
+ pool_config.secondary_backend_port);
+ close(fd);
+ return -2;
+ }
+
+ close(fd);
+
+ return 0;
+ }
-------------- next part --------------
*** child.c 2007-08-01 13:25:40.000000000 +0900
--- child.c.new 2008-02-25 23:52:15.000000000 +0900
***************
*** 1128,1231 ****
free(sp);
}
}
-
- /*
- * check if we can connect to the backend
- * returns 0 for ok. -1 for master down, -2 for secondary down.
- */
- int health_check(void)
- {
- int fd;
-
- /* V2 startup packet */
- typedef struct {
- int len; /* startup packet length */
- StartupPacket_v2 sp;
- } MySp;
- MySp mysp;
- char kind;
-
- memset(&mysp, 0, sizeof(mysp));
- mysp.len = htonl(296);
- mysp.sp.protoVersion = htonl(PROTO_MAJOR_V2 << 16);
- strcpy(mysp.sp.database, "template1");
- strncpy(mysp.sp.user, pool_config.health_check_user, sizeof(mysp.sp.user) - 1);
- *mysp.sp.options = '\0';
- *mysp.sp.unused = '\0';
- *mysp.sp.tty = '\0';
-
- if (*pool_config.current_backend_host_name == '\0')
- fd = connect_unix_domain_socket(0);
- else
- fd = connect_inet_domain_socket(0);
-
- if (fd < 0)
- {
- pool_error("health check failed. master %s at port %d is down",
- pool_config.current_backend_host_name,
- pool_config.current_backend_port);
- return -1;
- }
-
- if (write(fd, &mysp, sizeof(mysp)) < 0)
- {
- pool_error("health check failed during write. master %s at port %d is down",
- pool_config.current_backend_host_name,
- pool_config.current_backend_port);
- close(fd);
- return -1;
- }
-
- read(fd, &kind, 1);
-
- if (write(fd, "X", 1) < 0)
- {
- pool_error("health check failed during write. master %s at port %d is down",
- pool_config.current_backend_host_name,
- pool_config.current_backend_port);
- close(fd);
- return -1;
- }
-
- close(fd);
-
- if (!DUAL_MODE)
- return 0;
-
- if (*pool_config.secondary_backend_host_name == '\0')
- fd = connect_unix_domain_socket(1);
- else
- fd = connect_inet_domain_socket(1);
-
- if (fd < 0)
- {
- pool_error("health check failed. secondary %s at port %d is down",
- pool_config.secondary_backend_host_name,
- pool_config.secondary_backend_port);
- return -2;
- }
-
- if (write(fd, &mysp, sizeof(mysp)) < 0)
- {
- pool_error("health check failed during write. secondary %s at port %d is down",
- pool_config.secondary_backend_host_name,
- pool_config.secondary_backend_port);
- close(fd);
- return -2;
- }
-
- read(fd, &kind, 1);
-
- if (write(fd, "X", 1) < 0)
- {
- pool_error("health check failed during write. secondary %s at port %d is down",
- pool_config.secondary_backend_host_name,
- pool_config.secondary_backend_port);
- close(fd);
- return -2;
- }
-
- close(fd);
-
- return 0;
- }
--- 1128,1130 ----
pgpool-general-jp メーリングリストの案内