[Pgpool-hackers] retry of health check

Tatsuo Ishii ishii at sraoss.co.jp
Tue Mar 9 06:07:58 UTC 2010


> The health_check() in main.c retries to execute the health check from
> the beginning only when the message other than the ErrorResponse
> arrives from the backend and it fails in sending the Terminate message
> to the backend. Why do we need to retry that only in that case?
> The retry with template1 seems useless. Am I missing something?

Thanks for the report. You are right! Here is the patch which should
fix the problem.
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp
-------------- next part --------------
Index: main.c
===================================================================
RCS file: /cvsroot/pgpool/pgpool-II/main.c,v
retrieving revision 1.63
diff -c -r1.63 main.c
*** main.c	3 Mar 2010 00:31:39 -0000	1.63
--- main.c	9 Mar 2010 06:06:09 -0000
***************
*** 1544,1562 ****
  
  		if (fd < 0)
  		{
! 			pool_error("health check failed. %d th host %s at port %d is down",
  					   i,
  					   BACKEND_INFO(i).backend_hostname,
! 					   BACKEND_INFO(i).backend_port);
  
  			return i+1;
  		}
  
  		if (write(fd, &mysp, sizeof(mysp)) < 0)
  		{
! 			pool_error("health check failed during write. host %s at port %d is down. reason: %s",
  					   BACKEND_INFO(i).backend_hostname,
  					   BACKEND_INFO(i).backend_port,
  					   strerror(errno));
  			close(fd);
  			return i+1;
--- 1544,1564 ----
  
  		if (fd < 0)
  		{
! 			pool_error("health check failed. %d th host %s at port %d DB %s is down",
  					   i,
  					   BACKEND_INFO(i).backend_hostname,
! 					   BACKEND_INFO(i).backend_port,
! 					   dbname);
  
  			return i+1;
  		}
  
  		if (write(fd, &mysp, sizeof(mysp)) < 0)
  		{
! 			pool_error("health check failed during write. host %s at port %d DB %s is down. reason: %s",
  					   BACKEND_INFO(i).backend_hostname,
  					   BACKEND_INFO(i).backend_port,
+ 					   dbname,
  					   strerror(errno));
  			close(fd);
  			return i+1;
***************
*** 1578,1586 ****
  		}
  		else if (sts == 0)
  		{
! 			pool_error("health check failed. EOF encountered. host %s at port %d is down",
  					   BACKEND_INFO(i).backend_hostname,
! 					   BACKEND_INFO(i).backend_port);
  			close(fd);
  			return i+1;
  		}
--- 1580,1589 ----
  		}
  		else if (sts == 0)
  		{
! 			pool_error("health check failed. EOF encountered. host %s at port %d DB %s is down",
  					   BACKEND_INFO(i).backend_hostname,
! 					   BACKEND_INFO(i).backend_port,
! 					   dbname);
  			close(fd);
  			return i+1;
  		}
***************
*** 1589,1611 ****
  			is_first = false;
  
  		/*
! 		 * If a backend raised a FATAL error(max connections error or
! 		 * starting up error?), do not send a Terminate message.
  		 */
! 		if ((kind != 'E') && (write(fd, "X", 1) < 0))
  		{
! 			if (!strcmp(dbname, "postgres"))
! 			{
! 				/*
! 				 * Retry with template1
! 				 */
! 				dbname = "template1";
! 				goto Retry;
! 			}
  
! 			pool_error("health check failed during write. host %s at port %d is down. reason: %s. Perhaps wrong health check user?",
  					   BACKEND_INFO(i).backend_hostname,
  					   BACKEND_INFO(i).backend_port,
  					   strerror(errno));
  			close(fd);
  			return i+1;
--- 1592,1622 ----
  			is_first = false;
  
  		/*
! 		 * If a backend raised FATAL error(max connections error or
! 		 * starting up error?) and dbname is "postgres", do not send a
! 		 * Terminate message and retry with template1 database.
  		 */
! 		if (kind == 'E' && !strcmp(dbname, "postgres"))
  		{
! 			/*
! 			 * Retry with template1
! 			 */
! 			dbname = "template1";
! 			goto Retry;
! 		}
  
! 		/*
! 		 * Do not retry and return with failed node number if:
! 		 * backend was ok but failed to send terminate message or
! 		 * backend raised FATAL error and dbanme is "template1".
! 		 */
! 		else if (((kind != 'E') && (write(fd, "X", 1) < 0)) ||
! 				 (kind == 'E' && !strcmp(dbname, "template1")))
! 		{
! 			pool_error("health check failed during write. host %s at port %d DB %s is down. reason: %s. Perhaps wrong health check user?",
  					   BACKEND_INFO(i).backend_hostname,
  					   BACKEND_INFO(i).backend_port,
+ 					   dbname,
  					   strerror(errno));
  			close(fd);
  			return i+1;


More information about the Pgpool-hackers mailing list