[Pgpool-general] Consistency issues

Tatsuo Ishii ishii at sraoss.co.jp
Wed Dec 23 09:32:13 UTC 2009


Done and committed to CVS HEAD.
For your convenience I included patches for this.
--
Tatsuo Ishii
SRA OSS, Inc. Japan

> Hi Fernando,
> 
> Thanks for the info. Oh, pgpool's recovery process connects to
> template1. That was the cause of the problem. Pgpool should connects
> to "postgres" database instead of template1, I think. Will fix.
> --
> Tatsuo Ishii
> SRA OSS, Inc. Japan
> 
> > Hi Tatsuo,
> > 
> > Thanks for your answer. Looking at the logs i found that the error was:
> > 
> > ERROR:  source database "template1" is being accessed by other users
> > DETAIL:  There are 1 other session(s) using the database.
> > STATEMENT:  create database test123;
> > 
> > So i just have to take care for not running CREATE TABLE when a node  
> > is being recovered.
> > 
> > Regards,
> > ---
> > 
> > Fernando Marcelo
> > www.consultorpc.com
> > fernando at consultorpc.com
> > Tel: +34 902 998971
> > Fax: +34 91 7903701
> > 
> > ## legal disclaimer
> > 
> > The information contained in this email is confidential. It is  
> > intended only
> > for the stated addressee(s) and access to it by any other person is
> > unauthorized. If you are not an addressee, you must not disclose, copy,
> > circulate or in any other way use or rely on the information contained  
> > in
> > this email. Such unauthorized use may be unlawful. If you have  
> > received this
> > email in error, please inform us immediately by emailing admin at consultorpc.com
> > and delete it and all copies from your system.
> > 
> > ## end mail
> > 
> > Em 22/12/2009, às 01:57, Tatsuo Ishii escreveu:
> > 
> > >> Hello,
> > >>
> > >> I have pgpool setup with 4 postgres nodes. I tried to run the
> > >> following test with:
> > >>
> > >> 3 Active nodes
> > >> 1 Failed nodes
> > >>
> > >> While recovering the failed node i decided to connect on pgpool and
> > >> run a simple create database. What i got was:
> > >>
> > >> # create database test123;
> > >> ERROR:  kind mismatch among backends. Possible last query was:  
> > >> "create
> > >> database test123;" kind details are: 0[E] 1[C] 3[C]
> > >
> > > You should find something in the PostgreSQL log of node 0.
> > > (0[E] means node 0 PostgreSQL reported error condition, while 1[C]
> > > means the operation completed noramly).
> > >
> > > Note that pgpool-II 2.3.1 reports PostgreSQL error in case [E], which
> > > should make admin's life a little bit easier.
> > > --
> > > Tatsuo Ishii
> > > SRA OSS, Inc. Japan
> > >
> > >> HINT:  check data consistency among db nodes
> > >> server closed the connection unexpectedly
> > >> 	This probably means the server terminated abnormally
> > >> 	before or while processing the request.
> > >> The connection to the server was lost. Attempting reset: Succeeded.
> > >>
> > >> An this caused one of the nodes to be at an inconsistency state
> > >> ( without test123 database ).
> > >>
> > >> Do you know what could have caused this problem? Do i have to stop
> > >> pgpool when running pcp_recovery_node?
> > >>
> > >> Thanks,
> > >> ---
> > >>
> > >> Fernando Marcelo
> > >> www.consultorpc.com
> > >> fernando at consultorpc.com
> > >> Tel: +34 902 998971
> > >> Fax: +34 91 7903701
> > >>
> > >> ## legal disclaimer
> > >>
> > >> The information contained in this email is confidential. It is
> > >> intended only
> > >> for the stated addressee(s) and access to it by any other person is
> > >> unauthorized. If you are not an addressee, you must not disclose,  
> > >> copy,
> > >> circulate or in any other way use or rely on the information  
> > >> contained
> > >> in
> > >> this email. Such unauthorized use may be unlawful. If you have
> > >> received this
> > >> email in error, please inform us immediately by emailing admin at consultorpc.com
> > >> and delete it and all copies from your system.
> > >>
> > >> ## end mail
> > >>
> > >> _______________________________________________
> > >> Pgpool-general mailing list
> > >> Pgpool-general at pgfoundry.org
> > >> http://pgfoundry.org/mailman/listinfo/pgpool-general
> > 
> > _______________________________________________
> > Pgpool-general mailing list
> > Pgpool-general at pgfoundry.org
> > http://pgfoundry.org/mailman/listinfo/pgpool-general
> _______________________________________________
> Pgpool-general mailing list
> Pgpool-general at pgfoundry.org
> http://pgfoundry.org/mailman/listinfo/pgpool-general
-------------- next part --------------
Index: main.c
===================================================================
RCS file: /cvsroot/pgpool/pgpool-II/main.c,v
retrieving revision 1.57
diff -c -r1.57 main.c
*** main.c	16 Dec 2009 09:35:41 -0000	1.57
--- main.c	23 Dec 2009 09:19:14 -0000
***************
*** 1476,1481 ****
--- 1476,1483 ----
  {
  	int fd;
  	int sts;
+ 	static bool is_first = true;
+ 	static char *dbname;
  
  	/* V2 startup packet */
  	typedef struct {
***************
*** 1486,1498 ****
  	char kind;
  	int i;
  
  	if (*InRecovery)
  		return 0;
  
  	memset(&mysp, 0, sizeof(mysp));
  	mysp.len = htonl(296);
  	mysp.sp.protoVersion = htonl(PROTO_MAJOR_V2 << 16);
! 	strcpy(mysp.sp.database, "template1");
  	strncpy(mysp.sp.user, pool_config->health_check_user, sizeof(mysp.sp.user) - 1);
  	*mysp.sp.options = '\0';
  	*mysp.sp.unused = '\0';
--- 1488,1508 ----
  	char kind;
  	int i;
  
+ 	/* Do health check during recovery */
  	if (*InRecovery)
  		return 0;
  
+  Retry:
+ 	/*
+ 	 * First we try with "postgres" database.
+ 	 */
+ 	if (is_first)
+ 		dbname = "postgres";
+ 
  	memset(&mysp, 0, sizeof(mysp));
  	mysp.len = htonl(296);
  	mysp.sp.protoVersion = htonl(PROTO_MAJOR_V2 << 16);
! 	strcpy(mysp.sp.database, dbname);
  	strncpy(mysp.sp.user, pool_config->health_check_user, sizeof(mysp.sp.user) - 1);
  	*mysp.sp.options = '\0';
  	*mysp.sp.unused = '\0';
***************
*** 1554,1565 ****
--- 1564,1587 ----
  			return i+1;
  		}
  
+ 		if (is_first)
+ 			is_first = false;
+ 
  		/*
  		 * If a backend raised a FATAL error(max connections error or
  		 * starting up error?), do not send a Terminate message.
  		 */
  		if ((kind != 'E') && (write(fd, "X", 1) < 0))
  		{
+ 			if (!strcmp(dbname, "postgres"))
+ 			{
+ 				/*
+ 				 * Retry with template1
+ 				 */
+ 				dbname = "template1";
+ 				goto Retry;
+ 			}
+ 
  			pool_error("health check failed during write. host %s at port %d is down. reason: %s. Perhaps wrong health check user?",
  					   BACKEND_INFO(i).backend_hostname,
  					   BACKEND_INFO(i).backend_port,
Index: recovery.c
===================================================================
RCS file: /cvsroot/pgpool/pgpool-II/recovery.c,v
retrieving revision 1.13
diff -c -r1.13 recovery.c
*** recovery.c	22 Aug 2009 04:04:21 -0000	1.13
--- recovery.c	23 Dec 2009 09:19:14 -0000
***************
*** 268,290 ****
  	int i = 0;
  	char port_str[16];
  	PGconn *conn;
  
  	snprintf(port_str, sizeof(port_str),
  			 "%d", backend->backend_port);
  	do {
  		ConnStatusType r;
  		conn = PQsetdbLogin(backend->backend_hostname,
  							port_str,
  							NULL,
  							NULL,
! 							"template1",
  							pool_config->recovery_user,
  							pool_config->recovery_password);
  		r = PQstatus(conn);
  		PQfinish(conn);
  		if (r == CONNECTION_OK)
  			return 0;
  
  		if (WAIT_RETRY_COUNT != 0)
  			sleep(3);
  	} while (i++ < WAIT_RETRY_COUNT);
--- 268,310 ----
  	int i = 0;
  	char port_str[16];
  	PGconn *conn;
+ 	static bool is_first = true;
+ 	static char *dbname;
  
  	snprintf(port_str, sizeof(port_str),
  			 "%d", backend->backend_port);
+ 
+  Retry:
+ 	/*
+ 	 * First we try with "postgres" database.
+ 	 */
+ 	if (is_first)
+ 		dbname = "postgres";
+ 
  	do {
  		ConnStatusType r;
  		conn = PQsetdbLogin(backend->backend_hostname,
  							port_str,
  							NULL,
  							NULL,
! 							dbname,
  							pool_config->recovery_user,
  							pool_config->recovery_password);
+ 
+ 		if (is_first)
+ 			is_first = false;
+ 
  		r = PQstatus(conn);
  		PQfinish(conn);
  		if (r == CONNECTION_OK)
  			return 0;
  
+ 		/*
+ 		 * Retry with template1
+ 		 */
+ 		dbname = "template1";
+ 		goto Retry;
+ 
  		if (WAIT_RETRY_COUNT != 0)
  			sleep(3);
  	} while (i++ < WAIT_RETRY_COUNT);


More information about the Pgpool-general mailing list