[Pgpool-general] Consistency issues
Tatsuo Ishii
ishii at sraoss.co.jp
Wed Dec 23 09:32:13 UTC 2009
Done and committed to CVS HEAD.
For your convenience I included patches for this.
--
Tatsuo Ishii
SRA OSS, Inc. Japan
> Hi Fernando,
>
> Thanks for the info. Oh, pgpool's recovery process connects to
> template1. That was the cause of the problem. Pgpool should connects
> to "postgres" database instead of template1, I think. Will fix.
> --
> Tatsuo Ishii
> SRA OSS, Inc. Japan
>
> > Hi Tatsuo,
> >
> > Thanks for your answer. Looking at the logs i found that the error was:
> >
> > ERROR: source database "template1" is being accessed by other users
> > DETAIL: There are 1 other session(s) using the database.
> > STATEMENT: create database test123;
> >
> > So i just have to take care for not running CREATE TABLE when a node
> > is being recovered.
> >
> > Regards,
> > ---
> >
> > Fernando Marcelo
> > www.consultorpc.com
> > fernando at consultorpc.com
> > Tel: +34 902 998971
> > Fax: +34 91 7903701
> >
> > ## legal disclaimer
> >
> > The information contained in this email is confidential. It is
> > intended only
> > for the stated addressee(s) and access to it by any other person is
> > unauthorized. If you are not an addressee, you must not disclose, copy,
> > circulate or in any other way use or rely on the information contained
> > in
> > this email. Such unauthorized use may be unlawful. If you have
> > received this
> > email in error, please inform us immediately by emailing admin at consultorpc.com
> > and delete it and all copies from your system.
> >
> > ## end mail
> >
> > Em 22/12/2009, às 01:57, Tatsuo Ishii escreveu:
> >
> > >> Hello,
> > >>
> > >> I have pgpool setup with 4 postgres nodes. I tried to run the
> > >> following test with:
> > >>
> > >> 3 Active nodes
> > >> 1 Failed nodes
> > >>
> > >> While recovering the failed node i decided to connect on pgpool and
> > >> run a simple create database. What i got was:
> > >>
> > >> # create database test123;
> > >> ERROR: kind mismatch among backends. Possible last query was:
> > >> "create
> > >> database test123;" kind details are: 0[E] 1[C] 3[C]
> > >
> > > You should find something in the PostgreSQL log of node 0.
> > > (0[E] means node 0 PostgreSQL reported error condition, while 1[C]
> > > means the operation completed noramly).
> > >
> > > Note that pgpool-II 2.3.1 reports PostgreSQL error in case [E], which
> > > should make admin's life a little bit easier.
> > > --
> > > Tatsuo Ishii
> > > SRA OSS, Inc. Japan
> > >
> > >> HINT: check data consistency among db nodes
> > >> server closed the connection unexpectedly
> > >> This probably means the server terminated abnormally
> > >> before or while processing the request.
> > >> The connection to the server was lost. Attempting reset: Succeeded.
> > >>
> > >> An this caused one of the nodes to be at an inconsistency state
> > >> ( without test123 database ).
> > >>
> > >> Do you know what could have caused this problem? Do i have to stop
> > >> pgpool when running pcp_recovery_node?
> > >>
> > >> Thanks,
> > >> ---
> > >>
> > >> Fernando Marcelo
> > >> www.consultorpc.com
> > >> fernando at consultorpc.com
> > >> Tel: +34 902 998971
> > >> Fax: +34 91 7903701
> > >>
> > >> ## legal disclaimer
> > >>
> > >> The information contained in this email is confidential. It is
> > >> intended only
> > >> for the stated addressee(s) and access to it by any other person is
> > >> unauthorized. If you are not an addressee, you must not disclose,
> > >> copy,
> > >> circulate or in any other way use or rely on the information
> > >> contained
> > >> in
> > >> this email. Such unauthorized use may be unlawful. If you have
> > >> received this
> > >> email in error, please inform us immediately by emailing admin at consultorpc.com
> > >> and delete it and all copies from your system.
> > >>
> > >> ## end mail
> > >>
> > >> _______________________________________________
> > >> Pgpool-general mailing list
> > >> Pgpool-general at pgfoundry.org
> > >> http://pgfoundry.org/mailman/listinfo/pgpool-general
> >
> > _______________________________________________
> > Pgpool-general mailing list
> > Pgpool-general at pgfoundry.org
> > http://pgfoundry.org/mailman/listinfo/pgpool-general
> _______________________________________________
> Pgpool-general mailing list
> Pgpool-general at pgfoundry.org
> http://pgfoundry.org/mailman/listinfo/pgpool-general
-------------- next part --------------
Index: main.c
===================================================================
RCS file: /cvsroot/pgpool/pgpool-II/main.c,v
retrieving revision 1.57
diff -c -r1.57 main.c
*** main.c 16 Dec 2009 09:35:41 -0000 1.57
--- main.c 23 Dec 2009 09:19:14 -0000
***************
*** 1476,1481 ****
--- 1476,1483 ----
{
int fd;
int sts;
+ static bool is_first = true;
+ static char *dbname;
/* V2 startup packet */
typedef struct {
***************
*** 1486,1498 ****
char kind;
int i;
if (*InRecovery)
return 0;
memset(&mysp, 0, sizeof(mysp));
mysp.len = htonl(296);
mysp.sp.protoVersion = htonl(PROTO_MAJOR_V2 << 16);
! strcpy(mysp.sp.database, "template1");
strncpy(mysp.sp.user, pool_config->health_check_user, sizeof(mysp.sp.user) - 1);
*mysp.sp.options = '\0';
*mysp.sp.unused = '\0';
--- 1488,1508 ----
char kind;
int i;
+ /* Do health check during recovery */
if (*InRecovery)
return 0;
+ Retry:
+ /*
+ * First we try with "postgres" database.
+ */
+ if (is_first)
+ dbname = "postgres";
+
memset(&mysp, 0, sizeof(mysp));
mysp.len = htonl(296);
mysp.sp.protoVersion = htonl(PROTO_MAJOR_V2 << 16);
! strcpy(mysp.sp.database, dbname);
strncpy(mysp.sp.user, pool_config->health_check_user, sizeof(mysp.sp.user) - 1);
*mysp.sp.options = '\0';
*mysp.sp.unused = '\0';
***************
*** 1554,1565 ****
--- 1564,1587 ----
return i+1;
}
+ if (is_first)
+ is_first = false;
+
/*
* If a backend raised a FATAL error(max connections error or
* starting up error?), do not send a Terminate message.
*/
if ((kind != 'E') && (write(fd, "X", 1) < 0))
{
+ if (!strcmp(dbname, "postgres"))
+ {
+ /*
+ * Retry with template1
+ */
+ dbname = "template1";
+ goto Retry;
+ }
+
pool_error("health check failed during write. host %s at port %d is down. reason: %s. Perhaps wrong health check user?",
BACKEND_INFO(i).backend_hostname,
BACKEND_INFO(i).backend_port,
Index: recovery.c
===================================================================
RCS file: /cvsroot/pgpool/pgpool-II/recovery.c,v
retrieving revision 1.13
diff -c -r1.13 recovery.c
*** recovery.c 22 Aug 2009 04:04:21 -0000 1.13
--- recovery.c 23 Dec 2009 09:19:14 -0000
***************
*** 268,290 ****
int i = 0;
char port_str[16];
PGconn *conn;
snprintf(port_str, sizeof(port_str),
"%d", backend->backend_port);
do {
ConnStatusType r;
conn = PQsetdbLogin(backend->backend_hostname,
port_str,
NULL,
NULL,
! "template1",
pool_config->recovery_user,
pool_config->recovery_password);
r = PQstatus(conn);
PQfinish(conn);
if (r == CONNECTION_OK)
return 0;
if (WAIT_RETRY_COUNT != 0)
sleep(3);
} while (i++ < WAIT_RETRY_COUNT);
--- 268,310 ----
int i = 0;
char port_str[16];
PGconn *conn;
+ static bool is_first = true;
+ static char *dbname;
snprintf(port_str, sizeof(port_str),
"%d", backend->backend_port);
+
+ Retry:
+ /*
+ * First we try with "postgres" database.
+ */
+ if (is_first)
+ dbname = "postgres";
+
do {
ConnStatusType r;
conn = PQsetdbLogin(backend->backend_hostname,
port_str,
NULL,
NULL,
! dbname,
pool_config->recovery_user,
pool_config->recovery_password);
+
+ if (is_first)
+ is_first = false;
+
r = PQstatus(conn);
PQfinish(conn);
if (r == CONNECTION_OK)
return 0;
+ /*
+ * Retry with template1
+ */
+ dbname = "template1";
+ goto Retry;
+
if (WAIT_RETRY_COUNT != 0)
sleep(3);
} while (i++ < WAIT_RETRY_COUNT);
More information about the Pgpool-general
mailing list