[pgpool-general-jp: 1124] Re: オンラインリカバリ後にゾンビプロセスになる

Tatsuo Ishii ishii @ sraoss.co.jp
2012年 11月 23日 (金) 00:07:33 JST


$B @ P0f$G$9!#(B

$B$4Js9p$"$j$,$H$&$4$6$$$^$9!#$9$G$K(Bgit$B>e$G$O$3$NLdBj$O=$@5$5$l$F$$$k$H;W$&$N$G$9$,!"(B
$B$4Ds0F$$$?$@$$$?%Q%C%A$bE,MQ$7$?$[$&$,NI$$$h$&$J5$$b$9$k$N$G!"8!F$$7$F$_$^$9!#(B
--
Tatsuo Ishii
SRA OSS, Inc. Japan
English: http://www.sraoss.co.jp/index_en.php
Japanese: http://www.sraoss.co.jp

> $B$3$s$K$A$O!#8eF#$H?=$7$^$9!#(B
> 
> $BF1$8$h$&$J8=>]$,:F8=$7$^$7$?$N$G$4Js9p$$$?$7$^$9!#(B
> 
> --
> postgresql9$B7O$NI>2A$r$9$k$?$a$K0J2<$N4D6-$G9=C[$7$F$$$^$9!#(B
> 
> $B!&(Bwindows7$B$N(BVMware player$B>e$N2>A[%^%7%s4D6-(B
> $B!&(BCentOS release 6.2 (Final)
> $B!&(Bpostgresql91-9.1.6-1PGDG.rhel6.x86_64
> $B!&(Bpgpool-II-91-3.1.3-2.rhel6.x86_64
> $B!J(Bpostgresql91 $B$H(B pgpool-II-91 $B$O(B yum $B$G(B
>  pgdg91 $B%l%]%8%H%j$+$i%$%s%9%H!<%k$7$^$7$?!K(B
> 
> $B%^%7%s#A(B(DB21)$B!$#B(B(DB22)$B$N#2Bf9=@.$G!"(B
> $B$=$l$>$l(Bpostgresql$B!"(Bpgpool$B$r%$%s%9%H!<%k$7$F$"$j$^$9!#(B
> $B!J%^%7%s#B$N(Bpgpool$B$O5/F0$7$F$$$^$;$s!K(B
> 
> pgpool.conf $B$O%l%W%j%1!<%7%g%s%b!<%I$G1?MQ$7$F$$$^$9!#(B
> $B$D$^$j(B
> replication_mode = on 
> master_slave_mode = off
> parallel_mode = off
> $B$G$9!#(B
> 
> $B$3$N9=@.$GF14|$,$H$l$F$$$k>uBV$+$i(Bpcp_detach_node$B$r$7$?$j!"(B
> $BJR7O$r @ Z$jN%$7$F%*%s%i%$%s%j%+%P%j$d(B pcp_attach_node $B$r<B;\$9$k$H!"(B
> PCP$B$N;R%W%m%;%9$,%>%s%S$H$J$j!"0J9_(Bpcp$B$N%3%^%s%I$N1~Ez$,$J$/$J$k>l9g$,$"$j$^$9!#(B
> 
> --
> $B"#;R%W%m%;%9$N3NG'(B
> [root @ DB21 9.1]# ps ax | grep pgpool
> 24848 ?        Ss     0:00 /usr/pgpool-9.1/bin/pgpool -f /etc/pgpool-II-91/pgpool.conf
> 25054 ?        S      0:00 pgpool: wait for connection request
> 25055 ?        S      0:00 pgpool: wait for connection request
> ...
> 25084 ?        S      0:00 pgpool: wait for connection request
> 25085 ?        S      0:00 pgpool: wait for connection request
> 25086 ?        S      0:00 pgpool: worker process
> 25088 ?        S      0:00 pgpool: PCP: wait for connection request
> 25092 pts/1    S+     0:00 grep pgpool
> 
> $B"#JL$N%?!<%_%J%k$G(Bdetach/attach$B$r7+$jJV$7(B
> [root @ DB21 ~]# pcp_node_info 10 192.168.68.151 9898 postgres postgres 0
> 192.168.68.151 5433 1 0.500000
> [root @ DB21 ~]# pcp_node_info 10 192.168.68.151 9898 postgres postgres 1
> 192.168.68.152 5433 1 0.500000
> [root @ DB21 ~]# pcp_detach_node 10 192.168.68.151 9898 postgres postgres 0
> [root @ DB21 ~]# pcp_node_info 10 192.168.68.151 9898 postgres postgres 0
> ($B1~Ez$J$7!##1#0IC0J>e7P$C$F$bI|5"$;$:(B)
> 
> --
> $B"#$=$N;~$N%W%m%;%9>uBV(B
> 
> [root @ DB21 9.1]#  ps ax | grep pgpool
> 24848 ?        Ss     0:00 /usr/pgpool-9.1/bin/pgpool -f /etc/pgpool-II-91/pgpool.conf
> 25088 ?        Z      0:00 [pgpool] <defunct>
> 25099 ?        S      0:00 pgpool: wait for connection request
> 25100 ?        S      0:00 pgpool: wait for connection request
> ...
> 25129 ?        S      0:00 pgpool: wait for connection request
> 25130 ?        S      0:00 pgpool: wait for connection request
> 25131 ?        S      0:00 pgpool: worker process
> 25139 pts/1    S+     0:00 grep pgpool
> 
> 
> $B"#$=$N;~$N(Bpgpool$B%m%0(B($BJL%?!<%_%J%k(B)
> 
> [root @ DB21 log]# tail -f /var/log/messages
> Nov 21 19:27:35 DB21 pgpool[25088]: degenerate_backend_set: 0 fail over request from pid 25088
> Nov 21 19:27:35 DB21 pgpool[24848]: starting degeneration. shutdown host 192.168.68.151(5433)
> Nov 21 19:27:35 DB21 pgpool[24848]: Restart all children
> Nov 21 19:27:35 DB21 pgpool[24848]: execute command: echo failover 0 192.168.68.151 5433 /var/lib/pgsql/9.1/data 1 192.168.68.152 0 0
> Nov 21 19:27:35 DB21 pgpool[24848]: failover: set new primary node: -1
> Nov 21 19:27:35 DB21 pgpool[24848]: failover: set new master node: 1
> Nov 21 19:27:35 DB21 pgpool[25086]: worker process received restart request
> Nov 21 19:27:35 DB21 pgpool[24848]: failover done. shutdown host 192.168.68.151(5433)
> Nov 21 19:27:36 DB21 pgpool[25088]: pcp child process received restart request
> Nov 21 19:27:36 DB21 pgpool[24848]: worker child 25086 exits with status 256
> Nov 21 19:27:36 DB21 pgpool[24848]: fork a new worker child pid 25131
> 
> 
> $B"#1~Ez$,L5$/$J$C$F$+$i!"?F%W%m%;%9$K(B SIGCHLD $B$rEj$2$k$H(Bpcp$B%3%^%s%I$,I|5"$9$k!#(B
> [root @ DB21 9.1]# kill -SIGCHLD 24848
> [root @ DB21 9.1]# ps ax | grep pgpool
> 24848 ?        Ss     0:00 /usr/pgpool-9.1/bin/pgpool -f /etc/pgpool-II-91/pgpool.conf
> 25099 ?        S      0:00 pgpool: wait for connection request
> 25100 ?        S      0:00 pgpool: wait for connection request
> ...
> 25129 ?        S      0:00 pgpool: wait for connection request
> 25130 ?        S      0:00 pgpool: wait for connection request
> 25131 ?        S      0:00 pgpool: worker process
> 25143 ?        S      0:00 pgpool: PCP: wait for connection request
> 25147 pts/1    S+     0:00 grep pgpool
> 
> $B"#>e5-$N(Bmessages$B$NB3$-(B
> 
> Nov 21 19:29:28 DB21 pgpool[24848]: PCP child 25088 exits with status 256
> Nov 21 19:29:28 DB21 pgpool[24848]: fork a new PCP child pid 25143
> 
> 
> $B"#$5$C$-1~Ez$,$J$/$F%U%j!<%:$7$F$$$?%3%^%s%I$,I|5"$7$F7k2L$,=PNO$5$l$F$$$k(B
> [root @ DB21 ~]# pcp_node_info 10 192.168.68.151 9898 postgres postgres 0
> 192.168.68.151 5433 3 0.500000
> 
> --
> 
> $B$&$A$N4D6-$G$O:F8=N($O$+$J$j9b$$46$8$G$7$?!#(B
> $B!J(B7$B!A(B8$B3d$/$i$$!K(B
> 
> waitpid$B<~$j$,2x$7$$$H;W$$!"0J2<$N$h$&$K%=!<%9$r=$@5$7$?$H$3$m!"(B
> $B$3$N8=>]$O:F8=$7$J$/$J$j$^$7$?!#(B
> 
> [root @ jtn-test tr]# diff -Naru main.c.org main.c
> --- main.c.org  2012-11-21 18:47:55.000000000 +0900
> +++ main.c      2012-11-21 18:48:39.000000000 +0900
> @@ -2134,7 +2134,8 @@
> 
>                         pcp_pid = pcp_fork_a_child(pcp_unix_fd, pcp_inet_fd, pcp_conf_file);
>                         pool_log("fork a new PCP child pid %d", pcp_pid);
> -                       break;
> +                       //break;
> +                       continue;
>                 }
> 
>                 /* exiting process was worker process */
> @@ -2149,7 +2150,8 @@
>                                 worker_pid = worker_fork_a_child();
> 
>                         pool_log("fork a new worker child pid %d", worker_pid);
> -                       break;
> +                       //break;
> +                       continue;
>                 } else
>                 {
>                         if (WIFSIGNALED(status))
> 
> --
> 
> $B0J>e$G$9!#(B
> 
> --
> s-fukuda$B$5$s(B<s-fukuda @ acs21.co.jp>wrote:
>> $B$O$8$a$^$7$F!#(B
>> $BJ!ED$H?=$7$^$9!#(B
>> 
>> $B8=:_!"(BPGPOOL-$B-6(B(3.1.3)$B!\(BPostgreSQL(8.4.12-1)$B$rMxMQ(B
>> $B$7$F$*$j$^$9!#(B
>> 
>> $B%l%W%j%1!<%7%g%s%b!<%I$G1?MQ$7!"%*%s%i%$%s%j%+%P%j$N(B
>> $B @ _Dj$r9T$$!"(Bpcp_recovery_node$B$r<B9T$7$?$H$3$m!"%3%^%s%I(B
>> $B$O @ 5>o$K=*N;$9$k$N$G$9$,!"%W%m%;%9$N%j%9%?!<%H$,$+$+$C$?(B
>> $B:]$K%W%m%;%9$,%>%s%S%W%m%;%9$H$J$C$F$7$^$$$^$9!#(B
>> 
>> $B!c<B9TA0!d(B
>> postgres 18045 17008 0 10:13 ? 00:00:00 pgpool: PCP: wait for connection request
>> 
>> $B!c<B9T8e!d(B
>> postgres 19114 17008 0 15:52 ? 00:00:00 [pgpool] <defunct>
>> 
>> $B"#<B9T$7$?%3%^%s%I(B
>> 
>> pcp_recovery_node 100 localhost $B%]!<%H(B $B%f!<%6L>(B $B%Q%9%o!<%I(B $B%N!<%I(BID
>> 
>> 
>> $B"#4D6-(B
>>  RHEL 5.6 (64bit)
>>  - pgpool-$B-6(B 3.1.3
>> 
>>  RHEL 5.6 (64bit)
>>  - PostgreSQL 8.4.12-1
>> 
>> 
>> $BBP=hK!$,$o$+$i$::$$C$F$*$j$^$9!#(B
>> 
>> $B$I$J$?$+BP=hK!$r$4B8CN$JJ}!"$465<x4j$($^$;$s$G$7$g$&$+!#(B
>> 
>> $B0J>e!"$h$m$7$/$*4j$$$$$?$7$^$9!#(B
>> 
> 
> 
> -- 
>  $B8eF#(B $BBgJe(B<gotoh @ m-design.com>
>  $B3t<02q<R%(%`!&%G%#!<!&%7!<(B
>  $B")(B212-0012 $B @ n:j;T9,6hCf9,D.#3CzL\#2(B
>  Tel. 044-555-3185 Fax. 044-555-5700


pgpool-general-jp メーリングリストの案内