[pgpool-general-jp: 1515] pgpoolでエラーが発生します「child process with pid: 28828 was terminated by segmentation fault」

KUROSAWA Hiroshi hiroshi.kurosawa @ und-meer.com
2018年 4月 3日 (火) 12:09:52 JST


お世話になります
黒沢と申します
pgpoolで確認させてください
CakePHP3からテーブルデータを書き写す際にエラーが発生します
問題はSQLにあるのでしょうか
データ量は10万レコードほどでCTEでselectで加工しつつinsertしています
WITH RECURSIVE のネストが問題になるでしょうか
それだとpostgresでエラーがでてよさそうなんですが、
pgpoolで制限があるでしょうか?

どうぞよろしくお願いします。

環境は
CentOS Linux 7.4
pgpool-II 3.7.2
postgresql 9.6.8
です
pgpool.netとpostgresql.orgのリポジトリからyumで入れました

coreダンプをgdbで見てみました

# gdb /usr/src/debug/pgpool-II-3.7.2 /core.28828
GNU gdb (GDB) Red Hat Enterprise Linux 7.6.1-100.el7_4.1
Copyright (C) 2013 Free Software Foundation, Inc.
License GPLv3+: GNU GPL version 3 or later  
<http://gnu.org/licenses/gpl.html>
This is free software: you are free to change and redistribute it.
There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
and "show warranty" for details.
This GDB was configured as "x86_64-redhat-linux-gnu".
For bug reporting instructions, please see:
<http://www.gnu.org/software/gdb/bugs/>...
/usr/src/debug/pgpool-II-3.7.2: Success.
[New LWP 28828]
Reading symbols from /usr/bin/pgpool...Reading symbols from  
/usr/lib/debug/usr/bin/pgpool.debug...done.
done.
[Thread debugging using libthread_db enabled]
Using host libthread_db library "/lib64/libthread_db.so.1".
Core was generated by `pgpool: postgres test2 m-yyweb(43928) idle in tra'.
Program terminated with signal 11, Segmentation fault.
#0  0x00007ff9e4c296e4 in _IO_vfprintf_internal (s=s @ entry=0x7ffef4a6e020,
     format=<optimized out>, format @ entry=0x4b9426 "Execute: %s",
     ap=ap @ entry=0x7ffef4a6e198) at vfprintf.c:1635
1635              process_string_arg (((struct printf_spec *) NULL));
(gdb) bt
#0  0x00007ff9e4c296e4 in _IO_vfprintf_internal (s=s @ entry=0x7ffef4a6e020,
     format=<optimized out>, format @ entry=0x4b9426 "Execute: %s",
     ap=ap @ entry=0x7ffef4a6e198) at vfprintf.c:1635
#1  0x00007ff9e4cee615 in ___vsnprintf_chk (s=0x7ffef4a6e2d0 "Execute: ",
     maxlen=<optimized out>, flags=1, slen=<optimized out>,
     format=0x4b9426 "Execute: %s", args=args @ entry=0x7ffef4a6e198)
     at vsnprintf_chk.c:63
#2  0x00007ff9e4cee578 in ___snprintf_chk (
     s=s @ entry=0x7ffef4a6e2d0 "Execute: ", maxlen=maxlen @ entry=1024,
     flags=flags @ entry=1, slen=slen @ entry=1024, format=<optimized out>)
     at snprintf_chk.c:35
#3  0x0000000000446d2b in snprintf (__fmt=<optimized out>,
     __n=<optimized out>, __s=<optimized out>) at  
/usr/include/bits/stdio2.h:64
#4  pool_extended_send_and_wait  
(query_context=query_context @ entry=0x1779128,
     kind=kind @ entry=0x52eb1a "E", len=len @ entry=5,
     contents=contents @ entry=0x1776908 "", send_type=send_type @ entry=1,
     node_id=0, nowait=nowait @ entry=0 '\000')
     at context/pool_query_context.c:953
#5  0x0000000000436150 in Execute (frontend=frontend @ entry=0x17704d8,
     backend=backend @ entry=0x7ff9e6c66cd0, len=<optimized out>,
     contents=contents @ entry=0x1776908 "") at  
protocol/pool_proto_modules.c:855
#6  0x000000000043aa1e in ProcessFrontendResponse (
     frontend=frontend @ entry=0x17704d8,  
backend=backend @ entry=0x7ff9e6c66cd0)
---Type <return> to continue, or q <return> to quit---
     at protocol/pool_proto_modules.c:2377
#7  0x000000000042e180 in pool_process_query (frontend=0x17704d8,
     backend=0x7ff9e6c66cd0, reset_request=reset_request @ entry=0)
     at protocol/pool_process_query.c:247
#8  0x0000000000428721 in do_child (fds=fds @ entry=0x1769a00)
     at protocol/child.c:383
#9  0x0000000000409b35 in fork_a_child (fds=0x1769a00, id=0)
     at main/pgpool_main.c:611
#10 0x000000000040f85b in PgpoolMain (
     discard_status=discard_status @ entry=0 '\000',
     clear_memcache_oidmaps=clear_memcache_oidmaps @ entry=0 '\000')
     at main/pgpool_main.c:367
#11 0x000000000040812b in main (argc=<optimized out>, argv=<optimized out>)
     at main/main.c:318
(gdb)

straceもとってみました

20:32:57.915851 select(10,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], {30, 0}) = 1 (in  
[9], left {29, 996087}) <0.003972>
20:32:57.919941 select(10,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL) = 1 (in  
[9]) <0.000032>
20:32:57.920078  
read(9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>, "1\0\0\0\4",  
1024) = 5 <0.000033>
20:32:57.920201  
write(7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>, "1\0\0\0\4",  
5) = 5 <0.000069>
20:32:57.920348 select(10,  
[7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>  
8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>  
9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], [],  
[7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>  
8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>  
9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], {1, 0}) = 1 (in  
[7], left {0, 999996}) <0.000034>
20:32:57.920493 select(8,  
[7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>], NULL,  
[7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>], NULL) = 1 (in  
[7]) <0.000031>
20:32:57.920600  
read(7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>, "S\0\0\0\4",  
1024) = 5 <0.000034>
20:32:57.920704  
write(8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>, "S\0\0\0\4",  
5) = 5 <0.000147>
20:32:57.920928  
write(9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>, "S\0\0\0\4",  
5) = 5 <0.000058>
20:32:57.921067 select(9,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL) = 1 (in  
[8]) <0.000031>
20:32:57.921212  
read(8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>, "Z\0\0\0\5T",  
1024) = 6 <0.000033>
20:32:57.921320 select(10,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL) = 1 (in  
[9]) <0.000031>
20:32:57.921429  
read(9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>, "Z\0\0\0\5T",  
1024) = 6 <0.000031>
20:32:57.921528 rt_sigprocmask(SIG_SETMASK, ~[ILL TRAP ABRT BUS FPE SEGV  
CONT SYS RTMIN RT_1], [], 8) = 0 <0.000030>
20:32:57.921623 rt_sigprocmask(SIG_SETMASK, [], NULL, 8) = 0 <0.000028>
20:32:57.921713  
write(7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>,  
"Z\0\0\0\5T", 6) = 6 <0.000076>
20:32:57.921879 select(10,  
[7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>  
8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>  
9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], [],  
[7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>  
8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>  
9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], {1, 0}) = 1 (in  
[7], left {0, 999912}) <0.000121>
20:32:57.922120 select(8,  
[7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>], NULL,  
[7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>], NULL) = 1 (in  
[7]) <0.000031>
20:32:57.922234  
read(7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>,  
"B\0\0\0\37\0pdo_stmt_00000022\0\0\0\0\0\0\1\0\0D\0\0\0\6P\0E\0\0\0\t\0\0\0\0\0S\0\0\0\4",  
1024) = 54 <0.000032>
20:32:57.922348  
write(8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>,  
"P\0\0\0\37pgpool28828\0SELECT  
now()\0\0\0B\0\0\0$pgpool28828\0pgpool28828\0\0\0\0\0\0\1\0\0C\0\0\0\21Spgpool28828\0E\0\0\0\24pgpool28828\0\0\0\0\0C\0\0\0\21Ppgpool28828\0S\0\0\0\4",  
131) = 131 <0.000373>
20:32:57.922841 select(9,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL) = 1 (in  
[8]) <0.000034>
20:32:57.922971  
read(8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>,  
"1\0\0\0\0042\0\0\0\0043\0\0\0\4D\0\0\0&\0\1\0\0\0\0342018-03-30  
11:32:57.90247+00C\0\0\0\rSELECT 1\0003\0\0\0\4Z\0\0\0\5T", 1024) = 79  
<0.000032>
20:32:57.923102 stat("/etc/localtime", {st_mode=S_IFREG|0644,  
st_size=292, ...}) = 0 <0.000033>
20:32:57.923221 write(2<UNIX:[1175988->1175989]>, "2018-03-30 20:32:57:  
pid 28828: LOG:  DB node id: 0 backend pid: 28922 statement: SELECT 1  
FROM pgpool_catalog.insert_lock WHERE reloid =  
to_regclass('\"work_card_linkage2\"') FOR UPDATE\n", 185) = 185 <0.000053>
20:32:57.923361  
write(8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>,  
"P\0\0\0ypgpool28828\0SELECT 1 FROM pgpool_catalog.insert_lock WHERE  
reloid = to_regclass('\"work_card_linkage2\"') FOR  
UPDATE\0\0\0B\0\0\0$pgpool28828\0pgpool28828\0\0\0\0\0\0\1\0\0C\0\0\0\21Spgpool28828\0E\0\0\0\24pgpool28828\0\0\0\0\0C\0\0\0\21Ppgpool28828\0S\0\0\0\4",  
221) = 221 <0.000680>
20:32:57.924129 select(9,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL) = 1 (in  
[8]) <0.000032>
20:32:57.924254  
read(8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>,  
"1\0\0\0\0042\0\0\0\0043\0\0\0\4D\0\0\0\v\0\1\0\0\0\0011C\0\0\0\rSELECT  
1\0003\0\0\0\4Z\0\0\0\5T", 1024) = 52 <0.000032>
20:32:57.924371 stat("/etc/localtime", {st_mode=S_IFREG|0644,  
st_size=292, ...}) = 0 <0.000033>
20:32:57.924485 write(2<UNIX:[1175988->1175989]>, "2018-03-30 20:32:57:  
pid 28828: LOG:  DB node id: 0 backend pid: 28922 statement: B  
message\n", 92) = 92 <0.000045>
20:32:57.924604  
write(8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>,  
"B\0\0\0?\0pdo_stmt_00000022\0\0\0\0\1\0\0\0\0342018-03-30  
11:32:57.90247+00\0\1\0\0H\0\0\0\4", 69) = 69 <0.001256>
20:32:57.925984 select(9,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], {30, 0}) = 1 (in  
[8], left {29, 997622}) <0.002420>
20:32:57.928555 select(9,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL) = 1 (in  
[8]) <0.000032>
20:32:57.928695  
read(8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>, "2\0\0\0\4",  
1024) = 5 <0.000033>
20:32:57.928888 stat("/etc/localtime", {st_mode=S_IFREG|0644,  
st_size=292, ...}) = 0 <0.000035>
20:32:57.929019 write(2<UNIX:[1175988->1175989]>, "2018-03-30 20:32:57:  
pid 28828: LOG:  DB node id: 1 backend pid: 13799 statement: B  
message\n", 92) = 92 <0.000024>
20:32:57.929082  
write(9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>,  
"B\0\0\0?\0pdo_stmt_00000022\0\0\0\0\1\0\0\0\0342018-03-30  
11:32:57.90247+00\0\1\0\0H\0\0\0\4", 69) = 69 <0.000037>
20:32:57.929158 select(10,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], {30, 0}) = 1 (in  
[9], left {29, 992739}) <0.007306>
20:32:57.936572 select(10,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL) = 1 (in  
[9]) <0.000016>
20:32:57.936671  
read(9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>, "2\0\0\0\4",  
1024) = 5 <0.000018>
20:32:57.936777 brk(NULL)               = 0x2356000 <0.000027>
20:32:57.936836 brk(NULL)               = 0x2356000 <0.000014>
20:32:57.936883 brk(0x2234000)          = 0x2234000 <0.000129>
20:32:57.937040 brk(NULL)               = 0x2234000 <0.000013>
20:32:57.937128 brk(NULL)               = 0x2234000 <0.000014>
20:32:57.937167 brk(NULL)               = 0x2234000 <0.000013>
20:32:57.937206 brk(0x1d3e000)          = 0x1d3e000 <0.000595>
20:32:57.937830 brk(NULL)               = 0x1d3e000 <0.000014>
20:32:57.937882  
write(7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>, "2\0\0\0\4",  
5) = 5 <0.000050>
20:32:57.937993 stat("/etc/localtime", {st_mode=S_IFREG|0644,  
st_size=292, ...}) = 0 <0.000016>
20:32:57.938056 write(2<UNIX:[1175988->1175989]>, "2018-03-30 20:32:57:  
pid 28828: LOG:  DB node id: 0 backend pid: 28922 statement: D  
message\n", 92) = 92 <0.000026>
20:32:57.938121  
write(8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>,  
"D\0\0\0\6P\0H\0\0\0\4", 12) = 12 <0.000072>
20:32:57.938233 select(9,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], {30, 0}) = 1 (in  
[8], left {29, 999998}) <0.000017>
20:32:57.938296 select(9,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL,  
[8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>], NULL) = 1 (in  
[8]) <0.000016>
20:32:57.938361  
read(8<TCP:[192.168.100.154:39526->192.168.100.154:5432]>, "n\0\0\0\4",  
1024) = 5 <0.000015>
20:32:57.938418 stat("/etc/localtime", {st_mode=S_IFREG|0644,  
st_size=292, ...}) = 0 <0.000016>
20:32:57.938472 write(2<UNIX:[1175988->1175989]>, "2018-03-30 20:32:57:  
pid 28828: LOG:  DB node id: 1 backend pid: 13799 statement: D  
message\n", 92) = 92 <0.000017>
20:32:57.938524  
write(9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>,  
"D\0\0\0\6P\0H\0\0\0\4", 12) = 12 <0.000032>
20:32:57.938592 select(10,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], {30, 0}) = 1 (in  
[9], left {29, 999868}) <0.000148>
20:32:57.938791 select(10,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL,  
[9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>], NULL) = 1 (in  
[9]) <0.000034>
20:32:57.938885  
read(9<TCP:[192.168.100.154:35352->192.168.100.180:5432]>, "n\0\0\0\4",  
1024) = 5 <0.000016>
20:32:57.938964  
write(7<TCP:[192.168.100.199:9999->192.168.100.180:43928]>, "n\0\0\0\4",  
5) = 5 <0.000061>
20:32:57.939120 --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_MAPERR,  
si_addr=0x2123158} ---
20:32:57.961227 +++ killed by SIGSEGV (core dumped) +++


CakePHPで出たエラーは以下です

SQLSTATE[HY000]: General error: 7 no connection to the server

postgresで出たログは以下です

< 2018-03-30 20:32:57.902 JST > LOG:  statement: BEGIN
< 2018-03-30 20:32:57.907 JST > LOG:  execute pgpool28828/pgpool28828:  
SELECT count(*) FROM pg_catalog.pg_attrdef AS d, pg_catalog.pg_class AS  
c WHERE d.adrelid = c.oid AND d.adsrc ~ 'nextval' AND c.oid =  
to_regclass('"work_card_linkage2"')
< 2018-03-30 20:32:57.909 JST > LOG:  execute pgpool28828/pgpool28828:  
SELECT count(*) FROM pg_catalog.pg_class c JOIN pg_catalog.pg_namespace  
n ON (c.relnamespace = n.oid) WHERE nspname = 'pgpool_catalog' AND  
relname = 'insert_lock'
< 2018-03-30 20:32:57.910 JST > LOG:  execute pgpool28828/pgpool28828:  
SELECT 1 FROM pgpool_catalog.insert_lock WHERE reloid =  
to_regclass('"work_card_linkage2"') FOR UPDATE
< 2018-03-30 20:32:57.922 JST > LOG:  execute pgpool28828/pgpool28828:  
SELECT now()
< 2018-03-30 20:32:57.923 JST > LOG:  execute pgpool28828/pgpool28828:  
SELECT 1 FROM pgpool_catalog.insert_lock WHERE reloid =  
to_regclass('"work_card_linkage2"') FOR UPDATE
< 2018-03-30 20:32:57.961 JST > LOG:  unexpected EOF on client  
connection with an open transaction
< 2018-03-30 20:32:57.961 JST > LOG:  disconnection: session time:  
0:00:54.192 user=postgres database=test2 host=192.168.100.154 port=39526
< 2018-03-30 20:35:01.556 JST > LOG:  disconnection: session time:  
0:05:00.155 user=postgres database=test host=192.168.100.154 port=39498


pgpoolで出たログは以下です

Mar 30 20:32:57 m-yydb pgpool: 2018-03-30 20:32:57: pid 28828: LOG:  DB  
node id: 1 backend pid: 13799 statement: Parse:  WITH RECURSIVE "src" AS  
(( SELECT "w1"."card_no" AS "card_no"  FROM "work_card_linkage" AS "w1")  
  UNION ( SELECT "w2"."prev_card_no" AS "card_no"  FROM  
"work_card_linkage" AS "w2" WHERE "w2"."prev_card_no" IS NOT NULL)  
),"src2" AS ( SELECT  
"w3"."point","w3"."required_promotion","w3"."birthday","w3"."point_expire","w3"."gender","w3"."rank","w3"."coming_rank","w3"."usage_status","w3"."postal_code","w3"."card_no","w3"."prev_card_no","w4"."card_no"  
AS "newer_card_no" ,"w5"."prev_card_no" AS "prev_prev_card_no"  
,"src"."card_no" AS "root_card_no" ,0::int4 AS "lvl"  FROM  
"work_card_linkage" AS "w3" LEFT OUTER JOIN "work_card_linkage" AS "w4"  
ON  ("w4"."prev_card_no"="w3"."card_no" ) LEFT OUTER JOIN  
"work_card_linkage" AS "w5" ON  ("w5"."card_no"="w3"."prev_card_no" )  
LEFT OUTER JOIN "src" ON  ("src"."card_no"="w3"."prev_card_no" ) WHERE  
("w3"."prev_card_no" IS NOT NULL AND "w5"."prev_card_no" IS  
NULL)),"src3" AS (( SELECT * FROM "src2")  UNION ALL ( SELECT  
"w6"."point","w6"."required_promotion","w6"."birth
Mar 30 20:32:57 m-yydb pgpool: 2018-03-30 20:32:57: pid 28828: LOG:  DB  
node id: 0 backend pid: 28922 statement: SELECT 1 FROM  
pgpool_catalog.insert_lock WHERE reloid =  
to_regclass('"work_card_linkage2"') FOR UPDATE
Mar 30 20:32:57 m-yydb pgpool: 2018-03-30 20:32:57: pid 28828: LOG:  DB  
node id: 0 backend pid: 28922 statement: B message
Mar 30 20:32:57 m-yydb pgpool: 2018-03-30 20:32:57: pid 28828: LOG:  DB  
node id: 1 backend pid: 13799 statement: B message
Mar 30 20:32:57 m-yydb pgpool: 2018-03-30 20:32:57: pid 28828: LOG:  DB  
node id: 0 backend pid: 28922 statement: D message
Mar 30 20:32:57 m-yydb pgpool: 2018-03-30 20:32:57: pid 28828: LOG:  DB  
node id: 1 backend pid: 13799 statement: D message
Mar 30 20:32:57 m-yydb pgpool: 2018-03-30 20:32:57: pid 28820: WARNING:  
  child process with pid: 28828 was terminated by segmentation fault
Mar 30 20:32:57 m-yydb pgpool: 2018-03-30 20:32:57: pid 28820: LOG:  
fork a new child process with pid: 28931



pgpool-general-jp メーリングリストの案内