I also experience crashes of stonithd, alone 2 times yesterday, always on both nodes at the same time. Here is the stack trace:
root@kjp03:/var/crash# apport-retrace -Rs _usr_lib_pacemaker_stonithd.0.crash E: Can not find version '1.1.10+git20130802-1ubuntu2.2' of package 'pacemaker' E: Quellpaket für pacemaker kann nicht gefunden werden. --- stack trace --- #0 0x00007ffa6f17abb9 in __GI_raise (sig=sig@entry=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:56 resultvar = 0 pid = 40008 selftid = 40008 #1 0x00007ffa6f17dfc8 in __GI_abort () at abort.c:89 save_stage = 2 act = {__sigaction_handler = {sa_handler = 0x0, sa_sigaction = 0x0}, sa_mask = {__val = {0, 17179869185, 140713634797360, 140713634496512, 0, 140734633026224, 140713582943175, 140713586093704, 140734633026160, 397168, 32, 140713586088608, 0, 140713586088608, 140713582942786, 140713579551566}}, sa_flags = 1876903824, sa_restorer = 0x3f} sigs = {__val = {32, 0 <repeats 15 times>}} #2 0x00007ffa6fdcf6c9 in crm_abort (file=0x7ffa6fdf34bb "logging.c", function=0x7ffa6fdf4790 <__PRETTY_FUNCTION__.22958> "crm_glib_handler", line=63, assert_condition=0x7ffa72376ce0 "Source ID 541 was not found when attempting to remove it", do_core=<optimized out>, do_fork=<optimized out>) at utils.c:1118 rc = 0 pid = <optimized out> status = 0 __func__ = "crm_abort" #3 0x00007ffa6ee8bae1 in g_logv () from /lib/x86_64-linux-gnu/libglib-2.0.so.0 No symbol table info available. #4 0x00007ffa6ee8bd72 in g_log () from /lib/x86_64-linux-gnu/libglib-2.0.so.0 No symbol table info available. #5 0x00007ffa6ee83c5c in g_source_remove () from /lib/x86_64-linux-gnu/libglib-2.0.so.0 No symbol table info available. #6 0x00007ffa6f999ef5 in stonith_action_clear_tracking_data (action=action@entry=0x7ffa723350b0) at st_client.c:536 No locals. #7 0x00007ffa6f999f2d in stonith_action_destroy (action=0x7ffa723350b0) at st_client.c:557 No locals. #8 0x00007ffa6fde7cd9 in child_waitpid (child=child@entry=0x7ffa7236bb20, flags=flags@entry=1) at mainloop.c:948 rc = <optimized out> core = <optimized out> signo = 0 status = 0 exitcode = 0 __func__ = "child_waitpid" #9 0x00007ffa6fde7fce in child_death_dispatch (signal=<optimized out>) at mainloop.c:962 saved = 0x0 child = 0x7ffa7236bb20 iter = 0x7ffa7222d200 exited = <optimized out> __func__ = "child_death_dispatch" #10 0x00007ffa6fde6de7 in crm_signal_dispatch (source=0x7ffa7236ba50, callback=<optimized out>, userdata=<optimized out>) at mainloop.c:275 __func__ = "crm_signal_dispatch" #11 0x00007ffa6ee84e04 in g_main_context_dispatch () from /lib/x86_64-linux-gnu/libglib-2.0.so.0 No symbol table info available. #12 0x00007ffa6ee85048 in ?? () from /lib/x86_64-linux-gnu/libglib-2.0.so.0 No symbol table info available. #13 0x00007ffa6ee8530a in g_main_loop_run () from /lib/x86_64-linux-gnu/libglib-2.0.so.0 No symbol table info available. #14 0x00007ffa702282a9 in main (argc=<optimized out>, argv=<optimized out>) at main.c:1136 flag = <optimized out> lpc = 0 argerr = <optimized out> option_index = 0 cluster = {uuid = 0x7ffa7222fba0 "167772162", uname = 0x7ffa72230280 "kjp03", nodeid = 167772162, destroy = 0x7ffa70229b40 <stonith_peer_cs_destroy>, hb_conn = 0x0, hb_dispatch = 0x7ffa702299c0 <stonith_peer_hb_callback>, group = {length = 128, value = "stonith-ng", '\000' <repeats 117 times>}, cpg = {cpg_deliver_fn = 0x7ffa702298e0 <stonith_peer_ais_callback>, cpg_confchg_fn = 0x7ffa6fbb04a0 <pcmk_cpg_membership>}, cpg_handle = 7749363892505018368} actions = {0x7ffa70236d7d "reboot", 0x7ffa70236d84 "off", 0x7ffa7023893f "list", 0x7ffa70236d88 "monitor", 0x7ffa70236d90 "status"} __func__ = "main"
I also attach the crash report
Peter
I also experience crashes of stonithd, alone 2 times yesterday, always on both nodes at the same time. Here is the stack trace:
root@kjp03: /var/crash# apport-retrace -Rs _usr_lib_ pacemaker_ stonithd. 0.crash git20130802- 1ubuntu2. 2' of package 'pacemaker' sysdeps/ unix/sysv/ linux/raise. c:56 handler = {sa_handler = 0x0, sa_sigaction = 0x0}, sa_mask = {__val = {0, 17179869185, 140713634797360, 140713634496512, 0, 140734633026224, 140713582943175, 140713586093704, 140734633026160, 397168, 32, 140713586088608, 0, 140713586088608, 140713582942786, 140713579551566}}, sa_flags = 1876903824, sa_restorer = 0x3f} f34bb "logging.c", function= 0x7ffa6fdf4790 <__PRETTY_ FUNCTION_ _.22958> "crm_glib_handler", line=63, assert_ condition= 0x7ffa72376ce0 "Source ID 541 was not found when attempting to remove it", do_core=<optimized out>, do_fork=<optimized out>) at utils.c:1118 64-linux- gnu/libglib- 2.0.so. 0 64-linux- gnu/libglib- 2.0.so. 0 64-linux- gnu/libglib- 2.0.so. 0 action_ clear_tracking_ data (action= action@ entry=0x7ffa723 350b0) at st_client.c:536 action_ destroy (action= 0x7ffa723350b0) at st_client.c:557 child@entry= 0x7ffa7236bb20, flags=flags@ entry=1) at mainloop.c:948 dispatch (signal=<optimized out>) at mainloop.c:962 death_dispatch" 0x7ffa7236ba50, callback=<optimized out>, userdata=<optimized out>) at mainloop.c:275 dispatch" context_ dispatch () from /lib/x86_ 64-linux- gnu/libglib- 2.0.so. 0 64-linux- gnu/libglib- 2.0.so. 0 64-linux- gnu/libglib- 2.0.so. 0
option_ index = 0 peer_cs_ destroy> , hb_conn = 0x0, hb_dispatch = 0x7ffa702299c0 <stonith_ peer_hb_ callback> , group = {length = 128, value = "stonith-ng", '\000' <repeats 117 times>}, cpg = {cpg_deliver_fn = 0x7ffa702298e0 <stonith_ peer_ais_ callback> , cpg_confchg_fn = 0x7ffa6fbb04a0 <pcmk_cpg_ membership> }, cpg_handle = 774936389250501 8368}
E: Can not find version '1.1.10+
E: Quellpaket für pacemaker kann nicht gefunden werden.
--- stack trace ---
#0 0x00007ffa6f17abb9 in __GI_raise (sig=sig@entry=6) at ../nptl/
resultvar = 0
pid = 40008
selftid = 40008
#1 0x00007ffa6f17dfc8 in __GI_abort () at abort.c:89
save_stage = 2
act = {__sigaction_
sigs = {__val = {32, 0 <repeats 15 times>}}
#2 0x00007ffa6fdcf6c9 in crm_abort (file=0x7ffa6fd
rc = 0
pid = <optimized out>
status = 0
__func__ = "crm_abort"
#3 0x00007ffa6ee8bae1 in g_logv () from /lib/x86_
No symbol table info available.
#4 0x00007ffa6ee8bd72 in g_log () from /lib/x86_
No symbol table info available.
#5 0x00007ffa6ee83c5c in g_source_remove () from /lib/x86_
No symbol table info available.
#6 0x00007ffa6f999ef5 in stonith_
No locals.
#7 0x00007ffa6f999f2d in stonith_
No locals.
#8 0x00007ffa6fde7cd9 in child_waitpid (child=
rc = <optimized out>
core = <optimized out>
signo = 0
status = 0
exitcode = 0
__func__ = "child_waitpid"
#9 0x00007ffa6fde7fce in child_death_
saved = 0x0
child = 0x7ffa7236bb20
iter = 0x7ffa7222d200
exited = <optimized out>
__func__ = "child_
#10 0x00007ffa6fde6de7 in crm_signal_dispatch (source=
__func__ = "crm_signal_
#11 0x00007ffa6ee84e04 in g_main_
No symbol table info available.
#12 0x00007ffa6ee85048 in ?? () from /lib/x86_
No symbol table info available.
#13 0x00007ffa6ee8530a in g_main_loop_run () from /lib/x86_
No symbol table info available.
#14 0x00007ffa702282a9 in main (argc=<optimized out>, argv=<optimized out>) at main.c:1136
flag = <optimized out>
lpc = 0
argerr = <optimized out>
cluster = {uuid = 0x7ffa7222fba0 "167772162", uname = 0x7ffa72230280 "kjp03", nodeid = 167772162, destroy = 0x7ffa70229b40 <stonith_
actions = {0x7ffa70236d7d "reboot", 0x7ffa70236d84 "off", 0x7ffa7023893f "list", 0x7ffa70236d88 "monitor", 0x7ffa70236d90 "status"}
__func__ = "main"
I also attach the crash report
Peter