It is systemd-udevd process who is sending SIGKILL to worker
systemd-udevd process who is waiting for finit_module() to complete.
We can see that systemd-udevd has hard-coded 30 seconds timeout.
if ((now(CLOCK_MONOTONIC) - worker->event_start_usec) > 30 * 1000 * 1000) { log_error("worker [%u] %s timeout; kill it\n", worker->pid, worker->event ? worker->event->devpath : "<idle>"); kill(worker->pid, SIGKILL); worker->state = WORKER_KILLED; /* drop reference taken for state 'running' */ worker_unref(worker); if (worker->event) { log_error("seq %llu '%s' killed\n", udev_device_get_seqnum(worker->event->dev), worker->event->devpath); worker->event->exitcode = -64; event_queue_delete(worker->event, true); worker->event = NULL; } } }
(...snipped...)
---------- systemd-204/src/udev/udevd.c end ----------
Therefore, we cannot override this timeout without modifying udevd.c .
It is unfortunate but we have to let kthread_create() ignore SIGKILL
(unless killed by the OOM killer).
I reproduced a similar result using test patch shown below.
---------- test patch start ---------- message/ fusion/ mptspi. c b/drivers/ message/ fusion/ mptspi. c message/ fusion/ mptspi. c message/ fusion/ mptspi. c
diff --git a/drivers/
index 5653e50..eaaa5e2 100644
--- a/drivers/
+++ b/drivers/
@@ -1412,6 +1412,9 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;
}
+ printk(KERN_WARNING "Sleep injection start\n"); alloc(& mptspi_ driver_ template, sizeof( MPT_SCSI_ HOST));
+ ssleep(40);
+ printk(KERN_WARNING "Sleep injection end\n");
sh = scsi_host_
if (!sh) {
diff --git a/kernel/signal.c b/kernel/signal.c
index 52f881d..52ec166 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1051,6 +1051,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
int override_rlimit;
int ret = 0, result;
+ WARN_ON(sig == SIGKILL); spin_locked( &t->sighand- >siglock) ;
assert_
result = TRACE_SIGNAL_ IGNORED;
---------- test patch end ----------
---------- dmesg start ---------- {Initiator} signal. c:1054 __send_ signal+ 0x476/0x4b0( ) 669>] dump_stack+ 0x45/0x56 70d>] warn_slowpath_ common+ 0x7d/0xa0 7ea>] warn_slowpath_ null+0x1a/ 0x20 c66>] __send_ signal+ 0x476/0x4b0 cde>] send_signal+ 0x3e/0x80 753>] do_send_ sig_info+ 0x43/0x80 c16>] group_send_ sig_info+ 0x46/0x50 d24>] kill_pid_ info+0x34/ 0x50 dee>] SYSC_kill+ 0x8e/0x1a0 8db>] ? account_ user_time+ 0x8b/0xa0 ef4>] ? vtime_account_ user+0x54/ 0x60 635>] ? syscall_ trace_enter+ 0x145/0x250 9be>] SyS_kill+0xe/0x10 abf>] tracesys+0xe1/0xe6 f62>] mutex_lock+ 0x12/0x2f ffffffff816fff6 2>] [<ffffffff816ff f62>] mutex_lock+ 0x12/0x2f 479b20 EFLAGS: 00010246 0(0000) GS:ffff88007fa0 0000(0000) knlGS:000000000 0000000 54d>] scsi_remove_ host+0x1d/ 0x120 2a1>] mptscsih_ remove+ 0x31/0xc0 [mptscsih] 23a>] mptspi_ probe+0xfa/ 0x400 [mptspi] 8f5>] local_pci_ probe+0x45/ 0xa0 b95>] ? pci_match_ device+ 0xc5/0xd0 cb9>] pci_device_ probe+0xd9/ 0x130 fd5>] driver_ probe_device+ 0x125/0x3b0 333>] __driver_ attach+ 0x93/0xa0 2a0>] ? __device_ attach+ 0x40/0x40 f23>] bus_for_ each_dev+ 0x63/0xa0 98e>] driver_ attach+ 0x1e/0x20 570>] bus_add_ driver+ 0x180/0x250 000>] ? 0xffffffffa0044fff 9b4>] driver_ register+ 0x64/0xf0 000>] ? 0xffffffffa0044fff 28c>] __pci_register_ driver+ 0x4c/0x50 0d7>] mptspi_ init+0xd7/ 0x1000 [mptspi] 142>] do_one_ initcall+ 0xd2/0x180 743>] ? set_memory_ nx+0x43/ 0x50 046>] load_module+ 0x1c86/ 0x26c0 9e0>] ? store_uevent+ 0x40/0x40 5ba>] ? copy_module_ from_fd. isra.46+ 0x12a/0x190 bf6>] SyS_finit_ module+ 0x86/0xb0 abf>] tracesys+0xe1/0xe6 f62>] mutex_lock+ 0x12/0x2f
[ 2.665872] Fusion MPT base driver 3.04.20
[ 2.666876] Copyright (c) 1999-2008 LSI Corporation
[ 2.676334] Fusion MPT SPI Host driver 3.04.20
[ 2.679817] mptbase: ioc0: Initiating bringup
[ 2.692059] e1000: Intel(R) PRO/1000 Network Driver - version 7.3.21-k8-NAPI
[ 2.693733] e1000: Copyright (c) 1999-2006 Intel Corporation.
[ 2.761570] ioc0: LSI53C1030 B0: Capabilities=
[ 2.919766] Sleep injection start
[ 3.062094] e1000 0000:02:00.0 eth0: (PCI:66MHz:32-bit) 00:0c:29:d7:20:d6
[ 3.064980] e1000 0000:02:00.0 eth0: Intel(R) PRO/1000 Network Connection
[ 35.716117] ------------[ cut here ]------------
[ 35.718364] WARNING: CPU: 2 PID: 174 at kernel/
[ 35.722092] Modules linked in: e1000 mptspi(+) mptscsih mptbase floppy
[ 35.725666] CPU: 2 PID: 174 Comm: systemd-udevd Not tainted 3.14.0-rc5+ #267
[ 35.729478] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/20/2012
[ 35.734379] 0000000000000009 ffff8800377c5d50 ffffffff816f9669 0000000000000000
[ 35.738226] ffff8800377c5d88 ffffffff8106970d ffff88007a470000 0000000000000009
[ 35.742039] ffff8800377c5ec0 0000000000000001 0000000000000003 ffff8800377c5d98
[ 35.745852] Call Trace:
[ 35.747083] [<ffffffff816f9
[ 35.749482] [<ffffffff81069
[ 35.752228] [<ffffffff81069
[ 35.754904] [<ffffffff8107a
[ 35.757467] [<ffffffff8107a
[ 35.759889] [<ffffffff8107b
[ 35.762490] [<ffffffff8107b
[ 35.764040] [<ffffffff8107b
[ 35.765244] [<ffffffff8107b
[ 35.766360] [<ffffffff810a1
[ 35.767654] [<ffffffff810a1
[ 35.768946] [<ffffffff81022
[ 35.770298] [<ffffffff8107d
[ 35.771373] [<ffffffff8170a
[ 35.772455] ---[ end trace aeaeb4f8a60584a0 ]---
[ 42.921677] Sleep injection end
[ 42.923315] scsi2: error handler thread failed to spawn, error = -12
[ 42.926270] mptspi: ioc0: WARNING - Unable to register controller with SCSI subsystem
[ 42.929990] BUG: unable to handle kernel NULL pointer dereference at 0000000000000060
[ 42.933846] IP: [<ffffffff816ff
[ 42.936484] PGD 7a453067 PUD 7a454067 PMD 0
[ 42.939242] Oops: 0002 [#1] SMP
[ 42.940874] Modules linked in: e1000 mptspi(+) mptscsih mptbase floppy
[ 42.944277] CPU: 0 PID: 181 Comm: systemd-udevd Tainted: G W 3.14.0-rc5+ #267
[ 42.947652] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/20/2012
[ 42.952471] task: ffff88007a470000 ti: ffff88007a478000 task.ti: ffff88007a478000
[ 42.954281] RIP: 0010:[<
[ 42.955924] RSP: 0018:ffff88007a
[ 42.957009] RAX: 0000000000000000 RBX: 0000000000000060 RCX: 0000000000001a42
[ 42.958398] RDX: 0000000000004090 RSI: 0000000040924090 RDI: 0000000000000060
[ 42.959853] RBP: ffff88007a479b28 R08: 0000000000000082 R09: 000000000000050c
[ 42.961323] R10: ffffffff8185b080 R11: 2049534353206874 R12: 0000000000000060
[ 42.962752] R13: ffff88007be19000 R14: 00000000ffffffff R15: 0000000000000001
[ 42.964164] FS: 00007f79226bd88
[ 42.965755] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 42.966899] CR2: 0000000000000060 CR3: 000000007a452000 CR4: 00000000000407f0
[ 42.968333] Stack:
[ 42.969259] 0000000000000000 ffff88007a479b50 ffffffff814cd54d ffff88007abcf000
[ 42.970922] 0000000000000000 ffff88007be19000 ffff88007a479b80 ffffffffa003a2a1
[ 42.972674] ffff88007abcf000 ffff88007be19000 0000000000000000 00000000ffffffff
[ 42.974337] Call Trace:
[ 42.974868] [<ffffffff814cd
[ 42.976106] [<ffffffffa003a
[ 42.977686] [<ffffffffa0034
[ 42.978964] [<ffffffff813a0
[ 42.980094] [<ffffffff813a1
[ 42.981276] [<ffffffff813a1
[ 42.982410] [<ffffffff8148c
[ 42.983599] [<ffffffff8148d
[ 42.984770] [<ffffffff8148d
[ 42.985913] [<ffffffff8148a
[ 42.987066] [<ffffffff8148c
[ 42.988167] [<ffffffff8148c
[ 42.989318] [<ffffffffa0045
[ 42.990384] [<ffffffff8148d
[ 42.991888] [<ffffffffa0045
[ 42.992944] [<ffffffff813a0
[ 42.994162] [<ffffffffa0045
[ 42.995435] [<ffffffff81002
[ 42.996594] [<ffffffff8105b
[ 42.997718] [<ffffffff810e7
[ 42.998834] [<ffffffff810e2
[ 43.000119] [<ffffffff810e3
[ 43.001528] [<ffffffff810e7
[ 43.002680] [<ffffffff8170a
[ 43.003695] Code: 65 6e 9b ff e9 40 ff ff ff b8 01 00 00 00 e9 8c fe ff ff 66 0f 1f 44 00 00 66 66 66 66 90 55 48 89 e5 53 48 89 fb e8 9e df ff ff <f0> ff 0b 79 08 48 89 df e8 31 fe ff ff 65 48 8b 04 25 00 c8 00
[ 43.010886] RIP [<ffffffff816ff
[ 43.012085] RSP <ffff88007a479b20>
[ 43.012823] CR2: 0000000000000060
[ 43.013556] ---[ end trace aeaeb4f8a60584a1 ]---
---------- dmesg end ----------
It is systemd-udevd process who is sending SIGKILL to worker
systemd-udevd process who is waiting for finit_module() to complete.
We can see that systemd-udevd has hard-coded 30 seconds timeout.
---------- systemd- 204/src/ udev/udevd. c start ----------
/* check for hanging events */
udev_ list_node_ foreach( loop, &worker_list) {
struct worker *worker = node_to_ worker( loop);
(...snipped...)
(...snipped...)
---------- systemd-
Therefore, we cannot override this timeout without modifying udevd.c .
It is unfortunate but we have to let kthread_create() ignore SIGKILL
(unless killed by the OOM killer).