Comment 50 for bug 1276705

Revision history for this message
Tetsuo Handa (9-launchpad-i-love-sakura-ne-jp) wrote :

I reproduced a similar result using test patch shown below.

---------- test patch start ----------
diff --git a/drivers/message/fusion/mptspi.c b/drivers/message/fusion/mptspi.c
index 5653e50..eaaa5e2 100644
--- a/drivers/message/fusion/mptspi.c
+++ b/drivers/message/fusion/mptspi.c
@@ -1412,6 +1412,9 @@ mptspi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
   return 0;
  }

+ printk(KERN_WARNING "Sleep injection start\n");
+ ssleep(40);
+ printk(KERN_WARNING "Sleep injection end\n");
  sh = scsi_host_alloc(&mptspi_driver_template, sizeof(MPT_SCSI_HOST));

  if (!sh) {
diff --git a/kernel/signal.c b/kernel/signal.c
index 52f881d..52ec166 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1051,6 +1051,7 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t,
  int override_rlimit;
  int ret = 0, result;

+ WARN_ON(sig == SIGKILL);
  assert_spin_locked(&t->sighand->siglock);

  result = TRACE_SIGNAL_IGNORED;
---------- test patch end ----------

---------- dmesg start ----------
[ 2.665872] Fusion MPT base driver 3.04.20
[ 2.666876] Copyright (c) 1999-2008 LSI Corporation
[ 2.676334] Fusion MPT SPI Host driver 3.04.20
[ 2.679817] mptbase: ioc0: Initiating bringup
[ 2.692059] e1000: Intel(R) PRO/1000 Network Driver - version 7.3.21-k8-NAPI
[ 2.693733] e1000: Copyright (c) 1999-2006 Intel Corporation.
[ 2.761570] ioc0: LSI53C1030 B0: Capabilities={Initiator}
[ 2.919766] Sleep injection start
[ 3.062094] e1000 0000:02:00.0 eth0: (PCI:66MHz:32-bit) 00:0c:29:d7:20:d6
[ 3.064980] e1000 0000:02:00.0 eth0: Intel(R) PRO/1000 Network Connection
[ 35.716117] ------------[ cut here ]------------
[ 35.718364] WARNING: CPU: 2 PID: 174 at kernel/signal.c:1054 __send_signal+0x476/0x4b0()
[ 35.722092] Modules linked in: e1000 mptspi(+) mptscsih mptbase floppy
[ 35.725666] CPU: 2 PID: 174 Comm: systemd-udevd Not tainted 3.14.0-rc5+ #267
[ 35.729478] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/20/2012
[ 35.734379] 0000000000000009 ffff8800377c5d50 ffffffff816f9669 0000000000000000
[ 35.738226] ffff8800377c5d88 ffffffff8106970d ffff88007a470000 0000000000000009
[ 35.742039] ffff8800377c5ec0 0000000000000001 0000000000000003 ffff8800377c5d98
[ 35.745852] Call Trace:
[ 35.747083] [<ffffffff816f9669>] dump_stack+0x45/0x56
[ 35.749482] [<ffffffff8106970d>] warn_slowpath_common+0x7d/0xa0
[ 35.752228] [<ffffffff810697ea>] warn_slowpath_null+0x1a/0x20
[ 35.754904] [<ffffffff8107ac66>] __send_signal+0x476/0x4b0
[ 35.757467] [<ffffffff8107acde>] send_signal+0x3e/0x80
[ 35.759889] [<ffffffff8107b753>] do_send_sig_info+0x43/0x80
[ 35.762490] [<ffffffff8107bc16>] group_send_sig_info+0x46/0x50
[ 35.764040] [<ffffffff8107bd24>] kill_pid_info+0x34/0x50
[ 35.765244] [<ffffffff8107bdee>] SYSC_kill+0x8e/0x1a0
[ 35.766360] [<ffffffff810a18db>] ? account_user_time+0x8b/0xa0
[ 35.767654] [<ffffffff810a1ef4>] ? vtime_account_user+0x54/0x60
[ 35.768946] [<ffffffff81022635>] ? syscall_trace_enter+0x145/0x250
[ 35.770298] [<ffffffff8107d9be>] SyS_kill+0xe/0x10
[ 35.771373] [<ffffffff8170aabf>] tracesys+0xe1/0xe6
[ 35.772455] ---[ end trace aeaeb4f8a60584a0 ]---
[ 42.921677] Sleep injection end
[ 42.923315] scsi2: error handler thread failed to spawn, error = -12
[ 42.926270] mptspi: ioc0: WARNING - Unable to register controller with SCSI subsystem
[ 42.929990] BUG: unable to handle kernel NULL pointer dereference at 0000000000000060
[ 42.933846] IP: [<ffffffff816fff62>] mutex_lock+0x12/0x2f
[ 42.936484] PGD 7a453067 PUD 7a454067 PMD 0
[ 42.939242] Oops: 0002 [#1] SMP
[ 42.940874] Modules linked in: e1000 mptspi(+) mptscsih mptbase floppy
[ 42.944277] CPU: 0 PID: 181 Comm: systemd-udevd Tainted: G W 3.14.0-rc5+ #267
[ 42.947652] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 09/20/2012
[ 42.952471] task: ffff88007a470000 ti: ffff88007a478000 task.ti: ffff88007a478000
[ 42.954281] RIP: 0010:[<ffffffff816fff62>] [<ffffffff816fff62>] mutex_lock+0x12/0x2f
[ 42.955924] RSP: 0018:ffff88007a479b20 EFLAGS: 00010246
[ 42.957009] RAX: 0000000000000000 RBX: 0000000000000060 RCX: 0000000000001a42
[ 42.958398] RDX: 0000000000004090 RSI: 0000000040924090 RDI: 0000000000000060
[ 42.959853] RBP: ffff88007a479b28 R08: 0000000000000082 R09: 000000000000050c
[ 42.961323] R10: ffffffff8185b080 R11: 2049534353206874 R12: 0000000000000060
[ 42.962752] R13: ffff88007be19000 R14: 00000000ffffffff R15: 0000000000000001
[ 42.964164] FS: 00007f79226bd880(0000) GS:ffff88007fa00000(0000) knlGS:0000000000000000
[ 42.965755] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 42.966899] CR2: 0000000000000060 CR3: 000000007a452000 CR4: 00000000000407f0
[ 42.968333] Stack:
[ 42.969259] 0000000000000000 ffff88007a479b50 ffffffff814cd54d ffff88007abcf000
[ 42.970922] 0000000000000000 ffff88007be19000 ffff88007a479b80 ffffffffa003a2a1
[ 42.972674] ffff88007abcf000 ffff88007be19000 0000000000000000 00000000ffffffff
[ 42.974337] Call Trace:
[ 42.974868] [<ffffffff814cd54d>] scsi_remove_host+0x1d/0x120
[ 42.976106] [<ffffffffa003a2a1>] mptscsih_remove+0x31/0xc0 [mptscsih]
[ 42.977686] [<ffffffffa003423a>] mptspi_probe+0xfa/0x400 [mptspi]
[ 42.978964] [<ffffffff813a08f5>] local_pci_probe+0x45/0xa0
[ 42.980094] [<ffffffff813a1b95>] ? pci_match_device+0xc5/0xd0
[ 42.981276] [<ffffffff813a1cb9>] pci_device_probe+0xd9/0x130
[ 42.982410] [<ffffffff8148cfd5>] driver_probe_device+0x125/0x3b0
[ 42.983599] [<ffffffff8148d333>] __driver_attach+0x93/0xa0
[ 42.984770] [<ffffffff8148d2a0>] ? __device_attach+0x40/0x40
[ 42.985913] [<ffffffff8148af23>] bus_for_each_dev+0x63/0xa0
[ 42.987066] [<ffffffff8148c98e>] driver_attach+0x1e/0x20
[ 42.988167] [<ffffffff8148c570>] bus_add_driver+0x180/0x250
[ 42.989318] [<ffffffffa0045000>] ? 0xffffffffa0044fff
[ 42.990384] [<ffffffff8148d9b4>] driver_register+0x64/0xf0
[ 42.991888] [<ffffffffa0045000>] ? 0xffffffffa0044fff
[ 42.992944] [<ffffffff813a028c>] __pci_register_driver+0x4c/0x50
[ 42.994162] [<ffffffffa00450d7>] mptspi_init+0xd7/0x1000 [mptspi]
[ 42.995435] [<ffffffff81002142>] do_one_initcall+0xd2/0x180
[ 42.996594] [<ffffffff8105b743>] ? set_memory_nx+0x43/0x50
[ 42.997718] [<ffffffff810e7046>] load_module+0x1c86/0x26c0
[ 42.998834] [<ffffffff810e29e0>] ? store_uevent+0x40/0x40
[ 43.000119] [<ffffffff810e35ba>] ? copy_module_from_fd.isra.46+0x12a/0x190
[ 43.001528] [<ffffffff810e7bf6>] SyS_finit_module+0x86/0xb0
[ 43.002680] [<ffffffff8170aabf>] tracesys+0xe1/0xe6
[ 43.003695] Code: 65 6e 9b ff e9 40 ff ff ff b8 01 00 00 00 e9 8c fe ff ff 66 0f 1f 44 00 00 66 66 66 66 90 55 48 89 e5 53 48 89 fb e8 9e df ff ff <f0> ff 0b 79 08 48 89 df e8 31 fe ff ff 65 48 8b 04 25 00 c8 00
[ 43.010886] RIP [<ffffffff816fff62>] mutex_lock+0x12/0x2f
[ 43.012085] RSP <ffff88007a479b20>
[ 43.012823] CR2: 0000000000000060
[ 43.013556] ---[ end trace aeaeb4f8a60584a1 ]---
---------- dmesg end ----------

It is systemd-udevd process who is sending SIGKILL to worker
systemd-udevd process who is waiting for finit_module() to complete.
We can see that systemd-udevd has hard-coded 30 seconds timeout.

---------- systemd-204/src/udev/udevd.c start ----------
(...snipped...)
                        /* check for hanging events */
                        udev_list_node_foreach(loop, &worker_list) {
                                struct worker *worker = node_to_worker(loop);

                                if (worker->state != WORKER_RUNNING)
                                        continue;

                                if ((now(CLOCK_MONOTONIC) - worker->event_start_usec) > 30 * 1000 * 1000) {
                                        log_error("worker [%u] %s timeout; kill it\n", worker->pid,
                                            worker->event ? worker->event->devpath : "<idle>");
                                        kill(worker->pid, SIGKILL);
                                        worker->state = WORKER_KILLED;
                                        /* drop reference taken for state 'running' */
                                        worker_unref(worker);
                                        if (worker->event) {
                                                log_error("seq %llu '%s' killed\n",
                                                          udev_device_get_seqnum(worker->event->dev), worker->event->devpath);
                                                worker->event->exitcode = -64;
                                                event_queue_delete(worker->event, true);
                                                worker->event = NULL;
                                        }
                                }
                        }
(...snipped...)
---------- systemd-204/src/udev/udevd.c end ----------

Therefore, we cannot override this timeout without modifying udevd.c .
It is unfortunate but we have to let kthread_create() ignore SIGKILL
(unless killed by the OOM killer).