commit 56912e432e0684f6fce3562fe6d677ff3d21f597 Author: Paul E. McKenney Date: Fri Aug 11 16:22:33 2017 -0700 rcu: Migrate callbacks earlier in the CPU-offline timeline RCU callbacks must be migrated away from an outgoing CPU, and this is done near the end of the CPU-hotplug operation, after the outgoing CPU is long gone. Unfortunately, this means that other CPU-hotplug callbacks can execute while the outgoing CPU's callbacks are still immobilized on the long-gone CPU's callback lists. If any of these CPU-hotplug callbacks must wait, either directly or indirectly, for the invocation of any of the immobilized RCU callbacks, the system will hang. This commit avoids such hangs by migrating the callbacks away from the outgoing CPU immediately upon its departure, shortly after the return from __cpu_die() in takedown_cpu(). Thus, RCU is able to advance these callbacks and invoke them, which allows all the after-the-fact CPU-hotplug callbacks to wait on these RCU callbacks without risk of a hang. While in the neighborhood, this commit also moves rcu_send_cbs_to_orphanage() and rcu_adopt_orphan_cbs() under a pre-existing #ifdef to avoid including dead code on the one hand and to avoid define-without-use warnings on the other hand. Reported-by: Jeffrey Hugo Link: http://lkml.kernel.org/r/db9c91f6-1b17-6136-84f0-03c3c2581ab4@codeaurora.org Signed-off-by: Paul E. McKenney Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Cc: Ingo Molnar Cc: Anna-Maria Gleixner Cc: Boris Ostrovsky Cc: Richard Weinberger [ paulmck: Backported to linux 4.11 based source ] diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index de88b33c0974..183d69438776 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -295,6 +295,7 @@ void rcu_bh_qs(void); void rcu_check_callbacks(int user); void rcu_report_dead(unsigned int cpu); void rcu_cpu_starting(unsigned int cpu); +void rcutree_migrate_callbacks(int cpu); #ifndef CONFIG_TINY_RCU void rcu_end_inkernel_boot(void); diff --git a/kernel/cpu.c b/kernel/cpu.c index 37b223e4fc05..21be6ab54ea2 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -729,6 +729,7 @@ static int takedown_cpu(unsigned int cpu) __cpu_die(cpu); tick_cleanup_dead_cpu(cpu); + rcutree_migrate_callbacks(cpu); return 0; } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 50fee7689e71..2e1caf87b6af 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2640,7 +2640,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) * specified CPU must be offline, and the caller must hold the * ->orphan_lock. */ -static void +static void __maybe_unused rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp) { @@ -2698,7 +2698,8 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, * Adopt the RCU callbacks from the specified rcu_state structure's * orphanage. The caller must hold the ->orphan_lock. */ -static void rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) +static void __maybe_unused +rcu_adopt_orphan_cbs(struct rcu_state *rsp, unsigned long flags) { int i; struct rcu_data *rdp = raw_cpu_ptr(rsp->rda); @@ -2805,14 +2806,12 @@ static void rcu_cleanup_dead_rnp(struct rcu_node *rnp_leaf) /* * The CPU has been completely removed, and some other CPU is reporting - * this fact from process context. Do the remainder of the cleanup, - * including orphaning the outgoing CPU's RCU callbacks, and also - * adopting them. There can only be one CPU hotplug operation at a time, - * so no other CPU can be attempting to update rcu_cpu_kthread_task. + * this fact from process context. Do the remainder of the cleanup. + * There can only be one CPU hotplug operation at a time, so no other CPU + * can be attempting to update rcu_cpu_kthread_task. */ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) { - unsigned long flags; struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ @@ -2821,16 +2820,6 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp) /* Adjust any no-longer-needed kthreads. */ rcu_boost_kthread_setaffinity(rnp, -1); - - /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ - raw_spin_lock_irqsave(&rsp->orphan_lock, flags); - rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); - rcu_adopt_orphan_cbs(rsp, flags); - raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); - - WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, - "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", - cpu, rdp->qlen, rdp->nxtlist); } /* @@ -4011,6 +4000,36 @@ void rcu_report_dead(unsigned int cpu) for_each_rcu_flavor(rsp) rcu_cleanup_dying_idle_cpu(cpu, rsp); } + +/* Orphan the dead CPU's callbacks, and then adopt them. */ +static void rcu_migrate_callbacks(int cpu, struct rcu_state *rsp) +{ + unsigned long flags; + struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); + struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rdp & rnp. */ + + raw_spin_lock_irqsave(&rsp->orphan_lock, flags); + rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); + rcu_adopt_orphan_cbs(rsp, flags); + raw_spin_unlock_irqrestore(&rsp->orphan_lock, flags); + + WARN_ONCE(rdp->qlen != 0 || rdp->nxtlist != NULL, + "rcu_cleanup_dead_cpu: Callbacks on offline CPU %d: qlen=%lu, nxtlist=%p\n", + cpu, rdp->qlen, rdp->nxtlist); +} + +/* + * The outgoing CPU has just passed through the dying-idle state, + * and we are being invoked from the CPU that was IPIed to continue the + * offline operation. We need to migrate the outgoing CPU's callbacks. + */ +void rcutree_migrate_callbacks(int cpu) +{ + struct rcu_state *rsp; + + for_each_rcu_flavor(rsp) + rcu_migrate_callbacks(cpu, rsp); +} #endif static int rcu_pm_notify(struct notifier_block *self,