Skip to content

Commit 0784181

Browse files
dwmw2fbq
authored andcommitted
lockdep: Add lockdep_cleanup_dead_cpu()
Add a function to check that an offline CPU has left the tracing infrastructure in a sane state. Commit 9bb69ba ("ACPI: processor_idle: use raw_safe_halt() in acpi_idle_play_dead()") fixed an issue where the acpi_idle_play_dead() function called safe_halt() instead of raw_safe_halt(), which had the side-effect of setting the hardirqs_enabled flag for the offline CPU. On x86 this triggered warnings from lockdep_assert_irqs_disabled() when the CPU was brought back online again later. These warnings were too early for the exception to be handled correctly, leading to a triple-fault. Add lockdep_cleanup_dead_cpu() to check for this kind of failure mode, print the events leading up to it, and correct it so that the CPU can come online again correctly. Re-introducing the original bug now merely results in this warning instead: [ 61.556652] smpboot: CPU 1 is now offline [ 61.556769] CPU 1 left hardirqs enabled! [ 61.556915] irq event stamp: 128149 [ 61.556965] hardirqs last enabled at (128149): [<ffffffff81720a36>] acpi_idle_play_dead+0x46/0x70 [ 61.557055] hardirqs last disabled at (128148): [<ffffffff81124d50>] do_idle+0x90/0xe0 [ 61.557117] softirqs last enabled at (128078): [<ffffffff81cec74c>] __do_softirq+0x31c/0x423 [ 61.557199] softirqs last disabled at (128065): [<ffffffff810baae1>] __irq_exit_rcu+0x91/0x100 [boqun: Capitalize the title and reword the message a bit] Signed-off-by: David Woodhouse <[email protected]> Reviewed-by: Thomas Gleixner <[email protected]> Signed-off-by: Boqun Feng <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 87347f1 commit 0784181

File tree

3 files changed

+31
-0
lines changed

3 files changed

+31
-0
lines changed

include/linux/irqflags.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,25 @@
1818
#include <asm/irqflags.h>
1919
#include <asm/percpu.h>
2020

21+
struct task_struct;
22+
2123
/* Currently lockdep_softirqs_on/off is used only by lockdep */
2224
#ifdef CONFIG_PROVE_LOCKING
2325
extern void lockdep_softirqs_on(unsigned long ip);
2426
extern void lockdep_softirqs_off(unsigned long ip);
2527
extern void lockdep_hardirqs_on_prepare(void);
2628
extern void lockdep_hardirqs_on(unsigned long ip);
2729
extern void lockdep_hardirqs_off(unsigned long ip);
30+
extern void lockdep_cleanup_dead_cpu(unsigned int cpu,
31+
struct task_struct *idle);
2832
#else
2933
static inline void lockdep_softirqs_on(unsigned long ip) { }
3034
static inline void lockdep_softirqs_off(unsigned long ip) { }
3135
static inline void lockdep_hardirqs_on_prepare(void) { }
3236
static inline void lockdep_hardirqs_on(unsigned long ip) { }
3337
static inline void lockdep_hardirqs_off(unsigned long ip) { }
38+
static inline void lockdep_cleanup_dead_cpu(unsigned int cpu,
39+
struct task_struct *idle) {}
3440
#endif
3541

3642
#ifdef CONFIG_TRACE_IRQFLAGS

kernel/cpu.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1338,6 +1338,7 @@ static int takedown_cpu(unsigned int cpu)
13381338

13391339
cpuhp_bp_sync_dead(cpu);
13401340

1341+
lockdep_cleanup_dead_cpu(cpu, idle_thread_get(cpu));
13411342
tick_cleanup_dead_cpu(cpu);
13421343

13431344
/*

kernel/locking/lockdep.c

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4586,6 +4586,30 @@ void lockdep_softirqs_off(unsigned long ip)
45864586
debug_atomic_inc(redundant_softirqs_off);
45874587
}
45884588

4589+
/**
4590+
* lockdep_cleanup_dead_cpu - Ensure CPU lockdep state is cleanly stopped
4591+
*
4592+
* @cpu: index of offlined CPU
4593+
* @idle: task pointer for offlined CPU's idle thread
4594+
*
4595+
* Invoked after the CPU is dead. Ensures that the tracing infrastructure
4596+
* is left in a suitable state for the CPU to be subsequently brought
4597+
* online again.
4598+
*/
4599+
void lockdep_cleanup_dead_cpu(unsigned int cpu, struct task_struct *idle)
4600+
{
4601+
if (unlikely(!debug_locks))
4602+
return;
4603+
4604+
if (unlikely(per_cpu(hardirqs_enabled, cpu))) {
4605+
pr_warn("CPU %u left hardirqs enabled!", cpu);
4606+
if (idle)
4607+
print_irqtrace_events(idle);
4608+
/* Clean it up for when the CPU comes online again. */
4609+
per_cpu(hardirqs_enabled, cpu) = 0;
4610+
}
4611+
}
4612+
45894613
static int
45904614
mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
45914615
{

0 commit comments

Comments
 (0)