Skip to content

Commit 8a8109f

Browse files
Muchun Songpmladek
authored andcommitted
printk: fix deadlock when kernel panic
printk_safe_flush_on_panic() caused the following deadlock on our server: CPU0: CPU1: panic rcu_dump_cpu_stacks kdump_nmi_shootdown_cpus nmi_trigger_cpumask_backtrace register_nmi_handler(crash_nmi_callback) printk_safe_flush __printk_safe_flush raw_spin_lock_irqsave(&read_lock) // send NMI to other processors apic_send_IPI_allbutself(NMI_VECTOR) // NMI interrupt, dead loop crash_nmi_callback printk_safe_flush_on_panic printk_safe_flush __printk_safe_flush // deadlock raw_spin_lock_irqsave(&read_lock) DEADLOCK: read_lock is taken on CPU1 and will never get released. It happens when panic() stops a CPU by NMI while it has been in the middle of printk_safe_flush(). Handle the lock the same way as logbuf_lock. The printk_safe buffers are flushed only when both locks can be safely taken. It can avoid the deadlock _in this particular case_ at expense of losing contents of printk_safe buffers. Note: It would actually be safe to re-init the locks when all CPUs were stopped by NMI. But it would require passing this information from arch-specific code. It is not worth the complexity. Especially because logbuf_lock and printk_safe buffers have been obsoleted by the lockless ring buffer. Fixes: cf9b110 ("printk/nmi: flush NMI messages on the system panic") Signed-off-by: Muchun Song <[email protected]> Reviewed-by: Petr Mladek <[email protected]> Cc: <[email protected]> Acked-by: Sergey Senozhatsky <[email protected]> Signed-off-by: Petr Mladek <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 61bb17d commit 8a8109f

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

kernel/printk/printk_safe.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ struct printk_safe_seq_buf {
4545
static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq);
4646
static DEFINE_PER_CPU(int, printk_context);
4747

48+
static DEFINE_RAW_SPINLOCK(safe_read_lock);
49+
4850
#ifdef CONFIG_PRINTK_NMI
4951
static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq);
5052
#endif
@@ -180,8 +182,6 @@ static void report_message_lost(struct printk_safe_seq_buf *s)
180182
*/
181183
static void __printk_safe_flush(struct irq_work *work)
182184
{
183-
static raw_spinlock_t read_lock =
184-
__RAW_SPIN_LOCK_INITIALIZER(read_lock);
185185
struct printk_safe_seq_buf *s =
186186
container_of(work, struct printk_safe_seq_buf, work);
187187
unsigned long flags;
@@ -195,7 +195,7 @@ static void __printk_safe_flush(struct irq_work *work)
195195
* different CPUs. This is especially important when printing
196196
* a backtrace.
197197
*/
198-
raw_spin_lock_irqsave(&read_lock, flags);
198+
raw_spin_lock_irqsave(&safe_read_lock, flags);
199199

200200
i = 0;
201201
more:
@@ -232,7 +232,7 @@ static void __printk_safe_flush(struct irq_work *work)
232232

233233
out:
234234
report_message_lost(s);
235-
raw_spin_unlock_irqrestore(&read_lock, flags);
235+
raw_spin_unlock_irqrestore(&safe_read_lock, flags);
236236
}
237237

238238
/**
@@ -278,6 +278,14 @@ void printk_safe_flush_on_panic(void)
278278
raw_spin_lock_init(&logbuf_lock);
279279
}
280280

281+
if (raw_spin_is_locked(&safe_read_lock)) {
282+
if (num_online_cpus() > 1)
283+
return;
284+
285+
debug_locks_off();
286+
raw_spin_lock_init(&safe_read_lock);
287+
}
288+
281289
printk_safe_flush();
282290
}
283291

0 commit comments

Comments
 (0)