Skip to content

Commit 571c1ea

Browse files
jognesspmladek
authored andcommitted
printk: nbcon: Allow reacquire during panic
If a console printer is interrupted during panic, it will never be able to reacquire ownership in order to perform and cleanup. That in itself is not a problem, since the non-panic CPU will simply quiesce in an endless loop within nbcon_reacquire_nobuf(). However, in this state, platforms that do not support a true NMI to interrupt the quiesced CPU will not be able to shutdown that CPU from within panic(). This then causes problems for such as being unable to load and run a kdump kernel. Fix this by allowing non-panic CPUs to reacquire ownership using a direct acquire. Then the non-panic CPUs can successfullyl exit the nbcon_reacquire_nobuf() loop and the console driver can perform any necessary cleanup. But more importantly, the CPU is no longer quiesced and is free to process any interrupts necessary for panic() to shutdown the CPU. All other forms of acquire are still not allowed for non-panic CPUs since it is safer to have them avoid gaining console ownership that is not strictly necessary. Reported-by: Michael Kelley <[email protected]> Closes: https://lore.kernel.org/r/SN6PR02MB4157A4C5E8CB219A75263A17D46DA@SN6PR02MB4157.namprd02.prod.outlook.com Signed-off-by: John Ogness <[email protected]> Reviewed-by: Petr Mladek <[email protected]> Tested-by: Michael Kelley <[email protected]> Link: https://patch.msgid.link/[email protected] Signed-off-by: Petr Mladek <[email protected]>
1 parent af54a3a commit 571c1ea

File tree

1 file changed

+41
-22
lines changed

1 file changed

+41
-22
lines changed

kernel/printk/nbcon.c

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,9 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
214214

215215
/**
216216
* nbcon_context_try_acquire_direct - Try to acquire directly
217-
* @ctxt: The context of the caller
218-
* @cur: The current console state
217+
* @ctxt: The context of the caller
218+
* @cur: The current console state
219+
* @is_reacquire: This acquire is a reacquire
219220
*
220221
* Acquire the console when it is released. Also acquire the console when
221222
* the current owner has a lower priority and the console is in a safe state.
@@ -225,32 +226,38 @@ static void nbcon_seq_try_update(struct nbcon_context *ctxt, u64 new_seq)
225226
*
226227
* Errors:
227228
*
228-
* -EPERM: A panic is in progress and this is not the panic CPU.
229-
* Or the current owner or waiter has the same or higher
230-
* priority. No acquire method can be successful in
231-
* this case.
229+
* -EPERM: A panic is in progress and this is neither the panic
230+
* CPU nor is this a reacquire. Or the current owner or
231+
* waiter has the same or higher priority. No acquire
232+
* method can be successful in these cases.
232233
*
233234
* -EBUSY: The current owner has a lower priority but the console
234235
* in an unsafe state. The caller should try using
235236
* the handover acquire method.
236237
*/
237238
static int nbcon_context_try_acquire_direct(struct nbcon_context *ctxt,
238-
struct nbcon_state *cur)
239+
struct nbcon_state *cur, bool is_reacquire)
239240
{
240241
unsigned int cpu = smp_processor_id();
241242
struct console *con = ctxt->console;
242243
struct nbcon_state new;
243244

244245
do {
245246
/*
246-
* Panic does not imply that the console is owned. However, it
247-
* is critical that non-panic CPUs during panic are unable to
248-
* acquire ownership in order to satisfy the assumptions of
249-
* nbcon_waiter_matches(). In particular, the assumption that
250-
* lower priorities are ignored during panic.
247+
* Panic does not imply that the console is owned. However,
248+
* since all non-panic CPUs are stopped during panic(), it
249+
* is safer to have them avoid gaining console ownership.
250+
*
251+
* If this acquire is a reacquire (and an unsafe takeover
252+
* has not previously occurred) then it is allowed to attempt
253+
* a direct acquire in panic. This gives console drivers an
254+
* opportunity to perform any necessary cleanup if they were
255+
* interrupted by the panic CPU while printing.
251256
*/
252-
if (other_cpu_in_panic())
257+
if (other_cpu_in_panic() &&
258+
(!is_reacquire || cur->unsafe_takeover)) {
253259
return -EPERM;
260+
}
254261

255262
if (ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio)
256263
return -EPERM;
@@ -301,8 +308,9 @@ static bool nbcon_waiter_matches(struct nbcon_state *cur, int expected_prio)
301308
* Event #1 implies this context is EMERGENCY.
302309
* Event #2 implies the new context is PANIC.
303310
* Event #3 occurs when panic() has flushed the console.
304-
* Events #4 and #5 are not possible due to the other_cpu_in_panic()
305-
* check in nbcon_context_try_acquire_direct().
311+
* Event #4 occurs when a non-panic CPU reacquires.
312+
* Event #5 is not possible due to the other_cpu_in_panic() check
313+
* in nbcon_context_try_acquire_handover().
306314
*/
307315

308316
return (cur->req_prio == expected_prio);
@@ -431,6 +439,16 @@ static int nbcon_context_try_acquire_handover(struct nbcon_context *ctxt,
431439
WARN_ON_ONCE(ctxt->prio <= cur->prio || ctxt->prio <= cur->req_prio);
432440
WARN_ON_ONCE(!cur->unsafe);
433441

442+
/*
443+
* Panic does not imply that the console is owned. However, it
444+
* is critical that non-panic CPUs during panic are unable to
445+
* wait for a handover in order to satisfy the assumptions of
446+
* nbcon_waiter_matches(). In particular, the assumption that
447+
* lower priorities are ignored during panic.
448+
*/
449+
if (other_cpu_in_panic())
450+
return -EPERM;
451+
434452
/* Handover is not possible on the same CPU. */
435453
if (cur->cpu == cpu)
436454
return -EBUSY;
@@ -558,7 +576,8 @@ static struct printk_buffers panic_nbcon_pbufs;
558576

559577
/**
560578
* nbcon_context_try_acquire - Try to acquire nbcon console
561-
* @ctxt: The context of the caller
579+
* @ctxt: The context of the caller
580+
* @is_reacquire: This acquire is a reacquire
562581
*
563582
* Context: Under @ctxt->con->device_lock() or local_irq_save().
564583
* Return: True if the console was acquired. False otherwise.
@@ -568,7 +587,7 @@ static struct printk_buffers panic_nbcon_pbufs;
568587
* in an unsafe state. Otherwise, on success the caller may assume
569588
* the console is not in an unsafe state.
570589
*/
571-
static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
590+
static bool nbcon_context_try_acquire(struct nbcon_context *ctxt, bool is_reacquire)
572591
{
573592
unsigned int cpu = smp_processor_id();
574593
struct console *con = ctxt->console;
@@ -577,7 +596,7 @@ static bool nbcon_context_try_acquire(struct nbcon_context *ctxt)
577596

578597
nbcon_state_read(con, &cur);
579598
try_again:
580-
err = nbcon_context_try_acquire_direct(ctxt, &cur);
599+
err = nbcon_context_try_acquire_direct(ctxt, &cur, is_reacquire);
581600
if (err != -EBUSY)
582601
goto out;
583602

@@ -913,7 +932,7 @@ void nbcon_reacquire_nobuf(struct nbcon_write_context *wctxt)
913932
{
914933
struct nbcon_context *ctxt = &ACCESS_PRIVATE(wctxt, ctxt);
915934

916-
while (!nbcon_context_try_acquire(ctxt))
935+
while (!nbcon_context_try_acquire(ctxt, true))
917936
cpu_relax();
918937

919938
nbcon_write_context_set_buf(wctxt, NULL, 0);
@@ -1101,7 +1120,7 @@ static bool nbcon_emit_one(struct nbcon_write_context *wctxt, bool use_atomic)
11011120
cant_migrate();
11021121
}
11031122

1104-
if (!nbcon_context_try_acquire(ctxt))
1123+
if (!nbcon_context_try_acquire(ctxt, false))
11051124
goto out;
11061125

11071126
/*
@@ -1486,7 +1505,7 @@ static int __nbcon_atomic_flush_pending_con(struct console *con, u64 stop_seq,
14861505
ctxt->prio = nbcon_get_default_prio();
14871506
ctxt->allow_unsafe_takeover = allow_unsafe_takeover;
14881507

1489-
if (!nbcon_context_try_acquire(ctxt))
1508+
if (!nbcon_context_try_acquire(ctxt, false))
14901509
return -EPERM;
14911510

14921511
while (nbcon_seq_read(con) < stop_seq) {
@@ -1762,7 +1781,7 @@ bool nbcon_device_try_acquire(struct console *con)
17621781
ctxt->console = con;
17631782
ctxt->prio = NBCON_PRIO_NORMAL;
17641783

1765-
if (!nbcon_context_try_acquire(ctxt))
1784+
if (!nbcon_context_try_acquire(ctxt, false))
17661785
return false;
17671786

17681787
if (!nbcon_context_enter_unsafe(ctxt))

0 commit comments

Comments
 (0)