Skip to content

Commit 0d00449

Browse files
Peter ZijlstraKAGA-KOKO
authored andcommitted
x86: Replace ist_enter() with nmi_enter()
A few exceptions (like #DB and #BP) can happen at any location in the code, this then means that tracers should treat events from these exceptions as NMI-like. The interrupted context could be holding locks with interrupts disabled for instance. Similarly, #MC is an actual NMI-like exception. All of them use ist_enter() which only concerns itself with RCU, but does not do any of the other setup that NMIs need. This means things like: printk() raw_spin_lock_irq(&logbuf_lock); <#DB/#BP/#MC> printk() raw_spin_lock_irq(&logbuf_lock); are entirely possible (well, not really since printk tries hard to play nice, but the concept stands). So replace ist_enter() with nmi_enter(). Also observe that any nmi_enter() caller must be both notrace and NOKPROBE, or in the noinstr text section. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Reviewed-by: Alexandre Chartre <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 5567d11 commit 0d00449

File tree

5 files changed

+24
-65
lines changed

5 files changed

+24
-65
lines changed

arch/x86/include/asm/traps.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,6 @@ void smp_spurious_interrupt(struct pt_regs *regs);
118118
void smp_error_interrupt(struct pt_regs *regs);
119119
asmlinkage void smp_irq_move_cleanup_interrupt(void);
120120

121-
extern void ist_enter(struct pt_regs *regs);
122-
extern void ist_exit(struct pt_regs *regs);
123-
124121
#ifdef CONFIG_VMAP_STACK
125122
void __noreturn handle_stack_overflow(const char *message,
126123
struct pt_regs *regs,

arch/x86/kernel/cpu/mce/core.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include <linux/jump_label.h>
4444
#include <linux/set_memory.h>
4545
#include <linux/task_work.h>
46+
#include <linux/hardirq.h>
4647

4748
#include <asm/intel-family.h>
4849
#include <asm/processor.h>
@@ -1266,7 +1267,7 @@ void noinstr do_machine_check(struct pt_regs *regs, long error_code)
12661267
if (__mc_check_crashing_cpu(cpu))
12671268
return;
12681269

1269-
ist_enter(regs);
1270+
nmi_enter();
12701271

12711272
this_cpu_inc(mce_exception_count);
12721273

@@ -1374,7 +1375,7 @@ void noinstr do_machine_check(struct pt_regs *regs, long error_code)
13741375
}
13751376

13761377
out_ist:
1377-
ist_exit(regs);
1378+
nmi_exit();
13781379
}
13791380
EXPORT_SYMBOL_GPL(do_machine_check);
13801381

arch/x86/kernel/cpu/mce/p5.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/kernel.h>
88
#include <linux/types.h>
99
#include <linux/smp.h>
10+
#include <linux/hardirq.h>
1011

1112
#include <asm/processor.h>
1213
#include <asm/traps.h>
@@ -24,7 +25,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
2425
{
2526
u32 loaddr, hi, lotype;
2627

27-
ist_enter(regs);
28+
nmi_enter();
2829

2930
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
3031
rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi);
@@ -39,7 +40,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code)
3940

4041
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
4142

42-
ist_exit(regs);
43+
nmi_exit();
4344
}
4445

4546
/* Set up machine check reporting for processors with Intel style MCE: */

arch/x86/kernel/cpu/mce/winchip.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <linux/interrupt.h>
77
#include <linux/kernel.h>
88
#include <linux/types.h>
9+
#include <linux/hardirq.h>
910

1011
#include <asm/processor.h>
1112
#include <asm/traps.h>
@@ -18,12 +19,12 @@
1819
/* Machine check handler for WinChip C6: */
1920
static void winchip_machine_check(struct pt_regs *regs, long error_code)
2021
{
21-
ist_enter(regs);
22+
nmi_enter();
2223

2324
pr_emerg("CPU0: Machine Check Exception.\n");
2425
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
2526

26-
ist_exit(regs);
27+
nmi_exit();
2728
}
2829

2930
/* Set up machine check reporting on the Winchip C6 series */

arch/x86/kernel/traps.c

Lines changed: 15 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,12 @@
3737
#include <linux/mm.h>
3838
#include <linux/smp.h>
3939
#include <linux/io.h>
40+
#include <linux/hardirq.h>
41+
#include <linux/atomic.h>
42+
4043
#include <asm/stacktrace.h>
4144
#include <asm/processor.h>
4245
#include <asm/debugreg.h>
43-
#include <linux/atomic.h>
4446
#include <asm/text-patching.h>
4547
#include <asm/ftrace.h>
4648
#include <asm/traps.h>
@@ -82,41 +84,6 @@ static inline void cond_local_irq_disable(struct pt_regs *regs)
8284
local_irq_disable();
8385
}
8486

85-
/*
86-
* In IST context, we explicitly disable preemption. This serves two
87-
* purposes: it makes it much less likely that we would accidentally
88-
* schedule in IST context and it will force a warning if we somehow
89-
* manage to schedule by accident.
90-
*/
91-
void ist_enter(struct pt_regs *regs)
92-
{
93-
if (user_mode(regs)) {
94-
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
95-
} else {
96-
/*
97-
* We might have interrupted pretty much anything. In
98-
* fact, if we're a machine check, we can even interrupt
99-
* NMI processing. We don't want in_nmi() to return true,
100-
* but we need to notify RCU.
101-
*/
102-
rcu_nmi_enter();
103-
}
104-
105-
preempt_disable();
106-
107-
/* This code is a bit fragile. Test it. */
108-
RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work");
109-
}
110-
NOKPROBE_SYMBOL(ist_enter);
111-
112-
void ist_exit(struct pt_regs *regs)
113-
{
114-
preempt_enable_no_resched();
115-
116-
if (!user_mode(regs))
117-
rcu_nmi_exit();
118-
}
119-
12087
int is_valid_bugaddr(unsigned long addr)
12188
{
12289
unsigned short ud;
@@ -326,7 +293,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
326293
* The net result is that our #GP handler will think that we
327294
* entered from usermode with the bad user context.
328295
*
329-
* No need for ist_enter here because we don't use RCU.
296+
* No need for nmi_enter() here because we don't use RCU.
330297
*/
331298
if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
332299
regs->cs == __KERNEL_CS &&
@@ -361,7 +328,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign
361328
}
362329
#endif
363330

364-
ist_enter(regs);
331+
nmi_enter();
365332
notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
366333

367334
tsk->thread.error_code = error_code;
@@ -555,19 +522,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
555522
return;
556523

557524
/*
558-
* Unlike any other non-IST entry, we can be called from a kprobe in
559-
* non-CONTEXT_KERNEL kernel mode or even during context tracking
560-
* state changes. Make sure that we wake up RCU even if we're coming
561-
* from kernel code.
562-
*
563-
* This means that we can't schedule even if we came from a
564-
* preemptible kernel context. That's okay.
525+
* Unlike any other non-IST entry, we can be called from pretty much
526+
* any location in the kernel through kprobes -- text_poke() will most
527+
* likely be handled by poke_int3_handler() above. This means this
528+
* handler is effectively NMI-like.
565529
*/
566-
if (!user_mode(regs)) {
567-
rcu_nmi_enter();
568-
preempt_disable();
569-
}
570-
RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU");
530+
if (!user_mode(regs))
531+
nmi_enter();
571532

572533
#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
573534
if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
@@ -589,10 +550,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code)
589550
cond_local_irq_disable(regs);
590551

591552
exit:
592-
if (!user_mode(regs)) {
593-
preempt_enable_no_resched();
594-
rcu_nmi_exit();
595-
}
553+
if (!user_mode(regs))
554+
nmi_exit();
596555
}
597556
NOKPROBE_SYMBOL(do_int3);
598557

@@ -696,7 +655,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
696655
unsigned long dr6;
697656
int si_code;
698657

699-
ist_enter(regs);
658+
nmi_enter();
700659

701660
get_debugreg(dr6, 6);
702661
/*
@@ -789,7 +748,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
789748
debug_stack_usage_dec();
790749

791750
exit:
792-
ist_exit(regs);
751+
nmi_exit();
793752
}
794753
NOKPROBE_SYMBOL(do_debug);
795754

0 commit comments

Comments
 (0)