Skip to content

Commit a5497ba

Browse files
committed
entry: Provide generic interrupt entry/exit code
Like the syscall entry/exit code interrupt/exception entry after the real low level ASM bits should not be different accross architectures. Provide a generic version based on the x86 code. irqentry_enter() is called after the low level entry code and irqentry_exit() must be invoked right before returning to the low level code which just contains the actual return logic. The code before irqentry_enter() and irqentry_exit() must not be instrumented. Code after irqentry_enter() and before irqentry_exit() can be instrumented. irqentry_enter() invokes irqentry_enter_from_user_mode() if the interrupt/exception came from user mode. If if entered from kernel mode it handles the kernel mode variant of establishing state for lockdep, RCU and tracing depending on the kernel context it interrupted (idle, non-idle). Signed-off-by: Thomas Gleixner <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent a9f3a74 commit a5497ba

File tree

2 files changed

+179
-0
lines changed

2 files changed

+179
-0
lines changed

include/linux/entry-common.h

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,4 +307,66 @@ void irqentry_enter_from_user_mode(struct pt_regs *regs);
307307
*/
308308
void irqentry_exit_to_user_mode(struct pt_regs *regs);
309309

310+
#ifndef irqentry_state
311+
typedef struct irqentry_state {
312+
bool exit_rcu;
313+
} irqentry_state_t;
314+
#endif
315+
316+
/**
317+
* irqentry_enter - Handle state tracking on ordinary interrupt entries
318+
* @regs: Pointer to pt_regs of interrupted context
319+
*
320+
* Invokes:
321+
* - lockdep irqflag state tracking as low level ASM entry disabled
322+
* interrupts.
323+
*
324+
* - Context tracking if the exception hit user mode.
325+
*
326+
* - The hardirq tracer to keep the state consistent as low level ASM
327+
* entry disabled interrupts.
328+
*
329+
* As a precondition, this requires that the entry came from user mode,
330+
* idle, or a kernel context in which RCU is watching.
331+
*
332+
* For kernel mode entries RCU handling is done conditional. If RCU is
333+
* watching then the only RCU requirement is to check whether the tick has
334+
* to be restarted. If RCU is not watching then rcu_irq_enter() has to be
335+
* invoked on entry and rcu_irq_exit() on exit.
336+
*
337+
* Avoiding the rcu_irq_enter/exit() calls is an optimization but also
338+
* solves the problem of kernel mode pagefaults which can schedule, which
339+
* is not possible after invoking rcu_irq_enter() without undoing it.
340+
*
341+
* For user mode entries irqentry_enter_from_user_mode() is invoked to
342+
* establish the proper context for NOHZ_FULL. Otherwise scheduling on exit
343+
* would not be possible.
344+
*
345+
* Returns: An opaque object that must be passed to idtentry_exit()
346+
*/
347+
irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs);
348+
349+
/**
350+
* irqentry_exit_cond_resched - Conditionally reschedule on return from interrupt
351+
*
352+
* Conditional reschedule with additional sanity checks.
353+
*/
354+
void irqentry_exit_cond_resched(void);
355+
356+
/**
357+
* irqentry_exit - Handle return from exception that used irqentry_enter()
358+
* @regs: Pointer to pt_regs (exception entry regs)
359+
* @state: Return value from matching call to irqentry_enter()
360+
*
361+
* Depending on the return target (kernel/user) this runs the necessary
362+
* preemption and work checks if possible and reguired and returns to
363+
* the caller with interrupts disabled and no further work pending.
364+
*
365+
* This is the last action before returning to the low level ASM code which
366+
* just needs to return to the appropriate context.
367+
*
368+
* Counterpart to irqentry_enter().
369+
*/
370+
void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state);
371+
310372
#endif

kernel/entry/common.c

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,3 +255,120 @@ noinstr void irqentry_exit_to_user_mode(struct pt_regs *regs)
255255
instrumentation_end();
256256
exit_to_user_mode();
257257
}
258+
259+
irqentry_state_t noinstr irqentry_enter(struct pt_regs *regs)
260+
{
261+
irqentry_state_t ret = {
262+
.exit_rcu = false,
263+
};
264+
265+
if (user_mode(regs)) {
266+
irqentry_enter_from_user_mode(regs);
267+
return ret;
268+
}
269+
270+
/*
271+
* If this entry hit the idle task invoke rcu_irq_enter() whether
272+
* RCU is watching or not.
273+
*
274+
* Interupts can nest when the first interrupt invokes softirq
275+
* processing on return which enables interrupts.
276+
*
277+
* Scheduler ticks in the idle task can mark quiescent state and
278+
* terminate a grace period, if and only if the timer interrupt is
279+
* not nested into another interrupt.
280+
*
281+
* Checking for __rcu_is_watching() here would prevent the nesting
282+
* interrupt to invoke rcu_irq_enter(). If that nested interrupt is
283+
* the tick then rcu_flavor_sched_clock_irq() would wrongfully
284+
* assume that it is the first interupt and eventually claim
285+
* quiescient state and end grace periods prematurely.
286+
*
287+
* Unconditionally invoke rcu_irq_enter() so RCU state stays
288+
* consistent.
289+
*
290+
* TINY_RCU does not support EQS, so let the compiler eliminate
291+
* this part when enabled.
292+
*/
293+
if (!IS_ENABLED(CONFIG_TINY_RCU) && is_idle_task(current)) {
294+
/*
295+
* If RCU is not watching then the same careful
296+
* sequence vs. lockdep and tracing is required
297+
* as in irq_enter_from_user_mode().
298+
*/
299+
lockdep_hardirqs_off(CALLER_ADDR0);
300+
rcu_irq_enter();
301+
instrumentation_begin();
302+
trace_hardirqs_off_finish();
303+
instrumentation_end();
304+
305+
ret.exit_rcu = true;
306+
return ret;
307+
}
308+
309+
/*
310+
* If RCU is watching then RCU only wants to check whether it needs
311+
* to restart the tick in NOHZ mode. rcu_irq_enter_check_tick()
312+
* already contains a warning when RCU is not watching, so no point
313+
* in having another one here.
314+
*/
315+
instrumentation_begin();
316+
rcu_irq_enter_check_tick();
317+
/* Use the combo lockdep/tracing function */
318+
trace_hardirqs_off();
319+
instrumentation_end();
320+
321+
return ret;
322+
}
323+
324+
void irqentry_exit_cond_resched(void)
325+
{
326+
if (!preempt_count()) {
327+
/* Sanity check RCU and thread stack */
328+
rcu_irq_exit_check_preempt();
329+
if (IS_ENABLED(CONFIG_DEBUG_ENTRY))
330+
WARN_ON_ONCE(!on_thread_stack());
331+
if (need_resched())
332+
preempt_schedule_irq();
333+
}
334+
}
335+
336+
void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state)
337+
{
338+
lockdep_assert_irqs_disabled();
339+
340+
/* Check whether this returns to user mode */
341+
if (user_mode(regs)) {
342+
irqentry_exit_to_user_mode(regs);
343+
} else if (!regs_irqs_disabled(regs)) {
344+
/*
345+
* If RCU was not watching on entry this needs to be done
346+
* carefully and needs the same ordering of lockdep/tracing
347+
* and RCU as the return to user mode path.
348+
*/
349+
if (state.exit_rcu) {
350+
instrumentation_begin();
351+
/* Tell the tracer that IRET will enable interrupts */
352+
trace_hardirqs_on_prepare();
353+
lockdep_hardirqs_on_prepare(CALLER_ADDR0);
354+
instrumentation_end();
355+
rcu_irq_exit();
356+
lockdep_hardirqs_on(CALLER_ADDR0);
357+
return;
358+
}
359+
360+
instrumentation_begin();
361+
if (IS_ENABLED(CONFIG_PREEMPTION))
362+
irqentry_exit_cond_resched();
363+
/* Covers both tracing and lockdep */
364+
trace_hardirqs_on();
365+
instrumentation_end();
366+
} else {
367+
/*
368+
* IRQ flags state is correct already. Just tell RCU if it
369+
* was not watching on entry.
370+
*/
371+
if (state.exit_rcu)
372+
rcu_irq_exit();
373+
}
374+
}

0 commit comments

Comments
 (0)