Skip to content

Commit a7ef071

Browse files
KAGA-KOKOopsiff
authored andcommitted
x86/irq: Plug vector setup race
[ Upstream commit ce0b5eedcb753697d43f61dd2e27d68eb5d3150f ] Hogan reported a vector setup race, which overwrites the interrupt descriptor in the per CPU vector array resulting in a disfunctional device. CPU0 CPU1 interrupt is raised in APIC IRR but not handled free_irq() per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN; request_irq() common_interrupt() d = this_cpu_read(vector_irq[vector]); per_cpu(vector_irq, CPU1)[vector] = desc; if (d == VECTOR_SHUTDOWN) this_cpu_write(vector_irq[vector], VECTOR_UNUSED); free_irq() cannot observe the pending vector in the CPU1 APIC as there is no way to query the remote CPUs APIC IRR. This requires that request_irq() uses the same vector/CPU as the one which was freed, but this also can be triggered by a spurious interrupt. Interestingly enough this problem managed to be hidden for more than a decade. Prevent this by reevaluating vector_irq under the vector lock, which is held by the interrupt activation code when vector_irq is updated. To avoid ifdeffery or IS_ENABLED() nonsense, move the [un]lock_vector_lock() declarations out under the CONFIG_IRQ_DOMAIN_HIERARCHY guard as it's only provided when CONFIG_X86_LOCAL_APIC=y. The current CONFIG_IRQ_DOMAIN_HIERARCHY guard is selected by CONFIG_X86_LOCAL_APIC, but can also be selected by other parts of the Kconfig system, which makes 32-bit UP builds with CONFIG_X86_LOCAL_APIC=n fail. Can we just get rid of this !APIC nonsense once and forever? Fixes: 9345005 ("x86/irq: Fix do_IRQ() interrupt warning for cpu hotplug retriggered irqs") Reported-by: Hogan Wang <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-by: Hogan Wang <[email protected]> Link: https://lore.kernel.org/all/draft-87ikjhrhhh.ffs@tglx Signed-off-by: Sasha Levin <[email protected]> (cherry picked from commit 788c5e28cf48a24d1a704955264daf6f5fa81cfa)
1 parent f1069ce commit a7ef071

File tree

2 files changed

+55
-20
lines changed

2 files changed

+55
-20
lines changed

arch/x86/include/asm/hw_irq.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,21 +92,23 @@ struct irq_cfg {
9292

9393
extern struct irq_cfg *irq_cfg(unsigned int irq);
9494
extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
95-
extern void lock_vector_lock(void);
96-
extern void unlock_vector_lock(void);
9795
#ifdef CONFIG_SMP
9896
extern void vector_schedule_cleanup(struct irq_cfg *);
9997
extern void irq_complete_move(struct irq_cfg *cfg);
10098
#else
10199
static inline void vector_schedule_cleanup(struct irq_cfg *c) { }
102100
static inline void irq_complete_move(struct irq_cfg *c) { }
103101
#endif
104-
105102
extern void apic_ack_edge(struct irq_data *data);
106-
#else /* CONFIG_IRQ_DOMAIN_HIERARCHY */
103+
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
104+
105+
#ifdef CONFIG_X86_LOCAL_APIC
106+
extern void lock_vector_lock(void);
107+
extern void unlock_vector_lock(void);
108+
#else
107109
static inline void lock_vector_lock(void) {}
108110
static inline void unlock_vector_lock(void) {}
109-
#endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */
111+
#endif
110112

111113
/* Statistics */
112114
extern atomic_t irq_err_count;

arch/x86/kernel/irq.c

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -251,26 +251,59 @@ static __always_inline void handle_irq(struct irq_desc *desc,
251251
__handle_irq(desc, regs);
252252
}
253253

254-
static __always_inline int call_irq_handler(int vector, struct pt_regs *regs)
254+
static struct irq_desc *reevaluate_vector(int vector)
255255
{
256-
struct irq_desc *desc;
257-
int ret = 0;
256+
struct irq_desc *desc = __this_cpu_read(vector_irq[vector]);
257+
258+
if (!IS_ERR_OR_NULL(desc))
259+
return desc;
260+
261+
if (desc == VECTOR_UNUSED)
262+
pr_emerg_ratelimited("No irq handler for %d.%u\n", smp_processor_id(), vector);
263+
else
264+
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
265+
return NULL;
266+
}
267+
268+
static __always_inline bool call_irq_handler(int vector, struct pt_regs *regs)
269+
{
270+
struct irq_desc *desc = __this_cpu_read(vector_irq[vector]);
258271

259-
desc = __this_cpu_read(vector_irq[vector]);
260272
if (likely(!IS_ERR_OR_NULL(desc))) {
261273
handle_irq(desc, regs);
262-
} else {
263-
ret = -EINVAL;
264-
if (desc == VECTOR_UNUSED) {
265-
pr_emerg_ratelimited("%s: %d.%u No irq handler for vector\n",
266-
__func__, smp_processor_id(),
267-
vector);
268-
} else {
269-
__this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
270-
}
274+
return true;
271275
}
272276

273-
return ret;
277+
/*
278+
* Reevaluate with vector_lock held to prevent a race against
279+
* request_irq() setting up the vector:
280+
*
281+
* CPU0 CPU1
282+
* interrupt is raised in APIC IRR
283+
* but not handled
284+
* free_irq()
285+
* per_cpu(vector_irq, CPU1)[vector] = VECTOR_SHUTDOWN;
286+
*
287+
* request_irq() common_interrupt()
288+
* d = this_cpu_read(vector_irq[vector]);
289+
*
290+
* per_cpu(vector_irq, CPU1)[vector] = desc;
291+
*
292+
* if (d == VECTOR_SHUTDOWN)
293+
* this_cpu_write(vector_irq[vector], VECTOR_UNUSED);
294+
*
295+
* This requires that the same vector on the same target CPU is
296+
* handed out or that a spurious interrupt hits that CPU/vector.
297+
*/
298+
lock_vector_lock();
299+
desc = reevaluate_vector(vector);
300+
unlock_vector_lock();
301+
302+
if (!desc)
303+
return false;
304+
305+
handle_irq(desc, regs);
306+
return true;
274307
}
275308

276309
/*
@@ -284,7 +317,7 @@ DEFINE_IDTENTRY_IRQ(common_interrupt)
284317
/* entry code tells RCU that we're not quiescent. Check it. */
285318
RCU_LOCKDEP_WARN(!rcu_is_watching(), "IRQ failed to wake up RCU");
286319

287-
if (unlikely(call_irq_handler(vector, regs)))
320+
if (unlikely(!call_irq_handler(vector, regs)))
288321
apic_eoi();
289322

290323
set_irq_regs(old_regs);

0 commit comments

Comments
 (0)