Skip to content

Commit 4b44a21

Browse files
Peter ZijlstraIngo Molnar
authored andcommitted
irq_work, smp: Allow irq_work on call_single_queue
Currently irq_work_queue_on() will issue an unconditional arch_send_call_function_single_ipi() and has the handler do irq_work_run(). This is unfortunate in that it makes the IPI handler look at a second cacheline and it misses the opportunity to avoid the IPI. Instead note that struct irq_work and struct __call_single_data are very similar in layout, so use a few bits in the flags word to encode a type and stick the irq_work on the call_single_queue list. Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent b2a02fc commit 4b44a21

File tree

4 files changed

+130
-72
lines changed

4 files changed

+130
-72
lines changed

include/linux/irq_work.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
* busy NULL, 2 -> {free, claimed} : callback in progress, can be claimed
1414
*/
1515

16+
/* flags share CSD_FLAG_ space */
17+
1618
#define IRQ_WORK_PENDING BIT(0)
1719
#define IRQ_WORK_BUSY BIT(1)
1820

@@ -23,9 +25,12 @@
2325

2426
#define IRQ_WORK_CLAIMED (IRQ_WORK_PENDING | IRQ_WORK_BUSY)
2527

28+
/*
29+
* structure shares layout with single_call_data_t.
30+
*/
2631
struct irq_work {
27-
atomic_t flags;
2832
struct llist_node llnode;
33+
atomic_t flags;
2934
void (*func)(struct irq_work *);
3035
};
3136

include/linux/smp.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,38 @@
1616

1717
typedef void (*smp_call_func_t)(void *info);
1818
typedef bool (*smp_cond_func_t)(int cpu, void *info);
19+
20+
enum {
21+
CSD_FLAG_LOCK = 0x01,
22+
23+
/* IRQ_WORK_flags */
24+
25+
CSD_TYPE_ASYNC = 0x00,
26+
CSD_TYPE_SYNC = 0x10,
27+
CSD_TYPE_IRQ_WORK = 0x20,
28+
CSD_FLAG_TYPE_MASK = 0xF0,
29+
};
30+
31+
/*
32+
* structure shares (partial) layout with struct irq_work
33+
*/
1934
struct __call_single_data {
2035
struct llist_node llist;
36+
unsigned int flags;
2137
smp_call_func_t func;
2238
void *info;
23-
unsigned int flags;
2439
};
2540

2641
/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
2742
typedef struct __call_single_data call_single_data_t
2843
__aligned(sizeof(struct __call_single_data));
2944

45+
/*
46+
* Enqueue a llist_node on the call_single_queue; be very careful, read
47+
* flush_smp_call_function_queue() in detail.
48+
*/
49+
extern void __smp_call_single_queue(int cpu, struct llist_node *node);
50+
3051
/* total number of cpus in this system (may exceed NR_CPUS) */
3152
extern unsigned int total_cpus;
3253

kernel/irq_work.c

Lines changed: 29 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ static bool irq_work_claim(struct irq_work *work)
3131
{
3232
int oflags;
3333

34-
oflags = atomic_fetch_or(IRQ_WORK_CLAIMED, &work->flags);
34+
oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->flags);
3535
/*
3636
* If the work is already pending, no need to raise the IPI.
3737
* The pairing atomic_fetch_andnot() in irq_work_run() makes sure
@@ -102,8 +102,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
102102
if (cpu != smp_processor_id()) {
103103
/* Arch remote IPI send/receive backend aren't NMI safe */
104104
WARN_ON_ONCE(in_nmi());
105-
if (llist_add(&work->llnode, &per_cpu(raised_list, cpu)))
106-
arch_send_call_function_single_ipi(cpu);
105+
__smp_call_single_queue(cpu, &work->llnode);
107106
} else {
108107
__irq_work_queue_local(work);
109108
}
@@ -131,6 +130,31 @@ bool irq_work_needs_cpu(void)
131130
return true;
132131
}
133132

133+
void irq_work_single(void *arg)
134+
{
135+
struct irq_work *work = arg;
136+
int flags;
137+
138+
/*
139+
* Clear the PENDING bit, after this point the @work
140+
* can be re-used.
141+
* Make it immediately visible so that other CPUs trying
142+
* to claim that work don't rely on us to handle their data
143+
* while we are in the middle of the func.
144+
*/
145+
flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
146+
147+
lockdep_irq_work_enter(work);
148+
work->func(work);
149+
lockdep_irq_work_exit(work);
150+
/*
151+
* Clear the BUSY bit and return to the free state if
152+
* no-one else claimed it meanwhile.
153+
*/
154+
flags &= ~IRQ_WORK_PENDING;
155+
(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
156+
}
157+
134158
static void irq_work_run_list(struct llist_head *list)
135159
{
136160
struct irq_work *work, *tmp;
@@ -142,27 +166,8 @@ static void irq_work_run_list(struct llist_head *list)
142166
return;
143167

144168
llnode = llist_del_all(list);
145-
llist_for_each_entry_safe(work, tmp, llnode, llnode) {
146-
int flags;
147-
/*
148-
* Clear the PENDING bit, after this point the @work
149-
* can be re-used.
150-
* Make it immediately visible so that other CPUs trying
151-
* to claim that work don't rely on us to handle their data
152-
* while we are in the middle of the func.
153-
*/
154-
flags = atomic_fetch_andnot(IRQ_WORK_PENDING, &work->flags);
155-
156-
lockdep_irq_work_enter(work);
157-
work->func(work);
158-
lockdep_irq_work_exit(work);
159-
/*
160-
* Clear the BUSY bit and return to the free state if
161-
* no-one else claimed it meanwhile.
162-
*/
163-
flags &= ~IRQ_WORK_PENDING;
164-
(void)atomic_cmpxchg(&work->flags, flags, flags & ~IRQ_WORK_BUSY);
165-
}
169+
llist_for_each_entry_safe(work, tmp, llnode, llnode)
170+
irq_work_single(work);
166171
}
167172

168173
/*

kernel/smp.c

Lines changed: 73 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,8 @@
2323

2424
#include "smpboot.h"
2525

26-
enum {
27-
CSD_FLAG_LOCK = 0x01,
28-
CSD_FLAG_SYNCHRONOUS = 0x02,
29-
};
26+
27+
#define CSD_TYPE(_csd) ((_csd)->flags & CSD_FLAG_TYPE_MASK)
3028

3129
struct call_function_data {
3230
call_single_data_t __percpu *csd;
@@ -137,15 +135,33 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(call_single_data_t, csd_data);
137135

138136
extern void send_call_function_single_ipi(int cpu);
139137

138+
void __smp_call_single_queue(int cpu, struct llist_node *node)
139+
{
140+
/*
141+
* The list addition should be visible before sending the IPI
142+
* handler locks the list to pull the entry off it because of
143+
* normal cache coherency rules implied by spinlocks.
144+
*
145+
* If IPIs can go out of order to the cache coherency protocol
146+
* in an architecture, sufficient synchronisation should be added
147+
* to arch code to make it appear to obey cache coherency WRT
148+
* locking and barrier primitives. Generic code isn't really
149+
* equipped to do the right thing...
150+
*/
151+
if (llist_add(node, &per_cpu(call_single_queue, cpu)))
152+
send_call_function_single_ipi(cpu);
153+
}
154+
140155
/*
141156
* Insert a previously allocated call_single_data_t element
142157
* for execution on the given CPU. data must already have
143158
* ->func, ->info, and ->flags set.
144159
*/
145-
static int generic_exec_single(int cpu, call_single_data_t *csd,
146-
smp_call_func_t func, void *info)
160+
static int generic_exec_single(int cpu, call_single_data_t *csd)
147161
{
148162
if (cpu == smp_processor_id()) {
163+
smp_call_func_t func = csd->func;
164+
void *info = csd->info;
149165
unsigned long flags;
150166

151167
/*
@@ -159,28 +175,12 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
159175
return 0;
160176
}
161177

162-
163178
if ((unsigned)cpu >= nr_cpu_ids || !cpu_online(cpu)) {
164179
csd_unlock(csd);
165180
return -ENXIO;
166181
}
167182

168-
csd->func = func;
169-
csd->info = info;
170-
171-
/*
172-
* The list addition should be visible before sending the IPI
173-
* handler locks the list to pull the entry off it because of
174-
* normal cache coherency rules implied by spinlocks.
175-
*
176-
* If IPIs can go out of order to the cache coherency protocol
177-
* in an architecture, sufficient synchronisation should be added
178-
* to arch code to make it appear to obey cache coherency WRT
179-
* locking and barrier primitives. Generic code isn't really
180-
* equipped to do the right thing...
181-
*/
182-
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
183-
send_call_function_single_ipi(cpu);
183+
__smp_call_single_queue(cpu, &csd->llist);
184184

185185
return 0;
186186
}
@@ -194,16 +194,10 @@ static int generic_exec_single(int cpu, call_single_data_t *csd,
194194
void generic_smp_call_function_single_interrupt(void)
195195
{
196196
flush_smp_call_function_queue(true);
197-
198-
/*
199-
* Handle irq works queued remotely by irq_work_queue_on().
200-
* Smp functions above are typically synchronous so they
201-
* better run first since some other CPUs may be busy waiting
202-
* for them.
203-
*/
204-
irq_work_run();
205197
}
206198

199+
extern void irq_work_single(void *);
200+
207201
/**
208202
* flush_smp_call_function_queue - Flush pending smp-call-function callbacks
209203
*
@@ -241,26 +235,39 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
241235
* We don't have to use the _safe() variant here
242236
* because we are not invoking the IPI handlers yet.
243237
*/
244-
llist_for_each_entry(csd, entry, llist)
245-
pr_warn("IPI callback %pS sent to offline CPU\n",
246-
csd->func);
238+
llist_for_each_entry(csd, entry, llist) {
239+
switch (CSD_TYPE(csd)) {
240+
case CSD_TYPE_ASYNC:
241+
case CSD_TYPE_SYNC:
242+
case CSD_TYPE_IRQ_WORK:
243+
pr_warn("IPI callback %pS sent to offline CPU\n",
244+
csd->func);
245+
break;
246+
247+
default:
248+
pr_warn("IPI callback, unknown type %d, sent to offline CPU\n",
249+
CSD_TYPE(csd));
250+
break;
251+
}
252+
}
247253
}
248254

249255
/*
250256
* First; run all SYNC callbacks, people are waiting for us.
251257
*/
252258
prev = NULL;
253259
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
254-
smp_call_func_t func = csd->func;
255-
void *info = csd->info;
256-
257260
/* Do we wait until *after* callback? */
258-
if (csd->flags & CSD_FLAG_SYNCHRONOUS) {
261+
if (CSD_TYPE(csd) == CSD_TYPE_SYNC) {
262+
smp_call_func_t func = csd->func;
263+
void *info = csd->info;
264+
259265
if (prev) {
260266
prev->next = &csd_next->llist;
261267
} else {
262268
entry = &csd_next->llist;
263269
}
270+
264271
func(info);
265272
csd_unlock(csd);
266273
} else {
@@ -272,11 +279,17 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
272279
* Second; run all !SYNC callbacks.
273280
*/
274281
llist_for_each_entry_safe(csd, csd_next, entry, llist) {
275-
smp_call_func_t func = csd->func;
276-
void *info = csd->info;
282+
int type = CSD_TYPE(csd);
277283

278-
csd_unlock(csd);
279-
func(info);
284+
if (type == CSD_TYPE_ASYNC) {
285+
smp_call_func_t func = csd->func;
286+
void *info = csd->info;
287+
288+
csd_unlock(csd);
289+
func(info);
290+
} else if (type == CSD_TYPE_IRQ_WORK) {
291+
irq_work_single(csd);
292+
}
280293
}
281294
}
282295

@@ -305,7 +318,7 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
305318
{
306319
call_single_data_t *csd;
307320
call_single_data_t csd_stack = {
308-
.flags = CSD_FLAG_LOCK | CSD_FLAG_SYNCHRONOUS,
321+
.flags = CSD_FLAG_LOCK | CSD_TYPE_SYNC,
309322
};
310323
int this_cpu;
311324
int err;
@@ -339,7 +352,10 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
339352
csd_lock(csd);
340353
}
341354

342-
err = generic_exec_single(cpu, csd, func, info);
355+
csd->func = func;
356+
csd->info = info;
357+
358+
err = generic_exec_single(cpu, csd);
343359

344360
if (wait)
345361
csd_lock_wait(csd);
@@ -385,7 +401,7 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
385401
csd->flags = CSD_FLAG_LOCK;
386402
smp_wmb();
387403

388-
err = generic_exec_single(cpu, csd, csd->func, csd->info);
404+
err = generic_exec_single(cpu, csd);
389405

390406
out:
391407
preempt_enable();
@@ -500,7 +516,7 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
500516

501517
csd_lock(csd);
502518
if (wait)
503-
csd->flags |= CSD_FLAG_SYNCHRONOUS;
519+
csd->flags |= CSD_TYPE_SYNC;
504520
csd->func = func;
505521
csd->info = info;
506522
if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
@@ -632,6 +648,17 @@ void __init smp_init(void)
632648
{
633649
int num_nodes, num_cpus;
634650

651+
/*
652+
* Ensure struct irq_work layout matches so that
653+
* flush_smp_call_function_queue() can do horrible things.
654+
*/
655+
BUILD_BUG_ON(offsetof(struct irq_work, llnode) !=
656+
offsetof(struct __call_single_data, llist));
657+
BUILD_BUG_ON(offsetof(struct irq_work, func) !=
658+
offsetof(struct __call_single_data, func));
659+
BUILD_BUG_ON(offsetof(struct irq_work, flags) !=
660+
offsetof(struct __call_single_data, flags));
661+
635662
idle_threads_init();
636663
cpuhp_threads_init();
637664

0 commit comments

Comments
 (0)