Skip to content

Commit dc82dad

Browse files
image-dragonKernel Patches Daemon
authored andcommitted
sched: make migrate_enable/migrate_disable inline
For now, migrate_enable and migrate_disable are global, which makes them become hotspots in some case. Take BPF for example, the function calling to migrate_enable and migrate_disable in BPF trampoline can introduce significant overhead, and following is the 'perf top' of FENTRY's benchmark (./tools/testing/selftests/bpf/bench trig-fentry): 54.63% bpf_prog_2dcccf652aac1793_bench_trigger_fentry [k] bpf_prog_2dcccf652aac1793_bench_trigger_fentry 10.43% [kernel] [k] migrate_enable 10.07% bpf_trampoline_6442517037 [k] bpf_trampoline_6442517037 8.06% [kernel] [k] __bpf_prog_exit_recur 4.11% libc.so.6 [.] syscall 2.15% [kernel] [k] entry_SYSCALL_64 1.48% [kernel] [k] memchr_inv 1.32% [kernel] [k] fput 1.16% [kernel] [k] _copy_to_user 0.73% [kernel] [k] bpf_prog_test_run_raw_tp So in this commit, we make migrate_enable/migrate_disable inline to obtain better performance. The struct rq is defined internally in kernel/sched/sched.h, and the field "nr_pinned" is accessed in migrate_enable/migrate_disable, which makes it hard to make them inline. Alexei Starovoitov suggests to generate the offset of "nr_pinned" in [1], so we can define the migrate_enable/migrate_disable in include/linux/sched.h and access "this_rq()->nr_pinned" with "(void *)this_rq() + RQ_nr_pinned". The offset of "nr_pinned" is generated in include/generated/rq-offsets.h by kernel/sched/rq-offsets.c. Generally speaking, we move the definition of migrate_enable and migrate_disable to include/linux/sched.h from kernel/sched/core.c. The calling to __set_cpus_allowed_ptr() is leaved in __migrate_enable(). The "struct rq" is not available in include/linux/sched.h, so we can't access the "runqueues" with this_cpu_ptr(), as the compilation will fail in this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr(): typeof((ptr) + 0) So we introduce the this_rq_raw() and access the runqueues with arch_raw_cpu_ptr() directly. Before this patch, the performance of BPF FENTRY is: fentry : 113.030 ± 0.149M/s fentry : 112.501 ± 0.187M/s fentry : 112.828 ± 0.267M/s fentry : 115.287 ± 0.241M/s After this patch, the performance of BPF FENTRY increases to: fentry : 143.644 ± 0.670M/s fentry : 149.764 ± 0.362M/s fentry : 149.642 ± 0.156M/s fentry : 145.263 ± 0.221M/s Link: https://lore.kernel.org/bpf/CAADnVQ+5sEDKHdsJY5ZsfGDO_1SEhhQWHrt2SMBG5SYyQ+jt7w@mail.gmail.com/ [1] Signed-off-by: Menglong Dong <[email protected]>
1 parent 03f336b commit dc82dad

File tree

6 files changed

+106
-58
lines changed

6 files changed

+106
-58
lines changed

Kbuild

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,24 @@ arch/$(SRCARCH)/kernel/asm-offsets.s: $(timeconst-file) $(bounds-file)
3434
$(offsets-file): arch/$(SRCARCH)/kernel/asm-offsets.s FORCE
3535
$(call filechk,offsets,__ASM_OFFSETS_H__)
3636

37+
# Generate rq-offsets.h
38+
39+
rq-offsets-file := include/generated/rq-offsets.h
40+
41+
targets += kernel/sched/rq-offsets.s
42+
43+
kernel/sched/rq-offsets.s: $(offsets-file)
44+
45+
$(rq-offsets-file): kernel/sched/rq-offsets.s FORCE
46+
$(call filechk,offsets,__RQ_OFFSETS_H__)
47+
3748
# Check for missing system calls
3849

3950
quiet_cmd_syscalls = CALL $<
4051
cmd_syscalls = $(CONFIG_SHELL) $< $(CC) $(c_flags) $(missing_syscalls_flags)
4152

4253
PHONY += missing-syscalls
43-
missing-syscalls: scripts/checksyscalls.sh $(offsets-file)
54+
missing-syscalls: scripts/checksyscalls.sh $(rq-offsets-file)
4455
$(call cmd,syscalls)
4556

4657
# Check the manual modification of atomic headers

include/linux/preempt.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -424,8 +424,6 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
424424
* work-conserving schedulers.
425425
*
426426
*/
427-
extern void migrate_disable(void);
428-
extern void migrate_enable(void);
429427

430428
/**
431429
* preempt_disable_nested - Disable preemption inside a normally preempt disabled section
@@ -471,7 +469,6 @@ static __always_inline void preempt_enable_nested(void)
471469

472470
DEFINE_LOCK_GUARD_0(preempt, preempt_disable(), preempt_enable())
473471
DEFINE_LOCK_GUARD_0(preempt_notrace, preempt_disable_notrace(), preempt_enable_notrace())
474-
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
475472

476473
#ifdef CONFIG_PREEMPT_DYNAMIC
477474

include/linux/sched.h

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@
4949
#include <linux/tracepoint-defs.h>
5050
#include <linux/unwind_deferred_types.h>
5151
#include <asm/kmap_size.h>
52+
#ifndef COMPILE_OFFSETS
53+
#include <generated/rq-offsets.h>
54+
#endif
5255

5356
/* task_struct member predeclarations (sorted alphabetically): */
5457
struct audit_context;
@@ -2307,4 +2310,78 @@ static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct allo
23072310
#define alloc_tag_restore(_tag, _old) do {} while (0)
23082311
#endif
23092312

2313+
#ifndef COMPILE_OFFSETS
2314+
2315+
extern void __migrate_enable(void);
2316+
2317+
struct rq;
2318+
DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
2319+
2320+
#ifdef CONFIG_SMP
2321+
#define this_rq_raw() arch_raw_cpu_ptr(&runqueues)
2322+
#else
2323+
#define this_rq_raw() PERCPU_PTR(&runqueues)
2324+
#endif
2325+
2326+
static inline void migrate_enable(void)
2327+
{
2328+
struct task_struct *p = current;
2329+
2330+
#ifdef CONFIG_DEBUG_PREEMPT
2331+
/*
2332+
* Check both overflow from migrate_disable() and superfluous
2333+
* migrate_enable().
2334+
*/
2335+
if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
2336+
return;
2337+
#endif
2338+
2339+
if (p->migration_disabled > 1) {
2340+
p->migration_disabled--;
2341+
return;
2342+
}
2343+
2344+
/*
2345+
* Ensure stop_task runs either before or after this, and that
2346+
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
2347+
*/
2348+
guard(preempt)();
2349+
if (unlikely(p->cpus_ptr != &p->cpus_mask))
2350+
__migrate_enable();
2351+
/*
2352+
* Mustn't clear migration_disabled() until cpus_ptr points back at the
2353+
* regular cpus_mask, otherwise things that race (eg.
2354+
* select_fallback_rq) get confused.
2355+
*/
2356+
barrier();
2357+
p->migration_disabled = 0;
2358+
(*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))--;
2359+
}
2360+
2361+
static inline void migrate_disable(void)
2362+
{
2363+
struct task_struct *p = current;
2364+
2365+
if (p->migration_disabled) {
2366+
#ifdef CONFIG_DEBUG_PREEMPT
2367+
/*
2368+
*Warn about overflow half-way through the range.
2369+
*/
2370+
WARN_ON_ONCE((s16)p->migration_disabled < 0);
2371+
#endif
2372+
p->migration_disabled++;
2373+
return;
2374+
}
2375+
2376+
guard(preempt)();
2377+
(*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned))++;
2378+
p->migration_disabled = 1;
2379+
}
2380+
#else
2381+
static inline void migrate_disable(void) { }
2382+
static inline void migrate_enable(void) { }
2383+
#endif
2384+
2385+
DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable())
2386+
23102387
#endif

kernel/bpf/verifier.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23870,8 +23870,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log,
2387023870
BTF_SET_START(btf_id_deny)
2387123871
BTF_ID_UNUSED
2387223872
#ifdef CONFIG_SMP
23873-
BTF_ID(func, migrate_disable)
23874-
BTF_ID(func, migrate_enable)
23873+
BTF_ID(func, __migrate_enable)
2387523874
#endif
2387623875
#if !defined CONFIG_PREEMPT_RCU && !defined CONFIG_TINY_RCU
2387723876
BTF_ID(func, rcu_read_unlock_strict)

kernel/sched/core.c

Lines changed: 4 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_update_nr_running_tp);
119119
EXPORT_TRACEPOINT_SYMBOL_GPL(sched_compute_energy_tp);
120120

121121
DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
122+
EXPORT_SYMBOL_GPL(runqueues);
122123

123124
#ifdef CONFIG_SCHED_PROXY_EXEC
124125
DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
@@ -2381,66 +2382,17 @@ static void migrate_disable_switch(struct rq *rq, struct task_struct *p)
23812382
__do_set_cpus_allowed(p, &ac);
23822383
}
23832384

2384-
void migrate_disable(void)
2385-
{
2386-
struct task_struct *p = current;
2387-
2388-
if (p->migration_disabled) {
2389-
#ifdef CONFIG_DEBUG_PREEMPT
2390-
/*
2391-
*Warn about overflow half-way through the range.
2392-
*/
2393-
WARN_ON_ONCE((s16)p->migration_disabled < 0);
2394-
#endif
2395-
p->migration_disabled++;
2396-
return;
2397-
}
2398-
2399-
guard(preempt)();
2400-
this_rq()->nr_pinned++;
2401-
p->migration_disabled = 1;
2402-
}
2403-
EXPORT_SYMBOL_GPL(migrate_disable);
2404-
2405-
void migrate_enable(void)
2385+
void __migrate_enable(void)
24062386
{
24072387
struct task_struct *p = current;
24082388
struct affinity_context ac = {
24092389
.new_mask = &p->cpus_mask,
24102390
.flags = SCA_MIGRATE_ENABLE,
24112391
};
24122392

2413-
#ifdef CONFIG_DEBUG_PREEMPT
2414-
/*
2415-
* Check both overflow from migrate_disable() and superfluous
2416-
* migrate_enable().
2417-
*/
2418-
if (WARN_ON_ONCE((s16)p->migration_disabled <= 0))
2419-
return;
2420-
#endif
2421-
2422-
if (p->migration_disabled > 1) {
2423-
p->migration_disabled--;
2424-
return;
2425-
}
2426-
2427-
/*
2428-
* Ensure stop_task runs either before or after this, and that
2429-
* __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule().
2430-
*/
2431-
guard(preempt)();
2432-
if (p->cpus_ptr != &p->cpus_mask)
2433-
__set_cpus_allowed_ptr(p, &ac);
2434-
/*
2435-
* Mustn't clear migration_disabled() until cpus_ptr points back at the
2436-
* regular cpus_mask, otherwise things that race (eg.
2437-
* select_fallback_rq) get confused.
2438-
*/
2439-
barrier();
2440-
p->migration_disabled = 0;
2441-
this_rq()->nr_pinned--;
2393+
__set_cpus_allowed_ptr(p, &ac);
24422394
}
2443-
EXPORT_SYMBOL_GPL(migrate_enable);
2395+
EXPORT_SYMBOL_GPL(__migrate_enable);
24442396

24452397
static inline bool rq_has_pinned_tasks(struct rq *rq)
24462398
{

kernel/sched/rq-offsets.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#define COMPILE_OFFSETS
3+
#include <linux/kbuild.h>
4+
#include <linux/types.h>
5+
#include "sched.h"
6+
7+
int main(void)
8+
{
9+
DEFINE(RQ_nr_pinned, offsetof(struct rq, nr_pinned));
10+
11+
return 0;
12+
}

0 commit comments

Comments
 (0)