Skip to content

Commit 057b63c

Browse files
Tao ChenKernel Patches Daemon
authored andcommitted
bpf: Use per-cpu BPF callchain entry to save callchain
As Alexei noted, get_perf_callchain() return values may be reused if a task is preempted after the BPF program enters migrate disable mode. Drawing on the per-cpu design of bpf_bprintf_buffers, per-cpu BPF callchain entry is used here. Signed-off-by: Tao Chen <[email protected]>
1 parent acda230 commit 057b63c

File tree

1 file changed

+74
-24
lines changed

1 file changed

+74
-24
lines changed

kernel/bpf/stackmap.c

Lines changed: 74 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,52 @@ struct bpf_stack_map {
3131
struct stack_map_bucket *buckets[] __counted_by(n_buckets);
3232
};
3333

34+
struct bpf_perf_callchain_entry {
35+
u64 nr;
36+
u64 ip[PERF_MAX_STACK_DEPTH];
37+
};
38+
39+
#define MAX_PERF_CALLCHAIN_PREEMPT 3
40+
static DEFINE_PER_CPU(struct bpf_perf_callchain_entry[MAX_PERF_CALLCHAIN_PREEMPT],
41+
bpf_perf_callchain_entries);
42+
static DEFINE_PER_CPU(int, bpf_perf_callchain_preempt_cnt);
43+
44+
static int bpf_get_perf_callchain_or_entry(struct perf_callchain_entry **entry,
45+
struct pt_regs *regs, bool kernel,
46+
bool user, u32 max_stack, bool crosstack,
47+
bool add_mark, bool get_callchain)
48+
{
49+
struct bpf_perf_callchain_entry *bpf_entry;
50+
struct perf_callchain_entry *perf_entry;
51+
int preempt_cnt;
52+
53+
preempt_cnt = this_cpu_inc_return(bpf_perf_callchain_preempt_cnt);
54+
if (WARN_ON_ONCE(preempt_cnt > MAX_PERF_CALLCHAIN_PREEMPT)) {
55+
this_cpu_dec(bpf_perf_callchain_preempt_cnt);
56+
return -EBUSY;
57+
}
58+
59+
bpf_entry = this_cpu_ptr(&bpf_perf_callchain_entries[preempt_cnt - 1]);
60+
if (!get_callchain) {
61+
*entry = (struct perf_callchain_entry *)bpf_entry;
62+
return 0;
63+
}
64+
65+
perf_entry = get_perf_callchain(regs, (struct perf_callchain_entry *)bpf_entry,
66+
kernel, user, max_stack,
67+
crosstack, add_mark);
68+
*entry = perf_entry;
69+
70+
return 0;
71+
}
72+
73+
static void bpf_put_perf_callchain(void)
74+
{
75+
if (WARN_ON_ONCE(this_cpu_read(bpf_perf_callchain_preempt_cnt) == 0))
76+
return;
77+
this_cpu_dec(bpf_perf_callchain_preempt_cnt);
78+
}
79+
3480
static inline bool stack_map_use_build_id(struct bpf_map *map)
3581
{
3682
return (map->map_flags & BPF_F_STACK_BUILD_ID);
@@ -192,11 +238,11 @@ get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
192238
{
193239
#ifdef CONFIG_STACKTRACE
194240
struct perf_callchain_entry *entry;
195-
int rctx;
196-
197-
entry = get_callchain_entry(&rctx);
241+
int ret;
198242

199-
if (!entry)
243+
ret = bpf_get_perf_callchain_or_entry(&entry, NULL, false, false, 0, false, false,
244+
false);
245+
if (ret)
200246
return NULL;
201247

202248
entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip,
@@ -216,7 +262,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
216262
to[i] = (u64)(from[i]);
217263
}
218264

219-
put_callchain_entry(rctx);
265+
bpf_put_perf_callchain();
220266

221267
return entry;
222268
#else /* CONFIG_STACKTRACE */
@@ -305,6 +351,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
305351
bool user = flags & BPF_F_USER_STACK;
306352
struct perf_callchain_entry *trace;
307353
bool kernel = !user;
354+
int err;
308355

309356
if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
310357
BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
@@ -314,14 +361,15 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
314361
if (max_depth > sysctl_perf_event_max_stack)
315362
max_depth = sysctl_perf_event_max_stack;
316363

317-
trace = get_perf_callchain(regs, NULL, kernel, user, max_depth,
318-
false, false);
364+
err = bpf_get_perf_callchain_or_entry(&trace, regs, kernel, user, max_depth,
365+
false, false, true);
366+
if (err)
367+
return err;
319368

320-
if (unlikely(!trace))
321-
/* couldn't fetch the stack trace */
322-
return -EFAULT;
369+
err = __bpf_get_stackid(map, trace, flags);
370+
bpf_put_perf_callchain();
323371

324-
return __bpf_get_stackid(map, trace, flags);
372+
return err;
325373
}
326374

327375
const struct bpf_func_proto bpf_get_stackid_proto = {
@@ -443,20 +491,23 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
443491
if (sysctl_perf_event_max_stack < max_depth)
444492
max_depth = sysctl_perf_event_max_stack;
445493

446-
if (may_fault)
447-
rcu_read_lock(); /* need RCU for perf's callchain below */
448-
449494
if (trace_in)
450495
trace = trace_in;
451-
else if (kernel && task)
496+
else if (kernel && task) {
452497
trace = get_callchain_entry_for_task(task, max_depth);
453-
else
454-
trace = get_perf_callchain(regs, NULL, kernel, user, max_depth,
455-
crosstask, false);
498+
} else {
499+
err = bpf_get_perf_callchain_or_entry(&trace, regs, kernel, user, max_depth,
500+
false, false, true);
501+
if (err)
502+
return err;
503+
}
504+
505+
if (unlikely(!trace))
506+
goto err_fault;
456507

457-
if (unlikely(!trace) || trace->nr < skip) {
458-
if (may_fault)
459-
rcu_read_unlock();
508+
if (trace->nr < skip) {
509+
if (!trace_in)
510+
bpf_put_perf_callchain();
460511
goto err_fault;
461512
}
462513

@@ -475,9 +526,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
475526
memcpy(buf, ips, copy_len);
476527
}
477528

478-
/* trace/ips should not be dereferenced after this point */
479-
if (may_fault)
480-
rcu_read_unlock();
529+
if (!trace_in)
530+
bpf_put_perf_callchain();
481531

482532
if (user_build_id)
483533
stack_map_get_build_id_offset(buf, trace_nr, user, may_fault);

0 commit comments

Comments
 (0)