Skip to content

Commit 3b5554d

Browse files
Tao ChenKernel Patches Daemon
authored andcommitted
perf: Refactor get_perf_callchain
From BPF stack map, we want to use our own buffers to avoid unnecessary copy and ensure that the buffer will not be overwritten by other preemptive tasks. Peter suggested provide more flexible stack-sampling APIs, which can be used in BPF, and we can still use the perf callchain entry with the help of these APIs. The next patch will modify the BPF part. Signed-off-by: Peter Zijlstra <[email protected]> Signed-off-by: Tao Chen <[email protected]>
1 parent 5c24747 commit 3b5554d

File tree

4 files changed

+61
-31
lines changed

4 files changed

+61
-31
lines changed

include/linux/perf_event.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ struct perf_callchain_entry_ctx {
6767
u32 nr;
6868
short contexts;
6969
bool contexts_maxed;
70+
bool add_mark;
7071
};
7172

7273
typedef unsigned long (*perf_copy_f)(void *dst, const void *src,
@@ -1718,9 +1719,17 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
17181719

17191720
extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
17201721
extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
1722+
1723+
extern void __init_perf_callchain_ctx(struct perf_callchain_entry_ctx *ctx,
1724+
struct perf_callchain_entry *entry,
1725+
u32 max_stack, bool add_mark);
1726+
1727+
extern void __get_perf_callchain_kernel(struct perf_callchain_entry_ctx *ctx, struct pt_regs *regs);
1728+
extern void __get_perf_callchain_user(struct perf_callchain_entry_ctx *ctx, struct pt_regs *regs);
1729+
17211730
extern struct perf_callchain_entry *
17221731
get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
1723-
u32 max_stack, bool crosstask, bool add_mark);
1732+
u32 max_stack, bool crosstask);
17241733
extern int get_callchain_buffers(int max_stack);
17251734
extern void put_callchain_buffers(void);
17261735
extern struct perf_callchain_entry *get_callchain_entry(int *rctx);

kernel/bpf/stackmap.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
315315
max_depth = sysctl_perf_event_max_stack;
316316

317317
trace = get_perf_callchain(regs, kernel, user, max_depth,
318-
false, false);
318+
false);
319319

320320
if (unlikely(!trace))
321321
/* couldn't fetch the stack trace */
@@ -452,7 +452,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
452452
trace = get_callchain_entry_for_task(task, max_depth);
453453
else
454454
trace = get_perf_callchain(regs, kernel, user, max_depth,
455-
crosstask, false);
455+
crosstask);
456456

457457
if (unlikely(!trace) || trace->nr < skip) {
458458
if (may_fault)

kernel/events/callchain.c

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,54 @@ static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entr
216216
#endif
217217
}
218218

219+
void __init_perf_callchain_ctx(struct perf_callchain_entry_ctx *ctx,
220+
struct perf_callchain_entry *entry,
221+
u32 max_stack, bool add_mark)
222+
223+
{
224+
ctx->entry = entry;
225+
ctx->max_stack = max_stack;
226+
ctx->nr = entry->nr = 0;
227+
ctx->contexts = 0;
228+
ctx->contexts_maxed = false;
229+
ctx->add_mark = add_mark;
230+
}
231+
232+
void __get_perf_callchain_kernel(struct perf_callchain_entry_ctx *ctx, struct pt_regs *regs)
233+
{
234+
if (user_mode(regs))
235+
return;
236+
237+
if (ctx->add_mark)
238+
perf_callchain_store_context(ctx, PERF_CONTEXT_KERNEL);
239+
perf_callchain_kernel(ctx, regs);
240+
}
241+
242+
void __get_perf_callchain_user(struct perf_callchain_entry_ctx *ctx, struct pt_regs *regs)
243+
{
244+
int start_entry_idx;
245+
246+
if (!user_mode(regs)) {
247+
if (current->flags & (PF_KTHREAD | PF_USER_WORKER))
248+
return;
249+
regs = task_pt_regs(current);
250+
}
251+
252+
if (ctx->add_mark)
253+
perf_callchain_store_context(ctx, PERF_CONTEXT_USER);
254+
255+
start_entry_idx = ctx->nr;
256+
perf_callchain_user(ctx, regs);
257+
fixup_uretprobe_trampoline_entries(ctx->entry, start_entry_idx);
258+
}
259+
219260
struct perf_callchain_entry *
220261
get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
221-
u32 max_stack, bool crosstask, bool add_mark)
262+
u32 max_stack, bool crosstask)
222263
{
223264
struct perf_callchain_entry *entry;
224265
struct perf_callchain_entry_ctx ctx;
225-
int rctx, start_entry_idx;
266+
int rctx;
226267

227268
/* crosstask is not supported for user stacks */
228269
if (crosstask && user && !kernel)
@@ -232,34 +273,14 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
232273
if (!entry)
233274
return NULL;
234275

235-
ctx.entry = entry;
236-
ctx.max_stack = max_stack;
237-
ctx.nr = entry->nr = 0;
238-
ctx.contexts = 0;
239-
ctx.contexts_maxed = false;
276+
__init_perf_callchain_ctx(&ctx, entry, max_stack, true);
240277

241-
if (kernel && !user_mode(regs)) {
242-
if (add_mark)
243-
perf_callchain_store_context(&ctx, PERF_CONTEXT_KERNEL);
244-
perf_callchain_kernel(&ctx, regs);
245-
}
246-
247-
if (user && !crosstask) {
248-
if (!user_mode(regs)) {
249-
if (current->flags & (PF_KTHREAD | PF_USER_WORKER))
250-
goto exit_put;
251-
regs = task_pt_regs(current);
252-
}
278+
if (kernel)
279+
__get_perf_callchain_kernel(&ctx, regs);
253280

254-
if (add_mark)
255-
perf_callchain_store_context(&ctx, PERF_CONTEXT_USER);
256-
257-
start_entry_idx = entry->nr;
258-
perf_callchain_user(&ctx, regs);
259-
fixup_uretprobe_trampoline_entries(entry, start_entry_idx);
260-
}
281+
if (user && !crosstask)
282+
__get_perf_callchain_user(&ctx, regs);
261283

262-
exit_put:
263284
put_callchain_entry(rctx);
264285

265286
return entry;

kernel/events/core.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8218,7 +8218,7 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
82188218
return &__empty_callchain;
82198219

82208220
callchain = get_perf_callchain(regs, kernel, user,
8221-
max_stack, crosstask, true);
8221+
max_stack, crosstask);
82228222
return callchain ?: &__empty_callchain;
82238223
}
82248224

0 commit comments

Comments
 (0)