Pass external callchain entry to get_perf_callchain #10036

kernel-patches-daemon-bpf · 2025-10-19T17:08:58Z

Pull request for series with
subject: Pass external callchain entry to get_perf_callchain
version: 3
url: https://patchwork.kernel.org/project/netdevbpf/list/?series=1013305

From bpf stack map, we want to use our own buffers to avoid unnecessary copy, so let us pass it directly. BPF will use this in the next patch. Signed-off-by: Tao Chen <[email protected]>

As Alexei noted, get_perf_callchain() return values may be reused if a task is preempted after the BPF program enters migrate disable mode. Drawing on the per-cpu design of bpf_bprintf_buffers, per-cpu BPF callchain entry is used here. Signed-off-by: Tao Chen <[email protected]>

kernel-patches-daemon-bpf · 2025-10-19T17:08:59Z

Upstream branch: 7361c86
series: https://patchwork.kernel.org/project/netdevbpf/list/?series=1013305
version: 3

kernel-patches-review-bot · 2025-10-19T17:19:44Z

commit d8e76e700a50de3165213495d434390d670007bf
Author: Tao Chen <[email protected]>

bpf: Use per-cpu BPF callchain entry to save callchain

This patch introduces per-CPU BPF callchain entries to prevent reuse
issues when a task is preempted after entering migrate disable mode.
It replaces get_callchain_entry()/put_callchain_entry() with a new
counter-based resource management system.

> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 94e46b7f3..3513077c5 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -31,6 +31,52 @@ struct bpf_stack_map {
>  	struct stack_map_bucket *buckets[] __counted_by(n_buckets);
>  };
>
> +struct bpf_perf_callchain_entry {
> +	u64 nr;
> +	u64 ip[PERF_MAX_STACK_DEPTH];
> +};
> +
> +#define MAX_PERF_CALLCHAIN_PREEMPT 3
> +static DEFINE_PER_CPU(struct bpf_perf_callchain_entry[MAX_PERF_CALLCHAIN_PREEMPT],
> +		      bpf_perf_callchain_entries);
> +static DEFINE_PER_CPU(int, bpf_perf_callchain_preempt_cnt);
> +
> +static int bpf_get_perf_callchain_or_entry(struct perf_callchain_entry **entry,
> +					   struct pt_regs *regs, bool kernel,
> +					   bool user, u32 max_stack, bool crosstack,
> +					   bool add_mark, bool get_callchain)
> +{
> +	struct bpf_perf_callchain_entry *bpf_entry;
> +	struct perf_callchain_entry *perf_entry;
> +	int preempt_cnt;
> +
> +	preempt_cnt = this_cpu_inc_return(bpf_perf_callchain_preempt_cnt);
> +	if (WARN_ON_ONCE(preempt_cnt > MAX_PERF_CALLCHAIN_PREEMPT)) {
> +		this_cpu_dec(bpf_perf_callchain_preempt_cnt);
> +		return -EBUSY;
> +	}
> +
> +	bpf_entry = this_cpu_ptr(&bpf_perf_callchain_entries[preempt_cnt - 1]);
> +	if (!get_callchain) {
> +		*entry = (struct perf_callchain_entry *)bpf_entry;
> +		return 0;
> +	}
> +
> +	perf_entry = get_perf_callchain(regs, (struct perf_callchain_entry *)bpf_entry,
> +					kernel, user, max_stack,
> +					crosstack, add_mark);
> +	*entry = perf_entry;
> +
> +	return 0;
> +}
> +
> +static void bpf_put_perf_callchain(void)
> +{
> +	if (WARN_ON_ONCE(this_cpu_read(bpf_perf_callchain_preempt_cnt) == 0))
> +		return;
> +	this_cpu_dec(bpf_perf_callchain_preempt_cnt);
> +}
> +
>  static inline bool stack_map_use_build_id(struct bpf_map *map)
>  {
>  	return (map->map_flags & BPF_F_STACK_BUILD_ID);
> @@ -192,11 +238,11 @@ get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
>  {
>  #ifdef CONFIG_STACKTRACE
>  	struct perf_callchain_entry *entry;
> -	int rctx;
> -
> -	entry = get_callchain_entry(&rctx);
> +	int ret;
>
> -	if (!entry)
> +	ret = bpf_get_perf_callchain_or_entry(&entry, NULL, false, false, 0, false, false,
> +					      false);
> +	if (ret)
>  		return NULL;
>
>  	entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip,
> @@ -216,7 +262,7 @@ get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
>  			to[i] = (u64)(from[i]);
>  	}
>
> -	put_callchain_entry(rctx);
> +	bpf_put_perf_callchain();
>
>  	return entry;
                   ^^^^

Does get_callchain_entry_for_task() need to call bpf_put_perf_callchain()
before returning the entry? The resource tracking counter gets decremented
here, but then the caller in __bpf_get_stack() calls bpf_put_perf_callchain()
again at the end of the function.

This creates a double-put scenario:
  bpf_get_task_stack()->__bpf_get_task_stack()->__bpf_get_stack():
    - calls get_callchain_entry_for_task() which does get+put
    - then does its own put at line 529 or 510
    - triggers WARN_ON_ONCE in bpf_put_perf_callchain()

The other path through __bpf_get_stack() that calls
bpf_get_perf_callchain_or_entry() directly expects the caller to do the
put, but get_callchain_entry_for_task() is doing put internally.

>  #else /* CONFIG_STACKTRACE */
>  	return NULL;
>  #endif

[ ... ]

> @@ -443,20 +491,23 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>  	if (sysctl_perf_event_max_stack < max_depth)
>  		max_depth = sysctl_perf_event_max_stack;
>
> -	if (may_fault)
> -		rcu_read_lock(); /* need RCU for perf's callchain below */
> -
>  	if (trace_in)
>  		trace = trace_in;
> -	else if (kernel && task)
> +	else if (kernel && task) {
>  		trace = get_callchain_entry_for_task(task, max_depth);
> -	else
> -		trace = get_perf_callchain(regs, NULL, kernel, user, max_depth,
> -					   crosstask, false);
> +	} else {
> +		err = bpf_get_perf_callchain_or_entry(&trace, regs, kernel, user, max_depth,
> +						      false, false, true);
> +		if (err)
> +			return err;
> +	}
> +
> +	if (unlikely(!trace))
> +		goto err_fault;
>
> -	if (unlikely(!trace) || trace->nr < skip) {
> -		if (may_fault)
> -			rcu_read_unlock();
> +	if (trace->nr < skip) {
> +		if (!trace_in)
> +			bpf_put_perf_callchain();
                        ^^^^

When the "kernel && task" path is taken above, get_callchain_entry_for_task()
already called bpf_put_perf_callchain() before returning. This second
bpf_put_perf_callchain() call will trigger the WARN_ON_ONCE since the
counter is already zero.

>  		goto err_fault;
>  	}
>
> @@ -475,9 +526,8 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>  		memcpy(buf, ips, copy_len);
>  	}
>
> -	/* trace/ips should not be dereferenced after this point */
> -	if (may_fault)
> -		rcu_read_unlock();
> +	if (!trace_in)
> +		bpf_put_perf_callchain();
                ^^^^

Same double-put issue here on the success path when "kernel && task" was
used.

Should get_callchain_entry_for_task() avoid calling bpf_put_perf_callchain()
and leave it to the caller, similar to how the other path through
bpf_get_perf_callchain_or_entry() works?

AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md

In-Reply-To-Subject: bpf: Use per-cpu BPF callchain entry to save callchain
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/18633525045

kernel-patches-daemon-bpf · 2025-10-19T17:21:04Z

Forwarding comment 3419821144 via email
In-Reply-To: [email protected]
Patch: https://patchwork.kernel.org/project/netdevbpf/patch/[email protected]/

Tao Chen added 2 commits October 19, 2025 10:07

perf: Use extern perf_callchain_entry for get_perf_callchain

24175b6

From bpf stack map, we want to use our own buffers to avoid unnecessary copy, so let us pass it directly. BPF will use this in the next patch. Signed-off-by: Tao Chen <[email protected]>

kernel-patches-daemon-bpf bot added new bpf-next V3 labels Oct 19, 2025

kernel-patches-review-bot bot added the ai-review label Oct 19, 2025

kernel-patches-daemon-bpf bot added V3-ci-fail and removed ai-review labels Oct 19, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Pass external callchain entry to get_perf_callchain #10036

Pass external callchain entry to get_perf_callchain #10036

kernel-patches-daemon-bpf bot commented Oct 19, 2025

Uh oh!

kernel-patches-daemon-bpf bot commented Oct 19, 2025

Uh oh!

kernel-patches-review-bot bot commented Oct 19, 2025

Uh oh!

kernel-patches-daemon-bpf bot commented Oct 19, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

0 participants

Pass external callchain entry to get_perf_callchain #10036

Are you sure you want to change the base?

Pass external callchain entry to get_perf_callchain #10036

Conversation

kernel-patches-daemon-bpf bot commented Oct 19, 2025

Uh oh!

kernel-patches-daemon-bpf bot commented Oct 19, 2025

Uh oh!

kernel-patches-review-bot bot commented Oct 19, 2025

Uh oh!

kernel-patches-daemon-bpf bot commented Oct 19, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

0 participants