From 6fd60f33971dca32060696860856acefe7e986ac Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Wed, 10 Sep 2025 00:32:21 +0800 Subject: [PATCH 1/2] bpf: Add lookup_and_delete_elem for BPF_MAP_STACK_TRACE The stacktrace map can be easily full, which will lead to failure in obtaining the stack. In addition to increasing the size of the map, another solution is to delete the stack_id after looking it up from the user, so extend the existing bpf_map_lookup_and_delete_elem() functionality to stacktrace map types. Signed-off-by: Tao Chen --- include/linux/bpf.h | 2 +- kernel/bpf/stackmap.c | 16 ++++++++++++++-- kernel/bpf/syscall.c | 8 +++++--- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index dfc1a27b56d55..ab6cf5e76fd27 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2712,7 +2712,7 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value, u64 flags); -int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); +int bpf_stackmap_copy_and_delete(struct bpf_map *map, void *key, void *value, bool delete); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index 3615c06b7dfa9..bb63a74db7bdd 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -646,7 +646,15 @@ static void *stack_map_lookup_elem(struct bpf_map *map, void *key) } /* Called from syscall */ -int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) +static int stack_map_lookup_and_delete_elem(struct bpf_map *map, void *key, + void *value, u64 flags) +{ + return bpf_stackmap_copy_and_delete(map, key, value, true); +} + +/* Called from syscall */ +int bpf_stackmap_copy_and_delete(struct bpf_map *map, void *key, void *value, + bool delete) { struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct stack_map_bucket *bucket, *old_bucket; @@ -663,7 +671,10 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) memcpy(value, bucket->data, trace_len); memset(value + trace_len, 0, map->value_size - trace_len); - old_bucket = xchg(&smap->buckets[id], bucket); + if (delete) + old_bucket = bucket; + else + old_bucket = xchg(&smap->buckets[id], bucket); if (old_bucket) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode); return 0; @@ -754,6 +765,7 @@ const struct bpf_map_ops stack_trace_map_ops = { .map_free = stack_map_free, .map_get_next_key = stack_map_get_next_key, .map_lookup_elem = stack_map_lookup_elem, + .map_lookup_and_delete_elem = stack_map_lookup_and_delete_elem, .map_update_elem = stack_map_update_elem, .map_delete_elem = stack_map_delete_elem, .map_check_btf = map_check_no_btf, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index cf7173b1bb83d..641a18a7bf4d8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -319,7 +319,7 @@ static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value, } else if (map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) { err = bpf_percpu_cgroup_storage_copy(map, key, value); } else if (map->map_type == BPF_MAP_TYPE_STACK_TRACE) { - err = bpf_stackmap_copy(map, key, value); + err = bpf_stackmap_copy_and_delete(map, key, value, false); } else if (IS_FD_ARRAY(map) || IS_FD_PROG_ARRAY(map)) { err = bpf_fd_array_map_lookup_elem(map, key, value); } else if (IS_FD_HASH(map)) { @@ -1651,7 +1651,8 @@ struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) } EXPORT_SYMBOL_GPL(bpf_map_inc_not_zero); -int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) +int __weak bpf_stackmap_copy_and_delete(struct bpf_map *map, void *key, void *value, + bool delete) { return -ENOTSUPP; } @@ -2182,7 +2183,8 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) } else if (map->map_type == BPF_MAP_TYPE_HASH || map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_STACK_TRACE) { if (!bpf_map_is_offloaded(map)) { bpf_disable_instrumentation(); rcu_read_lock(); From d23138a87167a6efc45badd1c032c3648a57bbfb Mon Sep 17 00:00:00 2001 From: Tao Chen Date: Wed, 10 Sep 2025 00:32:22 +0800 Subject: [PATCH 2/2] selftests/bpf: Add stacktrace map lookup_and_delete_elem test case ... test_stacktrace_map:PASS:compare_stack_ips stackmap vs. stack_amap 0 nsec test_stacktrace_map:PASS:stack_key_map lookup 0 nsec test_stacktrace_map:PASS:stackmap lookup and detele 0 nsec test_stacktrace_map:PASS:stackmap lookup deleted stack_id 0 nsec #397 stacktrace_map:OK ... Signed-off-by: Tao Chen --- .../selftests/bpf/prog_tests/stacktrace_map.c | 22 ++++++++++++++++++- .../selftests/bpf/progs/test_stacktrace_map.c | 8 +++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c index 84a7e405e9129..7d38afe5cfcaf 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c @@ -3,7 +3,7 @@ void test_stacktrace_map(void) { - int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd, stack_key_map_fd; const char *prog_name = "oncpu"; int err, prog_fd, stack_trace_len; const char *file = "./test_stacktrace_map.bpf.o"; @@ -11,6 +11,9 @@ void test_stacktrace_map(void) struct bpf_program *prog; struct bpf_object *obj; struct bpf_link *link; + __u32 stack_id; + char val_buf[PERF_MAX_STACK_DEPTH * + sizeof(struct bpf_stack_build_id)]; err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) @@ -41,6 +44,10 @@ void test_stacktrace_map(void) if (CHECK_FAIL(stack_amap_fd < 0)) goto disable_pmu; + stack_key_map_fd = bpf_find_map(__func__, obj, "stack_key_map"); + if (CHECK_FAIL(stack_key_map_fd < 0)) + goto disable_pmu; + /* give some time for bpf program run */ sleep(1); @@ -68,6 +75,19 @@ void test_stacktrace_map(void) "err %d errno %d\n", err, errno)) goto disable_pmu; + err = bpf_map_lookup_elem(stack_key_map_fd, &key, &stack_id); + if (CHECK(err, "stack_key_map lookup", "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id, &val_buf); + if (CHECK(err, "stackmap lookup and delete", + "err %d errno %d\n", err, errno)) + goto disable_pmu; + + err = bpf_map_lookup_elem(stackmap_fd, &stack_id, &val_buf); + CHECK((!err || errno != ENOENT), "stackmap lookup deleted stack_id", + "err %d errno %d\n", err, errno); + disable_pmu: bpf_link__destroy(link); close_prog: diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c index 47568007b6683..3bede76c15136 100644 --- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c +++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c @@ -38,6 +38,13 @@ struct { __type(value, stack_trace_t); } stack_amap SEC(".maps"); +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} stack_key_map SEC(".maps"); + /* taken from /sys/kernel/tracing/events/sched/sched_switch/format */ struct sched_switch_args { unsigned long long pad; @@ -64,6 +71,7 @@ int oncpu(struct sched_switch_args *ctx) /* The size of stackmap and stackid_hmap should be the same */ key = bpf_get_stackid(ctx, &stackmap, 0); if ((int)key >= 0) { + bpf_map_update_elem(&stack_key_map, &val, &key, 0); bpf_map_update_elem(&stackid_hmap, &key, &val, 0); stack_p = bpf_map_lookup_elem(&stack_amap, &key); if (stack_p)