Merge branch 'bpf-free-special-fields-when-update-hash-and-local-storage-maps'

anakryiko · anakryiko · commit 9f317bdbc21a · 2025-10-28T11:02:18.000-07:00
Leon Hwang says: ==================== bpf: Free special fields when update hash and local storage maps In the discussion thread "[PATCH bpf-next v9 0/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu maps"[1], it was pointed out that missing calls to bpf_obj_free_fields() could lead to memory leaks. A selftest was added to confirm that this is indeed a real issue - the refcount of BPF_KPTR_REF field is not decremented when bpf_obj_free_fields() is missing after copy_map_value[,_long](). Further inspection of copy_map_value[,_long]() call sites revealed two locations affected by this issue: 1. pcpu_copy_value() 2. htab_map_update_elem() when used with BPF_F_LOCK Similar cases happen when update local storage maps. This series fixes the issues by properly calling bpf_obj_free_fields() (or check_and_free_fields()) after copy_map_value[,_long]() and adds selftests to verify the fix. Changes: v2 -> v3: * Free special fields when update local storage maps without BPF_F_LOCK. * Add test to verify decrementing refcount when update cgroup local storage maps without BPF_F_LOCK. * Address review from AI bot: * Slow path with BPF_F_LOCK (around line 642-646) in 'bpf_local_storage.c'. * https://lore.kernel.org/bpf/20251020164608.20536-1-leon.hwang@linux.dev/ v1 -> v2: * Add test to verify decrementing refcount when update cgroup local storage maps with BPF_F_LOCK. * Address review from AI bot: * Fast path without bucket lock (around line 610) in 'bpf_local_storage.c'. * https://lore.kernel.org/bpf/20251016145801.47552-1-leon.hwang@linux.dev/ ==================== Link: https://patch.msgid.link/20251026154000.34151-1-leon.hwang@linux.dev Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
@@ -609,6 +609,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 		if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) {
 			copy_map_value_locked(&smap->map, old_sdata->data,
 					      value, false);
+			bpf_obj_free_fields(smap->map.record, old_sdata->data);
 			return old_sdata;
 		}
 	}
@@ -641,6 +642,7 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 	if (old_sdata && (map_flags & BPF_F_LOCK)) {
 		copy_map_value_locked(&smap->map, old_sdata->data, value,
 				      false);
+		bpf_obj_free_fields(smap->map.record, old_sdata->data);
 		selem = SELEM(old_sdata);
 		goto unlock;
 	}
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
@@ -937,12 +937,14 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
 	if (!onallcpus) {
 		/* copy true value_size bytes */
 		copy_map_value(&htab->map, this_cpu_ptr(pptr), value);
+		bpf_obj_free_fields(htab->map.record, this_cpu_ptr(pptr));
 	} else {
 		u32 size = round_up(htab->map.value_size, 8);
 		int off = 0, cpu;
 
 		for_each_possible_cpu(cpu) {
 			copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off);
+			bpf_obj_free_fields(htab->map.record, per_cpu_ptr(pptr, cpu));
 			off += size;
 		}
 	}
@@ -1108,6 +1110,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 			copy_map_value_locked(map,
 					      htab_elem_value(l_old, key_size),
 					      value, false);
+			check_and_free_fields(htab, l_old);
 			return 0;
 		}
 		/* fall through, grab the bucket lock and lookup again.
@@ -1136,6 +1139,7 @@ static long htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 		copy_map_value_locked(map,
 				      htab_elem_value(l_old, key_size),
 				      value, false);
+		check_and_free_fields(htab, l_old);
 		ret = 0;
 		goto err;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -3,7 +3,7 @@
 
 #include <test_progs.h>
 #include <network_helpers.h>
-
+#include "cgroup_helpers.h"
 #include "refcounted_kptr.skel.h"
 #include "refcounted_kptr_fail.skel.h"
 
@@ -44,3 +44,179 @@ void test_refcounted_kptr_wrong_owner(void)
 	ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
 	refcounted_kptr__destroy(skel);
 }
+
+static void test_refcnt_leak(void *values, size_t values_sz, u64 flags, struct bpf_map *map,
+			     struct bpf_program *prog_leak, struct bpf_program *prog_check)
+{
+	int ret, fd, key = 0;
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .repeat = 1,
+	);
+
+	ret = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, flags);
+	if (!ASSERT_OK(ret, "bpf_map__update_elem init"))
+		return;
+
+	fd = bpf_program__fd(prog_leak);
+	ret = bpf_prog_test_run_opts(fd, &opts);
+	if (!ASSERT_OK(ret, "test_run_opts"))
+		return;
+	if (!ASSERT_EQ(opts.retval, 2, "retval refcount"))
+		return;
+
+	ret = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, flags);
+	if (!ASSERT_OK(ret, "bpf_map__update_elem dec refcount"))
+		return;
+
+	fd = bpf_program__fd(prog_check);
+	ret = bpf_prog_test_run_opts(fd, &opts);
+	ASSERT_OK(ret, "test_run_opts");
+	ASSERT_EQ(opts.retval, 1, "retval");
+}
+
+static void test_percpu_hash_refcount_leak(void)
+{
+	struct refcounted_kptr *skel;
+	size_t values_sz;
+	u64 *values;
+	int cpu_nr;
+
+	cpu_nr = libbpf_num_possible_cpus();
+	if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
+		return;
+
+	values = calloc(cpu_nr, sizeof(u64));
+	if (!ASSERT_OK_PTR(values, "calloc values"))
+		return;
+
+	skel = refcounted_kptr__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
+		free(values);
+		return;
+	}
+
+	values_sz = cpu_nr * sizeof(u64);
+	memset(values, 0, values_sz);
+
+	test_refcnt_leak(values, values_sz, 0, skel->maps.pcpu_hash,
+			 skel->progs.pcpu_hash_refcount_leak,
+			 skel->progs.check_pcpu_hash_refcount);
+
+	refcounted_kptr__destroy(skel);
+	free(values);
+}
+
+struct lock_map_value {
+	u64 kptr;
+	struct bpf_spin_lock lock;
+	int value;
+};
+
+static void test_hash_lock_refcount_leak(void)
+{
+	struct lock_map_value value = {};
+	struct refcounted_kptr *skel;
+
+	skel = refcounted_kptr__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load"))
+		return;
+
+	test_refcnt_leak(&value, sizeof(value), BPF_F_LOCK, skel->maps.lock_hash,
+			 skel->progs.hash_lock_refcount_leak,
+			 skel->progs.check_hash_lock_refcount);
+
+	refcounted_kptr__destroy(skel);
+}
+
+static void test_cgrp_storage_refcount_leak(u64 flags)
+{
+	int server_fd = -1, client_fd = -1;
+	struct lock_map_value value = {};
+	struct refcounted_kptr *skel;
+	struct bpf_link *link;
+	struct bpf_map *map;
+	int cgroup, err;
+
+	cgroup = test__join_cgroup("/cg_refcount_leak");
+	if (!ASSERT_GE(cgroup, 0, "test__join_cgroup"))
+		return;
+
+	skel = refcounted_kptr__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load"))
+		goto out;
+
+	link = bpf_program__attach_cgroup(skel->progs.cgroup_storage_refcount_leak, cgroup);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_cgroup"))
+		goto out;
+	skel->links.cgroup_storage_refcount_leak = link;
+
+	server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+	if (!ASSERT_GE(server_fd, 0, "start_server"))
+		goto out;
+
+	client_fd = connect_to_fd(server_fd, 0);
+	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+		goto out;
+
+	map = skel->maps.cgrp_strg;
+	err = bpf_map__lookup_elem(map, &cgroup, sizeof(cgroup), &value, sizeof(value), flags);
+	if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
+		goto out;
+
+	ASSERT_EQ(value.value, 2, "refcount");
+
+	err = bpf_map__update_elem(map, &cgroup, sizeof(cgroup), &value, sizeof(value), flags);
+	if (!ASSERT_OK(err, "bpf_map__update_elem"))
+		goto out;
+
+	err = bpf_link__detach(skel->links.cgroup_storage_refcount_leak);
+	if (!ASSERT_OK(err, "bpf_link__detach"))
+		goto out;
+
+	link = bpf_program__attach(skel->progs.check_cgroup_storage_refcount);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+		goto out;
+	skel->links.check_cgroup_storage_refcount = link;
+
+	close(client_fd);
+	client_fd = connect_to_fd(server_fd, 0);
+	if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+		goto out;
+
+	err = bpf_map__lookup_elem(map, &cgroup, sizeof(cgroup), &value, sizeof(value), flags);
+	if (!ASSERT_OK(err, "bpf_map__lookup_elem"))
+		goto out;
+
+	ASSERT_EQ(value.value, 1, "refcount");
+out:
+	close(cgroup);
+	refcounted_kptr__destroy(skel);
+	if (client_fd >= 0)
+		close(client_fd);
+	if (server_fd >= 0)
+		close(server_fd);
+}
+
+static void test_cgroup_storage_refcount_leak(void)
+{
+	test_cgrp_storage_refcount_leak(0);
+}
+
+static void test_cgroup_storage_lock_refcount_leak(void)
+{
+	test_cgrp_storage_refcount_leak(BPF_F_LOCK);
+}
+
+void test_kptr_refcount_leak(void)
+{
+	if (test__start_subtest("percpu_hash_refcount_leak"))
+		test_percpu_hash_refcount_leak();
+	if (test__start_subtest("hash_lock_refcount_leak"))
+		test_hash_lock_refcount_leak();
+	if (test__start_subtest("cgroup_storage_refcount_leak"))
+		test_cgroup_storage_refcount_leak();
+	if (test__start_subtest("cgroup_storage_lock_refcount_leak"))
+		test_cgroup_storage_lock_refcount_leak();
+}
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c

Original file line number	Diff line number	Diff line change
`@@ -609,6 +609,7 @@ bpf_local_storage_update(void owner, struct bpf_local_storage_map smap,`
`609`	`609`	`if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) {`
`610`	`610`	`copy_map_value_locked(&smap->map, old_sdata->data,`
`611`	`611`	`value, false);`
	`612`	`+ bpf_obj_free_fields(smap->map.record, old_sdata->data);`
`612`	`613`	`return old_sdata;`
`613`	`614`	`}`
`614`	`615`	`}`
`@@ -641,6 +642,7 @@ bpf_local_storage_update(void owner, struct bpf_local_storage_map smap,`
`641`	`642`	`if (old_sdata && (map_flags & BPF_F_LOCK)) {`
`642`	`643`	`copy_map_value_locked(&smap->map, old_sdata->data, value,`
`643`	`644`	`false);`
	`645`	`+ bpf_obj_free_fields(smap->map.record, old_sdata->data);`
`644`	`646`	`selem = SELEM(old_sdata);`
`645`	`647`	`goto unlock;`
`646`	`648`	`}`