Skip to content

Commit 9fd0606

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf-support-bpf-rbtree-traversal-and-list-peeking'
Martin KaFai Lau says: ==================== bpf: Support bpf rbtree traversal and list peeking From: Martin KaFai Lau <[email protected]> The RFC v1 [1] showed a fq qdisc implementation in bpf that is much closer to the kernel sch_fq.c. The fq example and bpf qdisc changes are separated out from this set. This set is to focus on the kfunc and verifier changes that enable the bpf rbtree traversal and list peeking. v2: - Added tests to check that the return value of the bpf_rbtree_{root,left,right} and bpf_list_{front,back} is marked as a non_own_ref node pointer. (Kumar) - Added tests to ensure that the bpf_rbtree_{root,left,right} and bpf_list_{front,back} must be called after holding the spinlock. - Squashed the selftests adjustment to the corresponding verifier changes to avoid bisect failure. (Kumar) - Separated the bpf qdisc specific changes and fq selftest example from this set. [1]: https://lore.kernel.org/bpf/[email protected]/ ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 62e23f1 + 29318b4 commit 9fd0606

File tree

7 files changed

+445
-31
lines changed

7 files changed

+445
-31
lines changed

kernel/bpf/helpers.c

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2293,6 +2293,26 @@ __bpf_kfunc struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head)
22932293
return __bpf_list_del(head, true);
22942294
}
22952295

2296+
__bpf_kfunc struct bpf_list_node *bpf_list_front(struct bpf_list_head *head)
2297+
{
2298+
struct list_head *h = (struct list_head *)head;
2299+
2300+
if (list_empty(h) || unlikely(!h->next))
2301+
return NULL;
2302+
2303+
return (struct bpf_list_node *)h->next;
2304+
}
2305+
2306+
__bpf_kfunc struct bpf_list_node *bpf_list_back(struct bpf_list_head *head)
2307+
{
2308+
struct list_head *h = (struct list_head *)head;
2309+
2310+
if (list_empty(h) || unlikely(!h->next))
2311+
return NULL;
2312+
2313+
return (struct bpf_list_node *)h->prev;
2314+
}
2315+
22962316
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
22972317
struct bpf_rb_node *node)
22982318
{
@@ -2366,6 +2386,33 @@ __bpf_kfunc struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root)
23662386
return (struct bpf_rb_node *)rb_first_cached(r);
23672387
}
23682388

2389+
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_root(struct bpf_rb_root *root)
2390+
{
2391+
struct rb_root_cached *r = (struct rb_root_cached *)root;
2392+
2393+
return (struct bpf_rb_node *)r->rb_root.rb_node;
2394+
}
2395+
2396+
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_left(struct bpf_rb_root *root, struct bpf_rb_node *node)
2397+
{
2398+
struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
2399+
2400+
if (READ_ONCE(node_internal->owner) != root)
2401+
return NULL;
2402+
2403+
return (struct bpf_rb_node *)node_internal->rb_node.rb_left;
2404+
}
2405+
2406+
__bpf_kfunc struct bpf_rb_node *bpf_rbtree_right(struct bpf_rb_root *root, struct bpf_rb_node *node)
2407+
{
2408+
struct bpf_rb_node_kern *node_internal = (struct bpf_rb_node_kern *)node;
2409+
2410+
if (READ_ONCE(node_internal->owner) != root)
2411+
return NULL;
2412+
2413+
return (struct bpf_rb_node *)node_internal->rb_node.rb_right;
2414+
}
2415+
23692416
/**
23702417
* bpf_task_acquire - Acquire a reference to a task. A task acquired by this
23712418
* kfunc which is not stored in a map as a kptr, must be released by calling
@@ -3209,11 +3256,16 @@ BTF_ID_FLAGS(func, bpf_list_push_front_impl)
32093256
BTF_ID_FLAGS(func, bpf_list_push_back_impl)
32103257
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
32113258
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
3259+
BTF_ID_FLAGS(func, bpf_list_front, KF_RET_NULL)
3260+
BTF_ID_FLAGS(func, bpf_list_back, KF_RET_NULL)
32123261
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
32133262
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
32143263
BTF_ID_FLAGS(func, bpf_rbtree_remove, KF_ACQUIRE | KF_RET_NULL)
32153264
BTF_ID_FLAGS(func, bpf_rbtree_add_impl)
32163265
BTF_ID_FLAGS(func, bpf_rbtree_first, KF_RET_NULL)
3266+
BTF_ID_FLAGS(func, bpf_rbtree_root, KF_RET_NULL)
3267+
BTF_ID_FLAGS(func, bpf_rbtree_left, KF_RET_NULL)
3268+
BTF_ID_FLAGS(func, bpf_rbtree_right, KF_RET_NULL)
32173269

32183270
#ifdef CONFIG_CGROUPS
32193271
BTF_ID_FLAGS(func, bpf_cgroup_acquire, KF_ACQUIRE | KF_RCU | KF_RET_NULL)

kernel/bpf/verifier.c

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11987,6 +11987,16 @@ static bool is_kfunc_arg_res_spin_lock(const struct btf *btf, const struct btf_p
1198711987
return __is_kfunc_ptr_arg_type(btf, arg, KF_ARG_RES_SPIN_LOCK_ID);
1198811988
}
1198911989

11990+
static bool is_rbtree_node_type(const struct btf_type *t)
11991+
{
11992+
return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_RB_NODE_ID]);
11993+
}
11994+
11995+
static bool is_list_node_type(const struct btf_type *t)
11996+
{
11997+
return t == btf_type_by_id(btf_vmlinux, kf_arg_btf_ids[KF_ARG_LIST_NODE_ID]);
11998+
}
11999+
1199012000
static bool is_kfunc_arg_callback(struct bpf_verifier_env *env, const struct btf *btf,
1199112001
const struct btf_param *arg)
1199212002
{
@@ -12069,13 +12079,18 @@ enum special_kfunc_type {
1206912079
KF_bpf_list_push_back_impl,
1207012080
KF_bpf_list_pop_front,
1207112081
KF_bpf_list_pop_back,
12082+
KF_bpf_list_front,
12083+
KF_bpf_list_back,
1207212084
KF_bpf_cast_to_kern_ctx,
1207312085
KF_bpf_rdonly_cast,
1207412086
KF_bpf_rcu_read_lock,
1207512087
KF_bpf_rcu_read_unlock,
1207612088
KF_bpf_rbtree_remove,
1207712089
KF_bpf_rbtree_add_impl,
1207812090
KF_bpf_rbtree_first,
12091+
KF_bpf_rbtree_root,
12092+
KF_bpf_rbtree_left,
12093+
KF_bpf_rbtree_right,
1207912094
KF_bpf_dynptr_from_skb,
1208012095
KF_bpf_dynptr_from_xdp,
1208112096
KF_bpf_dynptr_slice,
@@ -12111,11 +12126,16 @@ BTF_ID(func, bpf_list_push_front_impl)
1211112126
BTF_ID(func, bpf_list_push_back_impl)
1211212127
BTF_ID(func, bpf_list_pop_front)
1211312128
BTF_ID(func, bpf_list_pop_back)
12129+
BTF_ID(func, bpf_list_front)
12130+
BTF_ID(func, bpf_list_back)
1211412131
BTF_ID(func, bpf_cast_to_kern_ctx)
1211512132
BTF_ID(func, bpf_rdonly_cast)
1211612133
BTF_ID(func, bpf_rbtree_remove)
1211712134
BTF_ID(func, bpf_rbtree_add_impl)
1211812135
BTF_ID(func, bpf_rbtree_first)
12136+
BTF_ID(func, bpf_rbtree_root)
12137+
BTF_ID(func, bpf_rbtree_left)
12138+
BTF_ID(func, bpf_rbtree_right)
1211912139
#ifdef CONFIG_NET
1212012140
BTF_ID(func, bpf_dynptr_from_skb)
1212112141
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12144,13 +12164,18 @@ BTF_ID(func, bpf_list_push_front_impl)
1214412164
BTF_ID(func, bpf_list_push_back_impl)
1214512165
BTF_ID(func, bpf_list_pop_front)
1214612166
BTF_ID(func, bpf_list_pop_back)
12167+
BTF_ID(func, bpf_list_front)
12168+
BTF_ID(func, bpf_list_back)
1214712169
BTF_ID(func, bpf_cast_to_kern_ctx)
1214812170
BTF_ID(func, bpf_rdonly_cast)
1214912171
BTF_ID(func, bpf_rcu_read_lock)
1215012172
BTF_ID(func, bpf_rcu_read_unlock)
1215112173
BTF_ID(func, bpf_rbtree_remove)
1215212174
BTF_ID(func, bpf_rbtree_add_impl)
1215312175
BTF_ID(func, bpf_rbtree_first)
12176+
BTF_ID(func, bpf_rbtree_root)
12177+
BTF_ID(func, bpf_rbtree_left)
12178+
BTF_ID(func, bpf_rbtree_right)
1215412179
#ifdef CONFIG_NET
1215512180
BTF_ID(func, bpf_dynptr_from_skb)
1215612181
BTF_ID(func, bpf_dynptr_from_xdp)
@@ -12579,14 +12604,19 @@ static bool is_bpf_list_api_kfunc(u32 btf_id)
1257912604
return btf_id == special_kfunc_list[KF_bpf_list_push_front_impl] ||
1258012605
btf_id == special_kfunc_list[KF_bpf_list_push_back_impl] ||
1258112606
btf_id == special_kfunc_list[KF_bpf_list_pop_front] ||
12582-
btf_id == special_kfunc_list[KF_bpf_list_pop_back];
12607+
btf_id == special_kfunc_list[KF_bpf_list_pop_back] ||
12608+
btf_id == special_kfunc_list[KF_bpf_list_front] ||
12609+
btf_id == special_kfunc_list[KF_bpf_list_back];
1258312610
}
1258412611

1258512612
static bool is_bpf_rbtree_api_kfunc(u32 btf_id)
1258612613
{
1258712614
return btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
1258812615
btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
12589-
btf_id == special_kfunc_list[KF_bpf_rbtree_first];
12616+
btf_id == special_kfunc_list[KF_bpf_rbtree_first] ||
12617+
btf_id == special_kfunc_list[KF_bpf_rbtree_root] ||
12618+
btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
12619+
btf_id == special_kfunc_list[KF_bpf_rbtree_right];
1259012620
}
1259112621

1259212622
static bool is_bpf_iter_num_api_kfunc(u32 btf_id)
@@ -12686,7 +12716,9 @@ static bool check_kfunc_is_graph_node_api(struct bpf_verifier_env *env,
1268612716
break;
1268712717
case BPF_RB_NODE:
1268812718
ret = (kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
12689-
kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl]);
12719+
kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_add_impl] ||
12720+
kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_left] ||
12721+
kfunc_btf_id == special_kfunc_list[KF_bpf_rbtree_right]);
1269012722
break;
1269112723
default:
1269212724
verbose(env, "verifier internal error: unexpected graph node argument type %s\n",
@@ -13200,22 +13232,22 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
1320013232
return ret;
1320113233
break;
1320213234
case KF_ARG_PTR_TO_RB_NODE:
13203-
if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_remove]) {
13204-
if (!type_is_non_owning_ref(reg->type) || reg->ref_obj_id) {
13205-
verbose(env, "rbtree_remove node input must be non-owning ref\n");
13235+
if (meta->func_id == special_kfunc_list[KF_bpf_rbtree_add_impl]) {
13236+
if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13237+
verbose(env, "arg#%d expected pointer to allocated object\n", i);
1320613238
return -EINVAL;
1320713239
}
13208-
if (in_rbtree_lock_required_cb(env)) {
13209-
verbose(env, "rbtree_remove not allowed in rbtree cb\n");
13240+
if (!reg->ref_obj_id) {
13241+
verbose(env, "allocated object must be referenced\n");
1321013242
return -EINVAL;
1321113243
}
1321213244
} else {
13213-
if (reg->type != (PTR_TO_BTF_ID | MEM_ALLOC)) {
13214-
verbose(env, "arg#%d expected pointer to allocated object\n", i);
13245+
if (!type_is_non_owning_ref(reg->type) && !reg->ref_obj_id) {
13246+
verbose(env, "%s can only take non-owning or refcounted bpf_rb_node pointer\n", func_name);
1321513247
return -EINVAL;
1321613248
}
13217-
if (!reg->ref_obj_id) {
13218-
verbose(env, "allocated object must be referenced\n");
13249+
if (in_rbtree_lock_required_cb(env)) {
13250+
verbose(env, "%s not allowed in rbtree cb\n", func_name);
1321913251
return -EINVAL;
1322013252
}
1322113253
}
@@ -13745,13 +13777,11 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
1374513777
insn_aux->kptr_struct_meta =
1374613778
btf_find_struct_meta(meta.arg_btf,
1374713779
meta.arg_btf_id);
13748-
} else if (meta.func_id == special_kfunc_list[KF_bpf_list_pop_front] ||
13749-
meta.func_id == special_kfunc_list[KF_bpf_list_pop_back]) {
13780+
} else if (is_list_node_type(ptr_type)) {
1375013781
struct btf_field *field = meta.arg_list_head.field;
1375113782

1375213783
mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
13753-
} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_remove] ||
13754-
meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
13784+
} else if (is_rbtree_node_type(ptr_type)) {
1375513785
struct btf_field *field = meta.arg_rbtree_root.field;
1375613786

1375713787
mark_reg_graph_node(regs, BPF_REG_0, &field->graph_root);
@@ -13881,7 +13911,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
1388113911
if (is_kfunc_ret_null(&meta))
1388213912
regs[BPF_REG_0].id = id;
1388313913
regs[BPF_REG_0].ref_obj_id = id;
13884-
} else if (meta.func_id == special_kfunc_list[KF_bpf_rbtree_first]) {
13914+
} else if (is_rbtree_node_type(ptr_type) || is_list_node_type(ptr_type)) {
1388513915
ref_set_non_owning(env, &regs[BPF_REG_0]);
1388613916
}
1388713917

tools/testing/selftests/bpf/prog_tests/linked_list.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "linked_list.skel.h"
99
#include "linked_list_fail.skel.h"
10+
#include "linked_list_peek.skel.h"
1011

1112
static char log_buf[1024 * 1024];
1213

@@ -805,3 +806,8 @@ void test_linked_list(void)
805806
test_linked_list_success(LIST_IN_LIST, true);
806807
test_linked_list_success(TEST_ALL, false);
807808
}
809+
810+
void test_linked_list_peek(void)
811+
{
812+
RUN_TESTS(linked_list_peek);
813+
}

tools/testing/selftests/bpf/prog_tests/rbtree.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "rbtree_fail.skel.h"
99
#include "rbtree_btf_fail__wrong_node_type.skel.h"
1010
#include "rbtree_btf_fail__add_wrong_type.skel.h"
11+
#include "rbtree_search.skel.h"
1112

1213
static void test_rbtree_add_nodes(void)
1314
{
@@ -187,3 +188,8 @@ void test_rbtree_fail(void)
187188
{
188189
RUN_TESTS(rbtree_fail);
189190
}
191+
192+
void test_rbtree_search(void)
193+
{
194+
RUN_TESTS(rbtree_search);
195+
}
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
3+
4+
#include <vmlinux.h>
5+
#include <bpf/bpf_helpers.h>
6+
#include "bpf_misc.h"
7+
#include "bpf_experimental.h"
8+
9+
struct node_data {
10+
struct bpf_list_node l;
11+
int key;
12+
};
13+
14+
#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
15+
private(A) struct bpf_spin_lock glock;
16+
private(A) struct bpf_list_head ghead __contains(node_data, l);
17+
18+
#define list_entry(ptr, type, member) container_of(ptr, type, member)
19+
#define NR_NODES 16
20+
21+
int zero = 0;
22+
23+
SEC("syscall")
24+
__retval(0)
25+
long list_peek(void *ctx)
26+
{
27+
struct bpf_list_node *l_n;
28+
struct node_data *n;
29+
int i, err = 0;
30+
31+
bpf_spin_lock(&glock);
32+
l_n = bpf_list_front(&ghead);
33+
bpf_spin_unlock(&glock);
34+
if (l_n)
35+
return __LINE__;
36+
37+
bpf_spin_lock(&glock);
38+
l_n = bpf_list_back(&ghead);
39+
bpf_spin_unlock(&glock);
40+
if (l_n)
41+
return __LINE__;
42+
43+
for (i = zero; i < NR_NODES && can_loop; i++) {
44+
n = bpf_obj_new(typeof(*n));
45+
if (!n)
46+
return __LINE__;
47+
n->key = i;
48+
bpf_spin_lock(&glock);
49+
bpf_list_push_back(&ghead, &n->l);
50+
bpf_spin_unlock(&glock);
51+
}
52+
53+
bpf_spin_lock(&glock);
54+
55+
l_n = bpf_list_front(&ghead);
56+
if (!l_n) {
57+
err = __LINE__;
58+
goto done;
59+
}
60+
61+
n = list_entry(l_n, struct node_data, l);
62+
if (n->key != 0) {
63+
err = __LINE__;
64+
goto done;
65+
}
66+
67+
l_n = bpf_list_back(&ghead);
68+
if (!l_n) {
69+
err = __LINE__;
70+
goto done;
71+
}
72+
73+
n = list_entry(l_n, struct node_data, l);
74+
if (n->key != NR_NODES - 1) {
75+
err = __LINE__;
76+
goto done;
77+
}
78+
79+
done:
80+
bpf_spin_unlock(&glock);
81+
return err;
82+
}
83+
84+
#define TEST_FB(op, dolock) \
85+
SEC("syscall") \
86+
__failure __msg(MSG) \
87+
long test_##op##_spinlock_##dolock(void *ctx) \
88+
{ \
89+
struct bpf_list_node *l_n; \
90+
__u64 jiffies = 0; \
91+
\
92+
if (dolock) \
93+
bpf_spin_lock(&glock); \
94+
l_n = bpf_list_##op(&ghead); \
95+
if (l_n) \
96+
jiffies = bpf_jiffies64(); \
97+
if (dolock) \
98+
bpf_spin_unlock(&glock); \
99+
\
100+
return !!jiffies; \
101+
}
102+
103+
#define MSG "call bpf_list_{{(front|back).+}}; R0{{(_w)?}}=ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
104+
TEST_FB(front, true)
105+
TEST_FB(back, true)
106+
#undef MSG
107+
108+
#define MSG "bpf_spin_lock at off=0 must be held for bpf_list_head"
109+
TEST_FB(front, false)
110+
TEST_FB(back, false)
111+
#undef MSG
112+
113+
char _license[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)