From f3589ef5a389788ea7dbcc916eba1b88d43eb8c9 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 7 Oct 2025 15:37:40 +0200 Subject: [PATCH 1/5] bpf: Refactor cleanup of bpf_prog_test_run_skb This bit of refactoring aims to simplify how we free memory in bpf_prog_test_run_skb to avoid code duplication. Tested-by: syzbot@syzkaller.appspotmail.com Signed-off-by: Paul Chaignon --- net/bpf/test_run.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dfb03ee0bb62a..a39b26739a1ef 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -990,10 +990,10 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, u32 size = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; struct __sk_buff *ctx = NULL; + struct sk_buff *skb = NULL; + struct sock *sk = NULL; u32 retval, duration; int hh_len = ETH_HLEN; - struct sk_buff *skb; - struct sock *sk; void *data; int ret; @@ -1012,8 +1012,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); if (IS_ERR(ctx)) { - kfree(data); - return PTR_ERR(ctx); + ret = PTR_ERR(ctx); + ctx = NULL; + goto out; } switch (prog->type) { @@ -1033,21 +1034,20 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1); if (!sk) { - kfree(data); - kfree(ctx); - return -ENOMEM; + ret = -ENOMEM; + goto out; } sock_init_data(NULL, sk); skb = slab_build_skb(data); if (!skb) { - kfree(data); - kfree(ctx); - sk_free(sk); - return -ENOMEM; + ret = -ENOMEM; + goto out; } skb->sk = sk; + data = NULL; /* data released via kfree_skb */ + skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); __skb_put(skb, size); @@ -1142,7 +1142,9 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (dev && dev != net->loopback_dev) dev_put(dev); kfree_skb(skb); - sk_free(sk); + kfree(data); + if (sk) + sk_free(sk); kfree(ctx); return ret; } From 8120ef3c98df3e807db5f7079913bdd918efa19a Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 7 Oct 2025 15:37:58 +0200 Subject: [PATCH 2/5] bpf: Reorder bpf_prog_test_run_skb initialization This patch reorders the initialization of bpf_prog_test_run_skb to simplify the subsequent patch. Program types are checked first, followed by the ctx init, and finally the data init. With the subsequent patch, program types and the ctx init provide information that is used in the data init. Thus, we need the data init to happen last. Signed-off-by: Paul Chaignon --- net/bpf/test_run.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index a39b26739a1ef..b9b49d0c70146 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -1004,19 +1004,6 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (size < ETH_HLEN) return -EINVAL; - data = bpf_test_init(kattr, kattr->test.data_size_in, - size, NET_SKB_PAD + NET_IP_ALIGN, - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); - if (IS_ERR(data)) - return PTR_ERR(data); - - ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); - if (IS_ERR(ctx)) { - ret = PTR_ERR(ctx); - ctx = NULL; - goto out; - } - switch (prog->type) { case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: @@ -1032,6 +1019,19 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, break; } + ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff)); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + data = bpf_test_init(kattr, kattr->test.data_size_in, + size, NET_SKB_PAD + NET_IP_ALIGN, + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (IS_ERR(data)) { + ret = PTR_ERR(data); + data = NULL; + goto out; + } + sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1); if (!sk) { ret = -ENOMEM; From bf4f64de17086139ecfe9ec0e61b4606edd5d8af Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 7 Oct 2025 15:38:14 +0200 Subject: [PATCH 3/5] bpf: Craft non-linear skbs in BPF_PROG_TEST_RUN This patch adds support for crafting non-linear skbs in BPF test runs for tc programs. The size of the linear area is given by ctx->data_end, with a minimum of ETH_HLEN always pulled in the linear area. If ctx or ctx->data_end are null, a linear skb is used. This is particularly useful to test support for non-linear skbs in large codebases such as Cilium. We've had multiple bugs in the past few years where we were missing calls to bpf_skb_pull_data(). This support in BPF_PROG_TEST_RUN would allow us to automatically cover this case in our BPF tests. In addition to the selftests introduced later in the series, this patch was tested by setting enabling non-linear skbs for all tc selftests programs and checking test failures were expected. Tested-by: syzbot@syzkaller.appspotmail.com Suggested-by: Daniel Borkmann Signed-off-by: Paul Chaignon --- net/bpf/test_run.c | 82 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 74 insertions(+), 8 deletions(-) diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index b9b49d0c70146..0cdf894c1d057 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -910,6 +910,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) /* cb is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb), + offsetof(struct __sk_buff, data_end))) + return -EINVAL; + + /* data_end is allowed, but not copied to skb */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, data_end), offsetof(struct __sk_buff, tstamp))) return -EINVAL; @@ -984,9 +990,12 @@ static struct proto bpf_dummy_proto = { int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) { - bool is_l2 = false, is_direct_pkt_access = false; + bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false; + u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct net *net = current->nsproxy->net_ns; struct net_device *dev = net->loopback_dev; + u32 headroom = NET_SKB_PAD + NET_IP_ALIGN; + u32 linear_sz = kattr->test.data_size_in; u32 size = kattr->test.data_size_in; u32 repeat = kattr->test.repeat; struct __sk_buff *ctx = NULL; @@ -1007,11 +1016,14 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, switch (prog->type) { case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: + is_direct_pkt_access = true; is_l2 = true; - fallthrough; + break; case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_OUT: case BPF_PROG_TYPE_LWT_XMIT: + is_lwt = true; + fallthrough; case BPF_PROG_TYPE_CGROUP_SKB: is_direct_pkt_access = true; break; @@ -1023,9 +1035,24 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (IS_ERR(ctx)) return PTR_ERR(ctx); - data = bpf_test_init(kattr, kattr->test.data_size_in, - size, NET_SKB_PAD + NET_IP_ALIGN, - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))); + if (ctx) { + if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) { + ret = -EINVAL; + goto out; + } + if (ctx->data_end) { + /* Non-linear LWT test_run is unsupported for now. */ + if (is_lwt) { + ret = -EINVAL; + goto out; + } + linear_sz = max(ETH_HLEN, ctx->data_end); + } + } + + linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom); + + data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom); if (IS_ERR(data)) { ret = PTR_ERR(data); data = NULL; @@ -1044,12 +1071,49 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, ret = -ENOMEM; goto out; } + skb->sk = sk; data = NULL; /* data released via kfree_skb */ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - __skb_put(skb, size); + __skb_put(skb, linear_sz); + + if (unlikely(kattr->test.data_size_in > linear_sz)) { + void __user *data_in = u64_to_user_ptr(kattr->test.data_in); + struct skb_shared_info *sinfo = skb_shinfo(skb); + + size = linear_sz; + while (size < kattr->test.data_size_in) { + struct page *page; + u32 data_len; + + if (sinfo->nr_frags == MAX_SKB_FRAGS) { + ret = -ENOMEM; + goto out; + } + + page = alloc_page(GFP_KERNEL); + if (!page) { + ret = -ENOMEM; + goto out; + } + + data_len = min_t(u32, kattr->test.data_size_in - size, + PAGE_SIZE); + skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len); + + if (copy_from_user(page_address(page), data_in + size, + data_len)) { + ret = -EFAULT; + goto out; + } + skb->data_len += data_len; + skb->truesize += PAGE_SIZE; + skb->len += data_len; + size += data_len; + } + } if (ctx && ctx->ifindex > 1) { dev = dev_get_by_index(net, ctx->ifindex); @@ -1130,9 +1194,11 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, convert_skb_to___skb(skb, ctx); size = skb->len; - /* bpf program can never convert linear skb to non-linear */ - if (WARN_ON_ONCE(skb_is_nonlinear(skb))) + if (skb_is_nonlinear(skb)) { + /* bpf program can never convert linear skb to non-linear */ + WARN_ON_ONCE(linear_sz == size); size = skb_headlen(skb); + } ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval, duration); if (!ret) From f7dbfaf8f9b885b56b55a5de02a8c8bbba232453 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 7 Oct 2025 15:38:36 +0200 Subject: [PATCH 4/5] selftests/bpf: Support non-linear flag in test loader This patch adds support for a new tag __linear_size in the test loader, to specify the size of the linear area in case of non-linear skbs. If the tag is absent or null, a linear skb is crafted. Signed-off-by: Paul Chaignon --- tools/testing/selftests/bpf/progs/bpf_misc.h | 4 ++++ .../bpf/progs/verifier_direct_packet_access.c | 1 + tools/testing/selftests/bpf/test_loader.c | 19 +++++++++++++++++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index a7a1a684eed11..c9bfbe1bafc12 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -126,6 +126,9 @@ * Several __arch_* annotations could be specified at once. * When test case is not run on current arch it is marked as skipped. * __caps_unpriv Specify the capabilities that should be set when running the test. + * + * __linear_size Specify the size of the linear area of non-linear skbs, or + * 0 for linear skbs. */ #define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg))) #define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg))) @@ -159,6 +162,7 @@ #define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg))) #define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg))) #define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg))) +#define __linear_size(sz) __attribute__((btf_decl_tag("comment:test_linear_size=" XSTR(sz)))) /* Define common capabilities tested using __caps_unpriv */ #define CAP_NET_ADMIN 12 diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c index 28b602ac9cbe2..a61897e01a50d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Converted from tools/testing/selftests/bpf/verifier/direct_packet_access.c */ +#include #include #include #include "bpf_misc.h" diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 74ecc281bb8c1..690181617f048 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -43,6 +43,7 @@ #define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv=" #define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout=" #define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv=" +#define TEST_TAG_LINEAR_SIZE "comment:test_linear_size=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xbadcafe @@ -89,6 +90,7 @@ struct test_spec { int mode_mask; int arch_mask; int load_mask; + int linear_sz; bool auxiliary; bool valid; }; @@ -633,6 +635,11 @@ static int parse_test_spec(struct test_loader *tester, &spec->unpriv.stdout); if (err) goto cleanup; + } else if (str_has_pfx(s, TEST_TAG_LINEAR_SIZE)) { + val = s + sizeof(TEST_TAG_LINEAR_SIZE) - 1; + err = parse_int(val, &spec->linear_sz, "test linear size"); + if (err) + goto cleanup; } } @@ -1007,10 +1014,11 @@ static bool is_unpriv_capable_map(struct bpf_map *map) } } -static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts) +static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts, int linear_sz) { __u8 tmp_out[TEST_DATA_LEN << 2] = {}; __u8 tmp_in[TEST_DATA_LEN] = {}; + struct __sk_buff ctx = {}; int err, saved_errno; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = tmp_in, @@ -1020,6 +1028,12 @@ static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts) .repeat = 1, ); + if (linear_sz) { + ctx.data_end = linear_sz; + topts.ctx_in = &ctx; + topts.ctx_size_in = sizeof(ctx); + } + if (empty_opts) { memset(&topts, 0, sizeof(struct bpf_test_run_opts)); topts.sz = sizeof(struct bpf_test_run_opts); @@ -1269,7 +1283,8 @@ void run_subtest(struct test_loader *tester, } err = do_prog_test_run(bpf_program__fd(tprog), &retval, - bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); + bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false, + spec->linear_sz); if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) { PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval); goto tobj_cleanup; From 5156da710e7a7f1b0acd7919d2a0fec839c09664 Mon Sep 17 00:00:00 2001 From: Paul Chaignon Date: Tue, 7 Oct 2025 15:38:51 +0200 Subject: [PATCH 5/5] selftests/bpf: Test direct packet access on non-linear skbs This patch adds new selftests in the direct packet access suite, to cover the non-linear case. The first six tests cover the behavior of the bounds check with a non-linear skb. The last test adds a call to bpf_skb_pull_data() to be able to access the packet. Note that the size of the linear area includes the L2 header, but for some program types like cgroup_skb, ctx->data points to the L3 header. Therefore, a linear area of 22 bytes will have only 8 bytes accessible to the BPF program (22 - ETH_HLEN). For that reason, the cgroup_skb test cases access the packet at an offset of 8 bytes. Signed-off-by: Paul Chaignon --- .../bpf/progs/verifier_direct_packet_access.c | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c index a61897e01a50d..16019eb58eb4d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c @@ -801,4 +801,62 @@ l0_%=: /* exit(0) */ \ : __clobber_all); } +#define access_test_non_linear(name, type, desc, retval, linear_sz, off) \ + SEC(type) \ + __description("direct packet access: " #name " (non-linear, " type ", " desc ")") \ + __success __retval(retval) \ + __linear_size(linear_sz) \ + __naked void access_non_linear_##name(void) \ + { \ + asm volatile (" \ + r2 = *(u32*)(r1 + %[skb_data]); \ + r3 = *(u32*)(r1 + %[skb_data_end]); \ + r0 = r2; \ + r0 += %[offset]; \ + if r0 > r3 goto l0_%=; \ + r0 = *(u8*)(r0 - 1); \ + r0 = 0; \ + exit; \ + l0_%=: r0 = 1; \ + exit; \ + " : \ + : __imm_const(skb_data, offsetof(struct __sk_buff, data)), \ + __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)), \ + __imm_const(offset, off) \ + : __clobber_all); \ + } + +access_test_non_linear(test31, "tc", "too short eth", 1, ETH_HLEN, 22); +access_test_non_linear(test32, "tc", "too short 1", 1, 1, 22); +access_test_non_linear(test33, "tc", "long enough", 0, 22, 22); +access_test_non_linear(test34, "cgroup_skb/ingress", "too short eth", 1, ETH_HLEN, 8); +access_test_non_linear(test35, "cgroup_skb/ingress", "too short 1", 1, 1, 8); +access_test_non_linear(test36, "cgroup_skb/ingress", "long enough", 0, 22, 8); + +SEC("tc") +__description("direct packet access: test36 (non-linear, linearized)") +__success __retval(0) +__linear_size(ETH_HLEN) +__naked void access_non_linear_linearized(void) +{ + asm volatile (" \ + r6 = r1; \ + r2 = 22; \ + call %[bpf_skb_pull_data]; \ + r2 = *(u32*)(r6 + %[skb_data]); \ + r3 = *(u32*)(r6 + %[skb_data_end]); \ + r0 = r2; \ + r0 += 22; \ + if r0 > r3 goto l0_%=; \ + r0 = *(u8*)(r0 - 1); \ + exit; \ +l0_%=: r0 = 1; \ + exit; \ +" : + : __imm(bpf_skb_pull_data), + __imm_const(skb_data, offsetof(struct __sk_buff, data)), + __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)) + : __clobber_all); +} + char _license[] SEC("license") = "GPL";