Skip to content

Commit 7dc484f

Browse files
author
Martin KaFai Lau
committed
Merge branch 'support-non-linear-skbs-for-bpf_prog_test_run'
Paul Chaignon says: ==================== Support non-linear skbs for BPF_PROG_TEST_RUN This patchset adds support for non-linear skbs when running tc programs with BPF_PROG_TEST_RUN. We've had multiple bugs in the past few years in Cilium caused by missing calls to bpf_skb_pull_data(). Daniel suggested to support non linear skb in BPF_PROG_TEST_RUN to uncover these bugs in our BPF tests. Changes in v8: - Fix uninitialized data pointer spotted by Martin. - Error out in test_loader if __linear_size tag is used on unsupported program types. Changes in v7: - Refactor use of 'size' variable as suggested by Martin. - Support copying back the non-linear area to data_out. - Minor code changes for readability, suggested by Martin. Changes in v6: - Disallow non-linear skb in prog_run_skb only for LWT programs instead of all non-L2 program types, on suggestion from Martin. - Reject non-null ctx->data and ctx->data_meta, as suggested by Amery. - Bound linear_size to 'PAGE_SIZE - headroom - tailroom' to be consistent with prog_run_xdp, as suggested by Martin. - Allocate exactly linear_size bytes in bpf_test_init, spotted by Martin. - Fix wrong conflict resolution on double-free fix, spotted by Amery. - Rebased. Changes in v5: - Fix double free on data in first patch. Changes in v4: - Per Martin's suggestion, follow the XDP code pattern and use bpf_test_init only to initialize the linear area. That way data is directly copied to the right areas and we avoid the call to __pskb_pull_tail. - Fixed outdated commit descriptions. - Rebased. Changes in v3: - Dropped BPF_F_TEST_SKB_NON_LINEAR and used the ctx->data_end to determine if the user wants non-linear skb, as suggested by Amery. - Introduced a second commit with a bit of refactoring to allow for the above requested change. - Fix bug found by syzkaller on third commit. - Rebased. Changes in v2: - Made the linear size configurable via ctx->data_end, as suggested by Amery. - Reworked the selftests to allow testing the configurable linear size. - Fix warnings reported by kernel test robot on first commit. - Rebased. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Martin KaFai Lau <[email protected]>
2 parents 2e36338 + bc3eeb4 commit 7dc484f

File tree

4 files changed

+193
-42
lines changed

4 files changed

+193
-42
lines changed

net/bpf/test_run.c

Lines changed: 103 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
447447

448448
static int bpf_test_finish(const union bpf_attr *kattr,
449449
union bpf_attr __user *uattr, const void *data,
450-
struct skb_shared_info *sinfo, u32 size,
450+
struct skb_shared_info *sinfo, u32 size, u32 frag_size,
451451
u32 retval, u32 duration)
452452
{
453453
void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
@@ -464,7 +464,7 @@ static int bpf_test_finish(const union bpf_attr *kattr,
464464
}
465465

466466
if (data_out) {
467-
int len = sinfo ? copy_size - sinfo->xdp_frags_size : copy_size;
467+
int len = sinfo ? copy_size - frag_size : copy_size;
468468

469469
if (len < 0) {
470470
err = -ENOSPC;
@@ -910,6 +910,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
910910
/* cb is allowed */
911911

912912
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
913+
offsetof(struct __sk_buff, data_end)))
914+
return -EINVAL;
915+
916+
/* data_end is allowed, but not copied to skb */
917+
918+
if (!range_is_zero(__skb, offsetofend(struct __sk_buff, data_end),
913919
offsetof(struct __sk_buff, tstamp)))
914920
return -EINVAL;
915921

@@ -984,72 +990,128 @@ static struct proto bpf_dummy_proto = {
984990
int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
985991
union bpf_attr __user *uattr)
986992
{
987-
bool is_l2 = false, is_direct_pkt_access = false;
993+
bool is_l2 = false, is_direct_pkt_access = false, is_lwt = false;
994+
u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
988995
struct net *net = current->nsproxy->net_ns;
989996
struct net_device *dev = net->loopback_dev;
990-
u32 size = kattr->test.data_size_in;
997+
u32 headroom = NET_SKB_PAD + NET_IP_ALIGN;
998+
u32 linear_sz = kattr->test.data_size_in;
991999
u32 repeat = kattr->test.repeat;
9921000
struct __sk_buff *ctx = NULL;
1001+
struct sk_buff *skb = NULL;
1002+
struct sock *sk = NULL;
9931003
u32 retval, duration;
9941004
int hh_len = ETH_HLEN;
995-
struct sk_buff *skb;
996-
struct sock *sk;
997-
void *data;
1005+
void *data = NULL;
9981006
int ret;
9991007

10001008
if ((kattr->test.flags & ~BPF_F_TEST_SKB_CHECKSUM_COMPLETE) ||
10011009
kattr->test.cpu || kattr->test.batch_size)
10021010
return -EINVAL;
10031011

1004-
if (size < ETH_HLEN)
1012+
if (kattr->test.data_size_in < ETH_HLEN)
10051013
return -EINVAL;
10061014

1007-
data = bpf_test_init(kattr, kattr->test.data_size_in,
1008-
size, NET_SKB_PAD + NET_IP_ALIGN,
1009-
SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
1010-
if (IS_ERR(data))
1011-
return PTR_ERR(data);
1012-
1013-
ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
1014-
if (IS_ERR(ctx)) {
1015-
kfree(data);
1016-
return PTR_ERR(ctx);
1017-
}
1018-
10191015
switch (prog->type) {
10201016
case BPF_PROG_TYPE_SCHED_CLS:
10211017
case BPF_PROG_TYPE_SCHED_ACT:
1018+
is_direct_pkt_access = true;
10221019
is_l2 = true;
1023-
fallthrough;
1020+
break;
10241021
case BPF_PROG_TYPE_LWT_IN:
10251022
case BPF_PROG_TYPE_LWT_OUT:
10261023
case BPF_PROG_TYPE_LWT_XMIT:
1024+
is_lwt = true;
1025+
fallthrough;
10271026
case BPF_PROG_TYPE_CGROUP_SKB:
10281027
is_direct_pkt_access = true;
10291028
break;
10301029
default:
10311030
break;
10321031
}
10331032

1033+
ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
1034+
if (IS_ERR(ctx))
1035+
return PTR_ERR(ctx);
1036+
1037+
if (ctx) {
1038+
if (ctx->data_end > kattr->test.data_size_in || ctx->data || ctx->data_meta) {
1039+
ret = -EINVAL;
1040+
goto out;
1041+
}
1042+
if (ctx->data_end) {
1043+
/* Non-linear LWT test_run is unsupported for now. */
1044+
if (is_lwt) {
1045+
ret = -EINVAL;
1046+
goto out;
1047+
}
1048+
linear_sz = max(ETH_HLEN, ctx->data_end);
1049+
}
1050+
}
1051+
1052+
linear_sz = min_t(u32, linear_sz, PAGE_SIZE - headroom - tailroom);
1053+
1054+
data = bpf_test_init(kattr, linear_sz, linear_sz, headroom, tailroom);
1055+
if (IS_ERR(data)) {
1056+
ret = PTR_ERR(data);
1057+
data = NULL;
1058+
goto out;
1059+
}
1060+
10341061
sk = sk_alloc(net, AF_UNSPEC, GFP_USER, &bpf_dummy_proto, 1);
10351062
if (!sk) {
1036-
kfree(data);
1037-
kfree(ctx);
1038-
return -ENOMEM;
1063+
ret = -ENOMEM;
1064+
goto out;
10391065
}
10401066
sock_init_data(NULL, sk);
10411067

10421068
skb = slab_build_skb(data);
10431069
if (!skb) {
1044-
kfree(data);
1045-
kfree(ctx);
1046-
sk_free(sk);
1047-
return -ENOMEM;
1070+
ret = -ENOMEM;
1071+
goto out;
10481072
}
10491073
skb->sk = sk;
10501074

1075+
data = NULL; /* data released via kfree_skb */
1076+
10511077
skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
1052-
__skb_put(skb, size);
1078+
__skb_put(skb, linear_sz);
1079+
1080+
if (unlikely(kattr->test.data_size_in > linear_sz)) {
1081+
void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
1082+
struct skb_shared_info *sinfo = skb_shinfo(skb);
1083+
u32 copied = linear_sz;
1084+
1085+
while (copied < kattr->test.data_size_in) {
1086+
struct page *page;
1087+
u32 data_len;
1088+
1089+
if (sinfo->nr_frags == MAX_SKB_FRAGS) {
1090+
ret = -ENOMEM;
1091+
goto out;
1092+
}
1093+
1094+
page = alloc_page(GFP_KERNEL);
1095+
if (!page) {
1096+
ret = -ENOMEM;
1097+
goto out;
1098+
}
1099+
1100+
data_len = min_t(u32, kattr->test.data_size_in - copied,
1101+
PAGE_SIZE);
1102+
skb_fill_page_desc(skb, sinfo->nr_frags, page, 0, data_len);
1103+
1104+
if (copy_from_user(page_address(page), data_in + copied,
1105+
data_len)) {
1106+
ret = -EFAULT;
1107+
goto out;
1108+
}
1109+
skb->data_len += data_len;
1110+
skb->truesize += PAGE_SIZE;
1111+
skb->len += data_len;
1112+
copied += data_len;
1113+
}
1114+
}
10531115

10541116
if (ctx && ctx->ifindex > 1) {
10551117
dev = dev_get_by_index(net, ctx->ifindex);
@@ -1129,20 +1191,21 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
11291191

11301192
convert_skb_to___skb(skb, ctx);
11311193

1132-
size = skb->len;
1133-
/* bpf program can never convert linear skb to non-linear */
1134-
if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
1135-
size = skb_headlen(skb);
1136-
ret = bpf_test_finish(kattr, uattr, skb->data, NULL, size, retval,
1137-
duration);
1194+
if (skb_is_nonlinear(skb))
1195+
/* bpf program can never convert linear skb to non-linear */
1196+
WARN_ON_ONCE(linear_sz == kattr->test.data_size_in);
1197+
ret = bpf_test_finish(kattr, uattr, skb->data, skb_shinfo(skb), skb->len,
1198+
skb->data_len, retval, duration);
11381199
if (!ret)
11391200
ret = bpf_ctx_finish(kattr, uattr, ctx,
11401201
sizeof(struct __sk_buff));
11411202
out:
11421203
if (dev && dev != net->loopback_dev)
11431204
dev_put(dev);
11441205
kfree_skb(skb);
1145-
sk_free(sk);
1206+
kfree(data);
1207+
if (sk)
1208+
sk_free(sk);
11461209
kfree(ctx);
11471210
return ret;
11481211
}
@@ -1340,7 +1403,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
13401403
goto out;
13411404

13421405
size = xdp.data_end - xdp.data_meta + sinfo->xdp_frags_size;
1343-
ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size,
1406+
ret = bpf_test_finish(kattr, uattr, xdp.data_meta, sinfo, size, sinfo->xdp_frags_size,
13441407
retval, duration);
13451408
if (!ret)
13461409
ret = bpf_ctx_finish(kattr, uattr, ctx,
@@ -1431,7 +1494,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
14311494
goto out;
14321495

14331496
ret = bpf_test_finish(kattr, uattr, &flow_keys, NULL,
1434-
sizeof(flow_keys), retval, duration);
1497+
sizeof(flow_keys), 0, retval, duration);
14351498
if (!ret)
14361499
ret = bpf_ctx_finish(kattr, uattr, user_ctx,
14371500
sizeof(struct bpf_flow_keys));
@@ -1532,7 +1595,7 @@ int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kat
15321595
user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
15331596
}
15341597

1535-
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
1598+
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration);
15361599
if (!ret)
15371600
ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
15381601

@@ -1732,7 +1795,7 @@ int bpf_prog_test_run_nf(struct bpf_prog *prog,
17321795
if (ret)
17331796
goto out;
17341797

1735-
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, retval, duration);
1798+
ret = bpf_test_finish(kattr, uattr, NULL, NULL, 0, 0, retval, duration);
17361799

17371800
out:
17381801
kfree(user_ctx);

tools/testing/selftests/bpf/progs/bpf_misc.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,9 @@
126126
* Several __arch_* annotations could be specified at once.
127127
* When test case is not run on current arch it is marked as skipped.
128128
* __caps_unpriv Specify the capabilities that should be set when running the test.
129+
*
130+
* __linear_size Specify the size of the linear area of non-linear skbs, or
131+
* 0 for linear skbs.
129132
*/
130133
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
131134
#define __not_msg(msg) __attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg)))
@@ -159,6 +162,7 @@
159162
#define __stderr_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
160163
#define __stdout(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
161164
#define __stdout_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
165+
#define __linear_size(sz) __attribute__((btf_decl_tag("comment:test_linear_size=" XSTR(sz))))
162166

163167
/* Define common capabilities tested using __caps_unpriv */
164168
#define CAP_NET_ADMIN 12

tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0
22
/* Converted from tools/testing/selftests/bpf/verifier/direct_packet_access.c */
33

4+
#include <linux/if_ether.h>
45
#include <linux/bpf.h>
56
#include <bpf/bpf_helpers.h>
67
#include "bpf_misc.h"
@@ -800,4 +801,62 @@ l0_%=: /* exit(0) */ \
800801
: __clobber_all);
801802
}
802803

804+
#define access_test_non_linear(name, type, desc, retval, linear_sz, off) \
805+
SEC(type) \
806+
__description("direct packet access: " #name " (non-linear, " type ", " desc ")") \
807+
__success __retval(retval) \
808+
__linear_size(linear_sz) \
809+
__naked void access_non_linear_##name(void) \
810+
{ \
811+
asm volatile (" \
812+
r2 = *(u32*)(r1 + %[skb_data]); \
813+
r3 = *(u32*)(r1 + %[skb_data_end]); \
814+
r0 = r2; \
815+
r0 += %[offset]; \
816+
if r0 > r3 goto l0_%=; \
817+
r0 = *(u8*)(r0 - 1); \
818+
r0 = 0; \
819+
exit; \
820+
l0_%=: r0 = 1; \
821+
exit; \
822+
" : \
823+
: __imm_const(skb_data, offsetof(struct __sk_buff, data)), \
824+
__imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)), \
825+
__imm_const(offset, off) \
826+
: __clobber_all); \
827+
}
828+
829+
access_test_non_linear(test31, "tc", "too short eth", 1, ETH_HLEN, 22);
830+
access_test_non_linear(test32, "tc", "too short 1", 1, 1, 22);
831+
access_test_non_linear(test33, "tc", "long enough", 0, 22, 22);
832+
access_test_non_linear(test34, "cgroup_skb/ingress", "too short eth", 1, ETH_HLEN, 8);
833+
access_test_non_linear(test35, "cgroup_skb/ingress", "too short 1", 1, 1, 8);
834+
access_test_non_linear(test36, "cgroup_skb/ingress", "long enough", 0, 22, 8);
835+
836+
SEC("tc")
837+
__description("direct packet access: test37 (non-linear, linearized)")
838+
__success __retval(0)
839+
__linear_size(ETH_HLEN)
840+
__naked void access_non_linear_linearized(void)
841+
{
842+
asm volatile (" \
843+
r6 = r1; \
844+
r2 = 22; \
845+
call %[bpf_skb_pull_data]; \
846+
r2 = *(u32*)(r6 + %[skb_data]); \
847+
r3 = *(u32*)(r6 + %[skb_data_end]); \
848+
r0 = r2; \
849+
r0 += 22; \
850+
if r0 > r3 goto l0_%=; \
851+
r0 = *(u8*)(r0 - 1); \
852+
exit; \
853+
l0_%=: r0 = 1; \
854+
exit; \
855+
" :
856+
: __imm(bpf_skb_pull_data),
857+
__imm_const(skb_data, offsetof(struct __sk_buff, data)),
858+
__imm_const(skb_data_end, offsetof(struct __sk_buff, data_end))
859+
: __clobber_all);
860+
}
861+
803862
char _license[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)