Skip to content

Commit 71295ac

Browse files
q2venMartin KaFai Lau
authored andcommitted
selftests/bpf: Add test for sk->sk_bypass_prot_mem.
The test does the following for IPv4/IPv6 x TCP/UDP sockets with/without sk->sk_bypass_prot_mem, which can be turned on by net.core.bypass_prot_mem or bpf_setsockopt(SK_BPF_BYPASS_PROT_MEM). 1. Create socket pairs 2. Send NR_PAGES (32) of data (TCP consumes around 35 pages, and UDP consuems 66 pages due to skb overhead) 3. Read memory_allocated from sk->sk_prot->memory_allocated and sk->sk_prot->memory_per_cpu_fw_alloc 4. Check if unread data is charged to memory_allocated If sk->sk_bypass_prot_mem is set, memory_allocated should not be changed, but we allow a small error (up to 10 pages) in case other processes on the host use some amounts of TCP/UDP memory. The amount of allocated pages are buffered to per-cpu variable {tcp,udp}_memory_per_cpu_fw_alloc up to +/- net.core.mem_pcpu_rsv before reported to {tcp,udp}_memory_allocated. At 3., memory_allocated is calculated from the 2 variables at fentry of socket create function. We drain the receive queue only for UDP before close() because UDP recv queue is destroyed after RCU grace period. When I printed memory_allocated, UDP bypass cases sometimes saw the no-bypass case's leftover, but it's still in the small error range (<10 pages). bpf_trace_printk: memory_allocated: 0 <-- TCP no-bypass bpf_trace_printk: memory_allocated: 35 bpf_trace_printk: memory_allocated: 0 <-- TCP w/ sysctl bpf_trace_printk: memory_allocated: 0 bpf_trace_printk: memory_allocated: 0 <-- TCP w/ bpf bpf_trace_printk: memory_allocated: 0 bpf_trace_printk: memory_allocated: 0 <-- UDP no-bypass bpf_trace_printk: memory_allocated: 66 bpf_trace_printk: memory_allocated: 2 <-- UDP w/ sysctl (2 pages leftover) bpf_trace_printk: memory_allocated: 2 bpf_trace_printk: memory_allocated: 2 <-- UDP w/ bpf (2 pages leftover) bpf_trace_printk: memory_allocated: 2 We prefer finishing tests faster than oversleeping for call_rcu() + sk_destruct(). The test completes within 2s on QEMU (64 CPUs) w/ KVM. # time ./test_progs -t sk_bypass #371/1 sk_bypass_prot_mem/TCP :OK #371/2 sk_bypass_prot_mem/UDP :OK #371/3 sk_bypass_prot_mem/TCPv6:OK #371/4 sk_bypass_prot_mem/UDPv6:OK #371 sk_bypass_prot_mem:OK Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED real 0m1.481s user 0m0.181s sys 0m0.441s Signed-off-by: Kuniyuki Iwashima <[email protected]> Signed-off-by: Martin KaFai Lau <[email protected]> Acked-by: Roman Gushchin <[email protected]> Link: https://patch.msgid.link/[email protected]
1 parent 58e1d04 commit 71295ac

File tree

2 files changed

+396
-0
lines changed

2 files changed

+396
-0
lines changed
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright 2025 Google LLC */
3+
4+
#include <test_progs.h>
5+
#include "sk_bypass_prot_mem.skel.h"
6+
#include "network_helpers.h"
7+
8+
#define NR_PAGES 32
9+
#define NR_SOCKETS 2
10+
#define BUF_TOTAL (NR_PAGES * 4096 / NR_SOCKETS)
11+
#define BUF_SINGLE 1024
12+
#define NR_SEND (BUF_TOTAL / BUF_SINGLE)
13+
14+
struct test_case {
15+
char name[8];
16+
int family;
17+
int type;
18+
int (*create_sockets)(struct test_case *test_case, int sk[], int len);
19+
long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel);
20+
};
21+
22+
static int tcp_create_sockets(struct test_case *test_case, int sk[], int len)
23+
{
24+
int server, i, err = 0;
25+
26+
server = start_server(test_case->family, test_case->type, NULL, 0, 0);
27+
if (!ASSERT_GE(server, 0, "start_server_str"))
28+
return server;
29+
30+
/* Keep for-loop so we can change NR_SOCKETS easily. */
31+
for (i = 0; i < len; i += 2) {
32+
sk[i] = connect_to_fd(server, 0);
33+
if (sk[i] < 0) {
34+
ASSERT_GE(sk[i], 0, "connect_to_fd");
35+
err = sk[i];
36+
break;
37+
}
38+
39+
sk[i + 1] = accept(server, NULL, NULL);
40+
if (sk[i + 1] < 0) {
41+
ASSERT_GE(sk[i + 1], 0, "accept");
42+
err = sk[i + 1];
43+
break;
44+
}
45+
}
46+
47+
close(server);
48+
49+
return err;
50+
}
51+
52+
static int udp_create_sockets(struct test_case *test_case, int sk[], int len)
53+
{
54+
int i, j, err, rcvbuf = BUF_TOTAL;
55+
56+
/* Keep for-loop so we can change NR_SOCKETS easily. */
57+
for (i = 0; i < len; i += 2) {
58+
sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0);
59+
if (sk[i] < 0) {
60+
ASSERT_GE(sk[i], 0, "start_server");
61+
return sk[i];
62+
}
63+
64+
sk[i + 1] = connect_to_fd(sk[i], 0);
65+
if (sk[i + 1] < 0) {
66+
ASSERT_GE(sk[i + 1], 0, "connect_to_fd");
67+
return sk[i + 1];
68+
}
69+
70+
err = connect_fd_to_fd(sk[i], sk[i + 1], 0);
71+
if (err) {
72+
ASSERT_EQ(err, 0, "connect_fd_to_fd");
73+
return err;
74+
}
75+
76+
for (j = 0; j < 2; j++) {
77+
err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
78+
if (err) {
79+
ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)");
80+
return err;
81+
}
82+
}
83+
}
84+
85+
return 0;
86+
}
87+
88+
static long get_memory_allocated(struct test_case *test_case,
89+
bool *activated, long *memory_allocated)
90+
{
91+
int sk;
92+
93+
*activated = true;
94+
95+
/* AF_INET and AF_INET6 share the same memory_allocated.
96+
* tcp_init_sock() is called by AF_INET and AF_INET6,
97+
* but udp_lib_init_sock() is inline.
98+
*/
99+
sk = socket(AF_INET, test_case->type, 0);
100+
if (!ASSERT_GE(sk, 0, "get_memory_allocated"))
101+
return -1;
102+
103+
close(sk);
104+
105+
return *memory_allocated;
106+
}
107+
108+
static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
109+
{
110+
return get_memory_allocated(test_case,
111+
&skel->bss->tcp_activated,
112+
&skel->bss->tcp_memory_allocated);
113+
}
114+
115+
static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
116+
{
117+
return get_memory_allocated(test_case,
118+
&skel->bss->udp_activated,
119+
&skel->bss->udp_memory_allocated);
120+
}
121+
122+
static int check_bypass(struct test_case *test_case,
123+
struct sk_bypass_prot_mem *skel, bool bypass)
124+
{
125+
char buf[BUF_SINGLE] = {};
126+
long memory_allocated[2];
127+
int sk[NR_SOCKETS];
128+
int err, i, j;
129+
130+
for (i = 0; i < ARRAY_SIZE(sk); i++)
131+
sk[i] = -1;
132+
133+
err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk));
134+
if (err)
135+
goto close;
136+
137+
memory_allocated[0] = test_case->get_memory_allocated(test_case, skel);
138+
139+
/* allocate pages >= NR_PAGES */
140+
for (i = 0; i < ARRAY_SIZE(sk); i++) {
141+
for (j = 0; j < NR_SEND; j++) {
142+
int bytes = send(sk[i], buf, sizeof(buf), 0);
143+
144+
/* Avoid too noisy logs when something failed. */
145+
if (bytes != sizeof(buf)) {
146+
ASSERT_EQ(bytes, sizeof(buf), "send");
147+
if (bytes < 0) {
148+
err = bytes;
149+
goto drain;
150+
}
151+
}
152+
}
153+
}
154+
155+
memory_allocated[1] = test_case->get_memory_allocated(test_case, skel);
156+
157+
if (bypass)
158+
ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass");
159+
else
160+
ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass");
161+
162+
drain:
163+
if (test_case->type == SOCK_DGRAM) {
164+
/* UDP starts purging sk->sk_receive_queue after one RCU
165+
* grace period, then udp_memory_allocated goes down,
166+
* so drain the queue before close().
167+
*/
168+
for (i = 0; i < ARRAY_SIZE(sk); i++) {
169+
for (j = 0; j < NR_SEND; j++) {
170+
int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC);
171+
172+
if (bytes == sizeof(buf))
173+
continue;
174+
if (bytes != -1 || errno != EAGAIN)
175+
PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno));
176+
break;
177+
}
178+
}
179+
}
180+
181+
close:
182+
for (i = 0; i < ARRAY_SIZE(sk); i++) {
183+
if (sk[i] < 0)
184+
break;
185+
186+
close(sk[i]);
187+
}
188+
189+
return err;
190+
}
191+
192+
static void run_test(struct test_case *test_case)
193+
{
194+
struct sk_bypass_prot_mem *skel;
195+
struct nstoken *nstoken;
196+
int cgroup, err;
197+
198+
skel = sk_bypass_prot_mem__open_and_load();
199+
if (!ASSERT_OK_PTR(skel, "open_and_load"))
200+
return;
201+
202+
skel->bss->nr_cpus = libbpf_num_possible_cpus();
203+
204+
err = sk_bypass_prot_mem__attach(skel);
205+
if (!ASSERT_OK(err, "attach"))
206+
goto destroy_skel;
207+
208+
cgroup = test__join_cgroup("/sk_bypass_prot_mem");
209+
if (!ASSERT_GE(cgroup, 0, "join_cgroup"))
210+
goto destroy_skel;
211+
212+
err = make_netns("sk_bypass_prot_mem");
213+
if (!ASSERT_EQ(err, 0, "make_netns"))
214+
goto close_cgroup;
215+
216+
nstoken = open_netns("sk_bypass_prot_mem");
217+
if (!ASSERT_OK_PTR(nstoken, "open_netns"))
218+
goto remove_netns;
219+
220+
err = check_bypass(test_case, skel, false);
221+
if (!ASSERT_EQ(err, 0, "test_bypass(false)"))
222+
goto close_netns;
223+
224+
err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1");
225+
if (!ASSERT_EQ(err, 0, "write_sysctl(1)"))
226+
goto close_netns;
227+
228+
err = check_bypass(test_case, skel, true);
229+
if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)"))
230+
goto close_netns;
231+
232+
err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0");
233+
if (!ASSERT_EQ(err, 0, "write_sysctl(0)"))
234+
goto close_netns;
235+
236+
skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup);
237+
if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)"))
238+
goto close_netns;
239+
240+
err = check_bypass(test_case, skel, true);
241+
ASSERT_EQ(err, 0, "test_bypass(true by bpf)");
242+
243+
close_netns:
244+
close_netns(nstoken);
245+
remove_netns:
246+
remove_netns("sk_bypass_prot_mem");
247+
close_cgroup:
248+
close(cgroup);
249+
destroy_skel:
250+
sk_bypass_prot_mem__destroy(skel);
251+
}
252+
253+
static struct test_case test_cases[] = {
254+
{
255+
.name = "TCP ",
256+
.family = AF_INET,
257+
.type = SOCK_STREAM,
258+
.create_sockets = tcp_create_sockets,
259+
.get_memory_allocated = tcp_get_memory_allocated,
260+
},
261+
{
262+
.name = "UDP ",
263+
.family = AF_INET,
264+
.type = SOCK_DGRAM,
265+
.create_sockets = udp_create_sockets,
266+
.get_memory_allocated = udp_get_memory_allocated,
267+
},
268+
{
269+
.name = "TCPv6",
270+
.family = AF_INET6,
271+
.type = SOCK_STREAM,
272+
.create_sockets = tcp_create_sockets,
273+
.get_memory_allocated = tcp_get_memory_allocated,
274+
},
275+
{
276+
.name = "UDPv6",
277+
.family = AF_INET6,
278+
.type = SOCK_DGRAM,
279+
.create_sockets = udp_create_sockets,
280+
.get_memory_allocated = udp_get_memory_allocated,
281+
},
282+
};
283+
284+
void serial_test_sk_bypass_prot_mem(void)
285+
{
286+
int i;
287+
288+
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
289+
if (test__start_subtest(test_cases[i].name))
290+
run_test(&test_cases[i]);
291+
}
292+
}
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright 2025 Google LLC */
3+
4+
#include "bpf_tracing_net.h"
5+
#include <bpf/bpf_helpers.h>
6+
#include <bpf/bpf_tracing.h>
7+
#include <errno.h>
8+
9+
extern int tcp_memory_per_cpu_fw_alloc __ksym;
10+
extern int udp_memory_per_cpu_fw_alloc __ksym;
11+
12+
int nr_cpus;
13+
bool tcp_activated, udp_activated;
14+
long tcp_memory_allocated, udp_memory_allocated;
15+
16+
struct sk_prot {
17+
long *memory_allocated;
18+
int *memory_per_cpu_fw_alloc;
19+
};
20+
21+
static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx)
22+
{
23+
int *memory_per_cpu_fw_alloc;
24+
25+
memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i);
26+
if (memory_per_cpu_fw_alloc)
27+
*sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc;
28+
29+
return 0;
30+
}
31+
32+
static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc)
33+
{
34+
struct sock *sk = bpf_core_cast(_sk, struct sock);
35+
struct sk_prot sk_prot_ctx;
36+
long memory_allocated;
37+
38+
/* net_aligned_data.{tcp,udp}_memory_allocated was not available. */
39+
memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter;
40+
41+
sk_prot_ctx.memory_allocated = &memory_allocated;
42+
sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc;
43+
44+
bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0);
45+
46+
return memory_allocated;
47+
}
48+
49+
static void fentry_init_sock(struct sock *sk, bool *activated,
50+
long *memory_allocated, int *memory_per_cpu_fw_alloc)
51+
{
52+
if (!*activated)
53+
return;
54+
55+
*memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc);
56+
*activated = false;
57+
}
58+
59+
SEC("fentry/tcp_init_sock")
60+
int BPF_PROG(fentry_tcp_init_sock, struct sock *sk)
61+
{
62+
fentry_init_sock(sk, &tcp_activated,
63+
&tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc);
64+
return 0;
65+
}
66+
67+
SEC("fentry/udp_init_sock")
68+
int BPF_PROG(fentry_udp_init_sock, struct sock *sk)
69+
{
70+
fentry_init_sock(sk, &udp_activated,
71+
&udp_memory_allocated, &udp_memory_per_cpu_fw_alloc);
72+
return 0;
73+
}
74+
75+
SEC("cgroup/sock_create")
76+
int sock_create(struct bpf_sock *ctx)
77+
{
78+
int err, val = 1;
79+
80+
err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
81+
&val, sizeof(val));
82+
if (err)
83+
goto err;
84+
85+
val = 0;
86+
87+
err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
88+
&val, sizeof(val));
89+
if (err)
90+
goto err;
91+
92+
if (val != 1) {
93+
err = -EINVAL;
94+
goto err;
95+
}
96+
97+
return 1;
98+
99+
err:
100+
bpf_set_retval(err);
101+
return 0;
102+
}
103+
104+
char LICENSE[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)