Skip to content

Commit 58c2ebb

Browse files
q2venKernel Patches Daemon
authored andcommitted
selftest: bpf: Add test for SK_BPF_MEMCG_SOCK_ISOLATED.
The test does the following for IPv4/IPv6 x TCP/UDP sockets with/without SK_BPF_MEMCG_SOCK_ISOLATED. 1. Create socket pairs 2. Send a bunch of data that requires more than 1024 pages 3. Read memory_allocated from sk->sk_prot->memory_allocated and sk->sk_prot->memory_per_cpu_fw_alloc 4. Check if unread data is charged to memory_allocated If SK_BPF_MEMCG_SOCK_ISOLATED is set, memory_allocated should not be changed, but we allow a small error (up to 10 pages) in case other processes on the host use some amounts of TCP/UDP memory. The amount of allocated pages are buffered to per-cpu variable {tcp,udp}_memory_per_cpu_fw_alloc up to +/- net.core.mem_pcpu_rsv before reported to {tcp,udp}_memory_allocated. At 3., memory_allocated is calculated from the 2 variables twice at fentry and fexit of socket create function to check if the per-cpu value is drained during calculation. In that case, 3. is retried. We use kern_sync_rcu() for UDP because UDP recv queue is destroyed after RCU grace period. The test takes ~2s on QEMU (64 CPUs) w/ KVM but takes 6s w/o KVM. # time ./test_progs -t sk_memcg #370/1 sk_memcg/TCP :OK #370/2 sk_memcg/UDP :OK #370/3 sk_memcg/TCPv6:OK #370/4 sk_memcg/UDPv6:OK #370 sk_memcg:OK Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED real 0m1.623s user 0m0.165s sys 0m0.366s Signed-off-by: Kuniyuki Iwashima <[email protected]>
1 parent 3681b34 commit 58c2ebb

File tree

2 files changed

+382
-0
lines changed

2 files changed

+382
-0
lines changed
Lines changed: 236 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,236 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright 2025 Google LLC */
3+
4+
#include <test_progs.h>
5+
#include "sk_memcg.skel.h"
6+
#include "network_helpers.h"
7+
8+
#define NR_SOCKETS 64
9+
#define NR_SEND 128
10+
#define BUF_SINGLE 1024
11+
#define BUF_TOTAL (BUF_SINGLE * NR_SEND)
12+
13+
struct test_case {
14+
char name[8];
15+
int family;
16+
int type;
17+
int (*create_sockets)(struct test_case *test_case, int sk[], int len);
18+
long (*get_memory_allocated)(struct test_case *test_case, struct sk_memcg *skel);
19+
};
20+
21+
static int tcp_create_sockets(struct test_case *test_case, int sk[], int len)
22+
{
23+
int server, i;
24+
25+
server = start_server(test_case->family, test_case->type, NULL, 0, 0);
26+
ASSERT_GE(server, 0, "start_server_str");
27+
28+
for (i = 0; i < len / 2; i++) {
29+
sk[i * 2] = connect_to_fd(server, 0);
30+
if (!ASSERT_GE(sk[i * 2], 0, "connect_to_fd"))
31+
return sk[i * 2];
32+
33+
sk[i * 2 + 1] = accept(server, NULL, NULL);
34+
if (!ASSERT_GE(sk[i * 2 + 1], 0, "accept"))
35+
return sk[i * 2 + 1];
36+
}
37+
38+
close(server);
39+
40+
return 0;
41+
}
42+
43+
static int udp_create_sockets(struct test_case *test_case, int sk[], int len)
44+
{
45+
int i, err, rcvbuf = BUF_TOTAL;
46+
47+
for (i = 0; i < len / 2; i++) {
48+
sk[i * 2] = start_server(test_case->family, test_case->type, NULL, 0, 0);
49+
if (!ASSERT_GE(sk[i * 2], 0, "start_server"))
50+
return sk[i * 2];
51+
52+
sk[i * 2 + 1] = connect_to_fd(sk[i * 2], 0);
53+
if (!ASSERT_GE(sk[i * 2 + 1], 0, "connect_to_fd"))
54+
return sk[i * 2 + 1];
55+
56+
err = connect_fd_to_fd(sk[i * 2], sk[i * 2 + 1], 0);
57+
if (!ASSERT_EQ(err, 0, "connect_fd_to_fd"))
58+
return err;
59+
60+
err = setsockopt(sk[i * 2], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
61+
if (!ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)"))
62+
return err;
63+
64+
err = setsockopt(sk[i * 2 + 1], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
65+
if (!ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)"))
66+
return err;
67+
}
68+
69+
return 0;
70+
}
71+
72+
static long get_memory_allocated(struct test_case *test_case,
73+
bool *activated, bool *stable,
74+
long *memory_allocated)
75+
{
76+
*stable = false;
77+
78+
do {
79+
*activated = true;
80+
81+
/* AF_INET and AF_INET6 share the same memory_allocated.
82+
* tcp_init_sock() is called by AF_INET and AF_INET6,
83+
* but udp_lib_init_sock() is inline.
84+
*/
85+
socket(AF_INET, test_case->type, 0);
86+
} while (!*stable);
87+
88+
return *memory_allocated;
89+
}
90+
91+
static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_memcg *skel)
92+
{
93+
return get_memory_allocated(test_case,
94+
&skel->bss->tcp_activated,
95+
&skel->bss->tcp_stable,
96+
&skel->bss->tcp_memory_allocated);
97+
}
98+
99+
static long udp_get_memory_allocated(struct test_case *test_case, struct sk_memcg *skel)
100+
{
101+
return get_memory_allocated(test_case,
102+
&skel->bss->udp_activated,
103+
&skel->bss->udp_stable,
104+
&skel->bss->udp_memory_allocated);
105+
}
106+
107+
static int check_isolated(struct test_case *test_case,
108+
struct sk_memcg *skel, bool isolated)
109+
{
110+
char buf[BUF_SINGLE] = {};
111+
long memory_allocated[2];
112+
int sk[NR_SOCKETS] = {};
113+
int err, i, j;
114+
115+
err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk));
116+
if (err)
117+
goto close;
118+
119+
memory_allocated[0] = test_case->get_memory_allocated(test_case, skel);
120+
121+
/* allocate pages >= 1024 */
122+
for (i = 0; i < ARRAY_SIZE(sk); i++) {
123+
for (j = 0; j < NR_SEND; j++) {
124+
int bytes = send(sk[i], buf, sizeof(buf), 0);
125+
126+
/* Avoid too noisy logs when something failed. */
127+
if (bytes != sizeof(buf)) {
128+
ASSERT_EQ(bytes, sizeof(buf), "send");
129+
if (bytes < 0) {
130+
err = bytes;
131+
goto close;
132+
}
133+
}
134+
}
135+
}
136+
137+
memory_allocated[1] = test_case->get_memory_allocated(test_case, skel);
138+
139+
if (isolated)
140+
ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "isolated");
141+
else
142+
ASSERT_GT(memory_allocated[1], memory_allocated[0] + 1024, "not isolated");
143+
144+
close:
145+
for (i = 0; i < ARRAY_SIZE(sk); i++)
146+
close(sk[i]);
147+
148+
if (test_case->type == SOCK_DGRAM) {
149+
/* UDP recv queue is destroyed after RCU grace period.
150+
* With one kern_sync_rcu(), memory_allocated[0] of the
151+
* isoalted case often matches with memory_allocated[1]
152+
* of the preceding non-isolated case.
153+
*/
154+
kern_sync_rcu();
155+
kern_sync_rcu();
156+
}
157+
158+
return err;
159+
}
160+
161+
void run_test(struct test_case *test_case)
162+
{
163+
struct sk_memcg *skel;
164+
int cgroup, err;
165+
166+
skel = sk_memcg__open_and_load();
167+
if (!ASSERT_OK_PTR(skel, "open_and_load"))
168+
return;
169+
170+
skel->bss->nr_cpus = libbpf_num_possible_cpus();
171+
172+
err = sk_memcg__attach(skel);
173+
if (!ASSERT_OK(err, "attach"))
174+
goto destroy_skel;
175+
176+
cgroup = test__join_cgroup("/sk_memcg");
177+
if (!ASSERT_GE(cgroup, 0, "join_cgroup"))
178+
goto destroy_skel;
179+
180+
err = check_isolated(test_case, skel, false);
181+
if (!ASSERT_EQ(err, 0, "test_isolated(false)"))
182+
goto close_cgroup;
183+
184+
skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup);
185+
if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)"))
186+
goto close_cgroup;
187+
188+
err = check_isolated(test_case, skel, true);
189+
ASSERT_EQ(err, 0, "test_isolated(false)");
190+
191+
close_cgroup:
192+
close(cgroup);
193+
destroy_skel:
194+
sk_memcg__destroy(skel);
195+
}
196+
197+
struct test_case test_cases[] = {
198+
{
199+
.name = "TCP ",
200+
.family = AF_INET,
201+
.type = SOCK_STREAM,
202+
.create_sockets = tcp_create_sockets,
203+
.get_memory_allocated = tcp_get_memory_allocated,
204+
},
205+
{
206+
.name = "UDP ",
207+
.family = AF_INET,
208+
.type = SOCK_DGRAM,
209+
.create_sockets = udp_create_sockets,
210+
.get_memory_allocated = udp_get_memory_allocated,
211+
},
212+
{
213+
.name = "TCPv6",
214+
.family = AF_INET6,
215+
.type = SOCK_STREAM,
216+
.create_sockets = tcp_create_sockets,
217+
.get_memory_allocated = tcp_get_memory_allocated,
218+
},
219+
{
220+
.name = "UDPv6",
221+
.family = AF_INET6,
222+
.type = SOCK_DGRAM,
223+
.create_sockets = udp_create_sockets,
224+
.get_memory_allocated = udp_get_memory_allocated,
225+
},
226+
};
227+
228+
void serial_test_sk_memcg(void)
229+
{
230+
int i;
231+
232+
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
233+
test__start_subtest(test_cases[i].name);
234+
run_test(&test_cases[i]);
235+
}
236+
}
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright 2025 Google LLC */
3+
4+
#include "bpf_tracing_net.h"
5+
#include <bpf/bpf_helpers.h>
6+
#include <bpf/bpf_tracing.h>
7+
#include <errno.h>
8+
9+
extern int tcp_memory_per_cpu_fw_alloc __ksym;
10+
extern int udp_memory_per_cpu_fw_alloc __ksym;
11+
12+
int nr_cpus;
13+
bool tcp_activated, tcp_stable, udp_activated, udp_stable;
14+
long tcp_memory_allocated, udp_memory_allocated;
15+
static struct sock *tcp_sk_tracing, *udp_sk_tracing;
16+
17+
struct sk_prot {
18+
long *memory_allocated;
19+
int *memory_per_cpu_fw_alloc;
20+
};
21+
22+
static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx)
23+
{
24+
int *memory_per_cpu_fw_alloc;
25+
26+
memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i);
27+
if (memory_per_cpu_fw_alloc)
28+
*sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc;
29+
30+
return 0;
31+
}
32+
33+
static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc)
34+
{
35+
struct sock *sk = bpf_core_cast(_sk, struct sock);
36+
struct sk_prot sk_prot_ctx;
37+
long memory_allocated;
38+
39+
/* net_aligned_data.{tcp,udp}_memory_allocated was not available. */
40+
memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter;
41+
42+
sk_prot_ctx.memory_allocated = &memory_allocated;
43+
sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc;
44+
45+
bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0);
46+
47+
return memory_allocated;
48+
}
49+
50+
static void fentry_init_sock(struct sock *sk, struct sock **sk_tracing,
51+
long *memory_allocated, int *memory_per_cpu_fw_alloc,
52+
bool *activated)
53+
{
54+
if (!*activated)
55+
return;
56+
57+
if (__sync_val_compare_and_swap(sk_tracing, NULL, sk))
58+
return;
59+
60+
*activated = false;
61+
*memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc);
62+
}
63+
64+
static void fexit_init_sock(struct sock *sk, struct sock **sk_tracing,
65+
long *memory_allocated, int *memory_per_cpu_fw_alloc,
66+
bool *stable)
67+
{
68+
long new_memory_allocated;
69+
70+
if (sk != *sk_tracing)
71+
return;
72+
73+
new_memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc);
74+
if (new_memory_allocated == *memory_allocated)
75+
*stable = true;
76+
77+
*sk_tracing = NULL;
78+
}
79+
80+
SEC("fentry/tcp_init_sock")
81+
int BPF_PROG(fentry_tcp_init_sock, struct sock *sk)
82+
{
83+
fentry_init_sock(sk, &tcp_sk_tracing,
84+
&tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc,
85+
&tcp_activated);
86+
return 0;
87+
}
88+
89+
SEC("fexit/tcp_init_sock")
90+
int BPF_PROG(fexit_tcp_init_sock, struct sock *sk)
91+
{
92+
fexit_init_sock(sk, &tcp_sk_tracing,
93+
&tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc,
94+
&tcp_stable);
95+
return 0;
96+
}
97+
98+
SEC("fentry/udp_init_sock")
99+
int BPF_PROG(fentry_udp_init_sock, struct sock *sk)
100+
{
101+
fentry_init_sock(sk, &udp_sk_tracing,
102+
&udp_memory_allocated, &udp_memory_per_cpu_fw_alloc,
103+
&udp_activated);
104+
return 0;
105+
}
106+
107+
SEC("fexit/udp_init_sock")
108+
int BPF_PROG(fexit_udp_init_sock, struct sock *sk)
109+
{
110+
fexit_init_sock(sk, &udp_sk_tracing,
111+
&udp_memory_allocated, &udp_memory_per_cpu_fw_alloc,
112+
&udp_stable);
113+
return 0;
114+
}
115+
116+
SEC("cgroup/sock_create")
117+
int sock_create(struct bpf_sock *ctx)
118+
{
119+
u32 flags = SK_BPF_MEMCG_SOCK_ISOLATED;
120+
int err;
121+
122+
err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_MEMCG_FLAGS,
123+
&flags, sizeof(flags));
124+
if (err)
125+
goto err;
126+
127+
flags = 0;
128+
129+
err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_MEMCG_FLAGS,
130+
&flags, sizeof(flags));
131+
if (err)
132+
goto err;
133+
134+
if (flags != SK_BPF_MEMCG_SOCK_ISOLATED) {
135+
err = -EINVAL;
136+
goto err;
137+
}
138+
139+
return 1;
140+
141+
err:
142+
bpf_set_retval(err);
143+
return 0;
144+
}
145+
146+
char LICENSE[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)