Skip to content

Commit 0ff043d

Browse files
Geliang Tangmatttbe
authored andcommitted
selftests/bpf: Add bpf_burst scheduler & test
This patch implements the burst BPF MPTCP scheduler, named bpf_burst, which is the default scheduler in protocol.c. bpf_burst_get_send() uses the same logic as mptcp_subflow_get_send() and bpf_burst_get_retrans uses the same logic as mptcp_subflow_get_retrans(). Using MPTCP_SCHED_TEST macro to add a new test for this bpf_burst scheduler, the arguments "1 1" means data has been sent on both net devices. Run this test by RUN_MPTCP_TEST macro. Signed-off-by: Geliang Tang <[email protected]> Reviewed-by: Mat Martineau <[email protected]> Reviewed-by: Matthieu Baerts (NGI0) <[email protected]>
1 parent 855f3f9 commit 0ff043d

File tree

3 files changed

+200
-0
lines changed

3 files changed

+200
-0
lines changed

tools/testing/selftests/bpf/prog_tests/mptcp.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "mptcp_bpf_bkup.skel.h"
1616
#include "mptcp_bpf_rr.skel.h"
1717
#include "mptcp_bpf_red.skel.h"
18+
#include "mptcp_bpf_burst.skel.h"
1819

1920
#define NS_TEST "mptcp_ns"
2021
#define ADDR_1 "10.0.1.1"
@@ -652,6 +653,28 @@ static void test_red(void)
652653
mptcp_bpf_red__destroy(skel);
653654
}
654655

656+
static void test_burst(void)
657+
{
658+
struct mptcp_bpf_burst *skel;
659+
int err;
660+
661+
skel = mptcp_bpf_burst__open();
662+
if (!ASSERT_OK_PTR(skel, "open: burst"))
663+
return;
664+
665+
err = bpf_program__set_flags(skel->progs.bpf_burst_get_retrans,
666+
BPF_F_SLEEPABLE);
667+
if (!ASSERT_OK(err, "set sleepable flags"))
668+
goto skel_destroy;
669+
670+
if (!ASSERT_OK(mptcp_bpf_burst__load(skel), "load: burst"))
671+
goto skel_destroy;
672+
673+
test_bpf_sched(skel->maps.burst, "burst", WITH_DATA, WITH_DATA);
674+
skel_destroy:
675+
mptcp_bpf_burst__destroy(skel);
676+
}
677+
655678
void test_mptcp(void)
656679
{
657680
if (test__start_subtest("base"))
@@ -670,4 +693,6 @@ void test_mptcp(void)
670693
test_rr();
671694
if (test__start_subtest("red"))
672695
test_red();
696+
if (test__start_subtest("burst"))
697+
test_burst();
673698
}

tools/testing/selftests/bpf/progs/mptcp_bpf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow)
4242
/* ksym */
4343
extern struct mptcp_subflow_context *
4444
bpf_mptcp_subflow_ctx(const struct sock *sk) __ksym;
45+
extern struct sock *
46+
bpf_mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) __ksym;
4547

4648
extern void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow,
4749
bool scheduled) __ksym;
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2023, SUSE. */
3+
4+
#include "mptcp_bpf.h"
5+
#include <bpf/bpf_tracing.h>
6+
#include <limits.h>
7+
8+
char _license[] SEC("license") = "GPL";
9+
10+
#define MPTCP_SEND_BURST_SIZE 65428
11+
12+
#define SSK_MODE_ACTIVE 0
13+
#define SSK_MODE_BACKUP 1
14+
#define SSK_MODE_MAX 2
15+
16+
#define min(a, b) ((a) < (b) ? (a) : (b))
17+
18+
extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym;
19+
extern void mptcp_set_timeout(struct sock *sk) __ksym;
20+
extern __u64 mptcp_wnd_end(const struct mptcp_sock *msk) __ksym;
21+
extern bool bpf_sk_stream_memory_free(const struct sock *sk) __ksym;
22+
extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym;
23+
extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym;
24+
25+
static __always_inline __u64 div_u64(__u64 dividend, __u32 divisor)
26+
{
27+
return dividend / divisor;
28+
}
29+
30+
static __always_inline bool tcp_write_queue_empty(struct sock *sk)
31+
{
32+
const struct tcp_sock *tp = bpf_skc_to_tcp_sock(sk);
33+
34+
return tp ? tp->write_seq == tp->snd_nxt : true;
35+
}
36+
37+
static __always_inline bool tcp_rtx_and_write_queues_empty(struct sock *sk)
38+
{
39+
return bpf_mptcp_subflow_queues_empty(sk) && tcp_write_queue_empty(sk);
40+
}
41+
42+
SEC("struct_ops")
43+
void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk)
44+
{
45+
}
46+
47+
SEC("struct_ops")
48+
void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk)
49+
{
50+
}
51+
52+
SEC("struct_ops")
53+
int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk)
54+
{
55+
struct subflow_send_info send_info[SSK_MODE_MAX];
56+
struct mptcp_subflow_context *subflow;
57+
struct sock *sk = (struct sock *)msk;
58+
__u32 pace, burst, wmem;
59+
int i, nr_active = 0;
60+
__u64 linger_time;
61+
struct sock *ssk;
62+
63+
/* pick the subflow with the lower wmem/wspace ratio */
64+
for (i = 0; i < SSK_MODE_MAX; ++i) {
65+
send_info[i].ssk = NULL;
66+
send_info[i].linger_time = -1;
67+
}
68+
69+
bpf_for_each(mptcp_subflow, subflow, sk) {
70+
bool backup = subflow->backup || subflow->request_bkup;
71+
72+
ssk = mptcp_subflow_tcp_sock(subflow);
73+
if (!mptcp_subflow_active(subflow))
74+
continue;
75+
76+
nr_active += !backup;
77+
pace = subflow->avg_pacing_rate;
78+
if (!pace) {
79+
/* init pacing rate from socket */
80+
subflow->avg_pacing_rate = ssk->sk_pacing_rate;
81+
pace = subflow->avg_pacing_rate;
82+
if (!pace)
83+
continue;
84+
}
85+
86+
linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace);
87+
if (linger_time < send_info[backup].linger_time) {
88+
send_info[backup].ssk = ssk;
89+
send_info[backup].linger_time = linger_time;
90+
}
91+
}
92+
mptcp_set_timeout(sk);
93+
94+
/* pick the best backup if no other subflow is active */
95+
if (!nr_active)
96+
send_info[SSK_MODE_ACTIVE].ssk = send_info[SSK_MODE_BACKUP].ssk;
97+
98+
ssk = send_info[SSK_MODE_ACTIVE].ssk;
99+
if (!ssk || !bpf_sk_stream_memory_free(ssk))
100+
return -1;
101+
102+
subflow = bpf_mptcp_subflow_ctx(ssk);
103+
if (!subflow)
104+
return -1;
105+
106+
burst = min(MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
107+
ssk = bpf_core_cast(ssk, struct sock);
108+
wmem = ssk->sk_wmem_queued;
109+
if (!burst)
110+
goto out;
111+
112+
subflow->avg_pacing_rate = div_u64((__u64)subflow->avg_pacing_rate * wmem +
113+
ssk->sk_pacing_rate * burst,
114+
burst + wmem);
115+
msk->snd_burst = burst;
116+
117+
out:
118+
mptcp_subflow_set_scheduled(subflow, true);
119+
return 0;
120+
}
121+
122+
SEC("struct_ops")
123+
int BPF_PROG(bpf_burst_get_retrans, struct mptcp_sock *msk)
124+
{
125+
struct sock *backup = NULL, *pick = NULL;
126+
struct mptcp_subflow_context *subflow;
127+
int min_stale_count = INT_MAX;
128+
129+
bpf_for_each(mptcp_subflow, subflow, (struct sock *)msk) {
130+
struct sock *ssk = bpf_mptcp_subflow_tcp_sock(subflow);
131+
132+
if (!ssk || !mptcp_subflow_active(subflow))
133+
continue;
134+
135+
/* still data outstanding at TCP level? skip this */
136+
if (!tcp_rtx_and_write_queues_empty(ssk)) {
137+
mptcp_pm_subflow_chk_stale(msk, ssk);
138+
min_stale_count = min(min_stale_count, subflow->stale_count);
139+
continue;
140+
}
141+
142+
if (subflow->backup || subflow->request_bkup) {
143+
if (!backup)
144+
backup = ssk;
145+
continue;
146+
}
147+
148+
if (!pick)
149+
pick = ssk;
150+
}
151+
152+
if (pick)
153+
goto out;
154+
pick = min_stale_count > 1 ? backup : NULL;
155+
156+
out:
157+
if (!pick)
158+
return -1;
159+
subflow = bpf_mptcp_subflow_ctx(pick);
160+
if (!subflow)
161+
return -1;
162+
mptcp_subflow_set_scheduled(subflow, true);
163+
return 0;
164+
}
165+
166+
SEC(".struct_ops.link")
167+
struct mptcp_sched_ops burst = {
168+
.init = (void *)mptcp_sched_burst_init,
169+
.release = (void *)mptcp_sched_burst_release,
170+
.get_send = (void *)bpf_burst_get_send,
171+
.get_retrans = (void *)bpf_burst_get_retrans,
172+
.name = "bpf_burst",
173+
};

0 commit comments

Comments
 (0)