Skip to content

Commit 30bebe4

Browse files
Geliang Tangmatttbe
authored andcommitted
selftests/bpf: Add bpf_burst scheduler & test
This patch implements the burst BPF MPTCP scheduler, named bpf_burst, which is the default scheduler in protocol.c. bpf_burst_get_send() uses the same logic as mptcp_subflow_get_send() and bpf_burst_get_retrans uses the same logic as mptcp_subflow_get_retrans(). Using MPTCP_SCHED_TEST macro to add a new test for this bpf_burst scheduler, the arguments "1 1" means data has been sent on both net devices. Run this test by RUN_MPTCP_TEST macro. Signed-off-by: Geliang Tang <[email protected]> Reviewed-by: Mat Martineau <[email protected]> Reviewed-by: Matthieu Baerts (NGI0) <[email protected]>
1 parent 920af8a commit 30bebe4

File tree

2 files changed

+226
-0
lines changed

2 files changed

+226
-0
lines changed

tools/testing/selftests/bpf/prog_tests/mptcp.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "mptcp_bpf_bkup.skel.h"
1717
#include "mptcp_bpf_rr.skel.h"
1818
#include "mptcp_bpf_red.skel.h"
19+
#include "mptcp_bpf_burst.skel.h"
1920

2021
#define NS_TEST "mptcp_ns"
2122
#define ADDR_1 "10.0.1.1"
@@ -722,6 +723,28 @@ static void test_red(void)
722723
mptcp_bpf_red__destroy(skel);
723724
}
724725

726+
static void test_burst(void)
727+
{
728+
struct mptcp_bpf_burst *skel;
729+
int err;
730+
731+
skel = mptcp_bpf_burst__open();
732+
if (!ASSERT_OK_PTR(skel, "open: burst"))
733+
return;
734+
735+
err = bpf_program__set_flags(skel->progs.bpf_burst_get_retrans,
736+
BPF_F_SLEEPABLE);
737+
if (!ASSERT_OK(err, "set sleepable flags"))
738+
goto skel_destroy;
739+
740+
if (!ASSERT_OK(mptcp_bpf_burst__load(skel), "load: burst"))
741+
goto skel_destroy;
742+
743+
test_bpf_sched(skel->obj, "burst", WITH_DATA, WITH_DATA);
744+
skel_destroy:
745+
mptcp_bpf_burst__destroy(skel);
746+
}
747+
725748
void test_mptcp(void)
726749
{
727750
if (test__start_subtest("base"))
@@ -742,4 +765,6 @@ void test_mptcp(void)
742765
test_rr();
743766
if (test__start_subtest("red"))
744767
test_red();
768+
if (test__start_subtest("burst"))
769+
test_burst();
745770
}
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2023, SUSE. */
3+
4+
#include "mptcp_bpf.h"
5+
#include <bpf/bpf_tracing.h>
6+
#include <limits.h>
7+
8+
char _license[] SEC("license") = "GPL";
9+
10+
#define MPTCP_SEND_BURST_SIZE 65428
11+
12+
#define min(a, b) ((a) < (b) ? (a) : (b))
13+
14+
struct bpf_subflow_send_info {
15+
__u8 subflow_id;
16+
__u64 linger_time;
17+
};
18+
19+
extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym;
20+
extern void mptcp_set_timeout(struct sock *sk) __ksym;
21+
extern __u64 mptcp_wnd_end(const struct mptcp_sock *msk) __ksym;
22+
extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym;
23+
extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym;
24+
extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym;
25+
26+
#define SSK_MODE_ACTIVE 0
27+
#define SSK_MODE_BACKUP 1
28+
#define SSK_MODE_MAX 2
29+
30+
static __always_inline __u64 div_u64(__u64 dividend, __u32 divisor)
31+
{
32+
return dividend / divisor;
33+
}
34+
35+
static __always_inline bool tcp_write_queue_empty(struct sock *sk)
36+
{
37+
const struct tcp_sock *tp = bpf_skc_to_tcp_sock(sk);
38+
39+
return tp ? tp->write_seq == tp->snd_nxt : true;
40+
}
41+
42+
static __always_inline bool tcp_rtx_and_write_queues_empty(struct sock *sk)
43+
{
44+
return bpf_mptcp_subflow_queues_empty(sk) && tcp_write_queue_empty(sk);
45+
}
46+
47+
static __always_inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
48+
{
49+
if (sk->sk_wmem_queued >= sk->sk_sndbuf)
50+
return false;
51+
52+
return tcp_stream_memory_free(sk, wake);
53+
}
54+
55+
static __always_inline bool sk_stream_memory_free(const struct sock *sk)
56+
{
57+
return __sk_stream_memory_free(sk, 0);
58+
}
59+
60+
SEC("struct_ops")
61+
void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk)
62+
{
63+
}
64+
65+
SEC("struct_ops")
66+
void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk)
67+
{
68+
}
69+
70+
SEC("struct_ops")
71+
int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk,
72+
struct mptcp_sched_data *data)
73+
{
74+
struct bpf_subflow_send_info send_info[SSK_MODE_MAX];
75+
struct mptcp_subflow_context *subflow;
76+
struct sock *sk = (struct sock *)msk;
77+
__u32 pace, burst, wmem;
78+
int i, nr_active = 0;
79+
__u64 linger_time;
80+
struct sock *ssk;
81+
82+
/* pick the subflow with the lower wmem/wspace ratio */
83+
for (i = 0; i < SSK_MODE_MAX; ++i) {
84+
send_info[i].subflow_id = MPTCP_SUBFLOWS_MAX;
85+
send_info[i].linger_time = -1;
86+
}
87+
88+
for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
89+
bool backup;
90+
91+
subflow = bpf_mptcp_subflow_ctx_by_pos(data, i);
92+
if (!subflow)
93+
break;
94+
95+
backup = subflow->backup || subflow->request_bkup;
96+
97+
ssk = mptcp_subflow_tcp_sock(subflow);
98+
if (!mptcp_subflow_active(subflow))
99+
continue;
100+
101+
nr_active += !backup;
102+
pace = subflow->avg_pacing_rate;
103+
if (!pace) {
104+
/* init pacing rate from socket */
105+
subflow->avg_pacing_rate = ssk->sk_pacing_rate;
106+
pace = subflow->avg_pacing_rate;
107+
if (!pace)
108+
continue;
109+
}
110+
111+
linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace);
112+
if (linger_time < send_info[backup].linger_time) {
113+
send_info[backup].subflow_id = i;
114+
send_info[backup].linger_time = linger_time;
115+
}
116+
}
117+
mptcp_set_timeout(sk);
118+
119+
/* pick the best backup if no other subflow is active */
120+
if (!nr_active)
121+
send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id;
122+
123+
subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id);
124+
if (!subflow)
125+
return -1;
126+
ssk = mptcp_subflow_tcp_sock(subflow);
127+
if (!ssk || !sk_stream_memory_free(ssk))
128+
return -1;
129+
130+
burst = min(MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
131+
wmem = ssk->sk_wmem_queued;
132+
if (!burst)
133+
goto out;
134+
135+
subflow->avg_pacing_rate = div_u64((__u64)subflow->avg_pacing_rate * wmem +
136+
ssk->sk_pacing_rate * burst,
137+
burst + wmem);
138+
msk->snd_burst = burst;
139+
140+
out:
141+
mptcp_subflow_set_scheduled(subflow, true);
142+
return 0;
143+
}
144+
145+
SEC("struct_ops")
146+
int BPF_PROG(bpf_burst_get_retrans, struct mptcp_sock *msk,
147+
struct mptcp_sched_data *data)
148+
{
149+
int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id;
150+
struct mptcp_subflow_context *subflow;
151+
int min_stale_count = INT_MAX;
152+
struct sock *ssk;
153+
154+
for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
155+
subflow = bpf_mptcp_subflow_ctx_by_pos(data, i);
156+
if (!subflow)
157+
break;
158+
159+
if (!mptcp_subflow_active(subflow))
160+
continue;
161+
162+
ssk = mptcp_subflow_tcp_sock(subflow);
163+
/* still data outstanding at TCP level? skip this */
164+
if (!tcp_rtx_and_write_queues_empty(ssk)) {
165+
mptcp_pm_subflow_chk_stale(msk, ssk);
166+
min_stale_count = min(min_stale_count, subflow->stale_count);
167+
continue;
168+
}
169+
170+
if (subflow->backup || subflow->request_bkup) {
171+
if (backup == MPTCP_SUBFLOWS_MAX)
172+
backup = i;
173+
continue;
174+
}
175+
176+
if (pick == MPTCP_SUBFLOWS_MAX)
177+
pick = i;
178+
}
179+
180+
if (pick < MPTCP_SUBFLOWS_MAX) {
181+
subflow_id = pick;
182+
goto out;
183+
}
184+
subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX;
185+
186+
out:
187+
subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id);
188+
if (!subflow)
189+
return -1;
190+
mptcp_subflow_set_scheduled(subflow, true);
191+
return 0;
192+
}
193+
194+
SEC(".struct_ops")
195+
struct mptcp_sched_ops burst = {
196+
.init = (void *)mptcp_sched_burst_init,
197+
.release = (void *)mptcp_sched_burst_release,
198+
.get_send = (void *)bpf_burst_get_send,
199+
.get_retrans = (void *)bpf_burst_get_retrans,
200+
.name = "bpf_burst",
201+
};

0 commit comments

Comments
 (0)