Skip to content

Commit 866376d

Browse files
Geliang Tangmatttbe
authored andcommitted
selftests/bpf: Add bpf_burst scheduler & test
This patch implements the burst BPF MPTCP scheduler, named bpf_burst, which is the default scheduler in protocol.c. bpf_burst_get_send() uses the same logic as mptcp_subflow_get_send() and bpf_burst_get_retrans uses the same logic as mptcp_subflow_get_retrans(). Using MPTCP_SCHED_TEST macro to add a new test for this bpf_burst scheduler, the arguments "1 1" means data has been sent on both net devices. Run this test by RUN_MPTCP_TEST macro. Signed-off-by: Geliang Tang <[email protected]> Reviewed-by: Mat Martineau <[email protected]> Reviewed-by: Matthieu Baerts (NGI0) <[email protected]>
1 parent 9b8f536 commit 866376d

File tree

2 files changed

+222
-0
lines changed

2 files changed

+222
-0
lines changed

tools/testing/selftests/bpf/prog_tests/mptcp.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "mptcp_bpf_bkup.skel.h"
1717
#include "mptcp_bpf_rr.skel.h"
1818
#include "mptcp_bpf_red.skel.h"
19+
#include "mptcp_bpf_burst.skel.h"
1920

2021
#define NS_TEST "mptcp_ns"
2122
#define ADDR_1 "10.0.1.1"
@@ -740,6 +741,18 @@ static void test_red(void)
740741
mptcp_bpf_red__destroy(skel);
741742
}
742743

744+
static void test_burst(void)
745+
{
746+
struct mptcp_bpf_burst *skel;
747+
748+
skel = mptcp_bpf_burst__open_and_load();
749+
if (!ASSERT_OK_PTR(skel, "open_and_load: burst"))
750+
return;
751+
752+
test_bpf_sched(skel->obj, "burst", WITH_DATA, WITH_DATA);
753+
mptcp_bpf_burst__destroy(skel);
754+
}
755+
743756
void test_mptcp(void)
744757
{
745758
if (test__start_subtest("base"))
@@ -760,4 +773,6 @@ void test_mptcp(void)
760773
test_rr();
761774
if (test__start_subtest("red"))
762775
test_red();
776+
if (test__start_subtest("burst"))
777+
test_burst();
763778
}
Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2023, SUSE. */
3+
4+
#include "mptcp_bpf.h"
5+
#include <bpf/bpf_tracing.h>
6+
#include <limits.h>
7+
8+
char _license[] SEC("license") = "GPL";
9+
10+
#define MPTCP_SEND_BURST_SIZE 65428
11+
12+
#define min(a, b) ((a) < (b) ? (a) : (b))
13+
14+
struct bpf_subflow_send_info {
15+
__u8 subflow_id;
16+
__u64 linger_time;
17+
};
18+
19+
extern bool mptcp_subflow_active(struct mptcp_subflow_context *subflow) __ksym;
20+
extern void mptcp_set_timeout(struct sock *sk) __ksym;
21+
extern __u64 mptcp_wnd_end(const struct mptcp_sock *msk) __ksym;
22+
extern bool tcp_stream_memory_free(const struct sock *sk, int wake) __ksym;
23+
extern bool bpf_mptcp_subflow_queues_empty(struct sock *sk) __ksym;
24+
extern void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk) __ksym;
25+
26+
#define SSK_MODE_ACTIVE 0
27+
#define SSK_MODE_BACKUP 1
28+
#define SSK_MODE_MAX 2
29+
30+
static __always_inline __u64 div_u64(__u64 dividend, __u32 divisor)
31+
{
32+
return dividend / divisor;
33+
}
34+
35+
static __always_inline bool tcp_write_queue_empty(struct sock *sk)
36+
{
37+
const struct tcp_sock *tp = bpf_skc_to_tcp_sock(sk);
38+
39+
return tp ? tp->write_seq == tp->snd_nxt : true;
40+
}
41+
42+
static __always_inline bool tcp_rtx_and_write_queues_empty(struct sock *sk)
43+
{
44+
return bpf_mptcp_subflow_queues_empty(sk) && tcp_write_queue_empty(sk);
45+
}
46+
47+
static __always_inline bool __sk_stream_memory_free(const struct sock *sk, int wake)
48+
{
49+
if (sk->sk_wmem_queued >= sk->sk_sndbuf)
50+
return false;
51+
52+
return tcp_stream_memory_free(sk, wake);
53+
}
54+
55+
static __always_inline bool sk_stream_memory_free(const struct sock *sk)
56+
{
57+
return __sk_stream_memory_free(sk, 0);
58+
}
59+
60+
SEC("struct_ops")
61+
void BPF_PROG(mptcp_sched_burst_init, struct mptcp_sock *msk)
62+
{
63+
}
64+
65+
SEC("struct_ops")
66+
void BPF_PROG(mptcp_sched_burst_release, struct mptcp_sock *msk)
67+
{
68+
}
69+
70+
static int bpf_burst_get_send(struct mptcp_sock *msk,
71+
struct mptcp_sched_data *data)
72+
{
73+
struct bpf_subflow_send_info send_info[SSK_MODE_MAX];
74+
struct mptcp_subflow_context *subflow;
75+
struct sock *sk = (struct sock *)msk;
76+
__u32 pace, burst, wmem;
77+
int i, nr_active = 0;
78+
__u64 linger_time;
79+
struct sock *ssk;
80+
81+
/* pick the subflow with the lower wmem/wspace ratio */
82+
for (i = 0; i < SSK_MODE_MAX; ++i) {
83+
send_info[i].subflow_id = MPTCP_SUBFLOWS_MAX;
84+
send_info[i].linger_time = -1;
85+
}
86+
87+
for (i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
88+
bool backup;
89+
90+
subflow = bpf_mptcp_subflow_ctx_by_pos(data, i);
91+
if (!subflow)
92+
break;
93+
94+
backup = subflow->backup || subflow->request_bkup;
95+
96+
ssk = mptcp_subflow_tcp_sock(subflow);
97+
if (!mptcp_subflow_active(subflow))
98+
continue;
99+
100+
nr_active += !backup;
101+
pace = subflow->avg_pacing_rate;
102+
if (!pace) {
103+
/* init pacing rate from socket */
104+
subflow->avg_pacing_rate = ssk->sk_pacing_rate;
105+
pace = subflow->avg_pacing_rate;
106+
if (!pace)
107+
continue;
108+
}
109+
110+
linger_time = div_u64((__u64)ssk->sk_wmem_queued << 32, pace);
111+
if (linger_time < send_info[backup].linger_time) {
112+
send_info[backup].subflow_id = i;
113+
send_info[backup].linger_time = linger_time;
114+
}
115+
}
116+
mptcp_set_timeout(sk);
117+
118+
/* pick the best backup if no other subflow is active */
119+
if (!nr_active)
120+
send_info[SSK_MODE_ACTIVE].subflow_id = send_info[SSK_MODE_BACKUP].subflow_id;
121+
122+
subflow = bpf_mptcp_subflow_ctx_by_pos(data, send_info[SSK_MODE_ACTIVE].subflow_id);
123+
if (!subflow)
124+
return -1;
125+
ssk = mptcp_subflow_tcp_sock(subflow);
126+
if (!ssk || !sk_stream_memory_free(ssk))
127+
return -1;
128+
129+
burst = min(MPTCP_SEND_BURST_SIZE, mptcp_wnd_end(msk) - msk->snd_nxt);
130+
wmem = ssk->sk_wmem_queued;
131+
if (!burst)
132+
goto out;
133+
134+
subflow->avg_pacing_rate = div_u64((__u64)subflow->avg_pacing_rate * wmem +
135+
ssk->sk_pacing_rate * burst,
136+
burst + wmem);
137+
msk->snd_burst = burst;
138+
139+
out:
140+
mptcp_subflow_set_scheduled(subflow, true);
141+
return 0;
142+
}
143+
144+
static int bpf_burst_get_retrans(struct mptcp_sock *msk,
145+
struct mptcp_sched_data *data)
146+
{
147+
int backup = MPTCP_SUBFLOWS_MAX, pick = MPTCP_SUBFLOWS_MAX, subflow_id;
148+
struct mptcp_subflow_context *subflow;
149+
int min_stale_count = INT_MAX;
150+
struct sock *ssk;
151+
152+
for (int i = 0; i < data->subflows && i < MPTCP_SUBFLOWS_MAX; i++) {
153+
subflow = bpf_mptcp_subflow_ctx_by_pos(data, i);
154+
if (!subflow)
155+
break;
156+
157+
if (!mptcp_subflow_active(subflow))
158+
continue;
159+
160+
ssk = mptcp_subflow_tcp_sock(subflow);
161+
/* still data outstanding at TCP level? skip this */
162+
if (!tcp_rtx_and_write_queues_empty(ssk)) {
163+
mptcp_pm_subflow_chk_stale(msk, ssk);
164+
min_stale_count = min(min_stale_count, subflow->stale_count);
165+
continue;
166+
}
167+
168+
if (subflow->backup || subflow->request_bkup) {
169+
if (backup == MPTCP_SUBFLOWS_MAX)
170+
backup = i;
171+
continue;
172+
}
173+
174+
if (pick == MPTCP_SUBFLOWS_MAX)
175+
pick = i;
176+
}
177+
178+
if (pick < MPTCP_SUBFLOWS_MAX) {
179+
subflow_id = pick;
180+
goto out;
181+
}
182+
subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX;
183+
184+
out:
185+
subflow = bpf_mptcp_subflow_ctx_by_pos(data, subflow_id);
186+
if (!subflow)
187+
return -1;
188+
mptcp_subflow_set_scheduled(subflow, true);
189+
return 0;
190+
}
191+
192+
SEC("struct_ops")
193+
int BPF_PROG(bpf_burst_get_subflow, struct mptcp_sock *msk,
194+
struct mptcp_sched_data *data)
195+
{
196+
if (data->reinject)
197+
return bpf_burst_get_retrans(msk, data);
198+
return bpf_burst_get_send(msk, data);
199+
}
200+
201+
SEC(".struct_ops")
202+
struct mptcp_sched_ops burst = {
203+
.init = (void *)mptcp_sched_burst_init,
204+
.release = (void *)mptcp_sched_burst_release,
205+
.get_subflow = (void *)bpf_burst_get_subflow,
206+
.name = "bpf_burst",
207+
};

0 commit comments

Comments
 (0)