@@ -11,15 +11,10 @@ char _license[] SEC("license") = "GPL";
1111
1212#define min (a , b ) ((a) < (b) ? (a) : (b))
1313
14- struct bpf_subflow_send_info {
15- __u8 subflow_id ;
16- __u64 linger_time ;
17- };
18-
1914extern bool mptcp_subflow_active (struct mptcp_subflow_context * subflow ) __ksym ;
2015extern void mptcp_set_timeout (struct sock * sk ) __ksym ;
2116extern __u64 mptcp_wnd_end (const struct mptcp_sock * msk ) __ksym ;
22- extern bool tcp_stream_memory_free (const struct sock * sk , int wake ) __ksym ;
17+ extern bool bpf_sk_stream_memory_free (const struct sock * sk ) __ksym ;
2318extern bool bpf_mptcp_subflow_queues_empty (struct sock * sk ) __ksym ;
2419extern void mptcp_pm_subflow_chk_stale (const struct mptcp_sock * msk , struct sock * ssk ) __ksym ;
2520
@@ -44,19 +39,6 @@ static __always_inline bool tcp_rtx_and_write_queues_empty(struct sock *sk)
4439 return bpf_mptcp_subflow_queues_empty (sk ) && tcp_write_queue_empty (sk );
4540}
4641
47- static __always_inline bool __sk_stream_memory_free (const struct sock * sk , int wake )
48- {
49- if (sk -> sk_wmem_queued >= sk -> sk_sndbuf )
50- return false;
51-
52- return tcp_stream_memory_free (sk , wake );
53- }
54-
55- static __always_inline bool sk_stream_memory_free (const struct sock * sk )
56- {
57- return __sk_stream_memory_free (sk , 0 );
58- }
59-
6042SEC ("struct_ops" )
6143void BPF_PROG (mptcp_sched_burst_init , struct mptcp_sock * msk )
6244{
@@ -71,7 +53,7 @@ SEC("struct_ops")
7153int BPF_PROG (bpf_burst_get_send , struct mptcp_sock * msk ,
7254 struct mptcp_sched_data * data )
7355{
74- struct bpf_subflow_send_info send_info [SSK_MODE_MAX ];
56+ struct subflow_send_info send_info [SSK_MODE_MAX ];
7557 struct mptcp_subflow_context * subflow ;
7658 struct sock * sk = (struct sock * )msk ;
7759 __u32 pace , burst , wmem ;
@@ -81,18 +63,12 @@ int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk,
8163
8264 /* pick the subflow with the lower wmem/wspace ratio */
8365 for (i = 0 ; i < SSK_MODE_MAX ; ++ i ) {
84- send_info [i ].subflow_id = MPTCP_SUBFLOWS_MAX ;
66+ send_info [i ].ssk = NULL ;
8567 send_info [i ].linger_time = -1 ;
8668 }
8769
88- for (i = 0 ; i < data -> subflows && i < MPTCP_SUBFLOWS_MAX ; i ++ ) {
89- bool backup ;
90-
91- subflow = bpf_mptcp_subflow_ctx_by_pos (data , i );
92- if (!subflow )
93- break ;
94-
95- backup = subflow -> backup || subflow -> request_bkup ;
70+ bpf_for_each (mptcp_subflow , subflow , sk ) {
71+ bool backup = subflow -> backup || subflow -> request_bkup ;
9672
9773 ssk = mptcp_subflow_tcp_sock (subflow );
9874 if (!mptcp_subflow_active (subflow ))
@@ -110,24 +86,26 @@ int BPF_PROG(bpf_burst_get_send, struct mptcp_sock *msk,
11086
11187 linger_time = div_u64 ((__u64 )ssk -> sk_wmem_queued << 32 , pace );
11288 if (linger_time < send_info [backup ].linger_time ) {
113- send_info [backup ].subflow_id = i ;
89+ send_info [backup ].ssk = ssk ;
11490 send_info [backup ].linger_time = linger_time ;
11591 }
11692 }
11793 mptcp_set_timeout (sk );
11894
11995 /* pick the best backup if no other subflow is active */
12096 if (!nr_active )
121- send_info [SSK_MODE_ACTIVE ].subflow_id = send_info [SSK_MODE_BACKUP ].subflow_id ;
97+ send_info [SSK_MODE_ACTIVE ].ssk = send_info [SSK_MODE_BACKUP ].ssk ;
12298
123- subflow = bpf_mptcp_subflow_ctx_by_pos ( data , send_info [SSK_MODE_ACTIVE ].subflow_id ) ;
124- if (!subflow )
99+ ssk = send_info [SSK_MODE_ACTIVE ].ssk ;
100+ if (!ssk || ! bpf_sk_stream_memory_free ( ssk ) )
125101 return -1 ;
126- ssk = mptcp_subflow_tcp_sock (subflow );
127- if (!ssk || !sk_stream_memory_free (ssk ))
102+
103+ subflow = bpf_mptcp_subflow_ctx (ssk );
104+ if (!subflow )
128105 return -1 ;
129106
130107 burst = min (MPTCP_SEND_BURST_SIZE , mptcp_wnd_end (msk ) - msk -> snd_nxt );
108+ ssk = bpf_core_cast (ssk , struct sock );
131109 wmem = ssk -> sk_wmem_queued ;
132110 if (!burst )
133111 goto out ;
@@ -146,20 +124,16 @@ SEC("struct_ops")
146124int BPF_PROG (bpf_burst_get_retrans , struct mptcp_sock * msk ,
147125 struct mptcp_sched_data * data )
148126{
149- int backup = MPTCP_SUBFLOWS_MAX , pick = MPTCP_SUBFLOWS_MAX , subflow_id ;
127+ struct sock * backup = NULL , * pick = NULL ;
150128 struct mptcp_subflow_context * subflow ;
151129 int min_stale_count = INT_MAX ;
152- struct sock * ssk ;
153130
154- for (int i = 0 ; i < data -> subflows && i < MPTCP_SUBFLOWS_MAX ; i ++ ) {
155- subflow = bpf_mptcp_subflow_ctx_by_pos (data , i );
156- if (!subflow )
157- break ;
131+ bpf_for_each (mptcp_subflow , subflow , (struct sock * )msk ) {
132+ struct sock * ssk = bpf_mptcp_subflow_tcp_sock (subflow );
158133
159- if (!mptcp_subflow_active (subflow ))
134+ if (!ssk || ! mptcp_subflow_active (subflow ))
160135 continue ;
161136
162- ssk = mptcp_subflow_tcp_sock (subflow );
163137 /* still data outstanding at TCP level? skip this */
164138 if (!tcp_rtx_and_write_queues_empty (ssk )) {
165139 mptcp_pm_subflow_chk_stale (msk , ssk );
@@ -168,23 +142,23 @@ int BPF_PROG(bpf_burst_get_retrans, struct mptcp_sock *msk,
168142 }
169143
170144 if (subflow -> backup || subflow -> request_bkup ) {
171- if (backup == MPTCP_SUBFLOWS_MAX )
172- backup = i ;
145+ if (! backup )
146+ backup = ssk ;
173147 continue ;
174148 }
175149
176- if (pick == MPTCP_SUBFLOWS_MAX )
177- pick = i ;
150+ if (! pick )
151+ pick = ssk ;
178152 }
179153
180- if (pick < MPTCP_SUBFLOWS_MAX ) {
181- subflow_id = pick ;
154+ if (pick )
182155 goto out ;
183- }
184- subflow_id = min_stale_count > 1 ? backup : MPTCP_SUBFLOWS_MAX ;
156+ pick = min_stale_count > 1 ? backup : NULL ;
185157
186158out :
187- subflow = bpf_mptcp_subflow_ctx_by_pos (data , subflow_id );
159+ if (!pick )
160+ return -1 ;
161+ subflow = bpf_mptcp_subflow_ctx (pick );
188162 if (!subflow )
189163 return -1 ;
190164 mptcp_subflow_set_scheduled (subflow , true);
0 commit comments