Skip to content

Commit 9ee9262

Browse files
committed
Merge branch 'mptcp-fallback-to-tcp-after-3-mpc-drop-cache'
Matthieu Baerts says: ==================== mptcp: fallback to TCP after 3 MPC drop + cache The SYN + MPTCP_CAPABLE packets could be explicitly dropped by firewalls somewhere in the network, e.g. if they decide to drop packets based on the TCP options, instead of stripping them off. The idea of this series is to fallback to TCP after 3 SYN+MPC drop (patch 2). If the connection succeeds after the fallback, it very likely means a blackhole has been detected. In this case (patch 3), MPTCP can be disabled for a certain period of time, 1h by default. If after this period, MPTCP is still blocked, the period is doubled. This technique is inspired by the one used by TCP FastOpen. This should help applications which want to use MPTCP by default on the client side if available. ==================== Link: https://patch.msgid.link/20240909-net-next-mptcp-fallback-x-mpc-v1-0-da7ebb4cd2a3@kernel.org Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 8b5d2e5 + 27069e7 commit 9ee9262

File tree

9 files changed

+182
-11
lines changed

9 files changed

+182
-11
lines changed

Documentation/networking/mptcp-sysctl.rst

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ available_schedulers - STRING
3434
Shows the available schedulers choices that are registered. More packet
3535
schedulers may be available, but not loaded.
3636

37+
blackhole_timeout - INTEGER (seconds)
38+
Initial time period in second to disable MPTCP on active MPTCP sockets
39+
when a MPTCP firewall blackhole issue happens. This time period will
40+
grow exponentially when more blackhole issues get detected right after
41+
MPTCP is re-enabled and will reset to the initial value when the
42+
blackhole issue goes away.
43+
44+
0 to disable the blackhole detection.
45+
46+
Default: 3600
47+
3748
checksum_enabled - BOOLEAN
3849
Control whether DSS checksum can be enabled.
3950

include/net/mptcp.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,8 @@ static inline __be32 mptcp_reset_option(const struct sk_buff *skb)
223223

224224
return htonl(0u);
225225
}
226+
227+
void mptcp_active_detect_blackhole(struct sock *sk, bool expired);
226228
#else
227229

228230
static inline void mptcp_init(void)
@@ -307,6 +309,8 @@ static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct reques
307309
}
308310

309311
static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); }
312+
313+
static inline void mptcp_active_detect_blackhole(struct sock *sk, bool expired) { }
310314
#endif /* CONFIG_MPTCP */
311315

312316
#if IS_ENABLED(CONFIG_MPTCP_IPV6)

net/ipv4/tcp_timer.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,7 @@ static int tcp_write_timeout(struct sock *sk)
282282
expired = retransmits_timed_out(sk, retry_until,
283283
READ_ONCE(icsk->icsk_user_timeout));
284284
tcp_fastopen_active_detect_blackhole(sk, expired);
285+
mptcp_active_detect_blackhole(sk, expired);
285286

286287
if (BPF_SOCK_OPS_TEST_FLAG(tp, BPF_SOCK_OPS_RTO_CB_FLAG))
287288
tcp_call_bpf_3arg(sk, BPF_SOCK_OPS_RTO_CB,

net/mptcp/ctrl.c

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <net/netns/generic.h>
1313

1414
#include "protocol.h"
15+
#include "mib.h"
1516

1617
#define MPTCP_SYSCTL_PATH "net/mptcp"
1718

@@ -27,8 +28,11 @@ struct mptcp_pernet {
2728
#endif
2829

2930
unsigned int add_addr_timeout;
31+
unsigned int blackhole_timeout;
3032
unsigned int close_timeout;
3133
unsigned int stale_loss_cnt;
34+
atomic_t active_disable_times;
35+
unsigned long active_disable_stamp;
3236
u8 mptcp_enabled;
3337
u8 checksum_enabled;
3438
u8 allow_join_initial_addr_port;
@@ -87,6 +91,8 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet)
8791
{
8892
pernet->mptcp_enabled = 1;
8993
pernet->add_addr_timeout = TCP_RTO_MAX;
94+
pernet->blackhole_timeout = 3600;
95+
atomic_set(&pernet->active_disable_times, 0);
9096
pernet->close_timeout = TCP_TIMEWAIT_LEN;
9197
pernet->checksum_enabled = 0;
9298
pernet->allow_join_initial_addr_port = 1;
@@ -151,6 +157,20 @@ static int proc_available_schedulers(const struct ctl_table *ctl,
151157
return ret;
152158
}
153159

160+
static int proc_blackhole_detect_timeout(const struct ctl_table *table,
161+
int write, void *buffer, size_t *lenp,
162+
loff_t *ppos)
163+
{
164+
struct mptcp_pernet *pernet = mptcp_get_pernet(current->nsproxy->net_ns);
165+
int ret;
166+
167+
ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
168+
if (write && ret == 0)
169+
atomic_set(&pernet->active_disable_times, 0);
170+
171+
return ret;
172+
}
173+
154174
static struct ctl_table mptcp_sysctl_table[] = {
155175
{
156176
.procname = "enabled",
@@ -217,6 +237,13 @@ static struct ctl_table mptcp_sysctl_table[] = {
217237
.mode = 0644,
218238
.proc_handler = proc_dointvec_jiffies,
219239
},
240+
{
241+
.procname = "blackhole_timeout",
242+
.maxlen = sizeof(unsigned int),
243+
.mode = 0644,
244+
.proc_handler = proc_blackhole_detect_timeout,
245+
.extra1 = SYSCTL_ZERO,
246+
},
220247
};
221248

222249
static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
@@ -240,6 +267,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet)
240267
table[6].data = &pernet->scheduler;
241268
/* table[7] is for available_schedulers which is read-only info */
242269
table[8].data = &pernet->close_timeout;
270+
table[9].data = &pernet->blackhole_timeout;
243271

244272
hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table,
245273
ARRAY_SIZE(mptcp_sysctl_table));
@@ -277,6 +305,111 @@ static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {}
277305

278306
#endif /* CONFIG_SYSCTL */
279307

308+
/* The following code block is to deal with middle box issues with MPTCP,
309+
* similar to what is done with TFO.
310+
* The proposed solution is to disable active MPTCP globally when SYN+MPC are
311+
* dropped, while SYN without MPC aren't. In this case, active side MPTCP is
312+
* disabled globally for 1hr at first. Then if it happens again, it is disabled
313+
* for 2h, then 4h, 8h, ...
314+
* The timeout is reset back to 1hr when a successful active MPTCP connection is
315+
* fully established.
316+
*/
317+
318+
/* Disable active MPTCP and record current jiffies and active_disable_times */
319+
void mptcp_active_disable(struct sock *sk)
320+
{
321+
struct net *net = sock_net(sk);
322+
struct mptcp_pernet *pernet;
323+
324+
pernet = mptcp_get_pernet(net);
325+
326+
if (!READ_ONCE(pernet->blackhole_timeout))
327+
return;
328+
329+
/* Paired with READ_ONCE() in mptcp_active_should_disable() */
330+
WRITE_ONCE(pernet->active_disable_stamp, jiffies);
331+
332+
/* Paired with smp_rmb() in mptcp_active_should_disable().
333+
* We want pernet->active_disable_stamp to be updated first.
334+
*/
335+
smp_mb__before_atomic();
336+
atomic_inc(&pernet->active_disable_times);
337+
338+
MPTCP_INC_STATS(net, MPTCP_MIB_BLACKHOLE);
339+
}
340+
341+
/* Calculate timeout for MPTCP active disable
342+
* Return true if we are still in the active MPTCP disable period
343+
* Return false if timeout already expired and we should use active MPTCP
344+
*/
345+
bool mptcp_active_should_disable(struct sock *ssk)
346+
{
347+
struct net *net = sock_net(ssk);
348+
unsigned int blackhole_timeout;
349+
struct mptcp_pernet *pernet;
350+
unsigned long timeout;
351+
int disable_times;
352+
int multiplier;
353+
354+
pernet = mptcp_get_pernet(net);
355+
blackhole_timeout = READ_ONCE(pernet->blackhole_timeout);
356+
357+
if (!blackhole_timeout)
358+
return false;
359+
360+
disable_times = atomic_read(&pernet->active_disable_times);
361+
if (!disable_times)
362+
return false;
363+
364+
/* Paired with smp_mb__before_atomic() in mptcp_active_disable() */
365+
smp_rmb();
366+
367+
/* Limit timeout to max: 2^6 * initial timeout */
368+
multiplier = 1 << min(disable_times - 1, 6);
369+
370+
/* Paired with the WRITE_ONCE() in mptcp_active_disable(). */
371+
timeout = READ_ONCE(pernet->active_disable_stamp) +
372+
multiplier * blackhole_timeout * HZ;
373+
374+
return time_before(jiffies, timeout);
375+
}
376+
377+
/* Enable active MPTCP and reset active_disable_times if needed */
378+
void mptcp_active_enable(struct sock *sk)
379+
{
380+
struct mptcp_pernet *pernet = mptcp_get_pernet(sock_net(sk));
381+
382+
if (atomic_read(&pernet->active_disable_times)) {
383+
struct dst_entry *dst = sk_dst_get(sk);
384+
385+
if (dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))
386+
atomic_set(&pernet->active_disable_times, 0);
387+
}
388+
}
389+
390+
/* Check the number of retransmissions, and fallback to TCP if needed */
391+
void mptcp_active_detect_blackhole(struct sock *ssk, bool expired)
392+
{
393+
struct mptcp_subflow_context *subflow;
394+
u32 timeouts;
395+
396+
if (!sk_is_mptcp(ssk))
397+
return;
398+
399+
timeouts = inet_csk(ssk)->icsk_retransmits;
400+
subflow = mptcp_subflow_ctx(ssk);
401+
402+
if (subflow->request_mptcp && ssk->sk_state == TCP_SYN_SENT) {
403+
if (timeouts == 2 || (timeouts < 2 && expired)) {
404+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDROP);
405+
subflow->mpc_drop = 1;
406+
mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow);
407+
} else {
408+
subflow->mpc_drop = 0;
409+
}
410+
}
411+
}
412+
280413
static int __net_init mptcp_net_init(struct net *net)
281414
{
282415
struct mptcp_pernet *pernet = mptcp_get_pernet(net);

net/mptcp/mib.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ static const struct snmp_mib mptcp_snmp_list[] = {
1515
SNMP_MIB_ITEM("MPCapableACKRX", MPTCP_MIB_MPCAPABLEPASSIVEACK),
1616
SNMP_MIB_ITEM("MPCapableFallbackACK", MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK),
1717
SNMP_MIB_ITEM("MPCapableFallbackSYNACK", MPTCP_MIB_MPCAPABLEACTIVEFALLBACK),
18+
SNMP_MIB_ITEM("MPCapableSYNTXDrop", MPTCP_MIB_MPCAPABLEACTIVEDROP),
19+
SNMP_MIB_ITEM("MPCapableSYNTXDisabled", MPTCP_MIB_MPCAPABLEACTIVEDISABLED),
1820
SNMP_MIB_ITEM("MPFallbackTokenInit", MPTCP_MIB_TOKENFALLBACKINIT),
1921
SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS),
2022
SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN),
@@ -73,6 +75,7 @@ static const struct snmp_mib mptcp_snmp_list[] = {
7375
SNMP_MIB_ITEM("RcvWndConflictUpdate", MPTCP_MIB_RCVWNDCONFLICTUPDATE),
7476
SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT),
7577
SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB),
78+
SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE),
7679
SNMP_MIB_SENTINEL
7780
};
7881

net/mptcp/mib.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ enum linux_mptcp_mib_field {
1010
MPTCP_MIB_MPCAPABLEPASSIVEACK, /* Received third ACK with MP_CAPABLE */
1111
MPTCP_MIB_MPCAPABLEPASSIVEFALLBACK,/* Server-side fallback during 3-way handshake */
1212
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK, /* Client-side fallback during 3-way handshake */
13+
MPTCP_MIB_MPCAPABLEACTIVEDROP, /* Client-side fallback due to a MPC drop */
14+
MPTCP_MIB_MPCAPABLEACTIVEDISABLED, /* Client-side disabled due to past issues */
1315
MPTCP_MIB_TOKENFALLBACKINIT, /* Could not init/allocate token */
1416
MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */
1517
MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */
@@ -74,6 +76,7 @@ enum linux_mptcp_mib_field {
7476
*/
7577
MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */
7678
MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */
79+
MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */
7780
__MPTCP_MIB_MAX
7881
};
7982

net/mptcp/protocol.c

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3717,13 +3717,6 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
37173717
return 0;
37183718
}
37193719

3720-
static void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
3721-
struct mptcp_subflow_context *subflow)
3722-
{
3723-
subflow->request_mptcp = 0;
3724-
__mptcp_do_fallback(msk);
3725-
}
3726-
37273720
static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
37283721
{
37293722
struct mptcp_subflow_context *subflow;
@@ -3744,9 +3737,14 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
37443737
if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info))
37453738
mptcp_subflow_early_fallback(msk, subflow);
37463739
#endif
3747-
if (subflow->request_mptcp && mptcp_token_new_connect(ssk)) {
3748-
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
3749-
mptcp_subflow_early_fallback(msk, subflow);
3740+
if (subflow->request_mptcp) {
3741+
if (mptcp_active_should_disable(sk)) {
3742+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED);
3743+
mptcp_subflow_early_fallback(msk, subflow);
3744+
} else if (mptcp_token_new_connect(ssk) < 0) {
3745+
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT);
3746+
mptcp_subflow_early_fallback(msk, subflow);
3747+
}
37503748
}
37513749

37523750
WRITE_ONCE(msk->write_seq, subflow->idsn);

net/mptcp/protocol.h

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,8 @@ struct mptcp_subflow_context {
531531
valid_csum_seen : 1, /* at least one csum validated */
532532
is_mptfo : 1, /* subflow is doing TFO */
533533
close_event_done : 1, /* has done the post-closed part */
534-
__unused : 9;
534+
mpc_drop : 1, /* the MPC option has been dropped in a rtx */
535+
__unused : 8;
535536
bool data_avail;
536537
bool scheduled;
537538
u32 remote_nonce;
@@ -697,6 +698,11 @@ unsigned int mptcp_stale_loss_cnt(const struct net *net);
697698
unsigned int mptcp_close_timeout(const struct sock *sk);
698699
int mptcp_get_pm_type(const struct net *net);
699700
const char *mptcp_get_scheduler(const struct net *net);
701+
702+
void mptcp_active_disable(struct sock *sk);
703+
bool mptcp_active_should_disable(struct sock *ssk);
704+
void mptcp_active_enable(struct sock *sk);
705+
700706
void mptcp_get_available_schedulers(char *buf, size_t maxlen);
701707
void __mptcp_subflow_fully_established(struct mptcp_sock *msk,
702708
struct mptcp_subflow_context *subflow,
@@ -1215,6 +1221,14 @@ static inline void mptcp_do_fallback(struct sock *ssk)
12151221

12161222
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
12171223

1224+
static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
1225+
struct mptcp_subflow_context *subflow)
1226+
{
1227+
pr_fallback(msk);
1228+
subflow->request_mptcp = 0;
1229+
__mptcp_do_fallback(msk);
1230+
}
1231+
12181232
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)
12191233
{
12201234
struct mptcp_ext *mpext;

net/mptcp/subflow.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,7 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
546546
subflow->mp_capable = 1;
547547
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPCAPABLEACTIVEACK);
548548
mptcp_finish_connect(sk);
549+
mptcp_active_enable(parent);
549550
mptcp_propagate_state(parent, sk, subflow, &mp_opt);
550551
} else if (subflow->request_join) {
551552
u8 hmac[SHA256_DIGEST_SIZE];
@@ -591,6 +592,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
591592
MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINPORTSYNACKRX);
592593
}
593594
} else if (mptcp_check_fallback(sk)) {
595+
/* It looks like MPTCP is blocked, while TCP is not */
596+
if (subflow->mpc_drop)
597+
mptcp_active_disable(parent);
594598
fallback:
595599
mptcp_propagate_state(parent, sk, subflow, NULL);
596600
}

0 commit comments

Comments
 (0)