Skip to content

Commit dae7f9c

Browse files
committed
Merge branch 'mptcp-fix-fallback-related-races'
Matthieu Baerts says: ==================== mptcp: fix fallback-related races This series contains 3 fixes somewhat related to various races we have while handling fallback. The root cause of the issues addressed here is that the check for "we can fallback to tcp now" and the related action are not atomic. That also applies to fallback due to MP_FAIL -- where the window race is even wider. Address the issue introducing an additional spinlock to bundle together all the relevant events, as per patch 1 and 2. These fixes can be backported up to v5.19 and v5.15. Note that mptcp_disconnect() unconditionally clears the fallback status (zeroing msk->flags) but don't touch the `allows_infinite_fallback` flag. Such issue is addressed in patch 3, and can be backported up to v5.17. ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 2b30a3d + da9b2fc commit dae7f9c

File tree

5 files changed

+98
-28
lines changed

5 files changed

+98
-28
lines changed

net/mptcp/options.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -978,8 +978,9 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk,
978978
if (subflow->mp_join)
979979
goto reset;
980980
subflow->mp_capable = 0;
981+
if (!mptcp_try_fallback(ssk))
982+
goto reset;
981983
pr_fallback(msk);
982-
mptcp_do_fallback(ssk);
983984
return false;
984985
}
985986

net/mptcp/pm.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -765,8 +765,14 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq)
765765

766766
pr_debug("fail_seq=%llu\n", fail_seq);
767767

768-
if (!READ_ONCE(msk->allow_infinite_fallback))
768+
/* After accepting the fail, we can't create any other subflows */
769+
spin_lock_bh(&msk->fallback_lock);
770+
if (!msk->allow_infinite_fallback) {
771+
spin_unlock_bh(&msk->fallback_lock);
769772
return;
773+
}
774+
msk->allow_subflows = false;
775+
spin_unlock_bh(&msk->fallback_lock);
770776

771777
if (!subflow->fail_tout) {
772778
pr_debug("send MP_FAIL response and infinite map\n");

net/mptcp/protocol.c

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -560,10 +560,9 @@ static bool mptcp_check_data_fin(struct sock *sk)
560560

561561
static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk)
562562
{
563-
if (READ_ONCE(msk->allow_infinite_fallback)) {
563+
if (mptcp_try_fallback(ssk)) {
564564
MPTCP_INC_STATS(sock_net(ssk),
565565
MPTCP_MIB_DSSCORRUPTIONFALLBACK);
566-
mptcp_do_fallback(ssk);
567566
} else {
568567
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET);
569568
mptcp_subflow_reset(ssk);
@@ -792,7 +791,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
792791
static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk)
793792
{
794793
mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq);
795-
WRITE_ONCE(msk->allow_infinite_fallback, false);
794+
msk->allow_infinite_fallback = false;
796795
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
797796
}
798797

@@ -803,6 +802,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
803802
if (sk->sk_state != TCP_ESTABLISHED)
804803
return false;
805804

805+
spin_lock_bh(&msk->fallback_lock);
806+
if (!msk->allow_subflows) {
807+
spin_unlock_bh(&msk->fallback_lock);
808+
return false;
809+
}
810+
mptcp_subflow_joined(msk, ssk);
811+
spin_unlock_bh(&msk->fallback_lock);
812+
806813
/* attach to msk socket only after we are sure we will deal with it
807814
* at close time
808815
*/
@@ -811,7 +818,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
811818

812819
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
813820
mptcp_sockopt_sync_locked(msk, ssk);
814-
mptcp_subflow_joined(msk, ssk);
815821
mptcp_stop_tout_timer(sk);
816822
__mptcp_propagate_sndbuf(sk, ssk);
817823
return true;
@@ -1136,10 +1142,14 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk,
11361142
mpext->infinite_map = 1;
11371143
mpext->data_len = 0;
11381144

1145+
if (!mptcp_try_fallback(ssk)) {
1146+
mptcp_subflow_reset(ssk);
1147+
return;
1148+
}
1149+
11391150
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
11401151
mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
11411152
pr_fallback(msk);
1142-
mptcp_do_fallback(ssk);
11431153
}
11441154

11451155
#define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1))
@@ -2543,9 +2553,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
25432553

25442554
static void __mptcp_retrans(struct sock *sk)
25452555
{
2556+
struct mptcp_sendmsg_info info = { .data_lock_held = true, };
25462557
struct mptcp_sock *msk = mptcp_sk(sk);
25472558
struct mptcp_subflow_context *subflow;
2548-
struct mptcp_sendmsg_info info = {};
25492559
struct mptcp_data_frag *dfrag;
25502560
struct sock *ssk;
25512561
int ret, err;
@@ -2590,6 +2600,18 @@ static void __mptcp_retrans(struct sock *sk)
25902600
info.sent = 0;
25912601
info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len :
25922602
dfrag->already_sent;
2603+
2604+
/*
2605+
* make the whole retrans decision, xmit, disallow
2606+
* fallback atomic
2607+
*/
2608+
spin_lock_bh(&msk->fallback_lock);
2609+
if (__mptcp_check_fallback(msk)) {
2610+
spin_unlock_bh(&msk->fallback_lock);
2611+
release_sock(ssk);
2612+
return;
2613+
}
2614+
25932615
while (info.sent < info.limit) {
25942616
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
25952617
if (ret <= 0)
@@ -2603,8 +2625,9 @@ static void __mptcp_retrans(struct sock *sk)
26032625
len = max(copied, len);
26042626
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
26052627
info.size_goal);
2606-
WRITE_ONCE(msk->allow_infinite_fallback, false);
2628+
msk->allow_infinite_fallback = false;
26072629
}
2630+
spin_unlock_bh(&msk->fallback_lock);
26082631

26092632
release_sock(ssk);
26102633
}
@@ -2730,14 +2753,16 @@ static void __mptcp_init_sock(struct sock *sk)
27302753
WRITE_ONCE(msk->first, NULL);
27312754
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
27322755
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
2733-
WRITE_ONCE(msk->allow_infinite_fallback, true);
2756+
msk->allow_infinite_fallback = true;
2757+
msk->allow_subflows = true;
27342758
msk->recovery = false;
27352759
msk->subflow_id = 1;
27362760
msk->last_data_sent = tcp_jiffies32;
27372761
msk->last_data_recv = tcp_jiffies32;
27382762
msk->last_ack_recv = tcp_jiffies32;
27392763

27402764
mptcp_pm_data_init(msk);
2765+
spin_lock_init(&msk->fallback_lock);
27412766

27422767
/* re-use the csk retrans timer for MPTCP-level retrans */
27432768
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
@@ -3117,7 +3142,16 @@ static int mptcp_disconnect(struct sock *sk, int flags)
31173142
* subflow
31183143
*/
31193144
mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE);
3145+
3146+
/* The first subflow is already in TCP_CLOSE status, the following
3147+
* can't overlap with a fallback anymore
3148+
*/
3149+
spin_lock_bh(&msk->fallback_lock);
3150+
msk->allow_subflows = true;
3151+
msk->allow_infinite_fallback = true;
31203152
WRITE_ONCE(msk->flags, 0);
3153+
spin_unlock_bh(&msk->fallback_lock);
3154+
31213155
msk->cb_flags = 0;
31223156
msk->recovery = false;
31233157
WRITE_ONCE(msk->can_ack, false);
@@ -3524,7 +3558,13 @@ bool mptcp_finish_join(struct sock *ssk)
35243558

35253559
/* active subflow, already present inside the conn_list */
35263560
if (!list_empty(&subflow->node)) {
3561+
spin_lock_bh(&msk->fallback_lock);
3562+
if (!msk->allow_subflows) {
3563+
spin_unlock_bh(&msk->fallback_lock);
3564+
return false;
3565+
}
35273566
mptcp_subflow_joined(msk, ssk);
3567+
spin_unlock_bh(&msk->fallback_lock);
35283568
mptcp_propagate_sndbuf(parent, ssk);
35293569
return true;
35303570
}

net/mptcp/protocol.h

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,16 @@ struct mptcp_sock {
346346
u64 rtt_us; /* last maximum rtt of subflows */
347347
} rcvq_space;
348348
u8 scaling_ratio;
349+
bool allow_subflows;
349350

350351
u32 subflow_id;
351352
u32 setsockopt_seq;
352353
char ca_name[TCP_CA_NAME_MAX];
354+
355+
spinlock_t fallback_lock; /* protects fallback,
356+
* allow_infinite_fallback and
357+
* allow_join
358+
*/
353359
};
354360

355361
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
@@ -1216,15 +1222,22 @@ static inline bool mptcp_check_fallback(const struct sock *sk)
12161222
return __mptcp_check_fallback(msk);
12171223
}
12181224

1219-
static inline void __mptcp_do_fallback(struct mptcp_sock *msk)
1225+
static inline bool __mptcp_try_fallback(struct mptcp_sock *msk)
12201226
{
12211227
if (__mptcp_check_fallback(msk)) {
12221228
pr_debug("TCP fallback already done (msk=%p)\n", msk);
1223-
return;
1229+
return true;
12241230
}
1225-
if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback)))
1226-
return;
1231+
spin_lock_bh(&msk->fallback_lock);
1232+
if (!msk->allow_infinite_fallback) {
1233+
spin_unlock_bh(&msk->fallback_lock);
1234+
return false;
1235+
}
1236+
1237+
msk->allow_subflows = false;
12271238
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
1239+
spin_unlock_bh(&msk->fallback_lock);
1240+
return true;
12281241
}
12291242

12301243
static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
@@ -1236,14 +1249,15 @@ static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk)
12361249
TCPF_SYN_RECV | TCPF_LISTEN));
12371250
}
12381251

1239-
static inline void mptcp_do_fallback(struct sock *ssk)
1252+
static inline bool mptcp_try_fallback(struct sock *ssk)
12401253
{
12411254
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
12421255
struct sock *sk = subflow->conn;
12431256
struct mptcp_sock *msk;
12441257

12451258
msk = mptcp_sk(sk);
1246-
__mptcp_do_fallback(msk);
1259+
if (!__mptcp_try_fallback(msk))
1260+
return false;
12471261
if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
12481262
gfp_t saved_allocation = ssk->sk_allocation;
12491263

@@ -1255,6 +1269,7 @@ static inline void mptcp_do_fallback(struct sock *ssk)
12551269
tcp_shutdown(ssk, SEND_SHUTDOWN);
12561270
ssk->sk_allocation = saved_allocation;
12571271
}
1272+
return true;
12581273
}
12591274

12601275
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a)
@@ -1264,7 +1279,7 @@ static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk,
12641279
{
12651280
pr_fallback(msk);
12661281
subflow->request_mptcp = 0;
1267-
__mptcp_do_fallback(msk);
1282+
WARN_ON_ONCE(!__mptcp_try_fallback(msk));
12681283
}
12691284

12701285
static inline bool mptcp_check_infinite_map(struct sk_buff *skb)

net/mptcp/subflow.c

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -544,9 +544,11 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb)
544544
mptcp_get_options(skb, &mp_opt);
545545
if (subflow->request_mptcp) {
546546
if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) {
547+
if (!mptcp_try_fallback(sk))
548+
goto do_reset;
549+
547550
MPTCP_INC_STATS(sock_net(sk),
548551
MPTCP_MIB_MPCAPABLEACTIVEFALLBACK);
549-
mptcp_do_fallback(sk);
550552
pr_fallback(msk);
551553
goto fallback;
552554
}
@@ -1300,20 +1302,29 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss
13001302
mptcp_schedule_work(sk);
13011303
}
13021304

1303-
static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
1305+
static bool mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
13041306
{
13051307
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
13061308
unsigned long fail_tout;
13071309

1310+
/* we are really failing, prevent any later subflow join */
1311+
spin_lock_bh(&msk->fallback_lock);
1312+
if (!msk->allow_infinite_fallback) {
1313+
spin_unlock_bh(&msk->fallback_lock);
1314+
return false;
1315+
}
1316+
msk->allow_subflows = false;
1317+
spin_unlock_bh(&msk->fallback_lock);
1318+
13081319
/* graceful failure can happen only on the MPC subflow */
13091320
if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
1310-
return;
1321+
return false;
13111322

13121323
/* since the close timeout take precedence on the fail one,
13131324
* no need to start the latter when the first is already set
13141325
*/
13151326
if (sock_flag((struct sock *)msk, SOCK_DEAD))
1316-
return;
1327+
return true;
13171328

13181329
/* we don't need extreme accuracy here, use a zero fail_tout as special
13191330
* value meaning no fail timeout at all;
@@ -1325,6 +1336,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
13251336
tcp_send_ack(ssk);
13261337

13271338
mptcp_reset_tout_timer(msk, subflow->fail_tout);
1339+
return true;
13281340
}
13291341

13301342
static bool subflow_check_data_avail(struct sock *ssk)
@@ -1385,17 +1397,16 @@ static bool subflow_check_data_avail(struct sock *ssk)
13851397
(subflow->mp_join || subflow->valid_csum_seen)) {
13861398
subflow->send_mp_fail = 1;
13871399

1388-
if (!READ_ONCE(msk->allow_infinite_fallback)) {
1400+
if (!mptcp_subflow_fail(msk, ssk)) {
13891401
subflow->reset_transient = 0;
13901402
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
13911403
goto reset;
13921404
}
1393-
mptcp_subflow_fail(msk, ssk);
13941405
WRITE_ONCE(subflow->data_avail, true);
13951406
return true;
13961407
}
13971408

1398-
if (!READ_ONCE(msk->allow_infinite_fallback)) {
1409+
if (!mptcp_try_fallback(ssk)) {
13991410
/* fatal protocol error, close the socket.
14001411
* subflow_error_report() will introduce the appropriate barriers
14011412
*/
@@ -1413,8 +1424,6 @@ static bool subflow_check_data_avail(struct sock *ssk)
14131424
WRITE_ONCE(subflow->data_avail, false);
14141425
return false;
14151426
}
1416-
1417-
mptcp_do_fallback(ssk);
14181427
}
14191428

14201429
skb = skb_peek(&ssk->sk_receive_queue);
@@ -1679,7 +1688,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local,
16791688
/* discard the subflow socket */
16801689
mptcp_sock_graft(ssk, sk->sk_socket);
16811690
iput(SOCK_INODE(sf));
1682-
WRITE_ONCE(msk->allow_infinite_fallback, false);
16831691
mptcp_stop_tout_timer(sk);
16841692
return 0;
16851693

@@ -1851,7 +1859,7 @@ static void subflow_state_change(struct sock *sk)
18511859

18521860
msk = mptcp_sk(parent);
18531861
if (subflow_simultaneous_connect(sk)) {
1854-
mptcp_do_fallback(sk);
1862+
WARN_ON_ONCE(!mptcp_try_fallback(sk));
18551863
pr_fallback(msk);
18561864
subflow->conn_finished = 1;
18571865
mptcp_propagate_state(parent, sk, subflow, NULL);

0 commit comments

Comments
 (0)