Skip to content

Commit 533aa0b

Browse files
committed
Merge branch 'mptcp-fixes-for-6-4'
Matthieu Baerts says: ==================== mptcp: fixes for 6.4 Patch 1 correctly handles disconnect() failures that can happen in some specific cases: now the socket state is set as unconnected as expected. That fixes an issue introduced in v6.2. Patch 2 fixes a divide by zero bug in mptcp_recvmsg() with a fix similar to a recent one from Eric Dumazet for TCP introducing sk_wait_pending flag. It should address an issue present in MPTCP from almost the beginning, from v5.9. Patch 3 fixes a possible list corruption on passive MPJ even if the race seems very unlikely, better be safe than sorry. The possible issue is present from v5.17. Patch 4 consolidates fallback and non fallback state machines to avoid leaking some MPTCP sockets. The fix is likely needed for versions from v5.11. Patch 5 drops code that is no longer used after the introduction of patch 4/6. This is not really a fix but this patch can probably land in the -net tree as well not to leave unused code. Patch 6 ensures listeners are unhashed before updating their sk status to avoid possible deadlocks when diag info are going to be retrieved with a lock. Even if it should not be visible with the way we are currently getting diag info, the issue is present from v5.17. ==================== Link: https://lore.kernel.org/r/20230620-upstream-net-20230620-misc-fixes-for-v6-4-v1-0-f36aa5eae8b9@tessares.net Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 59bb14b + 57fc0f1 commit 533aa0b

File tree

4 files changed

+76
-107
lines changed

4 files changed

+76
-107
lines changed

net/mptcp/pm_netlink.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1047,6 +1047,7 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
10471047
if (err)
10481048
return err;
10491049

1050+
inet_sk_state_store(newsk, TCP_LISTEN);
10501051
err = kernel_listen(ssock, backlog);
10511052
if (err)
10521053
return err;

net/mptcp/protocol.c

Lines changed: 64 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ enum {
4444
static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp;
4545

4646
static void __mptcp_destroy_sock(struct sock *sk);
47-
static void __mptcp_check_send_data_fin(struct sock *sk);
47+
static void mptcp_check_send_data_fin(struct sock *sk);
4848

4949
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
5050
static struct net_device mptcp_napi_dev;
@@ -424,8 +424,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk)
424424
{
425425
struct mptcp_sock *msk = mptcp_sk(sk);
426426

427-
return !__mptcp_check_fallback(msk) &&
428-
((1 << sk->sk_state) &
427+
return ((1 << sk->sk_state) &
429428
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) &&
430429
msk->write_seq == READ_ONCE(msk->snd_una);
431430
}
@@ -583,9 +582,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
583582
u64 rcv_data_fin_seq;
584583
bool ret = false;
585584

586-
if (__mptcp_check_fallback(msk))
587-
return ret;
588-
589585
/* Need to ack a DATA_FIN received from a peer while this side
590586
* of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
591587
* msk->rcv_data_fin was set when parsing the incoming options
@@ -623,7 +619,8 @@ static bool mptcp_check_data_fin(struct sock *sk)
623619
}
624620

625621
ret = true;
626-
mptcp_send_ack(msk);
622+
if (!__mptcp_check_fallback(msk))
623+
mptcp_send_ack(msk);
627624
mptcp_close_wake_up(sk);
628625
}
629626
return ret;
@@ -850,12 +847,12 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
850847
return true;
851848
}
852849

853-
static void __mptcp_flush_join_list(struct sock *sk)
850+
static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list)
854851
{
855852
struct mptcp_subflow_context *tmp, *subflow;
856853
struct mptcp_sock *msk = mptcp_sk(sk);
857854

858-
list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
855+
list_for_each_entry_safe(subflow, tmp, join_list, node) {
859856
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
860857
bool slow = lock_sock_fast(ssk);
861858

@@ -897,49 +894,6 @@ bool mptcp_schedule_work(struct sock *sk)
897894
return false;
898895
}
899896

900-
void mptcp_subflow_eof(struct sock *sk)
901-
{
902-
if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags))
903-
mptcp_schedule_work(sk);
904-
}
905-
906-
static void mptcp_check_for_eof(struct mptcp_sock *msk)
907-
{
908-
struct mptcp_subflow_context *subflow;
909-
struct sock *sk = (struct sock *)msk;
910-
int receivers = 0;
911-
912-
mptcp_for_each_subflow(msk, subflow)
913-
receivers += !subflow->rx_eof;
914-
if (receivers)
915-
return;
916-
917-
if (!(sk->sk_shutdown & RCV_SHUTDOWN)) {
918-
/* hopefully temporary hack: propagate shutdown status
919-
* to msk, when all subflows agree on it
920-
*/
921-
WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
922-
923-
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
924-
sk->sk_data_ready(sk);
925-
}
926-
927-
switch (sk->sk_state) {
928-
case TCP_ESTABLISHED:
929-
inet_sk_state_store(sk, TCP_CLOSE_WAIT);
930-
break;
931-
case TCP_FIN_WAIT1:
932-
inet_sk_state_store(sk, TCP_CLOSING);
933-
break;
934-
case TCP_FIN_WAIT2:
935-
inet_sk_state_store(sk, TCP_CLOSE);
936-
break;
937-
default:
938-
return;
939-
}
940-
mptcp_close_wake_up(sk);
941-
}
942-
943897
static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
944898
{
945899
struct mptcp_subflow_context *subflow;
@@ -1609,7 +1563,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
16091563
if (!mptcp_timer_pending(sk))
16101564
mptcp_reset_timer(sk);
16111565
if (do_check_data_fin)
1612-
__mptcp_check_send_data_fin(sk);
1566+
mptcp_check_send_data_fin(sk);
16131567
}
16141568

16151569
static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
@@ -1727,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
17271681
if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
17281682
*copied_syn = 0;
17291683
} else if (ret && ret != -EINPROGRESS) {
1730-
mptcp_disconnect(sk, 0);
1684+
/* The disconnect() op called by tcp_sendmsg_fastopen()/
1685+
* __inet_stream_connect() can fail, due to looking check,
1686+
* see mptcp_disconnect().
1687+
* Attempt it again outside the problematic scope.
1688+
*/
1689+
if (!mptcp_disconnect(sk, 0))
1690+
sk->sk_socket->state = SS_UNCONNECTED;
17311691
}
17321692
inet_sk(sk)->defer_connect = 0;
17331693

@@ -2158,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
21582118
break;
21592119
}
21602120

2161-
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
2162-
mptcp_check_for_eof(msk);
2163-
21642121
if (sk->sk_shutdown & RCV_SHUTDOWN) {
21652122
/* race breaker: the shutdown could be after the
21662123
* previous receive queue check
@@ -2389,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
23892346

23902347
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
23912348
if (!dispose_it) {
2392-
tcp_disconnect(ssk, 0);
2349+
/* The MPTCP code never wait on the subflow sockets, TCP-level
2350+
* disconnect should never fail
2351+
*/
2352+
WARN_ON_ONCE(tcp_disconnect(ssk, 0));
23932353
msk->subflow->state = SS_UNCONNECTED;
23942354
mptcp_subflow_ctx_reset(subflow);
23952355
release_sock(ssk);
@@ -2408,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
24082368
kfree_rcu(subflow, rcu);
24092369
} else {
24102370
/* otherwise tcp will dispose of the ssk and subflow ctx */
2411-
if (ssk->sk_state == TCP_LISTEN) {
2412-
tcp_set_state(ssk, TCP_CLOSE);
2413-
mptcp_subflow_queue_clean(sk, ssk);
2414-
inet_csk_listen_stop(ssk);
2415-
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
2416-
}
2417-
24182371
__tcp_close(ssk, 0);
24192372

24202373
/* close acquired an extra ref */
@@ -2671,16 +2624,12 @@ static void mptcp_worker(struct work_struct *work)
26712624
if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
26722625
goto unlock;
26732626

2674-
mptcp_check_data_fin_ack(sk);
2675-
26762627
mptcp_check_fastclose(msk);
26772628

26782629
mptcp_pm_nl_work(msk);
26792630

2680-
if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
2681-
mptcp_check_for_eof(msk);
2682-
2683-
__mptcp_check_send_data_fin(sk);
2631+
mptcp_check_send_data_fin(sk);
2632+
mptcp_check_data_fin_ack(sk);
26842633
mptcp_check_data_fin(sk);
26852634

26862635
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
@@ -2812,13 +2761,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
28122761
break;
28132762
fallthrough;
28142763
case TCP_SYN_SENT:
2815-
tcp_disconnect(ssk, O_NONBLOCK);
2764+
WARN_ON_ONCE(tcp_disconnect(ssk, O_NONBLOCK));
28162765
break;
28172766
default:
28182767
if (__mptcp_check_fallback(mptcp_sk(sk))) {
28192768
pr_debug("Fallback");
28202769
ssk->sk_shutdown |= how;
28212770
tcp_shutdown(ssk, how);
2771+
2772+
/* simulate the data_fin ack reception to let the state
2773+
* machine move forward
2774+
*/
2775+
WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt);
2776+
mptcp_schedule_work(sk);
28222777
} else {
28232778
pr_debug("Sending DATA_FIN on subflow %p", ssk);
28242779
tcp_send_ack(ssk);
@@ -2858,7 +2813,7 @@ static int mptcp_close_state(struct sock *sk)
28582813
return next & TCP_ACTION_FIN;
28592814
}
28602815

2861-
static void __mptcp_check_send_data_fin(struct sock *sk)
2816+
static void mptcp_check_send_data_fin(struct sock *sk)
28622817
{
28632818
struct mptcp_subflow_context *subflow;
28642819
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2876,19 +2831,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
28762831

28772832
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
28782833

2879-
/* fallback socket will not get data_fin/ack, can move to the next
2880-
* state now
2881-
*/
2882-
if (__mptcp_check_fallback(msk)) {
2883-
WRITE_ONCE(msk->snd_una, msk->write_seq);
2884-
if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
2885-
inet_sk_state_store(sk, TCP_CLOSE);
2886-
mptcp_close_wake_up(sk);
2887-
} else if (sk->sk_state == TCP_FIN_WAIT1) {
2888-
inet_sk_state_store(sk, TCP_FIN_WAIT2);
2889-
}
2890-
}
2891-
28922834
mptcp_for_each_subflow(msk, subflow) {
28932835
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
28942836

@@ -2908,7 +2850,7 @@ static void __mptcp_wr_shutdown(struct sock *sk)
29082850
WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
29092851
WRITE_ONCE(msk->snd_data_fin_enable, 1);
29102852

2911-
__mptcp_check_send_data_fin(sk);
2853+
mptcp_check_send_data_fin(sk);
29122854
}
29132855

29142856
static void __mptcp_destroy_sock(struct sock *sk)
@@ -2953,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
29532895
return EPOLLIN | EPOLLRDNORM;
29542896
}
29552897

2956-
static void mptcp_listen_inuse_dec(struct sock *sk)
2898+
static void mptcp_check_listen_stop(struct sock *sk)
29572899
{
2958-
if (inet_sk_state_load(sk) == TCP_LISTEN)
2959-
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
2900+
struct sock *ssk;
2901+
2902+
if (inet_sk_state_load(sk) != TCP_LISTEN)
2903+
return;
2904+
2905+
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
2906+
ssk = mptcp_sk(sk)->first;
2907+
if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN))
2908+
return;
2909+
2910+
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
2911+
mptcp_subflow_queue_clean(sk, ssk);
2912+
inet_csk_listen_stop(ssk);
2913+
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
2914+
tcp_set_state(ssk, TCP_CLOSE);
2915+
release_sock(ssk);
29602916
}
29612917

29622918
bool __mptcp_close(struct sock *sk, long timeout)
@@ -2969,7 +2925,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
29692925
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
29702926

29712927
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
2972-
mptcp_listen_inuse_dec(sk);
2928+
mptcp_check_listen_stop(sk);
29732929
inet_sk_state_store(sk, TCP_CLOSE);
29742930
goto cleanup;
29752931
}
@@ -3073,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags)
30733029
{
30743030
struct mptcp_sock *msk = mptcp_sk(sk);
30753031

3032+
/* Deny disconnect if other threads are blocked in sk_wait_event()
3033+
* or inet_wait_for_connect().
3034+
*/
3035+
if (sk->sk_wait_pending)
3036+
return -EBUSY;
3037+
30763038
/* We are on the fastopen error path. We can't call straight into the
30773039
* subflows cleanup code due to lock nesting (we are already under
3078-
* msk->firstsocket lock). Do nothing and leave the cleanup to the
3079-
* caller.
3040+
* msk->firstsocket lock).
30803041
*/
30813042
if (msk->fastopening)
3082-
return 0;
3043+
return -EBUSY;
30833044

3084-
mptcp_listen_inuse_dec(sk);
3045+
mptcp_check_listen_stop(sk);
30853046
inet_sk_state_store(sk, TCP_CLOSE);
30863047

30873048
mptcp_stop_timer(sk);
@@ -3140,6 +3101,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
31403101
inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
31413102
#endif
31423103

3104+
nsk->sk_wait_pending = 0;
31433105
__mptcp_init_sock(nsk);
31443106

31453107
msk = mptcp_sk(nsk);
@@ -3327,9 +3289,14 @@ static void mptcp_release_cb(struct sock *sk)
33273289
for (;;) {
33283290
unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
33293291
msk->push_pending;
3292+
struct list_head join_list;
3293+
33303294
if (!flags)
33313295
break;
33323296

3297+
INIT_LIST_HEAD(&join_list);
3298+
list_splice_init(&msk->join_list, &join_list);
3299+
33333300
/* the following actions acquire the subflow socket lock
33343301
*
33353302
* 1) can't be invoked in atomic scope
@@ -3340,8 +3307,9 @@ static void mptcp_release_cb(struct sock *sk)
33403307
msk->push_pending = 0;
33413308
msk->cb_flags &= ~flags;
33423309
spin_unlock_bh(&sk->sk_lock.slock);
3310+
33433311
if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
3344-
__mptcp_flush_join_list(sk);
3312+
__mptcp_flush_join_list(sk, &join_list);
33453313
if (flags & BIT(MPTCP_PUSH_PENDING))
33463314
__mptcp_push_pending(sk, 0);
33473315
if (flags & BIT(MPTCP_RETRANSMIT))

net/mptcp/protocol.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@
113113
/* MPTCP socket atomic flags */
114114
#define MPTCP_NOSPACE 1
115115
#define MPTCP_WORK_RTX 2
116-
#define MPTCP_WORK_EOF 3
117116
#define MPTCP_FALLBACK_DONE 4
118117
#define MPTCP_WORK_CLOSE_SUBFLOW 5
119118

@@ -476,14 +475,13 @@ struct mptcp_subflow_context {
476475
send_mp_fail : 1,
477476
send_fastclose : 1,
478477
send_infinite_map : 1,
479-
rx_eof : 1,
480478
remote_key_valid : 1, /* received the peer key from */
481479
disposable : 1, /* ctx can be free at ulp release time */
482480
stale : 1, /* unable to snd/rcv data, do not use for xmit */
483481
local_id_valid : 1, /* local_id is correctly initialized */
484482
valid_csum_seen : 1, /* at least one csum validated */
485483
is_mptfo : 1, /* subflow is doing TFO */
486-
__unused : 8;
484+
__unused : 9;
487485
enum mptcp_data_avail data_avail;
488486
u32 remote_nonce;
489487
u64 thmac;
@@ -720,7 +718,6 @@ static inline u64 mptcp_expand_seq(u64 old_seq, u64 cur_seq, bool use_64bit)
720718
void __mptcp_check_push(struct sock *sk, struct sock *ssk);
721719
void __mptcp_data_acked(struct sock *sk);
722720
void __mptcp_error_report(struct sock *sk);
723-
void mptcp_subflow_eof(struct sock *sk);
724721
bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit);
725722
static inline bool mptcp_data_fin_enabled(const struct mptcp_sock *msk)
726723
{

0 commit comments

Comments
 (0)