44
44
static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp ;
45
45
46
46
static void __mptcp_destroy_sock (struct sock * sk );
47
- static void __mptcp_check_send_data_fin (struct sock * sk );
47
+ static void mptcp_check_send_data_fin (struct sock * sk );
48
48
49
49
DEFINE_PER_CPU (struct mptcp_delegated_action , mptcp_delegated_actions );
50
50
static struct net_device mptcp_napi_dev ;
@@ -424,8 +424,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk)
424
424
{
425
425
struct mptcp_sock * msk = mptcp_sk (sk );
426
426
427
- return !__mptcp_check_fallback (msk ) &&
428
- ((1 << sk -> sk_state ) &
427
+ return ((1 << sk -> sk_state ) &
429
428
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK )) &&
430
429
msk -> write_seq == READ_ONCE (msk -> snd_una );
431
430
}
@@ -583,9 +582,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
583
582
u64 rcv_data_fin_seq ;
584
583
bool ret = false;
585
584
586
- if (__mptcp_check_fallback (msk ))
587
- return ret ;
588
-
589
585
/* Need to ack a DATA_FIN received from a peer while this side
590
586
* of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
591
587
* msk->rcv_data_fin was set when parsing the incoming options
@@ -623,7 +619,8 @@ static bool mptcp_check_data_fin(struct sock *sk)
623
619
}
624
620
625
621
ret = true;
626
- mptcp_send_ack (msk );
622
+ if (!__mptcp_check_fallback (msk ))
623
+ mptcp_send_ack (msk );
627
624
mptcp_close_wake_up (sk );
628
625
}
629
626
return ret ;
@@ -850,12 +847,12 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
850
847
return true;
851
848
}
852
849
853
- static void __mptcp_flush_join_list (struct sock * sk )
850
+ static void __mptcp_flush_join_list (struct sock * sk , struct list_head * join_list )
854
851
{
855
852
struct mptcp_subflow_context * tmp , * subflow ;
856
853
struct mptcp_sock * msk = mptcp_sk (sk );
857
854
858
- list_for_each_entry_safe (subflow , tmp , & msk -> join_list , node ) {
855
+ list_for_each_entry_safe (subflow , tmp , join_list , node ) {
859
856
struct sock * ssk = mptcp_subflow_tcp_sock (subflow );
860
857
bool slow = lock_sock_fast (ssk );
861
858
@@ -897,49 +894,6 @@ bool mptcp_schedule_work(struct sock *sk)
897
894
return false;
898
895
}
899
896
900
- void mptcp_subflow_eof (struct sock * sk )
901
- {
902
- if (!test_and_set_bit (MPTCP_WORK_EOF , & mptcp_sk (sk )-> flags ))
903
- mptcp_schedule_work (sk );
904
- }
905
-
906
- static void mptcp_check_for_eof (struct mptcp_sock * msk )
907
- {
908
- struct mptcp_subflow_context * subflow ;
909
- struct sock * sk = (struct sock * )msk ;
910
- int receivers = 0 ;
911
-
912
- mptcp_for_each_subflow (msk , subflow )
913
- receivers += !subflow -> rx_eof ;
914
- if (receivers )
915
- return ;
916
-
917
- if (!(sk -> sk_shutdown & RCV_SHUTDOWN )) {
918
- /* hopefully temporary hack: propagate shutdown status
919
- * to msk, when all subflows agree on it
920
- */
921
- WRITE_ONCE (sk -> sk_shutdown , sk -> sk_shutdown | RCV_SHUTDOWN );
922
-
923
- smp_mb__before_atomic (); /* SHUTDOWN must be visible first */
924
- sk -> sk_data_ready (sk );
925
- }
926
-
927
- switch (sk -> sk_state ) {
928
- case TCP_ESTABLISHED :
929
- inet_sk_state_store (sk , TCP_CLOSE_WAIT );
930
- break ;
931
- case TCP_FIN_WAIT1 :
932
- inet_sk_state_store (sk , TCP_CLOSING );
933
- break ;
934
- case TCP_FIN_WAIT2 :
935
- inet_sk_state_store (sk , TCP_CLOSE );
936
- break ;
937
- default :
938
- return ;
939
- }
940
- mptcp_close_wake_up (sk );
941
- }
942
-
943
897
static struct sock * mptcp_subflow_recv_lookup (const struct mptcp_sock * msk )
944
898
{
945
899
struct mptcp_subflow_context * subflow ;
@@ -1609,7 +1563,7 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
1609
1563
if (!mptcp_timer_pending (sk ))
1610
1564
mptcp_reset_timer (sk );
1611
1565
if (do_check_data_fin )
1612
- __mptcp_check_send_data_fin (sk );
1566
+ mptcp_check_send_data_fin (sk );
1613
1567
}
1614
1568
1615
1569
static void __mptcp_subflow_push_pending (struct sock * sk , struct sock * ssk , bool first )
@@ -1727,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
1727
1681
if (ret && ret != - EINPROGRESS && ret != - ERESTARTSYS && ret != - EINTR )
1728
1682
* copied_syn = 0 ;
1729
1683
} else if (ret && ret != - EINPROGRESS ) {
1730
- mptcp_disconnect (sk , 0 );
1684
+ /* The disconnect() op called by tcp_sendmsg_fastopen()/
1685
+ * __inet_stream_connect() can fail, due to looking check,
1686
+ * see mptcp_disconnect().
1687
+ * Attempt it again outside the problematic scope.
1688
+ */
1689
+ if (!mptcp_disconnect (sk , 0 ))
1690
+ sk -> sk_socket -> state = SS_UNCONNECTED ;
1731
1691
}
1732
1692
inet_sk (sk )-> defer_connect = 0 ;
1733
1693
@@ -2158,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
2158
2118
break ;
2159
2119
}
2160
2120
2161
- if (test_and_clear_bit (MPTCP_WORK_EOF , & msk -> flags ))
2162
- mptcp_check_for_eof (msk );
2163
-
2164
2121
if (sk -> sk_shutdown & RCV_SHUTDOWN ) {
2165
2122
/* race breaker: the shutdown could be after the
2166
2123
* previous receive queue check
@@ -2389,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
2389
2346
2390
2347
need_push = (flags & MPTCP_CF_PUSH ) && __mptcp_retransmit_pending_data (sk );
2391
2348
if (!dispose_it ) {
2392
- tcp_disconnect (ssk , 0 );
2349
+ /* The MPTCP code never wait on the subflow sockets, TCP-level
2350
+ * disconnect should never fail
2351
+ */
2352
+ WARN_ON_ONCE (tcp_disconnect (ssk , 0 ));
2393
2353
msk -> subflow -> state = SS_UNCONNECTED ;
2394
2354
mptcp_subflow_ctx_reset (subflow );
2395
2355
release_sock (ssk );
@@ -2408,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
2408
2368
kfree_rcu (subflow , rcu );
2409
2369
} else {
2410
2370
/* otherwise tcp will dispose of the ssk and subflow ctx */
2411
- if (ssk -> sk_state == TCP_LISTEN ) {
2412
- tcp_set_state (ssk , TCP_CLOSE );
2413
- mptcp_subflow_queue_clean (sk , ssk );
2414
- inet_csk_listen_stop (ssk );
2415
- mptcp_event_pm_listener (ssk , MPTCP_EVENT_LISTENER_CLOSED );
2416
- }
2417
-
2418
2371
__tcp_close (ssk , 0 );
2419
2372
2420
2373
/* close acquired an extra ref */
@@ -2671,16 +2624,12 @@ static void mptcp_worker(struct work_struct *work)
2671
2624
if (unlikely ((1 << state ) & (TCPF_CLOSE | TCPF_LISTEN )))
2672
2625
goto unlock ;
2673
2626
2674
- mptcp_check_data_fin_ack (sk );
2675
-
2676
2627
mptcp_check_fastclose (msk );
2677
2628
2678
2629
mptcp_pm_nl_work (msk );
2679
2630
2680
- if (test_and_clear_bit (MPTCP_WORK_EOF , & msk -> flags ))
2681
- mptcp_check_for_eof (msk );
2682
-
2683
- __mptcp_check_send_data_fin (sk );
2631
+ mptcp_check_send_data_fin (sk );
2632
+ mptcp_check_data_fin_ack (sk );
2684
2633
mptcp_check_data_fin (sk );
2685
2634
2686
2635
if (test_and_clear_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags ))
@@ -2812,13 +2761,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
2812
2761
break ;
2813
2762
fallthrough ;
2814
2763
case TCP_SYN_SENT :
2815
- tcp_disconnect (ssk , O_NONBLOCK );
2764
+ WARN_ON_ONCE ( tcp_disconnect (ssk , O_NONBLOCK ) );
2816
2765
break ;
2817
2766
default :
2818
2767
if (__mptcp_check_fallback (mptcp_sk (sk ))) {
2819
2768
pr_debug ("Fallback" );
2820
2769
ssk -> sk_shutdown |= how ;
2821
2770
tcp_shutdown (ssk , how );
2771
+
2772
+ /* simulate the data_fin ack reception to let the state
2773
+ * machine move forward
2774
+ */
2775
+ WRITE_ONCE (mptcp_sk (sk )-> snd_una , mptcp_sk (sk )-> snd_nxt );
2776
+ mptcp_schedule_work (sk );
2822
2777
} else {
2823
2778
pr_debug ("Sending DATA_FIN on subflow %p" , ssk );
2824
2779
tcp_send_ack (ssk );
@@ -2858,7 +2813,7 @@ static int mptcp_close_state(struct sock *sk)
2858
2813
return next & TCP_ACTION_FIN ;
2859
2814
}
2860
2815
2861
- static void __mptcp_check_send_data_fin (struct sock * sk )
2816
+ static void mptcp_check_send_data_fin (struct sock * sk )
2862
2817
{
2863
2818
struct mptcp_subflow_context * subflow ;
2864
2819
struct mptcp_sock * msk = mptcp_sk (sk );
@@ -2876,19 +2831,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
2876
2831
2877
2832
WRITE_ONCE (msk -> snd_nxt , msk -> write_seq );
2878
2833
2879
- /* fallback socket will not get data_fin/ack, can move to the next
2880
- * state now
2881
- */
2882
- if (__mptcp_check_fallback (msk )) {
2883
- WRITE_ONCE (msk -> snd_una , msk -> write_seq );
2884
- if ((1 << sk -> sk_state ) & (TCPF_CLOSING | TCPF_LAST_ACK )) {
2885
- inet_sk_state_store (sk , TCP_CLOSE );
2886
- mptcp_close_wake_up (sk );
2887
- } else if (sk -> sk_state == TCP_FIN_WAIT1 ) {
2888
- inet_sk_state_store (sk , TCP_FIN_WAIT2 );
2889
- }
2890
- }
2891
-
2892
2834
mptcp_for_each_subflow (msk , subflow ) {
2893
2835
struct sock * tcp_sk = mptcp_subflow_tcp_sock (subflow );
2894
2836
@@ -2908,7 +2850,7 @@ static void __mptcp_wr_shutdown(struct sock *sk)
2908
2850
WRITE_ONCE (msk -> write_seq , msk -> write_seq + 1 );
2909
2851
WRITE_ONCE (msk -> snd_data_fin_enable , 1 );
2910
2852
2911
- __mptcp_check_send_data_fin (sk );
2853
+ mptcp_check_send_data_fin (sk );
2912
2854
}
2913
2855
2914
2856
static void __mptcp_destroy_sock (struct sock * sk )
@@ -2953,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
2953
2895
return EPOLLIN | EPOLLRDNORM ;
2954
2896
}
2955
2897
2956
- static void mptcp_listen_inuse_dec (struct sock * sk )
2898
+ static void mptcp_check_listen_stop (struct sock * sk )
2957
2899
{
2958
- if (inet_sk_state_load (sk ) == TCP_LISTEN )
2959
- sock_prot_inuse_add (sock_net (sk ), sk -> sk_prot , -1 );
2900
+ struct sock * ssk ;
2901
+
2902
+ if (inet_sk_state_load (sk ) != TCP_LISTEN )
2903
+ return ;
2904
+
2905
+ sock_prot_inuse_add (sock_net (sk ), sk -> sk_prot , -1 );
2906
+ ssk = mptcp_sk (sk )-> first ;
2907
+ if (WARN_ON_ONCE (!ssk || inet_sk_state_load (ssk ) != TCP_LISTEN ))
2908
+ return ;
2909
+
2910
+ lock_sock_nested (ssk , SINGLE_DEPTH_NESTING );
2911
+ mptcp_subflow_queue_clean (sk , ssk );
2912
+ inet_csk_listen_stop (ssk );
2913
+ mptcp_event_pm_listener (ssk , MPTCP_EVENT_LISTENER_CLOSED );
2914
+ tcp_set_state (ssk , TCP_CLOSE );
2915
+ release_sock (ssk );
2960
2916
}
2961
2917
2962
2918
bool __mptcp_close (struct sock * sk , long timeout )
@@ -2969,7 +2925,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
2969
2925
WRITE_ONCE (sk -> sk_shutdown , SHUTDOWN_MASK );
2970
2926
2971
2927
if ((1 << sk -> sk_state ) & (TCPF_LISTEN | TCPF_CLOSE )) {
2972
- mptcp_listen_inuse_dec (sk );
2928
+ mptcp_check_listen_stop (sk );
2973
2929
inet_sk_state_store (sk , TCP_CLOSE );
2974
2930
goto cleanup ;
2975
2931
}
@@ -3073,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags)
3073
3029
{
3074
3030
struct mptcp_sock * msk = mptcp_sk (sk );
3075
3031
3032
+ /* Deny disconnect if other threads are blocked in sk_wait_event()
3033
+ * or inet_wait_for_connect().
3034
+ */
3035
+ if (sk -> sk_wait_pending )
3036
+ return - EBUSY ;
3037
+
3076
3038
/* We are on the fastopen error path. We can't call straight into the
3077
3039
* subflows cleanup code due to lock nesting (we are already under
3078
- * msk->firstsocket lock). Do nothing and leave the cleanup to the
3079
- * caller.
3040
+ * msk->firstsocket lock).
3080
3041
*/
3081
3042
if (msk -> fastopening )
3082
- return 0 ;
3043
+ return - EBUSY ;
3083
3044
3084
- mptcp_listen_inuse_dec (sk );
3045
+ mptcp_check_listen_stop (sk );
3085
3046
inet_sk_state_store (sk , TCP_CLOSE );
3086
3047
3087
3048
mptcp_stop_timer (sk );
@@ -3140,6 +3101,7 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
3140
3101
inet_sk (nsk )-> pinet6 = mptcp_inet6_sk (nsk );
3141
3102
#endif
3142
3103
3104
+ nsk -> sk_wait_pending = 0 ;
3143
3105
__mptcp_init_sock (nsk );
3144
3106
3145
3107
msk = mptcp_sk (nsk );
@@ -3327,9 +3289,14 @@ static void mptcp_release_cb(struct sock *sk)
3327
3289
for (;;) {
3328
3290
unsigned long flags = (msk -> cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED ) |
3329
3291
msk -> push_pending ;
3292
+ struct list_head join_list ;
3293
+
3330
3294
if (!flags )
3331
3295
break ;
3332
3296
3297
+ INIT_LIST_HEAD (& join_list );
3298
+ list_splice_init (& msk -> join_list , & join_list );
3299
+
3333
3300
/* the following actions acquire the subflow socket lock
3334
3301
*
3335
3302
* 1) can't be invoked in atomic scope
@@ -3340,8 +3307,9 @@ static void mptcp_release_cb(struct sock *sk)
3340
3307
msk -> push_pending = 0 ;
3341
3308
msk -> cb_flags &= ~flags ;
3342
3309
spin_unlock_bh (& sk -> sk_lock .slock );
3310
+
3343
3311
if (flags & BIT (MPTCP_FLUSH_JOIN_LIST ))
3344
- __mptcp_flush_join_list (sk );
3312
+ __mptcp_flush_join_list (sk , & join_list );
3345
3313
if (flags & BIT (MPTCP_PUSH_PENDING ))
3346
3314
__mptcp_push_pending (sk , 0 );
3347
3315
if (flags & BIT (MPTCP_RETRANSMIT ))
0 commit comments