@@ -405,7 +405,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
405
405
return false;
406
406
}
407
407
408
- static void mptcp_stop_timer (struct sock * sk )
408
+ static void mptcp_stop_rtx_timer (struct sock * sk )
409
409
{
410
410
struct inet_connection_sock * icsk = inet_csk (sk );
411
411
@@ -770,6 +770,46 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
770
770
return moved ;
771
771
}
772
772
773
+ static bool __mptcp_subflow_error_report (struct sock * sk , struct sock * ssk )
774
+ {
775
+ int err = sock_error (ssk );
776
+ int ssk_state ;
777
+
778
+ if (!err )
779
+ return false;
780
+
781
+ /* only propagate errors on fallen-back sockets or
782
+ * on MPC connect
783
+ */
784
+ if (sk -> sk_state != TCP_SYN_SENT && !__mptcp_check_fallback (mptcp_sk (sk )))
785
+ return false;
786
+
787
+ /* We need to propagate only transition to CLOSE state.
788
+ * Orphaned socket will see such state change via
789
+ * subflow_sched_work_if_closed() and that path will properly
790
+ * destroy the msk as needed.
791
+ */
792
+ ssk_state = inet_sk_state_load (ssk );
793
+ if (ssk_state == TCP_CLOSE && !sock_flag (sk , SOCK_DEAD ))
794
+ inet_sk_state_store (sk , ssk_state );
795
+ WRITE_ONCE (sk -> sk_err , - err );
796
+
797
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
798
+ smp_wmb ();
799
+ sk_error_report (sk );
800
+ return true;
801
+ }
802
+
803
+ void __mptcp_error_report (struct sock * sk )
804
+ {
805
+ struct mptcp_subflow_context * subflow ;
806
+ struct mptcp_sock * msk = mptcp_sk (sk );
807
+
808
+ mptcp_for_each_subflow (msk , subflow )
809
+ if (__mptcp_subflow_error_report (sk , mptcp_subflow_tcp_sock (subflow )))
810
+ break ;
811
+ }
812
+
773
813
/* In most cases we will be able to lock the mptcp socket. If its already
774
814
* owned, we need to defer to the work queue to avoid ABBA deadlock.
775
815
*/
@@ -852,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
852
892
mptcp_subflow_ctx (ssk )-> subflow_id = msk -> subflow_id ++ ;
853
893
mptcp_sockopt_sync_locked (msk , ssk );
854
894
mptcp_subflow_joined (msk , ssk );
895
+ mptcp_stop_tout_timer (sk );
855
896
return true;
856
897
}
857
898
@@ -871,12 +912,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
871
912
}
872
913
}
873
914
874
- static bool mptcp_timer_pending (struct sock * sk )
915
+ static bool mptcp_rtx_timer_pending (struct sock * sk )
875
916
{
876
917
return timer_pending (& inet_csk (sk )-> icsk_retransmit_timer );
877
918
}
878
919
879
- static void mptcp_reset_timer (struct sock * sk )
920
+ static void mptcp_reset_rtx_timer (struct sock * sk )
880
921
{
881
922
struct inet_connection_sock * icsk = inet_csk (sk );
882
923
unsigned long tout ;
@@ -1010,10 +1051,10 @@ static void __mptcp_clean_una(struct sock *sk)
1010
1051
out :
1011
1052
if (snd_una == READ_ONCE (msk -> snd_nxt ) &&
1012
1053
snd_una == READ_ONCE (msk -> write_seq )) {
1013
- if (mptcp_timer_pending (sk ) && !mptcp_data_fin_enabled (msk ))
1014
- mptcp_stop_timer (sk );
1054
+ if (mptcp_rtx_timer_pending (sk ) && !mptcp_data_fin_enabled (msk ))
1055
+ mptcp_stop_rtx_timer (sk );
1015
1056
} else {
1016
- mptcp_reset_timer (sk );
1057
+ mptcp_reset_rtx_timer (sk );
1017
1058
}
1018
1059
}
1019
1060
@@ -1586,8 +1627,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
1586
1627
mptcp_push_release (ssk , & info );
1587
1628
1588
1629
/* ensure the rtx timer is running */
1589
- if (!mptcp_timer_pending (sk ))
1590
- mptcp_reset_timer (sk );
1630
+ if (!mptcp_rtx_timer_pending (sk ))
1631
+ mptcp_reset_rtx_timer (sk );
1591
1632
if (do_check_data_fin )
1592
1633
mptcp_check_send_data_fin (sk );
1593
1634
}
@@ -1650,8 +1691,8 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool
1650
1691
if (copied ) {
1651
1692
tcp_push (ssk , 0 , info .mss_now , tcp_sk (ssk )-> nonagle ,
1652
1693
info .size_goal );
1653
- if (!mptcp_timer_pending (sk ))
1654
- mptcp_reset_timer (sk );
1694
+ if (!mptcp_rtx_timer_pending (sk ))
1695
+ mptcp_reset_rtx_timer (sk );
1655
1696
1656
1697
if (msk -> snd_data_fin_enable &&
1657
1698
msk -> snd_nxt + 1 == msk -> write_seq )
@@ -2220,7 +2261,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
2220
2261
sock_put (sk );
2221
2262
}
2222
2263
2223
- static void mptcp_timeout_timer (struct timer_list * t )
2264
+ static void mptcp_tout_timer (struct timer_list * t )
2224
2265
{
2225
2266
struct sock * sk = from_timer (sk , t , sk_timer );
2226
2267
@@ -2329,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
2329
2370
bool dispose_it , need_push = false;
2330
2371
2331
2372
/* If the first subflow moved to a close state before accept, e.g. due
2332
- * to an incoming reset, mptcp either:
2333
- * - if either the subflow or the msk are dead, destroy the context
2334
- * (the subflow socket is deleted by inet_child_forget) and the msk
2335
- * - otherwise do nothing at the moment and take action at accept and/or
2336
- * listener shutdown - user-space must be able to accept() the closed
2337
- * socket.
2373
+ * to an incoming reset or listener shutdown, the subflow socket is
2374
+ * already deleted by inet_child_forget() and the mptcp socket can't
2375
+ * survive too.
2338
2376
*/
2339
- if (msk -> in_accept_queue && msk -> first == ssk ) {
2340
- if (!sock_flag (sk , SOCK_DEAD ) && !sock_flag (ssk , SOCK_DEAD ))
2341
- return ;
2342
-
2377
+ if (msk -> in_accept_queue && msk -> first == ssk &&
2378
+ (sock_flag (sk , SOCK_DEAD ) || sock_flag (ssk , SOCK_DEAD ))) {
2343
2379
/* ensure later check in mptcp_worker() will dispose the msk */
2380
+ mptcp_set_close_tout (sk , tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1 ));
2344
2381
sock_set_flag (sk , SOCK_DEAD );
2345
2382
lock_sock_nested (ssk , SINGLE_DEPTH_NESTING );
2346
2383
mptcp_subflow_drop_ctx (ssk );
@@ -2392,6 +2429,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
2392
2429
}
2393
2430
2394
2431
out_release :
2432
+ __mptcp_subflow_error_report (sk , ssk );
2395
2433
release_sock (ssk );
2396
2434
2397
2435
sock_put (ssk );
@@ -2402,6 +2440,22 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
2402
2440
out :
2403
2441
if (need_push )
2404
2442
__mptcp_push_pending (sk , 0 );
2443
+
2444
+ /* Catch every 'all subflows closed' scenario, including peers silently
2445
+ * closing them, e.g. due to timeout.
2446
+ * For established sockets, allow an additional timeout before closing,
2447
+ * as the protocol can still create more subflows.
2448
+ */
2449
+ if (list_is_singular (& msk -> conn_list ) && msk -> first &&
2450
+ inet_sk_state_load (msk -> first ) == TCP_CLOSE ) {
2451
+ if (sk -> sk_state != TCP_ESTABLISHED ||
2452
+ msk -> in_accept_queue || sock_flag (sk , SOCK_DEAD )) {
2453
+ inet_sk_state_store (sk , TCP_CLOSE );
2454
+ mptcp_close_wake_up (sk );
2455
+ } else {
2456
+ mptcp_start_tout_timer (sk );
2457
+ }
2458
+ }
2405
2459
}
2406
2460
2407
2461
void mptcp_close_ssk (struct sock * sk , struct sock * ssk ,
@@ -2445,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk)
2445
2499
2446
2500
}
2447
2501
2448
- static bool mptcp_should_close (const struct sock * sk )
2502
+ static bool mptcp_close_tout_expired (const struct sock * sk )
2449
2503
{
2450
- s32 delta = tcp_jiffies32 - inet_csk (sk )-> icsk_mtup .probe_timestamp ;
2451
- struct mptcp_subflow_context * subflow ;
2452
-
2453
- if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk (sk )-> in_accept_queue )
2454
- return true;
2504
+ if (!inet_csk (sk )-> icsk_mtup .probe_timestamp ||
2505
+ sk -> sk_state == TCP_CLOSE )
2506
+ return false;
2455
2507
2456
- /* if all subflows are in closed status don't bother with additional
2457
- * timeout
2458
- */
2459
- mptcp_for_each_subflow (mptcp_sk (sk ), subflow ) {
2460
- if (inet_sk_state_load (mptcp_subflow_tcp_sock (subflow )) !=
2461
- TCP_CLOSE )
2462
- return false;
2463
- }
2464
- return true;
2508
+ return time_after32 (tcp_jiffies32 ,
2509
+ inet_csk (sk )-> icsk_mtup .probe_timestamp + TCP_TIMEWAIT_LEN );
2465
2510
}
2466
2511
2467
2512
static void mptcp_check_fastclose (struct mptcp_sock * msk )
@@ -2588,27 +2633,28 @@ static void __mptcp_retrans(struct sock *sk)
2588
2633
reset_timer :
2589
2634
mptcp_check_and_set_pending (sk );
2590
2635
2591
- if (!mptcp_timer_pending (sk ))
2592
- mptcp_reset_timer (sk );
2636
+ if (!mptcp_rtx_timer_pending (sk ))
2637
+ mptcp_reset_rtx_timer (sk );
2593
2638
}
2594
2639
2595
2640
/* schedule the timeout timer for the relevant event: either close timeout
2596
2641
* or mp_fail timeout. The close timeout takes precedence on the mp_fail one
2597
2642
*/
2598
- void mptcp_reset_timeout (struct mptcp_sock * msk , unsigned long fail_tout )
2643
+ void mptcp_reset_tout_timer (struct mptcp_sock * msk , unsigned long fail_tout )
2599
2644
{
2600
2645
struct sock * sk = (struct sock * )msk ;
2601
2646
unsigned long timeout , close_timeout ;
2602
2647
2603
- if (!fail_tout && !sock_flag (sk , SOCK_DEAD ) )
2648
+ if (!fail_tout && !inet_csk (sk ) -> icsk_mtup . probe_timestamp )
2604
2649
return ;
2605
2650
2606
- close_timeout = inet_csk (sk )-> icsk_mtup .probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN ;
2651
+ close_timeout = inet_csk (sk )-> icsk_mtup .probe_timestamp - tcp_jiffies32 + jiffies +
2652
+ TCP_TIMEWAIT_LEN ;
2607
2653
2608
2654
/* the close timeout takes precedence on the fail one, and here at least one of
2609
2655
* them is active
2610
2656
*/
2611
- timeout = sock_flag (sk , SOCK_DEAD ) ? close_timeout : fail_tout ;
2657
+ timeout = inet_csk (sk ) -> icsk_mtup . probe_timestamp ? close_timeout : fail_tout ;
2612
2658
2613
2659
sk_reset_timer (sk , & sk -> sk_timer , timeout );
2614
2660
}
@@ -2627,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
2627
2673
mptcp_subflow_reset (ssk );
2628
2674
WRITE_ONCE (mptcp_subflow_ctx (ssk )-> fail_tout , 0 );
2629
2675
unlock_sock_fast (ssk , slow );
2630
-
2631
- mptcp_reset_timeout (msk , 0 );
2632
2676
}
2633
2677
2634
2678
static void mptcp_do_fastclose (struct sock * sk )
@@ -2665,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work)
2665
2709
if (test_and_clear_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags ))
2666
2710
__mptcp_close_subflow (sk );
2667
2711
2668
- /* There is no point in keeping around an orphaned sk timedout or
2669
- * closed, but we need the msk around to reply to incoming DATA_FIN,
2670
- * even if it is orphaned and in FIN_WAIT2 state
2671
- */
2672
- if (sock_flag (sk , SOCK_DEAD )) {
2673
- if (mptcp_should_close (sk ))
2674
- mptcp_do_fastclose (sk );
2712
+ if (mptcp_close_tout_expired (sk )) {
2713
+ mptcp_do_fastclose (sk );
2714
+ mptcp_close_wake_up (sk );
2715
+ }
2675
2716
2676
- if (sk -> sk_state == TCP_CLOSE ) {
2677
- __mptcp_destroy_sock (sk );
2678
- goto unlock ;
2679
- }
2717
+ if (sock_flag (sk , SOCK_DEAD ) && sk -> sk_state == TCP_CLOSE ) {
2718
+ __mptcp_destroy_sock (sk );
2719
+ goto unlock ;
2680
2720
}
2681
2721
2682
2722
if (test_and_clear_bit (MPTCP_WORK_RTX , & msk -> flags ))
@@ -2717,7 +2757,7 @@ static void __mptcp_init_sock(struct sock *sk)
2717
2757
2718
2758
/* re-use the csk retrans timer for MPTCP-level retrans */
2719
2759
timer_setup (& msk -> sk .icsk_retransmit_timer , mptcp_retransmit_timer , 0 );
2720
- timer_setup (& sk -> sk_timer , mptcp_timeout_timer , 0 );
2760
+ timer_setup (& sk -> sk_timer , mptcp_tout_timer , 0 );
2721
2761
}
2722
2762
2723
2763
static void mptcp_ca_reset (struct sock * sk )
@@ -2808,8 +2848,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
2808
2848
} else {
2809
2849
pr_debug ("Sending DATA_FIN on subflow %p" , ssk );
2810
2850
tcp_send_ack (ssk );
2811
- if (!mptcp_timer_pending (sk ))
2812
- mptcp_reset_timer (sk );
2851
+ if (!mptcp_rtx_timer_pending (sk ))
2852
+ mptcp_reset_rtx_timer (sk );
2813
2853
}
2814
2854
break ;
2815
2855
}
@@ -2892,7 +2932,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
2892
2932
2893
2933
might_sleep ();
2894
2934
2895
- mptcp_stop_timer (sk );
2935
+ mptcp_stop_rtx_timer (sk );
2896
2936
sk_stop_timer (sk , & sk -> sk_timer );
2897
2937
msk -> pm .status = 0 ;
2898
2938
mptcp_release_sched (msk );
@@ -2975,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
2975
3015
2976
3016
cleanup :
2977
3017
/* orphan all the subflows */
2978
- inet_csk (sk )-> icsk_mtup .probe_timestamp = tcp_jiffies32 ;
2979
3018
mptcp_for_each_subflow (msk , subflow ) {
2980
3019
struct sock * ssk = mptcp_subflow_tcp_sock (subflow );
2981
3020
bool slow = lock_sock_fast_nested (ssk );
@@ -3012,7 +3051,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
3012
3051
__mptcp_destroy_sock (sk );
3013
3052
do_cancel_work = true;
3014
3053
} else {
3015
- mptcp_reset_timeout ( msk , 0 );
3054
+ mptcp_start_tout_timer ( sk );
3016
3055
}
3017
3056
3018
3057
return do_cancel_work ;
@@ -3075,8 +3114,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
3075
3114
mptcp_check_listen_stop (sk );
3076
3115
inet_sk_state_store (sk , TCP_CLOSE );
3077
3116
3078
- mptcp_stop_timer (sk );
3079
- sk_stop_timer (sk , & sk -> sk_timer );
3117
+ mptcp_stop_rtx_timer (sk );
3118
+ mptcp_stop_tout_timer (sk );
3080
3119
3081
3120
if (msk -> token )
3082
3121
mptcp_event (MPTCP_EVENT_CLOSED , msk , NULL , GFP_KERNEL );
0 commit comments