@@ -662,19 +662,19 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
662
662
663
663
skb = skb_peek (& ssk -> sk_receive_queue );
664
664
if (!skb ) {
665
- /* if no data is found, a racing workqueue/recvmsg
666
- * already processed the new data, stop here or we
667
- * can enter an infinite loop
665
+ /* With racing move_skbs_to_msk() and __mptcp_move_skbs(),
666
+ * a different CPU can have already processed the pending
667
+ * data, stop here or we can enter an infinite loop
668
668
*/
669
669
if (!moved )
670
670
done = true;
671
671
break ;
672
672
}
673
673
674
674
if (__mptcp_check_fallback (msk )) {
675
- /* if we are running under the workqueue, TCP could have
676
- * collapsed skbs between dummy map creation and now
677
- * be sure to adjust the size
675
+ /* Under fallback skbs have no MPTCP extension and TCP could
676
+ * collapse them between the dummy map creation and the
677
+ * current dequeue. Be sure to adjust the map size.
678
678
*/
679
679
map_remaining = skb -> len ;
680
680
subflow -> map_data_len = skb -> len ;
@@ -1707,7 +1707,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1707
1707
goto out ;
1708
1708
} else if (ret ) {
1709
1709
release_sock (ssk );
1710
- goto out ;
1710
+ goto do_error ;
1711
1711
}
1712
1712
release_sock (ssk );
1713
1713
}
@@ -1717,9 +1717,13 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1717
1717
if ((1 << sk -> sk_state ) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT )) {
1718
1718
ret = sk_stream_wait_connect (sk , & timeo );
1719
1719
if (ret )
1720
- goto out ;
1720
+ goto do_error ;
1721
1721
}
1722
1722
1723
+ ret = - EPIPE ;
1724
+ if (unlikely (sk -> sk_err || (sk -> sk_shutdown & SEND_SHUTDOWN )))
1725
+ goto do_error ;
1726
+
1723
1727
pfrag = sk_page_frag (sk );
1724
1728
1725
1729
while (msg_data_left (msg )) {
@@ -1728,11 +1732,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1728
1732
bool dfrag_collapsed ;
1729
1733
size_t psize , offset ;
1730
1734
1731
- if (sk -> sk_err || (sk -> sk_shutdown & SEND_SHUTDOWN )) {
1732
- ret = - EPIPE ;
1733
- goto out ;
1734
- }
1735
-
1736
1735
/* reuse tail pfrag, if possible, or carve a new one from the
1737
1736
* page allocator
1738
1737
*/
@@ -1764,7 +1763,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1764
1763
if (copy_page_from_iter (dfrag -> page , offset , psize ,
1765
1764
& msg -> msg_iter ) != psize ) {
1766
1765
ret = - EFAULT ;
1767
- goto out ;
1766
+ goto do_error ;
1768
1767
}
1769
1768
1770
1769
/* data successfully copied into the write queue */
@@ -1796,15 +1795,22 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
1796
1795
__mptcp_push_pending (sk , msg -> msg_flags );
1797
1796
ret = sk_stream_wait_memory (sk , & timeo );
1798
1797
if (ret )
1799
- goto out ;
1798
+ goto do_error ;
1800
1799
}
1801
1800
1802
1801
if (copied )
1803
1802
__mptcp_push_pending (sk , msg -> msg_flags );
1804
1803
1805
1804
out :
1806
1805
release_sock (sk );
1807
- return copied ? : ret ;
1806
+ return copied ;
1807
+
1808
+ do_error :
1809
+ if (copied )
1810
+ goto out ;
1811
+
1812
+ copied = sk_stream_error (sk , msg -> msg_flags , ret );
1813
+ goto out ;
1808
1814
}
1809
1815
1810
1816
static int __mptcp_recvmsg_mskq (struct mptcp_sock * msk ,
@@ -2307,8 +2313,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
2307
2313
2308
2314
lock_sock_nested (ssk , SINGLE_DEPTH_NESTING );
2309
2315
2310
- if (flags & MPTCP_CF_FASTCLOSE )
2316
+ if (flags & MPTCP_CF_FASTCLOSE ) {
2317
+ /* be sure to force the tcp_disconnect() path,
2318
+ * to generate the egress reset
2319
+ */
2320
+ ssk -> sk_lingertime = 0 ;
2321
+ sock_set_flag (ssk , SOCK_LINGER );
2311
2322
subflow -> send_fastclose = 1 ;
2323
+ }
2312
2324
2313
2325
need_push = (flags & MPTCP_CF_PUSH ) && __mptcp_retransmit_pending_data (sk );
2314
2326
if (!dispose_it ) {
@@ -2441,12 +2453,31 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
2441
2453
unlock_sock_fast (tcp_sk , slow );
2442
2454
}
2443
2455
2456
+ /* Mirror the tcp_reset() error propagation */
2457
+ switch (sk -> sk_state ) {
2458
+ case TCP_SYN_SENT :
2459
+ sk -> sk_err = ECONNREFUSED ;
2460
+ break ;
2461
+ case TCP_CLOSE_WAIT :
2462
+ sk -> sk_err = EPIPE ;
2463
+ break ;
2464
+ case TCP_CLOSE :
2465
+ return ;
2466
+ default :
2467
+ sk -> sk_err = ECONNRESET ;
2468
+ }
2469
+
2444
2470
inet_sk_state_store (sk , TCP_CLOSE );
2445
2471
sk -> sk_shutdown = SHUTDOWN_MASK ;
2446
2472
smp_mb__before_atomic (); /* SHUTDOWN must be visible first */
2447
2473
set_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags );
2448
2474
2449
- mptcp_close_wake_up (sk );
2475
+ /* the calling mptcp_worker will properly destroy the socket */
2476
+ if (sock_flag (sk , SOCK_DEAD ))
2477
+ return ;
2478
+
2479
+ sk -> sk_state_change (sk );
2480
+ sk_error_report (sk );
2450
2481
}
2451
2482
2452
2483
static void __mptcp_retrans (struct sock * sk )
@@ -2552,6 +2583,16 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
2552
2583
mptcp_reset_timeout (msk , 0 );
2553
2584
}
2554
2585
2586
+ static void mptcp_do_fastclose (struct sock * sk )
2587
+ {
2588
+ struct mptcp_subflow_context * subflow , * tmp ;
2589
+ struct mptcp_sock * msk = mptcp_sk (sk );
2590
+
2591
+ mptcp_for_each_subflow_safe (msk , subflow , tmp )
2592
+ __mptcp_close_ssk (sk , mptcp_subflow_tcp_sock (subflow ),
2593
+ subflow , MPTCP_CF_FASTCLOSE );
2594
+ }
2595
+
2555
2596
static void mptcp_worker (struct work_struct * work )
2556
2597
{
2557
2598
struct mptcp_sock * msk = container_of (work , struct mptcp_sock , work );
@@ -2580,11 +2621,15 @@ static void mptcp_worker(struct work_struct *work)
2580
2621
* closed, but we need the msk around to reply to incoming DATA_FIN,
2581
2622
* even if it is orphaned and in FIN_WAIT2 state
2582
2623
*/
2583
- if (sock_flag (sk , SOCK_DEAD ) &&
2584
- (mptcp_check_close_timeout (sk ) || sk -> sk_state == TCP_CLOSE )) {
2585
- inet_sk_state_store (sk , TCP_CLOSE );
2586
- __mptcp_destroy_sock (sk );
2587
- goto unlock ;
2624
+ if (sock_flag (sk , SOCK_DEAD )) {
2625
+ if (mptcp_check_close_timeout (sk )) {
2626
+ inet_sk_state_store (sk , TCP_CLOSE );
2627
+ mptcp_do_fastclose (sk );
2628
+ }
2629
+ if (sk -> sk_state == TCP_CLOSE ) {
2630
+ __mptcp_destroy_sock (sk );
2631
+ goto unlock ;
2632
+ }
2588
2633
}
2589
2634
2590
2635
if (test_and_clear_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags ))
@@ -2825,6 +2870,18 @@ static void __mptcp_destroy_sock(struct sock *sk)
2825
2870
sock_put (sk );
2826
2871
}
2827
2872
2873
+ static __poll_t mptcp_check_readable (struct mptcp_sock * msk )
2874
+ {
2875
+ /* Concurrent splices from sk_receive_queue into receive_queue will
2876
+ * always show at least one non-empty queue when checked in this order.
2877
+ */
2878
+ if (skb_queue_empty_lockless (& ((struct sock * )msk )-> sk_receive_queue ) &&
2879
+ skb_queue_empty_lockless (& msk -> receive_queue ))
2880
+ return 0 ;
2881
+
2882
+ return EPOLLIN | EPOLLRDNORM ;
2883
+ }
2884
+
2828
2885
bool __mptcp_close (struct sock * sk , long timeout )
2829
2886
{
2830
2887
struct mptcp_subflow_context * subflow ;
@@ -2838,8 +2895,13 @@ bool __mptcp_close(struct sock *sk, long timeout)
2838
2895
goto cleanup ;
2839
2896
}
2840
2897
2841
- if (mptcp_close_state (sk ))
2898
+ if (mptcp_check_readable (msk )) {
2899
+ /* the msk has read data, do the MPTCP equivalent of TCP reset */
2900
+ inet_sk_state_store (sk , TCP_CLOSE );
2901
+ mptcp_do_fastclose (sk );
2902
+ } else if (mptcp_close_state (sk )) {
2842
2903
__mptcp_wr_shutdown (sk );
2904
+ }
2843
2905
2844
2906
sk_stream_wait_close (sk , timeout );
2845
2907
@@ -3656,18 +3718,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
3656
3718
return err ;
3657
3719
}
3658
3720
3659
- static __poll_t mptcp_check_readable (struct mptcp_sock * msk )
3660
- {
3661
- /* Concurrent splices from sk_receive_queue into receive_queue will
3662
- * always show at least one non-empty queue when checked in this order.
3663
- */
3664
- if (skb_queue_empty_lockless (& ((struct sock * )msk )-> sk_receive_queue ) &&
3665
- skb_queue_empty_lockless (& msk -> receive_queue ))
3666
- return 0 ;
3667
-
3668
- return EPOLLIN | EPOLLRDNORM ;
3669
- }
3670
-
3671
3721
static __poll_t mptcp_check_writeable (struct mptcp_sock * msk )
3672
3722
{
3673
3723
struct sock * sk = (struct sock * )msk ;
@@ -3718,7 +3768,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
3718
3768
if (sk -> sk_shutdown & RCV_SHUTDOWN )
3719
3769
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP ;
3720
3770
3721
- /* This barrier is coupled with smp_wmb() in tcp_reset () */
3771
+ /* This barrier is coupled with smp_wmb() in __mptcp_error_report () */
3722
3772
smp_rmb ();
3723
3773
if (sk -> sk_err )
3724
3774
mask |= EPOLLERR ;
0 commit comments