Skip to content

Commit 197060c

Browse files
committed
Merge branch 'mptcp-fastclose'
Mat Martineau says: ==================== mptcp: Fastclose edge cases and error handling MPTCP has existing code to use the MP_FASTCLOSE option header, which works like a RST for the MPTCP-level connection (regular RSTs only affect specific subflows in MPTCP). This series has some improvements for fastclose. Patch 1 aligns fastclose socket error handling with TCP RST behavior on TCP sockets. Patch 2 adds use of MP_FASTCLOSE in some more edge cases, like file descriptor close, FIN_WAIT timeout, and when the socket has unread data. Patch 3 updates the fastclose self tests. Patch 4 does not change any code, just fixes some outdated comments. ==================== Signed-off-by: David S. Miller <[email protected]>
2 parents 7171e8a + d89e3ed commit 197060c

File tree

3 files changed

+217
-62
lines changed

3 files changed

+217
-62
lines changed

net/mptcp/protocol.c

Lines changed: 87 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -662,19 +662,19 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
662662

663663
skb = skb_peek(&ssk->sk_receive_queue);
664664
if (!skb) {
665-
/* if no data is found, a racing workqueue/recvmsg
666-
* already processed the new data, stop here or we
667-
* can enter an infinite loop
665+
/* With racing move_skbs_to_msk() and __mptcp_move_skbs(),
666+
* a different CPU can have already processed the pending
667+
* data, stop here or we can enter an infinite loop
668668
*/
669669
if (!moved)
670670
done = true;
671671
break;
672672
}
673673

674674
if (__mptcp_check_fallback(msk)) {
675-
/* if we are running under the workqueue, TCP could have
676-
* collapsed skbs between dummy map creation and now
677-
* be sure to adjust the size
675+
/* Under fallback skbs have no MPTCP extension and TCP could
676+
* collapse them between the dummy map creation and the
677+
* current dequeue. Be sure to adjust the map size.
678678
*/
679679
map_remaining = skb->len;
680680
subflow->map_data_len = skb->len;
@@ -1707,7 +1707,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17071707
goto out;
17081708
} else if (ret) {
17091709
release_sock(ssk);
1710-
goto out;
1710+
goto do_error;
17111711
}
17121712
release_sock(ssk);
17131713
}
@@ -1717,9 +1717,13 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17171717
if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
17181718
ret = sk_stream_wait_connect(sk, &timeo);
17191719
if (ret)
1720-
goto out;
1720+
goto do_error;
17211721
}
17221722

1723+
ret = -EPIPE;
1724+
if (unlikely(sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)))
1725+
goto do_error;
1726+
17231727
pfrag = sk_page_frag(sk);
17241728

17251729
while (msg_data_left(msg)) {
@@ -1728,11 +1732,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17281732
bool dfrag_collapsed;
17291733
size_t psize, offset;
17301734

1731-
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) {
1732-
ret = -EPIPE;
1733-
goto out;
1734-
}
1735-
17361735
/* reuse tail pfrag, if possible, or carve a new one from the
17371736
* page allocator
17381737
*/
@@ -1764,7 +1763,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17641763
if (copy_page_from_iter(dfrag->page, offset, psize,
17651764
&msg->msg_iter) != psize) {
17661765
ret = -EFAULT;
1767-
goto out;
1766+
goto do_error;
17681767
}
17691768

17701769
/* data successfully copied into the write queue */
@@ -1796,15 +1795,22 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
17961795
__mptcp_push_pending(sk, msg->msg_flags);
17971796
ret = sk_stream_wait_memory(sk, &timeo);
17981797
if (ret)
1799-
goto out;
1798+
goto do_error;
18001799
}
18011800

18021801
if (copied)
18031802
__mptcp_push_pending(sk, msg->msg_flags);
18041803

18051804
out:
18061805
release_sock(sk);
1807-
return copied ? : ret;
1806+
return copied;
1807+
1808+
do_error:
1809+
if (copied)
1810+
goto out;
1811+
1812+
copied = sk_stream_error(sk, msg->msg_flags, ret);
1813+
goto out;
18081814
}
18091815

18101816
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
@@ -2307,8 +2313,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
23072313

23082314
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
23092315

2310-
if (flags & MPTCP_CF_FASTCLOSE)
2316+
if (flags & MPTCP_CF_FASTCLOSE) {
2317+
/* be sure to force the tcp_disconnect() path,
2318+
* to generate the egress reset
2319+
*/
2320+
ssk->sk_lingertime = 0;
2321+
sock_set_flag(ssk, SOCK_LINGER);
23112322
subflow->send_fastclose = 1;
2323+
}
23122324

23132325
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
23142326
if (!dispose_it) {
@@ -2441,12 +2453,31 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
24412453
unlock_sock_fast(tcp_sk, slow);
24422454
}
24432455

2456+
/* Mirror the tcp_reset() error propagation */
2457+
switch (sk->sk_state) {
2458+
case TCP_SYN_SENT:
2459+
sk->sk_err = ECONNREFUSED;
2460+
break;
2461+
case TCP_CLOSE_WAIT:
2462+
sk->sk_err = EPIPE;
2463+
break;
2464+
case TCP_CLOSE:
2465+
return;
2466+
default:
2467+
sk->sk_err = ECONNRESET;
2468+
}
2469+
24442470
inet_sk_state_store(sk, TCP_CLOSE);
24452471
sk->sk_shutdown = SHUTDOWN_MASK;
24462472
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
24472473
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
24482474

2449-
mptcp_close_wake_up(sk);
2475+
/* the calling mptcp_worker will properly destroy the socket */
2476+
if (sock_flag(sk, SOCK_DEAD))
2477+
return;
2478+
2479+
sk->sk_state_change(sk);
2480+
sk_error_report(sk);
24502481
}
24512482

24522483
static void __mptcp_retrans(struct sock *sk)
@@ -2552,6 +2583,16 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
25522583
mptcp_reset_timeout(msk, 0);
25532584
}
25542585

2586+
static void mptcp_do_fastclose(struct sock *sk)
2587+
{
2588+
struct mptcp_subflow_context *subflow, *tmp;
2589+
struct mptcp_sock *msk = mptcp_sk(sk);
2590+
2591+
mptcp_for_each_subflow_safe(msk, subflow, tmp)
2592+
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
2593+
subflow, MPTCP_CF_FASTCLOSE);
2594+
}
2595+
25552596
static void mptcp_worker(struct work_struct *work)
25562597
{
25572598
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
@@ -2580,11 +2621,15 @@ static void mptcp_worker(struct work_struct *work)
25802621
* closed, but we need the msk around to reply to incoming DATA_FIN,
25812622
* even if it is orphaned and in FIN_WAIT2 state
25822623
*/
2583-
if (sock_flag(sk, SOCK_DEAD) &&
2584-
(mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) {
2585-
inet_sk_state_store(sk, TCP_CLOSE);
2586-
__mptcp_destroy_sock(sk);
2587-
goto unlock;
2624+
if (sock_flag(sk, SOCK_DEAD)) {
2625+
if (mptcp_check_close_timeout(sk)) {
2626+
inet_sk_state_store(sk, TCP_CLOSE);
2627+
mptcp_do_fastclose(sk);
2628+
}
2629+
if (sk->sk_state == TCP_CLOSE) {
2630+
__mptcp_destroy_sock(sk);
2631+
goto unlock;
2632+
}
25882633
}
25892634

25902635
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
@@ -2825,6 +2870,18 @@ static void __mptcp_destroy_sock(struct sock *sk)
28252870
sock_put(sk);
28262871
}
28272872

2873+
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
2874+
{
2875+
/* Concurrent splices from sk_receive_queue into receive_queue will
2876+
* always show at least one non-empty queue when checked in this order.
2877+
*/
2878+
if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
2879+
skb_queue_empty_lockless(&msk->receive_queue))
2880+
return 0;
2881+
2882+
return EPOLLIN | EPOLLRDNORM;
2883+
}
2884+
28282885
bool __mptcp_close(struct sock *sk, long timeout)
28292886
{
28302887
struct mptcp_subflow_context *subflow;
@@ -2838,8 +2895,13 @@ bool __mptcp_close(struct sock *sk, long timeout)
28382895
goto cleanup;
28392896
}
28402897

2841-
if (mptcp_close_state(sk))
2898+
if (mptcp_check_readable(msk)) {
2899+
/* the msk has read data, do the MPTCP equivalent of TCP reset */
2900+
inet_sk_state_store(sk, TCP_CLOSE);
2901+
mptcp_do_fastclose(sk);
2902+
} else if (mptcp_close_state(sk)) {
28422903
__mptcp_wr_shutdown(sk);
2904+
}
28432905

28442906
sk_stream_wait_close(sk, timeout);
28452907

@@ -3656,18 +3718,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
36563718
return err;
36573719
}
36583720

3659-
static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
3660-
{
3661-
/* Concurrent splices from sk_receive_queue into receive_queue will
3662-
* always show at least one non-empty queue when checked in this order.
3663-
*/
3664-
if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) &&
3665-
skb_queue_empty_lockless(&msk->receive_queue))
3666-
return 0;
3667-
3668-
return EPOLLIN | EPOLLRDNORM;
3669-
}
3670-
36713721
static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
36723722
{
36733723
struct sock *sk = (struct sock *)msk;
@@ -3718,7 +3768,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
37183768
if (sk->sk_shutdown & RCV_SHUTDOWN)
37193769
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
37203770

3721-
/* This barrier is coupled with smp_wmb() in tcp_reset() */
3771+
/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
37223772
smp_rmb();
37233773
if (sk->sk_err)
37243774
mask |= EPOLLERR;

0 commit comments

Comments
 (0)