Skip to content

Commit 5a3afe4

Browse files
Paolo Abeniintel-lab-lkp
authored andcommitted
mptcp: move the whole rx path under msk socket lock protection
After commit c2e6048 ("mptcp: fix race in release_cb") it's pretty straight forward move the whole MPTCP rx path under the socket lock leveraging the release_cb. We can drop a bunch of spin_lock pairs in the receive functions, use a single receive queue and invoke __mptcp_move_skbs only when subflows ask for it. This will allow more cleanup in the next patch Signed-off-by: Paolo Abeni <[email protected]>
1 parent ee024dd commit 5a3afe4

File tree

3 files changed

+36
-44
lines changed

3 files changed

+36
-44
lines changed

net/mptcp/fastopen.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ void mptcp_fastopen_subflow_synack_set_params(struct mptcp_subflow_context *subf
4949
MPTCP_SKB_CB(skb)->has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp;
5050

5151
mptcp_data_lock(sk);
52+
DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
5253

5354
mptcp_set_owner_r(skb, sk);
5455
__skb_queue_tail(&sk->sk_receive_queue, skb);
@@ -65,6 +66,7 @@ void __mptcp_fastopen_gen_msk_ackseq(struct mptcp_sock *msk, struct mptcp_subflo
6566
struct sock *sk = (struct sock *)msk;
6667
struct sk_buff *skb;
6768

69+
DEBUG_NET_WARN_ON_ONCE(sock_owned_by_user_nocheck(sk));
6870
skb = skb_peek_tail(&sk->sk_receive_queue);
6971
if (skb) {
7072
WARN_ON_ONCE(MPTCP_SKB_CB(skb)->end_seq);

net/mptcp/protocol.c

Lines changed: 33 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -845,7 +845,7 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
845845
return moved > 0;
846846
}
847847

848-
void mptcp_data_ready(struct sock *sk, struct sock *ssk)
848+
static void __mptcp_data_ready(struct sock *sk, struct sock *ssk)
849849
{
850850
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
851851
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -868,9 +868,17 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
868868
return;
869869

870870
/* Wake-up the reader only for in-sequence data */
871-
mptcp_data_lock(sk);
872871
if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk))
873872
sk->sk_data_ready(sk);
873+
}
874+
875+
void mptcp_data_ready(struct sock *sk, struct sock *ssk)
876+
{
877+
mptcp_data_lock(sk);
878+
if (!sock_owned_by_user(sk))
879+
__mptcp_data_ready(sk, ssk);
880+
else
881+
__set_bit(MPTCP_DEQUEUE, &mptcp_sk(sk)->cb_flags);
874882
mptcp_data_unlock(sk);
875883
}
876884

@@ -1077,9 +1085,7 @@ static void __mptcp_clean_una_wakeup(struct sock *sk)
10771085

10781086
static void mptcp_clean_una_wakeup(struct sock *sk)
10791087
{
1080-
mptcp_data_lock(sk);
10811088
__mptcp_clean_una_wakeup(sk);
1082-
mptcp_data_unlock(sk);
10831089
}
10841090

10851091
static void mptcp_enter_memory_pressure(struct sock *sk)
@@ -1939,16 +1945,22 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
19391945
goto out;
19401946
}
19411947

1942-
static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
1948+
static bool __mptcp_move_skbs(struct sock *sk);
1949+
1950+
static int __mptcp_recvmsg_mskq(struct sock *sk,
19431951
struct msghdr *msg,
19441952
size_t len, int flags,
19451953
struct scm_timestamping_internal *tss,
19461954
int *cmsg_flags)
19471955
{
1956+
struct mptcp_sock *msk = mptcp_sk(sk);
19481957
struct sk_buff *skb, *tmp;
19491958
int copied = 0;
19501959

1951-
skb_queue_walk_safe(&msk->receive_queue, skb, tmp) {
1960+
if (skb_queue_empty(&sk->sk_receive_queue) && !__mptcp_move_skbs(sk))
1961+
return 0;
1962+
1963+
skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
19521964
u32 offset = MPTCP_SKB_CB(skb)->offset;
19531965
u32 data_len = skb->len - offset;
19541966
u32 count = min_t(size_t, len - copied, data_len);
@@ -1983,7 +1995,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
19831995
/* we will bulk release the skb memory later */
19841996
skb->destructor = NULL;
19851997
WRITE_ONCE(msk->rmem_released, msk->rmem_released + skb->truesize);
1986-
__skb_unlink(skb, &msk->receive_queue);
1998+
__skb_unlink(skb, &sk->sk_receive_queue);
19871999
__kfree_skb(skb);
19882000
msk->bytes_consumed += count;
19892001
}
@@ -2107,62 +2119,45 @@ static void __mptcp_update_rmem(struct sock *sk)
21072119
WRITE_ONCE(msk->rmem_released, 0);
21082120
}
21092121

2110-
static void __mptcp_splice_receive_queue(struct sock *sk)
2122+
static bool __mptcp_move_skbs(struct sock *sk)
21112123
{
21122124
struct mptcp_sock *msk = mptcp_sk(sk);
2113-
2114-
skb_queue_splice_tail_init(&sk->sk_receive_queue, &msk->receive_queue);
2115-
}
2116-
2117-
static bool __mptcp_move_skbs(struct mptcp_sock *msk)
2118-
{
2119-
struct sock *sk = (struct sock *)msk;
21202125
unsigned int moved = 0;
21212126
bool ret, done;
21222127

21232128
do {
21242129
struct sock *ssk = mptcp_subflow_recv_lookup(msk);
21252130
bool slowpath;
21262131

2127-
/* we can have data pending in the subflows only if the msk
2128-
* receive buffer was full at subflow_data_ready() time,
2129-
* that is an unlikely slow path.
2130-
*/
2131-
if (likely(!ssk))
2132+
if (unlikely(!ssk))
21322133
break;
21332134

21342135
slowpath = lock_sock_fast(ssk);
2135-
mptcp_data_lock(sk);
21362136
__mptcp_update_rmem(sk);
21372137
done = __mptcp_move_skbs_from_subflow(msk, ssk, &moved);
2138-
mptcp_data_unlock(sk);
21392138

21402139
if (unlikely(ssk->sk_err))
21412140
__mptcp_error_report(sk);
21422141
unlock_sock_fast(ssk, slowpath);
21432142
} while (!done);
21442143

2145-
/* acquire the data lock only if some input data is pending */
21462144
ret = moved > 0;
21472145
if (!RB_EMPTY_ROOT(&msk->out_of_order_queue) ||
2148-
!skb_queue_empty_lockless(&sk->sk_receive_queue)) {
2149-
mptcp_data_lock(sk);
2146+
!skb_queue_empty(&sk->sk_receive_queue)) {
21502147
__mptcp_update_rmem(sk);
21512148
ret |= __mptcp_ofo_queue(msk);
2152-
__mptcp_splice_receive_queue(sk);
2153-
mptcp_data_unlock(sk);
21542149
}
21552150
if (ret)
21562151
mptcp_check_data_fin((struct sock *)msk);
2157-
return !skb_queue_empty(&msk->receive_queue);
2152+
return ret;
21582153
}
21592154

21602155
static unsigned int mptcp_inq_hint(const struct sock *sk)
21612156
{
21622157
const struct mptcp_sock *msk = mptcp_sk(sk);
21632158
const struct sk_buff *skb;
21642159

2165-
skb = skb_peek(&msk->receive_queue);
2160+
skb = skb_peek(&sk->sk_receive_queue);
21662161
if (skb) {
21672162
u64 hint_val = READ_ONCE(msk->ack_seq) - MPTCP_SKB_CB(skb)->map_seq;
21682163

@@ -2208,7 +2203,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
22082203
while (copied < len) {
22092204
int err, bytes_read;
22102205

2211-
bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
2206+
bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
22122207
if (unlikely(bytes_read < 0)) {
22132208
if (!copied)
22142209
copied = bytes_read;
@@ -2220,8 +2215,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
22202215
/* be sure to advertise window change */
22212216
mptcp_cleanup_rbuf(msk);
22222217

2223-
if (skb_queue_empty(&msk->receive_queue) && __mptcp_move_skbs(msk))
2224-
continue;
22252218

22262219
/* only the MPTCP socket status is relevant here. The exit
22272220
* conditions mirror closely tcp_recvmsg()
@@ -2246,7 +2239,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
22462239
/* race breaker: the shutdown could be after the
22472240
* previous receive queue check
22482241
*/
2249-
if (__mptcp_move_skbs(msk))
2242+
if (__mptcp_move_skbs(sk))
22502243
continue;
22512244
break;
22522245
}
@@ -2290,9 +2283,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
22902283
}
22912284
}
22922285

2293-
pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n",
2294-
msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
2295-
skb_queue_empty(&msk->receive_queue), copied);
2286+
pr_debug("msk=%p rx queue empty=%d copied=%d",
2287+
msk, skb_queue_empty(&sk->sk_receive_queue), copied);
22962288

22972289
release_sock(sk);
22982290
return copied;
@@ -2819,7 +2811,6 @@ static void __mptcp_init_sock(struct sock *sk)
28192811
INIT_LIST_HEAD(&msk->join_list);
28202812
INIT_LIST_HEAD(&msk->rtx_queue);
28212813
INIT_WORK(&msk->work, mptcp_worker);
2822-
__skb_queue_head_init(&msk->receive_queue);
28232814
msk->out_of_order_queue = RB_ROOT;
28242815
msk->first_pending = NULL;
28252816
WRITE_ONCE(msk->rmem_fwd_alloc, 0);
@@ -3402,12 +3393,8 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
34023393
mptcp_for_each_subflow_safe(msk, subflow, tmp)
34033394
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags);
34043395

3405-
/* move to sk_receive_queue, sk_stream_kill_queues will purge it */
3406-
mptcp_data_lock(sk);
3407-
skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue);
34083396
__skb_queue_purge(&sk->sk_receive_queue);
34093397
skb_rbtree_purge(&msk->out_of_order_queue);
3410-
mptcp_data_unlock(sk);
34113398

34123399
/* move all the rx fwd alloc into the sk_mem_reclaim_final in
34133400
* inet_sock_destruct() will dispose it
@@ -3450,7 +3437,8 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
34503437

34513438
#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
34523439
BIT(MPTCP_RETRANSMIT) | \
3453-
BIT(MPTCP_FLUSH_JOIN_LIST))
3440+
BIT(MPTCP_FLUSH_JOIN_LIST) | \
3441+
BIT(MPTCP_DEQUEUE))
34543442

34553443
/* processes deferred events and flush wmem */
34563444
static void mptcp_release_cb(struct sock *sk)
@@ -3484,6 +3472,8 @@ static void mptcp_release_cb(struct sock *sk)
34843472
__mptcp_push_pending(sk, 0);
34853473
if (flags & BIT(MPTCP_RETRANSMIT))
34863474
__mptcp_retrans(sk);
3475+
if ((flags & BIT(MPTCP_DEQUEUE)) && __mptcp_move_skbs(sk))
3476+
sk->sk_data_ready(sk);
34873477

34883478
cond_resched();
34893479
spin_lock_bh(&sk->sk_lock.slock);
@@ -3721,7 +3711,7 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
37213711
return -EINVAL;
37223712

37233713
lock_sock(sk);
3724-
__mptcp_move_skbs(msk);
3714+
__mptcp_move_skbs(sk);
37253715
*karg = mptcp_inq_hint(sk);
37263716
release_sock(sk);
37273717
break;

net/mptcp/protocol.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
#define MPTCP_FLUSH_JOIN_LIST 5
125125
#define MPTCP_SYNC_STATE 6
126126
#define MPTCP_SYNC_SNDBUF 7
127+
#define MPTCP_DEQUEUE 8
127128

128129
struct mptcp_skb_cb {
129130
u64 map_seq;
@@ -322,7 +323,6 @@ struct mptcp_sock {
322323
struct work_struct work;
323324
struct sk_buff *ooo_last_skb;
324325
struct rb_root out_of_order_queue;
325-
struct sk_buff_head receive_queue;
326326
struct list_head conn_list;
327327
struct list_head rtx_queue;
328328
struct mptcp_data_frag *first_pending;

0 commit comments

Comments
 (0)