@@ -643,18 +643,6 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
643643 bool more_data_avail ;
644644 struct tcp_sock * tp ;
645645 bool done = false;
646- int sk_rbuf ;
647-
648- sk_rbuf = READ_ONCE (sk -> sk_rcvbuf );
649-
650- if (!(sk -> sk_userlocks & SOCK_RCVBUF_LOCK )) {
651- int ssk_rbuf = READ_ONCE (ssk -> sk_rcvbuf );
652-
653- if (unlikely (ssk_rbuf > sk_rbuf )) {
654- WRITE_ONCE (sk -> sk_rcvbuf , ssk_rbuf );
655- sk_rbuf = ssk_rbuf ;
656- }
657- }
658646
659647 pr_debug ("msk=%p ssk=%p\n" , msk , ssk );
660648 tp = tcp_sk (ssk );
@@ -722,7 +710,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
722710 WRITE_ONCE (tp -> copied_seq , seq );
723711 more_data_avail = mptcp_subflow_data_available (ssk );
724712
725- if (atomic_read (& sk -> sk_rmem_alloc ) > sk_rbuf ) {
713+ if (atomic_read (& sk -> sk_rmem_alloc ) > sk -> sk_rcvbuf ) {
726714 done = true;
727715 break ;
728716 }
@@ -846,11 +834,30 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
846834 return moved > 0 ;
847835}
848836
837+ static void __mptcp_rcvbuf_update (struct sock * sk , struct sock * ssk )
838+ {
839+ if (unlikely (ssk -> sk_rcvbuf > sk -> sk_rcvbuf ))
840+ WRITE_ONCE (sk -> sk_rcvbuf , ssk -> sk_rcvbuf );
841+ }
842+
843+ static void __mptcp_data_ready (struct sock * sk , struct sock * ssk )
844+ {
845+ struct mptcp_sock * msk = mptcp_sk (sk );
846+
847+ __mptcp_rcvbuf_update (sk , ssk );
848+
849+ /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
850+ if (__mptcp_rmem (sk ) > sk -> sk_rcvbuf )
851+ return ;
852+
853+ /* Wake-up the reader only for in-sequence data */
854+ if (move_skbs_to_msk (msk , ssk ) && mptcp_epollin_ready (sk ))
855+ sk -> sk_data_ready (sk );
856+ }
857+
849858void mptcp_data_ready (struct sock * sk , struct sock * ssk )
850859{
851860 struct mptcp_subflow_context * subflow = mptcp_subflow_ctx (ssk );
852- struct mptcp_sock * msk = mptcp_sk (sk );
853- int sk_rbuf , ssk_rbuf ;
854861
855862 /* The peer can send data while we are shutting down this
856863 * subflow at msk destruction time, but we must avoid enqueuing
@@ -859,19 +866,11 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
859866 if (unlikely (subflow -> disposable ))
860867 return ;
861868
862- ssk_rbuf = READ_ONCE (ssk -> sk_rcvbuf );
863- sk_rbuf = READ_ONCE (sk -> sk_rcvbuf );
864- if (unlikely (ssk_rbuf > sk_rbuf ))
865- sk_rbuf = ssk_rbuf ;
866-
867- /* over limit? can't append more skbs to msk, Also, no need to wake-up*/
868- if (__mptcp_rmem (sk ) > sk_rbuf )
869- return ;
870-
871- /* Wake-up the reader only for in-sequence data */
872869 mptcp_data_lock (sk );
873- if (move_skbs_to_msk (msk , ssk ) && mptcp_epollin_ready (sk ))
874- sk -> sk_data_ready (sk );
870+ if (!sock_owned_by_user (sk ))
871+ __mptcp_data_ready (sk , ssk );
872+ else
873+ __set_bit (MPTCP_DEQUEUE , & mptcp_sk (sk )-> cb_flags );
875874 mptcp_data_unlock (sk );
876875}
877876
@@ -1942,16 +1941,17 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
19421941
19431942static void mptcp_rcv_space_adjust (struct mptcp_sock * msk , int copied );
19441943
1945- static int __mptcp_recvmsg_mskq (struct mptcp_sock * msk ,
1944+ static int __mptcp_recvmsg_mskq (struct sock * sk ,
19461945 struct msghdr * msg ,
19471946 size_t len , int flags ,
19481947 struct scm_timestamping_internal * tss ,
19491948 int * cmsg_flags )
19501949{
1950+ struct mptcp_sock * msk = mptcp_sk (sk );
19511951 struct sk_buff * skb , * tmp ;
19521952 int copied = 0 ;
19531953
1954- skb_queue_walk_safe (& msk -> receive_queue , skb , tmp ) {
1954+ skb_queue_walk_safe (& sk -> sk_receive_queue , skb , tmp ) {
19551955 u32 offset = MPTCP_SKB_CB (skb )-> offset ;
19561956 u32 data_len = skb -> len - offset ;
19571957 u32 count = min_t (size_t , len - copied , data_len );
@@ -1986,7 +1986,7 @@ static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk,
19861986 /* we will bulk release the skb memory later */
19871987 skb -> destructor = NULL ;
19881988 WRITE_ONCE (msk -> rmem_released , msk -> rmem_released + skb -> truesize );
1989- __skb_unlink (skb , & msk -> receive_queue );
1989+ __skb_unlink (skb , & sk -> sk_receive_queue );
19901990 __kfree_skb (skb );
19911991 msk -> bytes_consumed += count ;
19921992 }
@@ -2111,62 +2111,54 @@ static void __mptcp_update_rmem(struct sock *sk)
21112111 WRITE_ONCE (msk -> rmem_released , 0 );
21122112}
21132113
2114- static void __mptcp_splice_receive_queue (struct sock * sk )
2114+ static bool __mptcp_move_skbs (struct sock * sk )
21152115{
2116+ struct mptcp_subflow_context * subflow ;
21162117 struct mptcp_sock * msk = mptcp_sk (sk );
2117-
2118- skb_queue_splice_tail_init (& sk -> sk_receive_queue , & msk -> receive_queue );
2119- }
2120-
2121- static bool __mptcp_move_skbs (struct mptcp_sock * msk )
2122- {
2123- struct sock * sk = (struct sock * )msk ;
21242118 unsigned int moved = 0 ;
21252119 bool ret , done ;
21262120
2121+ /* verify we can move any data from the subflow, eventually updating */
2122+ if (!(sk -> sk_userlocks & SOCK_RCVBUF_LOCK ))
2123+ mptcp_for_each_subflow (msk , subflow )
2124+ __mptcp_rcvbuf_update (sk , subflow -> tcp_sock );
2125+
2126+ if (__mptcp_rmem (sk ) > sk -> sk_rcvbuf )
2127+ return false;
2128+
21272129 do {
21282130 struct sock * ssk = mptcp_subflow_recv_lookup (msk );
21292131 bool slowpath ;
21302132
2131- /* we can have data pending in the subflows only if the msk
2132- * receive buffer was full at subflow_data_ready() time,
2133- * that is an unlikely slow path.
2134- */
2135- if (likely (!ssk ))
2133+ if (unlikely (!ssk ))
21362134 break ;
21372135
21382136 slowpath = lock_sock_fast (ssk );
2139- mptcp_data_lock (sk );
21402137 __mptcp_update_rmem (sk );
21412138 done = __mptcp_move_skbs_from_subflow (msk , ssk , & moved );
2142- mptcp_data_unlock (sk );
21432139
21442140 if (unlikely (ssk -> sk_err ))
21452141 __mptcp_error_report (sk );
21462142 unlock_sock_fast (ssk , slowpath );
21472143 } while (!done );
21482144
2149- /* acquire the data lock only if some input data is pending */
21502145 ret = moved > 0 ;
21512146 if (!RB_EMPTY_ROOT (& msk -> out_of_order_queue ) ||
2152- !skb_queue_empty_lockless (& sk -> sk_receive_queue )) {
2153- mptcp_data_lock (sk );
2147+ !skb_queue_empty (& sk -> sk_receive_queue )) {
21542148 __mptcp_update_rmem (sk );
21552149 ret |= __mptcp_ofo_queue (msk );
2156- __mptcp_splice_receive_queue (sk );
2157- mptcp_data_unlock (sk );
21582150 }
21592151 if (ret )
21602152 mptcp_check_data_fin ((struct sock * )msk );
2161- return ! skb_queue_empty ( & msk -> receive_queue ) ;
2153+ return ret ;
21622154}
21632155
21642156static unsigned int mptcp_inq_hint (const struct sock * sk )
21652157{
21662158 const struct mptcp_sock * msk = mptcp_sk (sk );
21672159 const struct sk_buff * skb ;
21682160
2169- skb = skb_peek (& msk -> receive_queue );
2161+ skb = skb_peek (& sk -> sk_receive_queue );
21702162 if (skb ) {
21712163 u64 hint_val = READ_ONCE (msk -> ack_seq ) - MPTCP_SKB_CB (skb )-> map_seq ;
21722164
@@ -2212,7 +2204,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
22122204 while (copied < len ) {
22132205 int err , bytes_read ;
22142206
2215- bytes_read = __mptcp_recvmsg_mskq (msk , msg , len - copied , flags , & tss , & cmsg_flags );
2207+ bytes_read = __mptcp_recvmsg_mskq (sk , msg , len - copied , flags , & tss , & cmsg_flags );
22162208 if (unlikely (bytes_read < 0 )) {
22172209 if (!copied )
22182210 copied = bytes_read ;
@@ -2221,7 +2213,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
22212213
22222214 copied += bytes_read ;
22232215
2224- if (skb_queue_empty (& msk -> receive_queue ) && __mptcp_move_skbs (msk ))
2216+ if (skb_queue_empty (& sk -> sk_receive_queue ) && __mptcp_move_skbs (sk ))
22252217 continue ;
22262218
22272219 /* only the MPTCP socket status is relevant here. The exit
@@ -2247,7 +2239,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
22472239 /* race breaker: the shutdown could be after the
22482240 * previous receive queue check
22492241 */
2250- if (__mptcp_move_skbs (msk ))
2242+ if (__mptcp_move_skbs (sk ))
22512243 continue ;
22522244 break ;
22532245 }
@@ -2291,9 +2283,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
22912283 }
22922284 }
22932285
2294- pr_debug ("msk=%p rx queue empty=%d:%d copied=%d\n" ,
2295- msk , skb_queue_empty_lockless (& sk -> sk_receive_queue ),
2296- skb_queue_empty (& msk -> receive_queue ), copied );
2286+ pr_debug ("msk=%p rx queue empty=%d copied=%d\n" ,
2287+ msk , skb_queue_empty (& sk -> sk_receive_queue ), copied );
22972288
22982289 release_sock (sk );
22992290 return copied ;
@@ -2820,7 +2811,6 @@ static void __mptcp_init_sock(struct sock *sk)
28202811 INIT_LIST_HEAD (& msk -> join_list );
28212812 INIT_LIST_HEAD (& msk -> rtx_queue );
28222813 INIT_WORK (& msk -> work , mptcp_worker );
2823- __skb_queue_head_init (& msk -> receive_queue );
28242814 msk -> out_of_order_queue = RB_ROOT ;
28252815 msk -> first_pending = NULL ;
28262816 WRITE_ONCE (msk -> rmem_fwd_alloc , 0 );
@@ -3403,12 +3393,8 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags)
34033393 mptcp_for_each_subflow_safe (msk , subflow , tmp )
34043394 __mptcp_close_ssk (sk , mptcp_subflow_tcp_sock (subflow ), subflow , flags );
34053395
3406- /* move to sk_receive_queue, sk_stream_kill_queues will purge it */
3407- mptcp_data_lock (sk );
3408- skb_queue_splice_tail_init (& msk -> receive_queue , & sk -> sk_receive_queue );
34093396 __skb_queue_purge (& sk -> sk_receive_queue );
34103397 skb_rbtree_purge (& msk -> out_of_order_queue );
3411- mptcp_data_unlock (sk );
34123398
34133399 /* move all the rx fwd alloc into the sk_mem_reclaim_final in
34143400 * inet_sock_destruct() will dispose it
@@ -3451,7 +3437,8 @@ void __mptcp_check_push(struct sock *sk, struct sock *ssk)
34513437
34523438#define MPTCP_FLAGS_PROCESS_CTX_NEED (BIT(MPTCP_PUSH_PENDING) | \
34533439 BIT(MPTCP_RETRANSMIT) | \
3454- BIT(MPTCP_FLUSH_JOIN_LIST))
3440+ BIT(MPTCP_FLUSH_JOIN_LIST) | \
3441+ BIT(MPTCP_DEQUEUE))
34553442
34563443/* processes deferred events and flush wmem */
34573444static void mptcp_release_cb (struct sock * sk )
@@ -3485,6 +3472,11 @@ static void mptcp_release_cb(struct sock *sk)
34853472 __mptcp_push_pending (sk , 0 );
34863473 if (flags & BIT (MPTCP_RETRANSMIT ))
34873474 __mptcp_retrans (sk );
3475+ if ((flags & BIT (MPTCP_DEQUEUE )) && __mptcp_move_skbs (sk )) {
3476+ /* notify ack seq update */
3477+ mptcp_cleanup_rbuf (msk , 0 );
3478+ sk -> sk_data_ready (sk );
3479+ }
34883480
34893481 cond_resched ();
34903482 spin_lock_bh (& sk -> sk_lock .slock );
@@ -3722,7 +3714,8 @@ static int mptcp_ioctl(struct sock *sk, int cmd, int *karg)
37223714 return - EINVAL ;
37233715
37243716 lock_sock (sk );
3725- __mptcp_move_skbs (msk );
3717+ if (__mptcp_move_skbs (sk ))
3718+ mptcp_cleanup_rbuf (msk , 0 );
37263719 * karg = mptcp_inq_hint (sk );
37273720 release_sock (sk );
37283721 break ;
0 commit comments