Skip to content

Commit a5efdbc

Browse files
Paolo Abenikuba-moo
authored andcommitted
mptcp: fix delegated action races
The delegated action infrastructure is prone to the following race: different CPUs can try to schedule different delegated actions on the same subflow at the same time. Each of them will check different bits via mptcp_subflow_delegate(), and will try to schedule the action on the related per-cpu napi instance. Depending on the timing, both can observe an empty delegated list node, causing the same entry to be added simultaneously on two different lists. The root cause is that the delegated actions infra does not provide a single synchronization point. Address the issue reserving an additional bit to mark the subflow as scheduled for delegation. Acquiring such bit guarantee the caller to own the delegated list node, and being able to safely schedule the subflow. Clear such bit only when the subflow scheduling is completed, ensuring proper barrier in place. Additionally swap the meaning of the delegated_action bitmask, to allow the usage of the existing helper to set multiple bit at once. Fixes: bcd9773 ("mptcp: use delegate action to schedule 3rd ack retrans") Cc: [email protected] Reviewed-by: Mat Martineau <[email protected]> Signed-off-by: Paolo Abeni <[email protected]> Signed-off-by: Mat Martineau <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
1 parent 3eef855 commit a5efdbc

File tree

3 files changed

+34
-39
lines changed

3 files changed

+34
-39
lines changed

net/mptcp/protocol.c

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3425,24 +3425,21 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
34253425
sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
34263426
}
34273427

3428-
void mptcp_subflow_process_delegated(struct sock *ssk)
3428+
void mptcp_subflow_process_delegated(struct sock *ssk, long status)
34293429
{
34303430
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
34313431
struct sock *sk = subflow->conn;
34323432

3433-
if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
3433+
if (status & BIT(MPTCP_DELEGATE_SEND)) {
34343434
mptcp_data_lock(sk);
34353435
if (!sock_owned_by_user(sk))
34363436
__mptcp_subflow_push_pending(sk, ssk, true);
34373437
else
34383438
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
34393439
mptcp_data_unlock(sk);
3440-
mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
34413440
}
3442-
if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
3441+
if (status & BIT(MPTCP_DELEGATE_ACK))
34433442
schedule_3rdack_retransmission(ssk);
3444-
mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
3445-
}
34463443
}
34473444

34483445
static int mptcp_hash(struct sock *sk)
@@ -3968,14 +3965,17 @@ static int mptcp_napi_poll(struct napi_struct *napi, int budget)
39683965
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
39693966

39703967
bh_lock_sock_nested(ssk);
3971-
if (!sock_owned_by_user(ssk) &&
3972-
mptcp_subflow_has_delegated_action(subflow))
3973-
mptcp_subflow_process_delegated(ssk);
3974-
/* ... elsewhere tcp_release_cb_override already processed
3975-
* the action or will do at next release_sock().
3976-
* In both case must dequeue the subflow here - on the same
3977-
* CPU that scheduled it.
3978-
*/
3968+
if (!sock_owned_by_user(ssk)) {
3969+
mptcp_subflow_process_delegated(ssk, xchg(&subflow->delegated_status, 0));
3970+
} else {
3971+
/* tcp_release_cb_override already processed
3972+
* the action or will do at next release_sock().
3973+
* In both case must dequeue the subflow here - on the same
3974+
* CPU that scheduled it.
3975+
*/
3976+
smp_wmb();
3977+
clear_bit(MPTCP_DELEGATE_SCHEDULED, &subflow->delegated_status);
3978+
}
39793979
bh_unlock_sock(ssk);
39803980
sock_put(ssk);
39813981

net/mptcp/protocol.h

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -444,9 +444,11 @@ struct mptcp_delegated_action {
444444

445445
DECLARE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
446446

447-
#define MPTCP_DELEGATE_SEND 0
448-
#define MPTCP_DELEGATE_ACK 1
447+
#define MPTCP_DELEGATE_SCHEDULED 0
448+
#define MPTCP_DELEGATE_SEND 1
449+
#define MPTCP_DELEGATE_ACK 2
449450

451+
#define MPTCP_DELEGATE_ACTIONS_MASK (~BIT(MPTCP_DELEGATE_SCHEDULED))
450452
/* MPTCP subflow context */
451453
struct mptcp_subflow_context {
452454
struct list_head node;/* conn_list of subflows */
@@ -564,23 +566,24 @@ mptcp_subflow_get_mapped_dsn(const struct mptcp_subflow_context *subflow)
564566
return subflow->map_seq + mptcp_subflow_get_map_offset(subflow);
565567
}
566568

567-
void mptcp_subflow_process_delegated(struct sock *ssk);
569+
void mptcp_subflow_process_delegated(struct sock *ssk, long actions);
568570

569571
static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, int action)
570572
{
573+
long old, set_bits = BIT(MPTCP_DELEGATE_SCHEDULED) | BIT(action);
571574
struct mptcp_delegated_action *delegated;
572575
bool schedule;
573576

574577
/* the caller held the subflow bh socket lock */
575578
lockdep_assert_in_softirq();
576579

577-
/* The implied barrier pairs with mptcp_subflow_delegated_done(), and
578-
* ensures the below list check sees list updates done prior to status
579-
* bit changes
580+
/* The implied barrier pairs with tcp_release_cb_override()
581+
* mptcp_napi_poll(), and ensures the below list check sees list
582+
* updates done prior to delegated status bits changes
580583
*/
581-
if (!test_and_set_bit(action, &subflow->delegated_status)) {
582-
/* still on delegated list from previous scheduling */
583-
if (!list_empty(&subflow->delegated_node))
584+
old = set_mask_bits(&subflow->delegated_status, 0, set_bits);
585+
if (!(old & BIT(MPTCP_DELEGATE_SCHEDULED))) {
586+
if (WARN_ON_ONCE(!list_empty(&subflow->delegated_node)))
584587
return;
585588

586589
delegated = this_cpu_ptr(&mptcp_delegated_actions);
@@ -605,20 +608,6 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated)
605608
return ret;
606609
}
607610

608-
static inline bool mptcp_subflow_has_delegated_action(const struct mptcp_subflow_context *subflow)
609-
{
610-
return !!READ_ONCE(subflow->delegated_status);
611-
}
612-
613-
static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *subflow, int action)
614-
{
615-
/* pairs with mptcp_subflow_delegate, ensures delegate_node is updated before
616-
* touching the status bit
617-
*/
618-
smp_wmb();
619-
clear_bit(action, &subflow->delegated_status);
620-
}
621-
622611
int mptcp_is_enabled(const struct net *net);
623612
unsigned int mptcp_get_add_addr_timeout(const struct net *net);
624613
int mptcp_is_checksum_enabled(const struct net *net);

net/mptcp/subflow.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1956,9 +1956,15 @@ static void subflow_ulp_clone(const struct request_sock *req,
19561956
static void tcp_release_cb_override(struct sock *ssk)
19571957
{
19581958
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
1959+
long status;
19591960

1960-
if (mptcp_subflow_has_delegated_action(subflow))
1961-
mptcp_subflow_process_delegated(ssk);
1961+
/* process and clear all the pending actions, but leave the subflow into
1962+
* the napi queue. To respect locking, only the same CPU that originated
1963+
* the action can touch the list. mptcp_napi_poll will take care of it.
1964+
*/
1965+
status = set_mask_bits(&subflow->delegated_status, MPTCP_DELEGATE_ACTIONS_MASK, 0);
1966+
if (status)
1967+
mptcp_subflow_process_delegated(ssk, status);
19621968

19631969
tcp_release_cb(ssk);
19641970
}

0 commit comments

Comments
 (0)