Skip to content

Commit 66dd101

Browse files
committed
Merge branch 'mptcp-fixes-for-connect-timeout-access-annotations-and-subflow-init'
Mat Martineau says: ==================== mptcp: Fixes for connect timeout, access annotations, and subflow init Patch 1 allows the SO_SNDTIMEO sockopt to correctly change the connect timeout on MPTCP sockets. Patches 2-5 add READ_ONCE()/WRITE_ONCE() annotations to fix KCSAN issues. Patch 6 correctly initializes some subflow fields on outgoing connections. ==================== Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jakub Kicinski <[email protected]>
2 parents 3021dbf + 55b47ca commit 66dd101

File tree

3 files changed

+88
-95
lines changed

3 files changed

+88
-95
lines changed

net/mptcp/protocol.c

Lines changed: 78 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
9090
if (err)
9191
return err;
9292

93-
msk->first = ssock->sk;
94-
msk->subflow = ssock;
93+
WRITE_ONCE(msk->first, ssock->sk);
94+
WRITE_ONCE(msk->subflow, ssock);
9595
subflow = mptcp_subflow_ctx(ssock->sk);
9696
list_add(&subflow->node, &msk->conn_list);
9797
sock_hold(ssock->sk);
@@ -603,7 +603,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
603603
WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1);
604604
WRITE_ONCE(msk->rcv_data_fin, 0);
605605

606-
sk->sk_shutdown |= RCV_SHUTDOWN;
606+
WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
607607
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
608608

609609
switch (sk->sk_state) {
@@ -825,6 +825,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
825825
mptcp_data_unlock(sk);
826826
}
827827

828+
static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk)
829+
{
830+
mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq);
831+
WRITE_ONCE(msk->allow_infinite_fallback, false);
832+
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
833+
}
834+
828835
static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
829836
{
830837
struct sock *sk = (struct sock *)msk;
@@ -839,6 +846,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
839846
mptcp_sock_graft(ssk, sk->sk_socket);
840847

841848
mptcp_sockopt_sync_locked(msk, ssk);
849+
mptcp_subflow_joined(msk, ssk);
842850
return true;
843851
}
844852

@@ -910,7 +918,7 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
910918
/* hopefully temporary hack: propagate shutdown status
911919
* to msk, when all subflows agree on it
912920
*/
913-
sk->sk_shutdown |= RCV_SHUTDOWN;
921+
WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
914922

915923
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
916924
sk->sk_data_ready(sk);
@@ -1702,7 +1710,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
17021710

17031711
lock_sock(ssk);
17041712
msg->msg_flags |= MSG_DONTWAIT;
1705-
msk->connect_flags = O_NONBLOCK;
17061713
msk->fastopening = 1;
17071714
ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
17081715
msk->fastopening = 0;
@@ -2283,7 +2290,7 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
22832290
{
22842291
if (msk->subflow) {
22852292
iput(SOCK_INODE(msk->subflow));
2286-
msk->subflow = NULL;
2293+
WRITE_ONCE(msk->subflow, NULL);
22872294
}
22882295
}
22892296

@@ -2420,7 +2427,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
24202427
sock_put(ssk);
24212428

24222429
if (ssk == msk->first)
2423-
msk->first = NULL;
2430+
WRITE_ONCE(msk->first, NULL);
24242431

24252432
out:
24262433
if (ssk == msk->last_snd)
@@ -2527,7 +2534,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
25272534
}
25282535

25292536
inet_sk_state_store(sk, TCP_CLOSE);
2530-
sk->sk_shutdown = SHUTDOWN_MASK;
2537+
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
25312538
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
25322539
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
25332540

@@ -2721,7 +2728,7 @@ static int __mptcp_init_sock(struct sock *sk)
27212728
WRITE_ONCE(msk->rmem_released, 0);
27222729
msk->timer_ival = TCP_RTO_MIN;
27232730

2724-
msk->first = NULL;
2731+
WRITE_ONCE(msk->first, NULL);
27252732
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
27262733
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
27272734
WRITE_ONCE(msk->allow_infinite_fallback, true);
@@ -2959,7 +2966,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
29592966
bool do_cancel_work = false;
29602967
int subflows_alive = 0;
29612968

2962-
sk->sk_shutdown = SHUTDOWN_MASK;
2969+
WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
29632970

29642971
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
29652972
mptcp_listen_inuse_dec(sk);
@@ -3039,7 +3046,7 @@ static void mptcp_close(struct sock *sk, long timeout)
30393046
sock_put(sk);
30403047
}
30413048

3042-
void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
3049+
static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
30433050
{
30443051
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
30453052
const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
@@ -3102,7 +3109,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
31023109
mptcp_pm_data_reset(msk);
31033110
mptcp_ca_reset(sk);
31043111

3105-
sk->sk_shutdown = 0;
3112+
WRITE_ONCE(sk->sk_shutdown, 0);
31063113
sk_error_report(sk);
31073114
return 0;
31083115
}
@@ -3116,9 +3123,10 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
31163123
}
31173124
#endif
31183125

3119-
struct sock *mptcp_sk_clone(const struct sock *sk,
3120-
const struct mptcp_options_received *mp_opt,
3121-
struct request_sock *req)
3126+
struct sock *mptcp_sk_clone_init(const struct sock *sk,
3127+
const struct mptcp_options_received *mp_opt,
3128+
struct sock *ssk,
3129+
struct request_sock *req)
31223130
{
31233131
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
31243132
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
@@ -3137,7 +3145,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
31373145
msk = mptcp_sk(nsk);
31383146
msk->local_key = subflow_req->local_key;
31393147
msk->token = subflow_req->token;
3140-
msk->subflow = NULL;
3148+
WRITE_ONCE(msk->subflow, NULL);
31413149
msk->in_accept_queue = 1;
31423150
WRITE_ONCE(msk->fully_established, false);
31433151
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
@@ -3150,10 +3158,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
31503158
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
31513159

31523160
sock_reset_flag(nsk, SOCK_RCU_FREE);
3153-
/* will be fully established after successful MPC subflow creation */
3154-
inet_sk_state_store(nsk, TCP_SYN_RECV);
3155-
31563161
security_inet_csk_clone(nsk, req);
3162+
3163+
/* this can't race with mptcp_close(), as the msk is
3164+
* not yet exposted to user-space
3165+
*/
3166+
inet_sk_state_store(nsk, TCP_ESTABLISHED);
3167+
3168+
/* The msk maintain a ref to each subflow in the connections list */
3169+
WRITE_ONCE(msk->first, ssk);
3170+
list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
3171+
sock_hold(ssk);
3172+
3173+
/* new mpc subflow takes ownership of the newly
3174+
* created mptcp socket
3175+
*/
3176+
mptcp_token_accept(subflow_req, msk);
3177+
3178+
/* set msk addresses early to ensure mptcp_pm_get_local_id()
3179+
* uses the correct data
3180+
*/
3181+
mptcp_copy_inaddrs(nsk, ssk);
3182+
mptcp_propagate_sndbuf(nsk, ssk);
3183+
3184+
mptcp_rcv_space_init(msk, ssk);
31573185
bh_unlock_sock(nsk);
31583186

31593187
/* note: the newly allocated socket refcount is 2 now */
@@ -3185,7 +3213,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
31853213
struct socket *listener;
31863214
struct sock *newsk;
31873215

3188-
listener = msk->subflow;
3216+
listener = READ_ONCE(msk->subflow);
31893217
if (WARN_ON_ONCE(!listener)) {
31903218
*err = -EINVAL;
31913219
return NULL;
@@ -3465,14 +3493,16 @@ bool mptcp_finish_join(struct sock *ssk)
34653493
return false;
34663494
}
34673495

3468-
if (!list_empty(&subflow->node))
3469-
goto out;
3496+
/* active subflow, already present inside the conn_list */
3497+
if (!list_empty(&subflow->node)) {
3498+
mptcp_subflow_joined(msk, ssk);
3499+
return true;
3500+
}
34703501

34713502
if (!mptcp_pm_allow_new_subflow(msk))
34723503
goto err_prohibited;
34733504

3474-
/* active connections are already on conn_list.
3475-
* If we can't acquire msk socket lock here, let the release callback
3505+
/* If we can't acquire msk socket lock here, let the release callback
34763506
* handle it
34773507
*/
34783508
mptcp_data_lock(parent);
@@ -3495,11 +3525,6 @@ bool mptcp_finish_join(struct sock *ssk)
34953525
return false;
34963526
}
34973527

3498-
subflow->map_seq = READ_ONCE(msk->ack_seq);
3499-
WRITE_ONCE(msk->allow_infinite_fallback, false);
3500-
3501-
out:
3502-
mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
35033528
return true;
35043529
}
35053530

@@ -3617,9 +3642,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
36173642
* acquired the subflow socket lock, too.
36183643
*/
36193644
if (msk->fastopening)
3620-
err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
3645+
err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
36213646
else
3622-
err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
3647+
err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
36233648
inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
36243649

36253650
/* on successful connect, the msk state will be moved to established by
@@ -3632,12 +3657,10 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
36323657

36333658
mptcp_copy_inaddrs(sk, ssock->sk);
36343659

3635-
/* unblocking connect, mptcp-level inet_stream_connect will error out
3636-
* without changing the socket state, update it here.
3660+
/* silence EINPROGRESS and let the caller inet_stream_connect
3661+
* handle the connection in progress
36373662
*/
3638-
if (err == -EINPROGRESS)
3639-
sk->sk_socket->state = ssock->state;
3640-
return err;
3663+
return 0;
36413664
}
36423665

36433666
static struct proto mptcp_prot = {
@@ -3696,18 +3719,6 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
36963719
return err;
36973720
}
36983721

3699-
static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
3700-
int addr_len, int flags)
3701-
{
3702-
int ret;
3703-
3704-
lock_sock(sock->sk);
3705-
mptcp_sk(sock->sk)->connect_flags = flags;
3706-
ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
3707-
release_sock(sock->sk);
3708-
return ret;
3709-
}
3710-
37113722
static int mptcp_listen(struct socket *sock, int backlog)
37123723
{
37133724
struct mptcp_sock *msk = mptcp_sk(sock->sk);
@@ -3751,10 +3762,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
37513762

37523763
pr_debug("msk=%p", msk);
37533764

3754-
/* buggy applications can call accept on socket states other then LISTEN
3765+
/* Buggy applications can call accept on socket states other then LISTEN
37553766
* but no need to allocate the first subflow just to error out.
37563767
*/
3757-
ssock = msk->subflow;
3768+
ssock = READ_ONCE(msk->subflow);
37583769
if (!ssock)
37593770
return -EINVAL;
37603771

@@ -3800,9 +3811,6 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
38003811
{
38013812
struct sock *sk = (struct sock *)msk;
38023813

3803-
if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
3804-
return EPOLLOUT | EPOLLWRNORM;
3805-
38063814
if (sk_stream_is_writeable(sk))
38073815
return EPOLLOUT | EPOLLWRNORM;
38083816

@@ -3820,6 +3828,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
38203828
struct sock *sk = sock->sk;
38213829
struct mptcp_sock *msk;
38223830
__poll_t mask = 0;
3831+
u8 shutdown;
38233832
int state;
38243833

38253834
msk = mptcp_sk(sk);
@@ -3828,23 +3837,30 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
38283837
state = inet_sk_state_load(sk);
38293838
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
38303839
if (state == TCP_LISTEN) {
3831-
if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk))
3840+
struct socket *ssock = READ_ONCE(msk->subflow);
3841+
3842+
if (WARN_ON_ONCE(!ssock || !ssock->sk))
38323843
return 0;
38333844

3834-
return inet_csk_listen_poll(msk->subflow->sk);
3845+
return inet_csk_listen_poll(ssock->sk);
38353846
}
38363847

3848+
shutdown = READ_ONCE(sk->sk_shutdown);
3849+
if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
3850+
mask |= EPOLLHUP;
3851+
if (shutdown & RCV_SHUTDOWN)
3852+
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
3853+
38373854
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
38383855
mask |= mptcp_check_readable(msk);
3839-
mask |= mptcp_check_writeable(msk);
3856+
if (shutdown & SEND_SHUTDOWN)
3857+
mask |= EPOLLOUT | EPOLLWRNORM;
3858+
else
3859+
mask |= mptcp_check_writeable(msk);
38403860
} else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
38413861
/* cf tcp_poll() note about TFO */
38423862
mask |= EPOLLOUT | EPOLLWRNORM;
38433863
}
3844-
if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
3845-
mask |= EPOLLHUP;
3846-
if (sk->sk_shutdown & RCV_SHUTDOWN)
3847-
mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
38483864

38493865
/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
38503866
smp_rmb();
@@ -3859,7 +3875,7 @@ static const struct proto_ops mptcp_stream_ops = {
38593875
.owner = THIS_MODULE,
38603876
.release = inet_release,
38613877
.bind = mptcp_bind,
3862-
.connect = mptcp_stream_connect,
3878+
.connect = inet_stream_connect,
38633879
.socketpair = sock_no_socketpair,
38643880
.accept = mptcp_stream_accept,
38653881
.getname = inet_getname,
@@ -3954,7 +3970,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
39543970
.owner = THIS_MODULE,
39553971
.release = inet6_release,
39563972
.bind = mptcp_bind,
3957-
.connect = mptcp_stream_connect,
3973+
.connect = inet_stream_connect,
39583974
.socketpair = sock_no_socketpair,
39593975
.accept = mptcp_stream_accept,
39603976
.getname = inet6_getname,

net/mptcp/protocol.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,6 @@ struct mptcp_sock {
297297
nodelay:1,
298298
fastopening:1,
299299
in_accept_queue:1;
300-
int connect_flags;
301300
struct work_struct work;
302301
struct sk_buff *ooo_last_skb;
303302
struct rb_root out_of_order_queue;
@@ -306,7 +305,11 @@ struct mptcp_sock {
306305
struct list_head rtx_queue;
307306
struct mptcp_data_frag *first_pending;
308307
struct list_head join_list;
309-
struct socket *subflow; /* outgoing connect/listener/!mp_capable */
308+
struct socket *subflow; /* outgoing connect/listener/!mp_capable
309+
* The mptcp ops can safely dereference, using suitable
310+
* ONCE annotation, the subflow outside the socket
311+
* lock as such sock is freed after close().
312+
*/
310313
struct sock *first;
311314
struct mptcp_pm_data pm;
312315
struct {
@@ -613,7 +616,6 @@ int mptcp_is_checksum_enabled(const struct net *net);
613616
int mptcp_allow_join_id0(const struct net *net);
614617
unsigned int mptcp_stale_loss_cnt(const struct net *net);
615618
int mptcp_get_pm_type(const struct net *net);
616-
void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk);
617619
void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
618620
const struct mptcp_options_received *mp_opt);
619621
bool __mptcp_retransmit_pending_data(struct sock *sk);
@@ -683,9 +685,10 @@ void __init mptcp_proto_init(void);
683685
int __init mptcp_proto_v6_init(void);
684686
#endif
685687

686-
struct sock *mptcp_sk_clone(const struct sock *sk,
687-
const struct mptcp_options_received *mp_opt,
688-
struct request_sock *req);
688+
struct sock *mptcp_sk_clone_init(const struct sock *sk,
689+
const struct mptcp_options_received *mp_opt,
690+
struct sock *ssk,
691+
struct request_sock *req);
689692
void mptcp_get_options(const struct sk_buff *skb,
690693
struct mptcp_options_received *mp_opt);
691694

0 commit comments

Comments
 (0)