@@ -90,8 +90,8 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
90
90
if (err )
91
91
return err ;
92
92
93
- msk -> first = ssock -> sk ;
94
- msk -> subflow = ssock ;
93
+ WRITE_ONCE ( msk -> first , ssock -> sk ) ;
94
+ WRITE_ONCE ( msk -> subflow , ssock ) ;
95
95
subflow = mptcp_subflow_ctx (ssock -> sk );
96
96
list_add (& subflow -> node , & msk -> conn_list );
97
97
sock_hold (ssock -> sk );
@@ -603,7 +603,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
603
603
WRITE_ONCE (msk -> ack_seq , msk -> ack_seq + 1 );
604
604
WRITE_ONCE (msk -> rcv_data_fin , 0 );
605
605
606
- sk -> sk_shutdown |= RCV_SHUTDOWN ;
606
+ WRITE_ONCE ( sk -> sk_shutdown , sk -> sk_shutdown | RCV_SHUTDOWN ) ;
607
607
smp_mb__before_atomic (); /* SHUTDOWN must be visible first */
608
608
609
609
switch (sk -> sk_state ) {
@@ -825,6 +825,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
825
825
mptcp_data_unlock (sk );
826
826
}
827
827
828
+ static void mptcp_subflow_joined (struct mptcp_sock * msk , struct sock * ssk )
829
+ {
830
+ mptcp_subflow_ctx (ssk )-> map_seq = READ_ONCE (msk -> ack_seq );
831
+ WRITE_ONCE (msk -> allow_infinite_fallback , false);
832
+ mptcp_event (MPTCP_EVENT_SUB_ESTABLISHED , msk , ssk , GFP_ATOMIC );
833
+ }
834
+
828
835
static bool __mptcp_finish_join (struct mptcp_sock * msk , struct sock * ssk )
829
836
{
830
837
struct sock * sk = (struct sock * )msk ;
@@ -839,6 +846,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
839
846
mptcp_sock_graft (ssk , sk -> sk_socket );
840
847
841
848
mptcp_sockopt_sync_locked (msk , ssk );
849
+ mptcp_subflow_joined (msk , ssk );
842
850
return true;
843
851
}
844
852
@@ -910,7 +918,7 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
910
918
/* hopefully temporary hack: propagate shutdown status
911
919
* to msk, when all subflows agree on it
912
920
*/
913
- sk -> sk_shutdown |= RCV_SHUTDOWN ;
921
+ WRITE_ONCE ( sk -> sk_shutdown , sk -> sk_shutdown | RCV_SHUTDOWN ) ;
914
922
915
923
smp_mb__before_atomic (); /* SHUTDOWN must be visible first */
916
924
sk -> sk_data_ready (sk );
@@ -1702,7 +1710,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
1702
1710
1703
1711
lock_sock (ssk );
1704
1712
msg -> msg_flags |= MSG_DONTWAIT ;
1705
- msk -> connect_flags = O_NONBLOCK ;
1706
1713
msk -> fastopening = 1 ;
1707
1714
ret = tcp_sendmsg_fastopen (ssk , msg , copied_syn , len , NULL );
1708
1715
msk -> fastopening = 0 ;
@@ -2283,7 +2290,7 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
2283
2290
{
2284
2291
if (msk -> subflow ) {
2285
2292
iput (SOCK_INODE (msk -> subflow ));
2286
- msk -> subflow = NULL ;
2293
+ WRITE_ONCE ( msk -> subflow , NULL ) ;
2287
2294
}
2288
2295
}
2289
2296
@@ -2420,7 +2427,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
2420
2427
sock_put (ssk );
2421
2428
2422
2429
if (ssk == msk -> first )
2423
- msk -> first = NULL ;
2430
+ WRITE_ONCE ( msk -> first , NULL ) ;
2424
2431
2425
2432
out :
2426
2433
if (ssk == msk -> last_snd )
@@ -2527,7 +2534,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
2527
2534
}
2528
2535
2529
2536
inet_sk_state_store (sk , TCP_CLOSE );
2530
- sk -> sk_shutdown = SHUTDOWN_MASK ;
2537
+ WRITE_ONCE ( sk -> sk_shutdown , SHUTDOWN_MASK ) ;
2531
2538
smp_mb__before_atomic (); /* SHUTDOWN must be visible first */
2532
2539
set_bit (MPTCP_WORK_CLOSE_SUBFLOW , & msk -> flags );
2533
2540
@@ -2721,7 +2728,7 @@ static int __mptcp_init_sock(struct sock *sk)
2721
2728
WRITE_ONCE (msk -> rmem_released , 0 );
2722
2729
msk -> timer_ival = TCP_RTO_MIN ;
2723
2730
2724
- msk -> first = NULL ;
2731
+ WRITE_ONCE ( msk -> first , NULL ) ;
2725
2732
inet_csk (sk )-> icsk_sync_mss = mptcp_sync_mss ;
2726
2733
WRITE_ONCE (msk -> csum_enabled , mptcp_is_checksum_enabled (sock_net (sk )));
2727
2734
WRITE_ONCE (msk -> allow_infinite_fallback , true);
@@ -2959,7 +2966,7 @@ bool __mptcp_close(struct sock *sk, long timeout)
2959
2966
bool do_cancel_work = false;
2960
2967
int subflows_alive = 0 ;
2961
2968
2962
- sk -> sk_shutdown = SHUTDOWN_MASK ;
2969
+ WRITE_ONCE ( sk -> sk_shutdown , SHUTDOWN_MASK ) ;
2963
2970
2964
2971
if ((1 << sk -> sk_state ) & (TCPF_LISTEN | TCPF_CLOSE )) {
2965
2972
mptcp_listen_inuse_dec (sk );
@@ -3039,7 +3046,7 @@ static void mptcp_close(struct sock *sk, long timeout)
3039
3046
sock_put (sk );
3040
3047
}
3041
3048
3042
- void mptcp_copy_inaddrs (struct sock * msk , const struct sock * ssk )
3049
+ static void mptcp_copy_inaddrs (struct sock * msk , const struct sock * ssk )
3043
3050
{
3044
3051
#if IS_ENABLED (CONFIG_MPTCP_IPV6 )
3045
3052
const struct ipv6_pinfo * ssk6 = inet6_sk (ssk );
@@ -3102,7 +3109,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
3102
3109
mptcp_pm_data_reset (msk );
3103
3110
mptcp_ca_reset (sk );
3104
3111
3105
- sk -> sk_shutdown = 0 ;
3112
+ WRITE_ONCE ( sk -> sk_shutdown , 0 ) ;
3106
3113
sk_error_report (sk );
3107
3114
return 0 ;
3108
3115
}
@@ -3116,9 +3123,10 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
3116
3123
}
3117
3124
#endif
3118
3125
3119
- struct sock * mptcp_sk_clone (const struct sock * sk ,
3120
- const struct mptcp_options_received * mp_opt ,
3121
- struct request_sock * req )
3126
+ struct sock * mptcp_sk_clone_init (const struct sock * sk ,
3127
+ const struct mptcp_options_received * mp_opt ,
3128
+ struct sock * ssk ,
3129
+ struct request_sock * req )
3122
3130
{
3123
3131
struct mptcp_subflow_request_sock * subflow_req = mptcp_subflow_rsk (req );
3124
3132
struct sock * nsk = sk_clone_lock (sk , GFP_ATOMIC );
@@ -3137,7 +3145,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
3137
3145
msk = mptcp_sk (nsk );
3138
3146
msk -> local_key = subflow_req -> local_key ;
3139
3147
msk -> token = subflow_req -> token ;
3140
- msk -> subflow = NULL ;
3148
+ WRITE_ONCE ( msk -> subflow , NULL ) ;
3141
3149
msk -> in_accept_queue = 1 ;
3142
3150
WRITE_ONCE (msk -> fully_established , false);
3143
3151
if (mp_opt -> suboptions & OPTION_MPTCP_CSUMREQD )
@@ -3150,10 +3158,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
3150
3158
msk -> setsockopt_seq = mptcp_sk (sk )-> setsockopt_seq ;
3151
3159
3152
3160
sock_reset_flag (nsk , SOCK_RCU_FREE );
3153
- /* will be fully established after successful MPC subflow creation */
3154
- inet_sk_state_store (nsk , TCP_SYN_RECV );
3155
-
3156
3161
security_inet_csk_clone (nsk , req );
3162
+
3163
+ /* this can't race with mptcp_close(), as the msk is
3164
+ * not yet exposted to user-space
3165
+ */
3166
+ inet_sk_state_store (nsk , TCP_ESTABLISHED );
3167
+
3168
+ /* The msk maintain a ref to each subflow in the connections list */
3169
+ WRITE_ONCE (msk -> first , ssk );
3170
+ list_add (& mptcp_subflow_ctx (ssk )-> node , & msk -> conn_list );
3171
+ sock_hold (ssk );
3172
+
3173
+ /* new mpc subflow takes ownership of the newly
3174
+ * created mptcp socket
3175
+ */
3176
+ mptcp_token_accept (subflow_req , msk );
3177
+
3178
+ /* set msk addresses early to ensure mptcp_pm_get_local_id()
3179
+ * uses the correct data
3180
+ */
3181
+ mptcp_copy_inaddrs (nsk , ssk );
3182
+ mptcp_propagate_sndbuf (nsk , ssk );
3183
+
3184
+ mptcp_rcv_space_init (msk , ssk );
3157
3185
bh_unlock_sock (nsk );
3158
3186
3159
3187
/* note: the newly allocated socket refcount is 2 now */
@@ -3185,7 +3213,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
3185
3213
struct socket * listener ;
3186
3214
struct sock * newsk ;
3187
3215
3188
- listener = msk -> subflow ;
3216
+ listener = READ_ONCE ( msk -> subflow ) ;
3189
3217
if (WARN_ON_ONCE (!listener )) {
3190
3218
* err = - EINVAL ;
3191
3219
return NULL ;
@@ -3465,14 +3493,16 @@ bool mptcp_finish_join(struct sock *ssk)
3465
3493
return false;
3466
3494
}
3467
3495
3468
- if (!list_empty (& subflow -> node ))
3469
- goto out ;
3496
+ /* active subflow, already present inside the conn_list */
3497
+ if (!list_empty (& subflow -> node )) {
3498
+ mptcp_subflow_joined (msk , ssk );
3499
+ return true;
3500
+ }
3470
3501
3471
3502
if (!mptcp_pm_allow_new_subflow (msk ))
3472
3503
goto err_prohibited ;
3473
3504
3474
- /* active connections are already on conn_list.
3475
- * If we can't acquire msk socket lock here, let the release callback
3505
+ /* If we can't acquire msk socket lock here, let the release callback
3476
3506
* handle it
3477
3507
*/
3478
3508
mptcp_data_lock (parent );
@@ -3495,11 +3525,6 @@ bool mptcp_finish_join(struct sock *ssk)
3495
3525
return false;
3496
3526
}
3497
3527
3498
- subflow -> map_seq = READ_ONCE (msk -> ack_seq );
3499
- WRITE_ONCE (msk -> allow_infinite_fallback , false);
3500
-
3501
- out :
3502
- mptcp_event (MPTCP_EVENT_SUB_ESTABLISHED , msk , ssk , GFP_ATOMIC );
3503
3528
return true;
3504
3529
}
3505
3530
@@ -3617,9 +3642,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
3617
3642
* acquired the subflow socket lock, too.
3618
3643
*/
3619
3644
if (msk -> fastopening )
3620
- err = __inet_stream_connect (ssock , uaddr , addr_len , msk -> connect_flags , 1 );
3645
+ err = __inet_stream_connect (ssock , uaddr , addr_len , O_NONBLOCK , 1 );
3621
3646
else
3622
- err = inet_stream_connect (ssock , uaddr , addr_len , msk -> connect_flags );
3647
+ err = inet_stream_connect (ssock , uaddr , addr_len , O_NONBLOCK );
3623
3648
inet_sk (sk )-> defer_connect = inet_sk (ssock -> sk )-> defer_connect ;
3624
3649
3625
3650
/* on successful connect, the msk state will be moved to established by
@@ -3632,12 +3657,10 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
3632
3657
3633
3658
mptcp_copy_inaddrs (sk , ssock -> sk );
3634
3659
3635
- /* unblocking connect, mptcp-level inet_stream_connect will error out
3636
- * without changing the socket state, update it here.
3660
+ /* silence EINPROGRESS and let the caller inet_stream_connect
3661
+ * handle the connection in progress
3637
3662
*/
3638
- if (err == - EINPROGRESS )
3639
- sk -> sk_socket -> state = ssock -> state ;
3640
- return err ;
3663
+ return 0 ;
3641
3664
}
3642
3665
3643
3666
static struct proto mptcp_prot = {
@@ -3696,18 +3719,6 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
3696
3719
return err ;
3697
3720
}
3698
3721
3699
- static int mptcp_stream_connect (struct socket * sock , struct sockaddr * uaddr ,
3700
- int addr_len , int flags )
3701
- {
3702
- int ret ;
3703
-
3704
- lock_sock (sock -> sk );
3705
- mptcp_sk (sock -> sk )-> connect_flags = flags ;
3706
- ret = __inet_stream_connect (sock , uaddr , addr_len , flags , 0 );
3707
- release_sock (sock -> sk );
3708
- return ret ;
3709
- }
3710
-
3711
3722
static int mptcp_listen (struct socket * sock , int backlog )
3712
3723
{
3713
3724
struct mptcp_sock * msk = mptcp_sk (sock -> sk );
@@ -3751,10 +3762,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
3751
3762
3752
3763
pr_debug ("msk=%p" , msk );
3753
3764
3754
- /* buggy applications can call accept on socket states other then LISTEN
3765
+ /* Buggy applications can call accept on socket states other then LISTEN
3755
3766
* but no need to allocate the first subflow just to error out.
3756
3767
*/
3757
- ssock = msk -> subflow ;
3768
+ ssock = READ_ONCE ( msk -> subflow ) ;
3758
3769
if (!ssock )
3759
3770
return - EINVAL ;
3760
3771
@@ -3800,9 +3811,6 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
3800
3811
{
3801
3812
struct sock * sk = (struct sock * )msk ;
3802
3813
3803
- if (unlikely (sk -> sk_shutdown & SEND_SHUTDOWN ))
3804
- return EPOLLOUT | EPOLLWRNORM ;
3805
-
3806
3814
if (sk_stream_is_writeable (sk ))
3807
3815
return EPOLLOUT | EPOLLWRNORM ;
3808
3816
@@ -3820,6 +3828,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
3820
3828
struct sock * sk = sock -> sk ;
3821
3829
struct mptcp_sock * msk ;
3822
3830
__poll_t mask = 0 ;
3831
+ u8 shutdown ;
3823
3832
int state ;
3824
3833
3825
3834
msk = mptcp_sk (sk );
@@ -3828,23 +3837,30 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
3828
3837
state = inet_sk_state_load (sk );
3829
3838
pr_debug ("msk=%p state=%d flags=%lx" , msk , state , msk -> flags );
3830
3839
if (state == TCP_LISTEN ) {
3831
- if (WARN_ON_ONCE (!msk -> subflow || !msk -> subflow -> sk ))
3840
+ struct socket * ssock = READ_ONCE (msk -> subflow );
3841
+
3842
+ if (WARN_ON_ONCE (!ssock || !ssock -> sk ))
3832
3843
return 0 ;
3833
3844
3834
- return inet_csk_listen_poll (msk -> subflow -> sk );
3845
+ return inet_csk_listen_poll (ssock -> sk );
3835
3846
}
3836
3847
3848
+ shutdown = READ_ONCE (sk -> sk_shutdown );
3849
+ if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE )
3850
+ mask |= EPOLLHUP ;
3851
+ if (shutdown & RCV_SHUTDOWN )
3852
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP ;
3853
+
3837
3854
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV ) {
3838
3855
mask |= mptcp_check_readable (msk );
3839
- mask |= mptcp_check_writeable (msk );
3856
+ if (shutdown & SEND_SHUTDOWN )
3857
+ mask |= EPOLLOUT | EPOLLWRNORM ;
3858
+ else
3859
+ mask |= mptcp_check_writeable (msk );
3840
3860
} else if (state == TCP_SYN_SENT && inet_sk (sk )-> defer_connect ) {
3841
3861
/* cf tcp_poll() note about TFO */
3842
3862
mask |= EPOLLOUT | EPOLLWRNORM ;
3843
3863
}
3844
- if (sk -> sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE )
3845
- mask |= EPOLLHUP ;
3846
- if (sk -> sk_shutdown & RCV_SHUTDOWN )
3847
- mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP ;
3848
3864
3849
3865
/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
3850
3866
smp_rmb ();
@@ -3859,7 +3875,7 @@ static const struct proto_ops mptcp_stream_ops = {
3859
3875
.owner = THIS_MODULE ,
3860
3876
.release = inet_release ,
3861
3877
.bind = mptcp_bind ,
3862
- .connect = mptcp_stream_connect ,
3878
+ .connect = inet_stream_connect ,
3863
3879
.socketpair = sock_no_socketpair ,
3864
3880
.accept = mptcp_stream_accept ,
3865
3881
.getname = inet_getname ,
@@ -3954,7 +3970,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
3954
3970
.owner = THIS_MODULE ,
3955
3971
.release = inet6_release ,
3956
3972
.bind = mptcp_bind ,
3957
- .connect = mptcp_stream_connect ,
3973
+ .connect = inet_stream_connect ,
3958
3974
.socketpair = sock_no_socketpair ,
3959
3975
.accept = mptcp_stream_accept ,
3960
3976
.getname = inet6_getname ,
0 commit comments