@@ -921,13 +921,26 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
921
921
return 0 ;
922
922
}
923
923
924
+ /* Lock two rings at once. The rings must be different! */
925
+ static void lock_two_rings (struct io_ring_ctx * ctx1 , struct io_ring_ctx * ctx2 )
926
+ {
927
+ if (ctx1 > ctx2 )
928
+ swap (ctx1 , ctx2 );
929
+ mutex_lock (& ctx1 -> uring_lock );
930
+ mutex_lock_nested (& ctx2 -> uring_lock , SINGLE_DEPTH_NESTING );
931
+ }
932
+
933
+ /* Both rings are locked by the caller. */
924
934
static int io_clone_buffers (struct io_ring_ctx * ctx , struct io_ring_ctx * src_ctx ,
925
935
struct io_uring_clone_buffers * arg )
926
936
{
927
937
struct io_rsrc_data data ;
928
938
int i , ret , off , nr ;
929
939
unsigned int nbufs ;
930
940
941
+ lockdep_assert_held (& ctx -> uring_lock );
942
+ lockdep_assert_held (& src_ctx -> uring_lock );
943
+
931
944
/*
932
945
* Accounting state is shared between the two rings; that only works if
933
946
* both rings are accounted towards the same counters.
@@ -942,7 +955,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
942
955
if (ctx -> buf_table .nr && !(arg -> flags & IORING_REGISTER_DST_REPLACE ))
943
956
return - EBUSY ;
944
957
945
- nbufs = READ_ONCE ( src_ctx -> buf_table .nr ) ;
958
+ nbufs = src_ctx -> buf_table .nr ;
946
959
if (!arg -> nr )
947
960
arg -> nr = nbufs ;
948
961
else if (arg -> nr > nbufs )
@@ -966,27 +979,20 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
966
979
}
967
980
}
968
981
969
- /*
970
- * Drop our own lock here. We'll setup the data we need and reference
971
- * the source buffers, then re-grab, check, and assign at the end.
972
- */
973
- mutex_unlock (& ctx -> uring_lock );
974
-
975
- mutex_lock (& src_ctx -> uring_lock );
976
982
ret = - ENXIO ;
977
983
nbufs = src_ctx -> buf_table .nr ;
978
984
if (!nbufs )
979
- goto out_unlock ;
985
+ goto out_free ;
980
986
ret = - EINVAL ;
981
987
if (!arg -> nr )
982
988
arg -> nr = nbufs ;
983
989
else if (arg -> nr > nbufs )
984
- goto out_unlock ;
990
+ goto out_free ;
985
991
ret = - EOVERFLOW ;
986
992
if (check_add_overflow (arg -> nr , arg -> src_off , & off ))
987
- goto out_unlock ;
993
+ goto out_free ;
988
994
if (off > nbufs )
989
- goto out_unlock ;
995
+ goto out_free ;
990
996
991
997
off = arg -> dst_off ;
992
998
i = arg -> src_off ;
@@ -1001,7 +1007,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
1001
1007
dst_node = io_rsrc_node_alloc (ctx , IORING_RSRC_BUFFER );
1002
1008
if (!dst_node ) {
1003
1009
ret = - ENOMEM ;
1004
- goto out_unlock ;
1010
+ goto out_free ;
1005
1011
}
1006
1012
1007
1013
refcount_inc (& src_node -> buf -> refs );
@@ -1011,10 +1017,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
1011
1017
i ++ ;
1012
1018
}
1013
1019
1014
- /* Have a ref on the bufs now, drop src lock and re-grab our own lock */
1015
- mutex_unlock (& src_ctx -> uring_lock );
1016
- mutex_lock (& ctx -> uring_lock );
1017
-
1018
1020
/*
1019
1021
* If asked for replace, put the old table. data->nodes[] holds both
1020
1022
* old and new nodes at this point.
@@ -1023,24 +1025,17 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
1023
1025
io_rsrc_data_free (ctx , & ctx -> buf_table );
1024
1026
1025
1027
/*
1026
- * ctx->buf_table should be empty now - either the contents are being
1027
- * replaced and we just freed the table, or someone raced setting up
1028
- * a buffer table while the clone was happening. If not empty, fall
1029
- * through to failure handling .
1028
+ * ctx->buf_table must be empty now - either the contents are being
1029
+ * replaced and we just freed the table, or the contents are being
1030
+ * copied to a ring that does not have buffers yet (checked at function
1031
+ * entry) .
1030
1032
*/
1031
- if (!ctx -> buf_table .nr ) {
1032
- ctx -> buf_table = data ;
1033
- return 0 ;
1034
- }
1033
+ WARN_ON_ONCE (ctx -> buf_table .nr );
1034
+ ctx -> buf_table = data ;
1035
+ return 0 ;
1035
1036
1036
- mutex_unlock (& ctx -> uring_lock );
1037
- mutex_lock (& src_ctx -> uring_lock );
1038
- /* someone raced setting up buffers, dump ours */
1039
- ret = - EBUSY ;
1040
- out_unlock :
1037
+ out_free :
1041
1038
io_rsrc_data_free (ctx , & data );
1042
- mutex_unlock (& src_ctx -> uring_lock );
1043
- mutex_lock (& ctx -> uring_lock );
1044
1039
return ret ;
1045
1040
}
1046
1041
@@ -1054,6 +1049,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
1054
1049
int io_register_clone_buffers (struct io_ring_ctx * ctx , void __user * arg )
1055
1050
{
1056
1051
struct io_uring_clone_buffers buf ;
1052
+ struct io_ring_ctx * src_ctx ;
1057
1053
bool registered_src ;
1058
1054
struct file * file ;
1059
1055
int ret ;
@@ -1071,7 +1067,18 @@ int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg)
1071
1067
file = io_uring_register_get_file (buf .src_fd , registered_src );
1072
1068
if (IS_ERR (file ))
1073
1069
return PTR_ERR (file );
1074
- ret = io_clone_buffers (ctx , file -> private_data , & buf );
1070
+
1071
+ src_ctx = file -> private_data ;
1072
+ if (src_ctx != ctx ) {
1073
+ mutex_unlock (& ctx -> uring_lock );
1074
+ lock_two_rings (ctx , src_ctx );
1075
+ }
1076
+
1077
+ ret = io_clone_buffers (ctx , src_ctx , & buf );
1078
+
1079
+ if (src_ctx != ctx )
1080
+ mutex_unlock (& src_ctx -> uring_lock );
1081
+
1075
1082
if (!registered_src )
1076
1083
fput (file );
1077
1084
return ret ;
0 commit comments