17
17
18
18
#define IO_BUFFER_LIST_BUF_PER_PAGE (PAGE_SIZE / sizeof(struct io_uring_buf))
19
19
20
- #define BGID_ARRAY 64
21
-
22
20
/* BIDs are addressed by a 16-bit field in a CQE */
23
21
#define MAX_BIDS_PER_BGID (1 << 16)
24
22
@@ -40,13 +38,9 @@ struct io_buf_free {
40
38
int inuse ;
41
39
};
42
40
43
- static struct io_buffer_list * __io_buffer_get_list (struct io_ring_ctx * ctx ,
44
- struct io_buffer_list * bl ,
45
- unsigned int bgid )
41
+ static inline struct io_buffer_list * __io_buffer_get_list (struct io_ring_ctx * ctx ,
42
+ unsigned int bgid )
46
43
{
47
- if (bl && bgid < BGID_ARRAY )
48
- return & bl [bgid ];
49
-
50
44
return xa_load (& ctx -> io_bl_xa , bgid );
51
45
}
52
46
@@ -55,7 +49,7 @@ static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
55
49
{
56
50
lockdep_assert_held (& ctx -> uring_lock );
57
51
58
- return __io_buffer_get_list (ctx , ctx -> io_bl , bgid );
52
+ return __io_buffer_get_list (ctx , bgid );
59
53
}
60
54
61
55
static int io_buffer_add_list (struct io_ring_ctx * ctx ,
@@ -67,11 +61,7 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx,
67
61
* always under the ->uring_lock, but the RCU lookup from mmap does.
68
62
*/
69
63
bl -> bgid = bgid ;
70
- smp_store_release (& bl -> is_ready , 1 );
71
-
72
- if (bgid < BGID_ARRAY )
73
- return 0 ;
74
-
64
+ atomic_set (& bl -> refs , 1 );
75
65
return xa_err (xa_store (& ctx -> io_bl_xa , bgid , bl , GFP_KERNEL ));
76
66
}
77
67
@@ -208,24 +198,6 @@ void __user *io_buffer_select(struct io_kiocb *req, size_t *len,
208
198
return ret ;
209
199
}
210
200
211
- static __cold int io_init_bl_list (struct io_ring_ctx * ctx )
212
- {
213
- struct io_buffer_list * bl ;
214
- int i ;
215
-
216
- bl = kcalloc (BGID_ARRAY , sizeof (struct io_buffer_list ), GFP_KERNEL );
217
- if (!bl )
218
- return - ENOMEM ;
219
-
220
- for (i = 0 ; i < BGID_ARRAY ; i ++ ) {
221
- INIT_LIST_HEAD (& bl [i ].buf_list );
222
- bl [i ].bgid = i ;
223
- }
224
-
225
- smp_store_release (& ctx -> io_bl , bl );
226
- return 0 ;
227
- }
228
-
229
201
/*
230
202
* Mark the given mapped range as free for reuse
231
203
*/
@@ -294,24 +266,24 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
294
266
return i ;
295
267
}
296
268
269
+ void io_put_bl (struct io_ring_ctx * ctx , struct io_buffer_list * bl )
270
+ {
271
+ if (atomic_dec_and_test (& bl -> refs )) {
272
+ __io_remove_buffers (ctx , bl , -1U );
273
+ kfree_rcu (bl , rcu );
274
+ }
275
+ }
276
+
297
277
void io_destroy_buffers (struct io_ring_ctx * ctx )
298
278
{
299
279
struct io_buffer_list * bl ;
300
280
struct list_head * item , * tmp ;
301
281
struct io_buffer * buf ;
302
282
unsigned long index ;
303
- int i ;
304
-
305
- for (i = 0 ; i < BGID_ARRAY ; i ++ ) {
306
- if (!ctx -> io_bl )
307
- break ;
308
- __io_remove_buffers (ctx , & ctx -> io_bl [i ], -1U );
309
- }
310
283
311
284
xa_for_each (& ctx -> io_bl_xa , index , bl ) {
312
285
xa_erase (& ctx -> io_bl_xa , bl -> bgid );
313
- __io_remove_buffers (ctx , bl , -1U );
314
- kfree_rcu (bl , rcu );
286
+ io_put_bl (ctx , bl );
315
287
}
316
288
317
289
/*
@@ -489,12 +461,6 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
489
461
490
462
io_ring_submit_lock (ctx , issue_flags );
491
463
492
- if (unlikely (p -> bgid < BGID_ARRAY && !ctx -> io_bl )) {
493
- ret = io_init_bl_list (ctx );
494
- if (ret )
495
- goto err ;
496
- }
497
-
498
464
bl = io_buffer_get_list (ctx , p -> bgid );
499
465
if (unlikely (!bl )) {
500
466
bl = kzalloc (sizeof (* bl ), GFP_KERNEL_ACCOUNT );
@@ -507,14 +473,9 @@ int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
507
473
if (ret ) {
508
474
/*
509
475
* Doesn't need rcu free as it was never visible, but
510
- * let's keep it consistent throughout. Also can't
511
- * be a lower indexed array group, as adding one
512
- * where lookup failed cannot happen.
476
+ * let's keep it consistent throughout.
513
477
*/
514
- if (p -> bgid >= BGID_ARRAY )
515
- kfree_rcu (bl , rcu );
516
- else
517
- WARN_ON_ONCE (1 );
478
+ kfree_rcu (bl , rcu );
518
479
goto err ;
519
480
}
520
481
}
@@ -679,12 +640,6 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
679
640
if (reg .ring_entries >= 65536 )
680
641
return - EINVAL ;
681
642
682
- if (unlikely (reg .bgid < BGID_ARRAY && !ctx -> io_bl )) {
683
- int ret = io_init_bl_list (ctx );
684
- if (ret )
685
- return ret ;
686
- }
687
-
688
643
bl = io_buffer_get_list (ctx , reg .bgid );
689
644
if (bl ) {
690
645
/* if mapped buffer ring OR classic exists, don't allow */
@@ -733,11 +688,8 @@ int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg)
733
688
if (!bl -> is_buf_ring )
734
689
return - EINVAL ;
735
690
736
- __io_remove_buffers (ctx , bl , -1U );
737
- if (bl -> bgid >= BGID_ARRAY ) {
738
- xa_erase (& ctx -> io_bl_xa , bl -> bgid );
739
- kfree_rcu (bl , rcu );
740
- }
691
+ xa_erase (& ctx -> io_bl_xa , bl -> bgid );
692
+ io_put_bl (ctx , bl );
741
693
return 0 ;
742
694
}
743
695
@@ -767,23 +719,35 @@ int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg)
767
719
return 0 ;
768
720
}
769
721
770
- void * io_pbuf_get_address (struct io_ring_ctx * ctx , unsigned long bgid )
722
+ struct io_buffer_list * io_pbuf_get_bl (struct io_ring_ctx * ctx ,
723
+ unsigned long bgid )
771
724
{
772
725
struct io_buffer_list * bl ;
726
+ bool ret ;
773
727
774
- bl = __io_buffer_get_list (ctx , smp_load_acquire (& ctx -> io_bl ), bgid );
775
-
776
- if (!bl || !bl -> is_mmap )
777
- return NULL ;
778
728
/*
779
- * Ensure the list is fully setup. Only strictly needed for RCU lookup
780
- * via mmap, and in that case only for the array indexed groups. For
781
- * the xarray lookups, it's either visible and ready, or not at all.
729
+ * We have to be a bit careful here - we're inside mmap and cannot grab
730
+ * the uring_lock. This means the buffer_list could be simultaneously
731
+ * going away, if someone is trying to be sneaky. Look it up under rcu
732
+ * so we know it's not going away, and attempt to grab a reference to
733
+ * it. If the ref is already zero, then fail the mapping. If successful,
734
+ * the caller will call io_put_bl() to drop the the reference at at the
735
+ * end. This may then safely free the buffer_list (and drop the pages)
736
+ * at that point, vm_insert_pages() would've already grabbed the
737
+ * necessary vma references.
782
738
*/
783
- if (!smp_load_acquire (& bl -> is_ready ))
784
- return NULL ;
785
-
786
- return bl -> buf_ring ;
739
+ rcu_read_lock ();
740
+ bl = xa_load (& ctx -> io_bl_xa , bgid );
741
+ /* must be a mmap'able buffer ring and have pages */
742
+ ret = false;
743
+ if (bl && bl -> is_mmap )
744
+ ret = atomic_inc_not_zero (& bl -> refs );
745
+ rcu_read_unlock ();
746
+
747
+ if (ret )
748
+ return bl ;
749
+
750
+ return ERR_PTR (- EINVAL );
787
751
}
788
752
789
753
/*
0 commit comments