@@ -145,7 +145,7 @@ struct io_rings {
145
145
/*
146
146
* Number of completion events lost because the queue was full;
147
147
* this should be avoided by the application by making sure
148
- * there are not more requests pending thatn there is space in
148
+ * there are not more requests pending than there is space in
149
149
* the completion queue.
150
150
*
151
151
* Written by the kernel, shouldn't be modified by the
@@ -275,7 +275,8 @@ struct io_ring_ctx {
275
275
* manipulate the list, hence no extra locking is needed there.
276
276
*/
277
277
struct list_head poll_list ;
278
- struct rb_root cancel_tree ;
278
+ struct hlist_head * cancel_hash ;
279
+ unsigned cancel_hash_bits ;
279
280
280
281
spinlock_t inflight_lock ;
281
282
struct list_head inflight_list ;
@@ -355,7 +356,7 @@ struct io_kiocb {
355
356
struct io_ring_ctx * ctx ;
356
357
union {
357
358
struct list_head list ;
358
- struct rb_node rb_node ;
359
+ struct hlist_node hash_node ;
359
360
};
360
361
struct list_head link_list ;
361
362
unsigned int flags ;
@@ -444,6 +445,7 @@ static void io_ring_ctx_ref_free(struct percpu_ref *ref)
444
445
static struct io_ring_ctx * io_ring_ctx_alloc (struct io_uring_params * p )
445
446
{
446
447
struct io_ring_ctx * ctx ;
448
+ int hash_bits ;
447
449
448
450
ctx = kzalloc (sizeof (* ctx ), GFP_KERNEL );
449
451
if (!ctx )
@@ -457,6 +459,21 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
457
459
if (!ctx -> completions )
458
460
goto err ;
459
461
462
+ /*
463
+ * Use 5 bits less than the max cq entries, that should give us around
464
+ * 32 entries per hash list if totally full and uniformly spread.
465
+ */
466
+ hash_bits = ilog2 (p -> cq_entries );
467
+ hash_bits -= 5 ;
468
+ if (hash_bits <= 0 )
469
+ hash_bits = 1 ;
470
+ ctx -> cancel_hash_bits = hash_bits ;
471
+ ctx -> cancel_hash = kmalloc ((1U << hash_bits ) * sizeof (struct hlist_head ),
472
+ GFP_KERNEL );
473
+ if (!ctx -> cancel_hash )
474
+ goto err ;
475
+ __hash_init (ctx -> cancel_hash , 1U << hash_bits );
476
+
460
477
if (percpu_ref_init (& ctx -> refs , io_ring_ctx_ref_free ,
461
478
PERCPU_REF_ALLOW_REINIT , GFP_KERNEL ))
462
479
goto err ;
@@ -470,7 +487,6 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
470
487
init_waitqueue_head (& ctx -> wait );
471
488
spin_lock_init (& ctx -> completion_lock );
472
489
INIT_LIST_HEAD (& ctx -> poll_list );
473
- ctx -> cancel_tree = RB_ROOT ;
474
490
INIT_LIST_HEAD (& ctx -> defer_list );
475
491
INIT_LIST_HEAD (& ctx -> timeout_list );
476
492
init_waitqueue_head (& ctx -> inflight_wait );
@@ -481,6 +497,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
481
497
if (ctx -> fallback_req )
482
498
kmem_cache_free (req_cachep , ctx -> fallback_req );
483
499
kfree (ctx -> completions );
500
+ kfree (ctx -> cancel_hash );
484
501
kfree (ctx );
485
502
return NULL ;
486
503
}
@@ -899,7 +916,6 @@ static bool io_link_cancel_timeout(struct io_kiocb *req)
899
916
static void io_req_link_next (struct io_kiocb * req , struct io_kiocb * * nxtptr )
900
917
{
901
918
struct io_ring_ctx * ctx = req -> ctx ;
902
- struct io_kiocb * nxt ;
903
919
bool wake_ev = false;
904
920
905
921
/* Already got next link */
@@ -911,24 +927,21 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
911
927
* potentially happen if the chain is messed up, check to be on the
912
928
* safe side.
913
929
*/
914
- nxt = list_first_entry_or_null ( & req -> link_list , struct io_kiocb , list );
915
- while ( nxt ) {
916
- list_del_init ( & nxt -> list );
930
+ while (! list_empty ( & req -> link_list )) {
931
+ struct io_kiocb * nxt = list_first_entry ( & req -> link_list ,
932
+ struct io_kiocb , link_list );
917
933
918
- if ((req -> flags & REQ_F_LINK_TIMEOUT ) &&
919
- (nxt -> flags & REQ_F_TIMEOUT )) {
934
+ if (unlikely ((req -> flags & REQ_F_LINK_TIMEOUT ) &&
935
+ (nxt -> flags & REQ_F_TIMEOUT ))) {
936
+ list_del_init (& nxt -> link_list );
920
937
wake_ev |= io_link_cancel_timeout (nxt );
921
- nxt = list_first_entry_or_null (& req -> link_list ,
922
- struct io_kiocb , list );
923
938
req -> flags &= ~REQ_F_LINK_TIMEOUT ;
924
939
continue ;
925
940
}
926
- if (!list_empty (& req -> link_list )) {
927
- INIT_LIST_HEAD (& nxt -> link_list );
928
- list_splice (& req -> link_list , & nxt -> link_list );
929
- nxt -> flags |= REQ_F_LINK ;
930
- }
931
941
942
+ list_del_init (& req -> link_list );
943
+ if (!list_empty (& nxt -> link_list ))
944
+ nxt -> flags |= REQ_F_LINK ;
932
945
* nxtptr = nxt ;
933
946
break ;
934
947
}
@@ -944,15 +957,15 @@ static void io_req_link_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
944
957
static void io_fail_links (struct io_kiocb * req )
945
958
{
946
959
struct io_ring_ctx * ctx = req -> ctx ;
947
- struct io_kiocb * link ;
948
960
unsigned long flags ;
949
961
950
962
spin_lock_irqsave (& ctx -> completion_lock , flags );
951
963
952
964
while (!list_empty (& req -> link_list )) {
953
- link = list_first_entry (& req -> link_list , struct io_kiocb , list );
954
- list_del_init ( & link -> list );
965
+ struct io_kiocb * link = list_first_entry (& req -> link_list ,
966
+ struct io_kiocb , link_list );
955
967
968
+ list_del_init (& link -> link_list );
956
969
trace_io_uring_fail_link (req , link );
957
970
958
971
if ((req -> flags & REQ_F_LINK_TIMEOUT ) &&
@@ -2260,14 +2273,6 @@ static int io_connect(struct io_kiocb *req, const struct io_uring_sqe *sqe,
2260
2273
#endif
2261
2274
}
2262
2275
2263
- static inline void io_poll_remove_req (struct io_kiocb * req )
2264
- {
2265
- if (!RB_EMPTY_NODE (& req -> rb_node )) {
2266
- rb_erase (& req -> rb_node , & req -> ctx -> cancel_tree );
2267
- RB_CLEAR_NODE (& req -> rb_node );
2268
- }
2269
- }
2270
-
2271
2276
static void io_poll_remove_one (struct io_kiocb * req )
2272
2277
{
2273
2278
struct io_poll_iocb * poll = & req -> poll ;
@@ -2279,36 +2284,34 @@ static void io_poll_remove_one(struct io_kiocb *req)
2279
2284
io_queue_async_work (req );
2280
2285
}
2281
2286
spin_unlock (& poll -> head -> lock );
2282
- io_poll_remove_req ( req );
2287
+ hash_del ( & req -> hash_node );
2283
2288
}
2284
2289
2285
2290
static void io_poll_remove_all (struct io_ring_ctx * ctx )
2286
2291
{
2287
- struct rb_node * node ;
2292
+ struct hlist_node * tmp ;
2288
2293
struct io_kiocb * req ;
2294
+ int i ;
2289
2295
2290
2296
spin_lock_irq (& ctx -> completion_lock );
2291
- while ((node = rb_first (& ctx -> cancel_tree )) != NULL ) {
2292
- req = rb_entry (node , struct io_kiocb , rb_node );
2293
- io_poll_remove_one (req );
2297
+ for (i = 0 ; i < (1U << ctx -> cancel_hash_bits ); i ++ ) {
2298
+ struct hlist_head * list ;
2299
+
2300
+ list = & ctx -> cancel_hash [i ];
2301
+ hlist_for_each_entry_safe (req , tmp , list , hash_node )
2302
+ io_poll_remove_one (req );
2294
2303
}
2295
2304
spin_unlock_irq (& ctx -> completion_lock );
2296
2305
}
2297
2306
2298
2307
static int io_poll_cancel (struct io_ring_ctx * ctx , __u64 sqe_addr )
2299
2308
{
2300
- struct rb_node * p , * parent = NULL ;
2309
+ struct hlist_head * list ;
2301
2310
struct io_kiocb * req ;
2302
2311
2303
- p = ctx -> cancel_tree .rb_node ;
2304
- while (p ) {
2305
- parent = p ;
2306
- req = rb_entry (parent , struct io_kiocb , rb_node );
2307
- if (sqe_addr < req -> user_data ) {
2308
- p = p -> rb_left ;
2309
- } else if (sqe_addr > req -> user_data ) {
2310
- p = p -> rb_right ;
2311
- } else {
2312
+ list = & ctx -> cancel_hash [hash_long (sqe_addr , ctx -> cancel_hash_bits )];
2313
+ hlist_for_each_entry (req , list , hash_node ) {
2314
+ if (sqe_addr == req -> user_data ) {
2312
2315
io_poll_remove_one (req );
2313
2316
return 0 ;
2314
2317
}
@@ -2390,7 +2393,7 @@ static void io_poll_complete_work(struct io_wq_work **workptr)
2390
2393
spin_unlock_irq (& ctx -> completion_lock );
2391
2394
return ;
2392
2395
}
2393
- io_poll_remove_req ( req );
2396
+ hash_del ( & req -> hash_node );
2394
2397
io_poll_complete (req , mask , ret );
2395
2398
spin_unlock_irq (& ctx -> completion_lock );
2396
2399
@@ -2425,7 +2428,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
2425
2428
* for finalizing the request, mark us as having grabbed that already.
2426
2429
*/
2427
2430
if (mask && spin_trylock_irqsave (& ctx -> completion_lock , flags )) {
2428
- io_poll_remove_req ( req );
2431
+ hash_del ( & req -> hash_node );
2429
2432
io_poll_complete (req , mask , 0 );
2430
2433
req -> flags |= REQ_F_COMP_LOCKED ;
2431
2434
io_put_req (req );
@@ -2463,20 +2466,10 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
2463
2466
static void io_poll_req_insert (struct io_kiocb * req )
2464
2467
{
2465
2468
struct io_ring_ctx * ctx = req -> ctx ;
2466
- struct rb_node * * p = & ctx -> cancel_tree .rb_node ;
2467
- struct rb_node * parent = NULL ;
2468
- struct io_kiocb * tmp ;
2469
-
2470
- while (* p ) {
2471
- parent = * p ;
2472
- tmp = rb_entry (parent , struct io_kiocb , rb_node );
2473
- if (req -> user_data < tmp -> user_data )
2474
- p = & (* p )-> rb_left ;
2475
- else
2476
- p = & (* p )-> rb_right ;
2477
- }
2478
- rb_link_node (& req -> rb_node , parent , p );
2479
- rb_insert_color (& req -> rb_node , & ctx -> cancel_tree );
2469
+ struct hlist_head * list ;
2470
+
2471
+ list = & ctx -> cancel_hash [hash_long (req -> user_data , ctx -> cancel_hash_bits )];
2472
+ hlist_add_head (& req -> hash_node , list );
2480
2473
}
2481
2474
2482
2475
static int io_poll_add (struct io_kiocb * req , const struct io_uring_sqe * sqe ,
@@ -2504,7 +2497,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe,
2504
2497
INIT_IO_WORK (& req -> work , io_poll_complete_work );
2505
2498
events = READ_ONCE (sqe -> poll_events );
2506
2499
poll -> events = demangle_poll (events ) | EPOLLERR | EPOLLHUP ;
2507
- RB_CLEAR_NODE (& req -> rb_node );
2500
+ INIT_HLIST_NODE (& req -> hash_node );
2508
2501
2509
2502
poll -> head = NULL ;
2510
2503
poll -> done = false;
@@ -3173,10 +3166,11 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
3173
3166
* We don't expect the list to be empty, that will only happen if we
3174
3167
* race with the completion of the linked work.
3175
3168
*/
3176
- if (!list_empty (& req -> list )) {
3177
- prev = list_entry (req -> list .prev , struct io_kiocb , link_list );
3169
+ if (!list_empty (& req -> link_list )) {
3170
+ prev = list_entry (req -> link_list .prev , struct io_kiocb ,
3171
+ link_list );
3178
3172
if (refcount_inc_not_zero (& prev -> refs )) {
3179
- list_del_init (& req -> list );
3173
+ list_del_init (& req -> link_list );
3180
3174
prev -> flags &= ~REQ_F_LINK_TIMEOUT ;
3181
3175
} else
3182
3176
prev = NULL ;
@@ -3206,7 +3200,7 @@ static void io_queue_linked_timeout(struct io_kiocb *req)
3206
3200
* we got a chance to setup the timer
3207
3201
*/
3208
3202
spin_lock_irq (& ctx -> completion_lock );
3209
- if (!list_empty (& req -> list )) {
3203
+ if (!list_empty (& req -> link_list )) {
3210
3204
struct io_timeout_data * data = & req -> io -> timeout ;
3211
3205
3212
3206
data -> timer .function = io_link_timeout_fn ;
@@ -3226,7 +3220,8 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
3226
3220
if (!(req -> flags & REQ_F_LINK ))
3227
3221
return NULL ;
3228
3222
3229
- nxt = list_first_entry_or_null (& req -> link_list , struct io_kiocb , list );
3223
+ nxt = list_first_entry_or_null (& req -> link_list , struct io_kiocb ,
3224
+ link_list );
3230
3225
if (!nxt || nxt -> sqe -> opcode != IORING_OP_LINK_TIMEOUT )
3231
3226
return NULL ;
3232
3227
@@ -3318,7 +3313,7 @@ static inline void io_queue_link_head(struct io_kiocb *req)
3318
3313
3319
3314
#define SQE_VALID_FLAGS (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK)
3320
3315
3321
- static void io_submit_sqe (struct io_kiocb * req , struct io_submit_state * state ,
3316
+ static bool io_submit_sqe (struct io_kiocb * req , struct io_submit_state * state ,
3322
3317
struct io_kiocb * * link )
3323
3318
{
3324
3319
struct io_ring_ctx * ctx = req -> ctx ;
@@ -3337,7 +3332,7 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
3337
3332
err_req :
3338
3333
io_cqring_add_event (req , ret );
3339
3334
io_double_put_req (req );
3340
- return ;
3335
+ return false ;
3341
3336
}
3342
3337
3343
3338
/*
@@ -3367,7 +3362,7 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
3367
3362
goto err_req ;
3368
3363
}
3369
3364
trace_io_uring_link (ctx , req , prev );
3370
- list_add_tail (& req -> list , & prev -> link_list );
3365
+ list_add_tail (& req -> link_list , & prev -> link_list );
3371
3366
} else if (req -> sqe -> flags & IOSQE_IO_LINK ) {
3372
3367
req -> flags |= REQ_F_LINK ;
3373
3368
@@ -3376,6 +3371,8 @@ static void io_submit_sqe(struct io_kiocb *req, struct io_submit_state *state,
3376
3371
} else {
3377
3372
io_queue_sqe (req );
3378
3373
}
3374
+
3375
+ return true;
3379
3376
}
3380
3377
3381
3378
/*
@@ -3505,6 +3502,7 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
3505
3502
}
3506
3503
}
3507
3504
3505
+ submitted ++ ;
3508
3506
sqe_flags = req -> sqe -> flags ;
3509
3507
3510
3508
req -> ring_file = ring_file ;
@@ -3514,9 +3512,8 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
3514
3512
req -> needs_fixed_file = async ;
3515
3513
trace_io_uring_submit_sqe (ctx , req -> sqe -> user_data ,
3516
3514
true, async );
3517
- io_submit_sqe (req , statep , & link );
3518
- submitted ++ ;
3519
-
3515
+ if (!io_submit_sqe (req , statep , & link ))
3516
+ break ;
3520
3517
/*
3521
3518
* If previous wasn't linked and we have a linked command,
3522
3519
* that's the end of the chain. Submit the previous link.
@@ -4644,6 +4641,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
4644
4641
free_uid (ctx -> user );
4645
4642
put_cred (ctx -> creds );
4646
4643
kfree (ctx -> completions );
4644
+ kfree (ctx -> cancel_hash );
4647
4645
kmem_cache_free (req_cachep , ctx -> fallback_req );
4648
4646
kfree (ctx );
4649
4647
}
0 commit comments