@@ -2355,17 +2355,70 @@ static enum hrtimer_restart io_cqring_timer_wakeup(struct hrtimer *timer)
2355
2355
struct io_wait_queue * iowq = container_of (timer , struct io_wait_queue , t );
2356
2356
2357
2357
WRITE_ONCE (iowq -> hit_timeout , 1 );
2358
+ iowq -> min_timeout = 0 ;
2358
2359
wake_up_process (iowq -> wq .private );
2359
2360
return HRTIMER_NORESTART ;
2360
2361
}
2361
2362
2363
+ /*
2364
+ * Doing min_timeout portion. If we saw any timeouts, events, or have work,
2365
+ * wake up. If not, and we have a normal timeout, switch to that and keep
2366
+ * sleeping.
2367
+ */
2368
+ static enum hrtimer_restart io_cqring_min_timer_wakeup (struct hrtimer * timer )
2369
+ {
2370
+ struct io_wait_queue * iowq = container_of (timer , struct io_wait_queue , t );
2371
+ struct io_ring_ctx * ctx = iowq -> ctx ;
2372
+
2373
+ /* no general timeout, or shorter (or equal), we are done */
2374
+ if (iowq -> timeout == KTIME_MAX ||
2375
+ ktime_compare (iowq -> min_timeout , iowq -> timeout ) >= 0 )
2376
+ goto out_wake ;
2377
+ /* work we may need to run, wake function will see if we need to wake */
2378
+ if (io_has_work (ctx ))
2379
+ goto out_wake ;
2380
+ /* got events since we started waiting, min timeout is done */
2381
+ if (iowq -> cq_min_tail != READ_ONCE (ctx -> rings -> cq .tail ))
2382
+ goto out_wake ;
2383
+ /* if we have any events and min timeout expired, we're done */
2384
+ if (io_cqring_events (ctx ))
2385
+ goto out_wake ;
2386
+
2387
+ /*
2388
+ * If using deferred task_work running and application is waiting on
2389
+ * more than one request, ensure we reset it now where we are switching
2390
+ * to normal sleeps. Any request completion post min_wait should wake
2391
+ * the task and return.
2392
+ */
2393
+ if (ctx -> flags & IORING_SETUP_DEFER_TASKRUN ) {
2394
+ atomic_set (& ctx -> cq_wait_nr , 1 );
2395
+ smp_mb ();
2396
+ if (!llist_empty (& ctx -> work_llist ))
2397
+ goto out_wake ;
2398
+ }
2399
+
2400
+ iowq -> t .function = io_cqring_timer_wakeup ;
2401
+ hrtimer_set_expires (timer , iowq -> timeout );
2402
+ return HRTIMER_RESTART ;
2403
+ out_wake :
2404
+ return io_cqring_timer_wakeup (timer );
2405
+ }
2406
+
2362
2407
static int io_cqring_schedule_timeout (struct io_wait_queue * iowq ,
2363
- clockid_t clock_id )
2408
+ clockid_t clock_id , ktime_t start_time )
2364
2409
{
2365
- iowq -> hit_timeout = 0 ;
2410
+ ktime_t timeout ;
2411
+
2366
2412
hrtimer_init_on_stack (& iowq -> t , clock_id , HRTIMER_MODE_ABS );
2367
- iowq -> t .function = io_cqring_timer_wakeup ;
2368
- hrtimer_set_expires_range_ns (& iowq -> t , iowq -> timeout , 0 );
2413
+ if (iowq -> min_timeout ) {
2414
+ timeout = ktime_add_ns (iowq -> min_timeout , start_time );
2415
+ iowq -> t .function = io_cqring_min_timer_wakeup ;
2416
+ } else {
2417
+ timeout = iowq -> timeout ;
2418
+ iowq -> t .function = io_cqring_timer_wakeup ;
2419
+ }
2420
+
2421
+ hrtimer_set_expires_range_ns (& iowq -> t , timeout , 0 );
2369
2422
hrtimer_start_expires (& iowq -> t , HRTIMER_MODE_ABS );
2370
2423
2371
2424
if (!READ_ONCE (iowq -> hit_timeout ))
@@ -2379,7 +2432,8 @@ static int io_cqring_schedule_timeout(struct io_wait_queue *iowq,
2379
2432
}
2380
2433
2381
2434
static int __io_cqring_wait_schedule (struct io_ring_ctx * ctx ,
2382
- struct io_wait_queue * iowq )
2435
+ struct io_wait_queue * iowq ,
2436
+ ktime_t start_time )
2383
2437
{
2384
2438
int ret = 0 ;
2385
2439
@@ -2390,8 +2444,8 @@ static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx,
2390
2444
*/
2391
2445
if (current_pending_io ())
2392
2446
current -> in_iowait = 1 ;
2393
- if (iowq -> timeout != KTIME_MAX )
2394
- ret = io_cqring_schedule_timeout (iowq , ctx -> clockid );
2447
+ if (iowq -> timeout != KTIME_MAX || iowq -> min_timeout )
2448
+ ret = io_cqring_schedule_timeout (iowq , ctx -> clockid , start_time );
2395
2449
else
2396
2450
schedule ();
2397
2451
current -> in_iowait = 0 ;
@@ -2400,7 +2454,8 @@ static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx,
2400
2454
2401
2455
/* If this returns > 0, the caller should retry */
2402
2456
static inline int io_cqring_wait_schedule (struct io_ring_ctx * ctx ,
2403
- struct io_wait_queue * iowq )
2457
+ struct io_wait_queue * iowq ,
2458
+ ktime_t start_time )
2404
2459
{
2405
2460
if (unlikely (READ_ONCE (ctx -> check_cq )))
2406
2461
return 1 ;
@@ -2413,7 +2468,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
2413
2468
if (unlikely (io_should_wake (iowq )))
2414
2469
return 0 ;
2415
2470
2416
- return __io_cqring_wait_schedule (ctx , iowq );
2471
+ return __io_cqring_wait_schedule (ctx , iowq , start_time );
2417
2472
}
2418
2473
2419
2474
struct ext_arg {
@@ -2431,6 +2486,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
2431
2486
{
2432
2487
struct io_wait_queue iowq ;
2433
2488
struct io_rings * rings = ctx -> rings ;
2489
+ ktime_t start_time ;
2434
2490
int ret ;
2435
2491
2436
2492
if (!io_allowed_run_tw (ctx ))
@@ -2448,9 +2504,13 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
2448
2504
iowq .wq .private = current ;
2449
2505
INIT_LIST_HEAD (& iowq .wq .entry );
2450
2506
iowq .ctx = ctx ;
2451
- iowq .nr_timeouts = atomic_read (& ctx -> cq_timeouts );
2452
2507
iowq .cq_tail = READ_ONCE (ctx -> rings -> cq .head ) + min_events ;
2508
+ iowq .cq_min_tail = READ_ONCE (ctx -> rings -> cq .tail );
2509
+ iowq .nr_timeouts = atomic_read (& ctx -> cq_timeouts );
2510
+ iowq .hit_timeout = 0 ;
2511
+ iowq .min_timeout = 0 ;
2453
2512
iowq .timeout = KTIME_MAX ;
2513
+ start_time = io_get_time (ctx );
2454
2514
2455
2515
if (ext_arg -> ts ) {
2456
2516
struct timespec64 ts ;
@@ -2460,7 +2520,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
2460
2520
2461
2521
iowq .timeout = timespec64_to_ktime (ts );
2462
2522
if (!(flags & IORING_ENTER_ABS_TIMER ))
2463
- iowq .timeout = ktime_add (iowq .timeout , io_get_time ( ctx ) );
2523
+ iowq .timeout = ktime_add (iowq .timeout , start_time );
2464
2524
}
2465
2525
2466
2526
if (ext_arg -> sig ) {
@@ -2480,8 +2540,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
2480
2540
2481
2541
trace_io_uring_cqring_wait (ctx , min_events );
2482
2542
do {
2483
- int nr_wait = (int ) iowq .cq_tail - READ_ONCE (ctx -> rings -> cq .tail );
2484
2543
unsigned long check_cq ;
2544
+ int nr_wait ;
2545
+
2546
+ /* if min timeout has been hit, don't reset wait count */
2547
+ if (!iowq .hit_timeout )
2548
+ nr_wait = (int ) iowq .cq_tail -
2549
+ READ_ONCE (ctx -> rings -> cq .tail );
2550
+ else
2551
+ nr_wait = 1 ;
2485
2552
2486
2553
if (ctx -> flags & IORING_SETUP_DEFER_TASKRUN ) {
2487
2554
atomic_set (& ctx -> cq_wait_nr , nr_wait );
@@ -2491,7 +2558,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
2491
2558
TASK_INTERRUPTIBLE );
2492
2559
}
2493
2560
2494
- ret = io_cqring_wait_schedule (ctx , & iowq );
2561
+ ret = io_cqring_wait_schedule (ctx , & iowq , start_time );
2495
2562
__set_current_state (TASK_RUNNING );
2496
2563
atomic_set (& ctx -> cq_wait_nr , IO_CQ_WAKE_INIT );
2497
2564
0 commit comments