Skip to content

Commit 01ee194

Browse files
xue01-heaxboe
authored andcommitted
io_uring: add support for hybrid IOPOLL
A new hybrid poll is implemented on the io_uring layer. Once an IO is issued, it will not poll immediately, but rather block first and re-run before IO complete, then poll to reap IO. While this poll method could be a suboptimal solution when running on a single thread, it offers performance lower than regular polling but higher than IRQ, and CPU utilization is also lower than polling. To use hybrid polling, the ring must be setup with both the IORING_SETUP_IOPOLL and IORING_SETUP_HYBRID)IOPOLL flags set. Hybrid polling has the same restrictions as IOPOLL, in that commands must explicitly support it. Signed-off-by: hexue <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent c132953 commit 01ee194

File tree

4 files changed

+108
-14
lines changed

4 files changed

+108
-14
lines changed

include/linux/io_uring_types.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,11 @@ struct io_ring_ctx {
298298
* ->uring_cmd() by io_uring_cmd_insert_cancelable()
299299
*/
300300
struct hlist_head cancelable_uring_cmd;
301+
/*
302+
* For Hybrid IOPOLL, runtime in hybrid polling, without
303+
* scheduling time
304+
*/
305+
u64 hybrid_poll_time;
301306
} ____cacheline_aligned_in_smp;
302307

303308
struct {
@@ -449,6 +454,7 @@ enum {
449454
REQ_F_LINK_TIMEOUT_BIT,
450455
REQ_F_NEED_CLEANUP_BIT,
451456
REQ_F_POLLED_BIT,
457+
REQ_F_HYBRID_IOPOLL_STATE_BIT,
452458
REQ_F_BUFFER_SELECTED_BIT,
453459
REQ_F_BUFFER_RING_BIT,
454460
REQ_F_REISSUE_BIT,
@@ -507,6 +513,8 @@ enum {
507513
REQ_F_NEED_CLEANUP = IO_REQ_FLAG(REQ_F_NEED_CLEANUP_BIT),
508514
/* already went through poll handler */
509515
REQ_F_POLLED = IO_REQ_FLAG(REQ_F_POLLED_BIT),
516+
/* every req only blocks once in hybrid poll */
517+
REQ_F_IOPOLL_STATE = IO_REQ_FLAG(REQ_F_HYBRID_IOPOLL_STATE_BIT),
510518
/* buffer already selected */
511519
REQ_F_BUFFER_SELECTED = IO_REQ_FLAG(REQ_F_BUFFER_SELECTED_BIT),
512520
/* buffer selected from ring, needs commit */
@@ -639,8 +647,15 @@ struct io_kiocb {
639647
atomic_t refs;
640648
bool cancel_seq_set;
641649
struct io_task_work io_task_work;
642-
/* for polled requests, i.e. IORING_OP_POLL_ADD and async armed poll */
643-
struct hlist_node hash_node;
650+
union {
651+
/*
652+
* for polled requests, i.e. IORING_OP_POLL_ADD and async armed
653+
* poll
654+
*/
655+
struct hlist_node hash_node;
656+
/* For IOPOLL setup queues, with hybrid polling */
657+
u64 iopoll_start;
658+
};
644659
/* internal polling, see IORING_FEAT_FAST_POLL */
645660
struct async_poll *apoll;
646661
/* opcode allocated if it needs to store data for async defer */

include/uapi/linux/io_uring.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,9 @@ enum io_uring_sqe_flags_bit {
200200
*/
201201
#define IORING_SETUP_NO_SQARRAY (1U << 16)
202202

203+
/* Use hybrid poll in iopoll process */
204+
#define IORING_SETUP_HYBRID_IOPOLL (1U << 17)
205+
203206
enum io_uring_op {
204207
IORING_OP_NOP,
205208
IORING_OP_READV,

io_uring/io_uring.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
307307
goto err;
308308

309309
ctx->flags = p->flags;
310+
ctx->hybrid_poll_time = LLONG_MAX;
310311
atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
311312
init_waitqueue_head(&ctx->sqo_sq_wait);
312313
INIT_LIST_HEAD(&ctx->sqd_list);
@@ -3630,6 +3631,11 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
36303631
if (!(ctx->flags & IORING_SETUP_NO_SQARRAY))
36313632
static_branch_inc(&io_key_has_sqarray);
36323633

3634+
/* HYBRID_IOPOLL only valid with IOPOLL */
3635+
if ((ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_HYBRID_IOPOLL)) ==
3636+
IORING_SETUP_HYBRID_IOPOLL)
3637+
return -EINVAL;
3638+
36333639
if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) &&
36343640
!(ctx->flags & IORING_SETUP_IOPOLL) &&
36353641
!(ctx->flags & IORING_SETUP_SQPOLL))
@@ -3785,7 +3791,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params)
37853791
IORING_SETUP_SQE128 | IORING_SETUP_CQE32 |
37863792
IORING_SETUP_SINGLE_ISSUER | IORING_SETUP_DEFER_TASKRUN |
37873793
IORING_SETUP_NO_MMAP | IORING_SETUP_REGISTERED_FD_ONLY |
3788-
IORING_SETUP_NO_SQARRAY))
3794+
IORING_SETUP_NO_SQARRAY | IORING_SETUP_HYBRID_IOPOLL))
37893795
return -EINVAL;
37903796

37913797
return io_uring_create(entries, &p, params);

io_uring/rw.c

Lines changed: 81 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -817,6 +817,11 @@ static int io_rw_init_file(struct io_kiocb *req, fmode_t mode, int rw_type)
817817
kiocb->ki_flags |= IOCB_HIPRI;
818818
kiocb->ki_complete = io_complete_rw_iopoll;
819819
req->iopoll_completed = 0;
820+
if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) {
821+
/* make sure every req only blocks once*/
822+
req->flags &= ~REQ_F_IOPOLL_STATE;
823+
req->iopoll_start = ktime_get_ns();
824+
}
820825
} else {
821826
if (kiocb->ki_flags & IOCB_HIPRI)
822827
return -EINVAL;
@@ -1115,6 +1120,78 @@ void io_rw_fail(struct io_kiocb *req)
11151120
io_req_set_res(req, res, req->cqe.flags);
11161121
}
11171122

1123+
static int io_uring_classic_poll(struct io_kiocb *req, struct io_comp_batch *iob,
1124+
unsigned int poll_flags)
1125+
{
1126+
struct file *file = req->file;
1127+
1128+
if (req->opcode == IORING_OP_URING_CMD) {
1129+
struct io_uring_cmd *ioucmd;
1130+
1131+
ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
1132+
return file->f_op->uring_cmd_iopoll(ioucmd, iob, poll_flags);
1133+
} else {
1134+
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
1135+
1136+
return file->f_op->iopoll(&rw->kiocb, iob, poll_flags);
1137+
}
1138+
}
1139+
1140+
static u64 io_hybrid_iopoll_delay(struct io_ring_ctx *ctx, struct io_kiocb *req)
1141+
{
1142+
struct hrtimer_sleeper timer;
1143+
enum hrtimer_mode mode;
1144+
ktime_t kt;
1145+
u64 sleep_time;
1146+
1147+
if (req->flags & REQ_F_IOPOLL_STATE)
1148+
return 0;
1149+
1150+
if (ctx->hybrid_poll_time == LLONG_MAX)
1151+
return 0;
1152+
1153+
/* Using half the running time to do schedule */
1154+
sleep_time = ctx->hybrid_poll_time / 2;
1155+
1156+
kt = ktime_set(0, sleep_time);
1157+
req->flags |= REQ_F_IOPOLL_STATE;
1158+
1159+
mode = HRTIMER_MODE_REL;
1160+
hrtimer_init_sleeper_on_stack(&timer, CLOCK_MONOTONIC, mode);
1161+
hrtimer_set_expires(&timer.timer, kt);
1162+
set_current_state(TASK_INTERRUPTIBLE);
1163+
hrtimer_sleeper_start_expires(&timer, mode);
1164+
1165+
if (timer.task)
1166+
io_schedule();
1167+
1168+
hrtimer_cancel(&timer.timer);
1169+
__set_current_state(TASK_RUNNING);
1170+
destroy_hrtimer_on_stack(&timer.timer);
1171+
return sleep_time;
1172+
}
1173+
1174+
static int io_uring_hybrid_poll(struct io_kiocb *req,
1175+
struct io_comp_batch *iob, unsigned int poll_flags)
1176+
{
1177+
struct io_ring_ctx *ctx = req->ctx;
1178+
u64 runtime, sleep_time;
1179+
int ret;
1180+
1181+
sleep_time = io_hybrid_iopoll_delay(ctx, req);
1182+
ret = io_uring_classic_poll(req, iob, poll_flags);
1183+
runtime = ktime_get_ns() - req->iopoll_start - sleep_time;
1184+
1185+
/*
1186+
* Use minimum sleep time if we're polling devices with different
1187+
* latencies. We could get more completions from the faster ones.
1188+
*/
1189+
if (ctx->hybrid_poll_time > runtime)
1190+
ctx->hybrid_poll_time = runtime;
1191+
1192+
return ret;
1193+
}
1194+
11181195
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
11191196
{
11201197
struct io_wq_work_node *pos, *start, *prev;
@@ -1131,7 +1208,6 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
11311208

11321209
wq_list_for_each(pos, start, &ctx->iopoll_list) {
11331210
struct io_kiocb *req = container_of(pos, struct io_kiocb, comp_list);
1134-
struct file *file = req->file;
11351211
int ret;
11361212

11371213
/*
@@ -1142,17 +1218,11 @@ int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin)
11421218
if (READ_ONCE(req->iopoll_completed))
11431219
break;
11441220

1145-
if (req->opcode == IORING_OP_URING_CMD) {
1146-
struct io_uring_cmd *ioucmd;
1147-
1148-
ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
1149-
ret = file->f_op->uring_cmd_iopoll(ioucmd, &iob,
1150-
poll_flags);
1151-
} else {
1152-
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
1221+
if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL)
1222+
ret = io_uring_hybrid_poll(req, &iob, poll_flags);
1223+
else
1224+
ret = io_uring_classic_poll(req, &iob, poll_flags);
11531225

1154-
ret = file->f_op->iopoll(&rw->kiocb, &iob, poll_flags);
1155-
}
11561226
if (unlikely(ret < 0))
11571227
return ret;
11581228
else if (ret)

0 commit comments

Comments
 (0)