Skip to content

Commit abe54c1

Browse files
ps-ushankaraxboe
authored andcommitted
selftests: ublk: kublk: decouple ublk_queues from ublk server threads
Add support in kublk for decoupled ublk_queues and ublk server threads. kublk now has two modes of operation: - (preexisting mode) threads and queues are paired 1:1, and each thread services all the I/Os of one queue - (new mode) thread and queue counts are independently configurable. threads service I/Os in a way that balances load across threads even if load is not balanced over queues. The default is the preexisting mode. The new mode is activated by passing the --per_io_tasks flag. Signed-off-by: Uday Shankar <[email protected]> Reviewed-by: Ming Lei <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent b9848ca commit abe54c1

File tree

5 files changed

+100
-24
lines changed

5 files changed

+100
-24
lines changed

tools/testing/selftests/ublk/file_backed.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de
5454

5555
ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3);
5656

57-
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
57+
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
5858
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
5959
sqe[0]->user_data = build_user_data(tag,
6060
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
@@ -66,7 +66,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de
6666
sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
6767
sqe[1]->user_data = build_user_data(tag, ublk_op, 0, q->q_id, 1);
6868

69-
io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
69+
io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
7070
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
7171

7272
return 2;

tools/testing/selftests/ublk/kublk.c

Lines changed: 88 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -505,8 +505,11 @@ static int ublk_thread_init(struct ublk_thread *t)
505505
}
506506

507507
if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) {
508+
unsigned nr_ios = dev->dev_info.queue_depth * dev->dev_info.nr_hw_queues;
509+
unsigned max_nr_ios_per_thread = nr_ios / dev->nthreads;
510+
max_nr_ios_per_thread += !!(nr_ios % dev->nthreads);
508511
ret = io_uring_register_buffers_sparse(
509-
&t->ring, dev->dev_info.queue_depth);
512+
&t->ring, max_nr_ios_per_thread);
510513
if (ret) {
511514
ublk_err("ublk dev %d thread %d register spare buffers failed %d",
512515
dev->dev_info.dev_id, t->idx, ret);
@@ -578,7 +581,7 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q,
578581
if (q->tgt_ops->buf_index)
579582
buf.index = q->tgt_ops->buf_index(q, tag);
580583
else
581-
buf.index = tag;
584+
buf.index = q->ios[tag].buf_index;
582585

583586
if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
584587
buf.flags = UBLK_AUTO_BUF_REG_FALLBACK;
@@ -660,18 +663,44 @@ int ublk_queue_io_cmd(struct ublk_io *io)
660663

661664
static void ublk_submit_fetch_commands(struct ublk_thread *t)
662665
{
663-
/*
664-
* Service exclusively the queue whose q_id matches our thread
665-
* index. This may change in the future.
666-
*/
667-
struct ublk_queue *q = &t->dev->q[t->idx];
666+
struct ublk_queue *q;
668667
struct ublk_io *io;
669-
int i = 0;
668+
int i = 0, j = 0;
670669

671-
for (i = 0; i < q->q_depth; i++) {
672-
io = &q->ios[i];
673-
io->t = t;
674-
ublk_queue_io_cmd(io);
670+
if (t->dev->per_io_tasks) {
671+
/*
672+
* Lexicographically order all the (qid,tag) pairs, with
673+
* qid taking priority (so (1,0) > (0,1)). Then make
674+
* this thread the daemon for every Nth entry in this
675+
* list (N is the number of threads), starting at this
676+
* thread's index. This ensures that each queue is
677+
* handled by as many ublk server threads as possible,
678+
* so that load that is concentrated on one or a few
679+
* queues can make use of all ublk server threads.
680+
*/
681+
const struct ublksrv_ctrl_dev_info *dinfo = &t->dev->dev_info;
682+
int nr_ios = dinfo->nr_hw_queues * dinfo->queue_depth;
683+
for (i = t->idx; i < nr_ios; i += t->dev->nthreads) {
684+
int q_id = i / dinfo->queue_depth;
685+
int tag = i % dinfo->queue_depth;
686+
q = &t->dev->q[q_id];
687+
io = &q->ios[tag];
688+
io->t = t;
689+
io->buf_index = j++;
690+
ublk_queue_io_cmd(io);
691+
}
692+
} else {
693+
/*
694+
* Service exclusively the queue whose q_id matches our
695+
* thread index.
696+
*/
697+
struct ublk_queue *q = &t->dev->q[t->idx];
698+
for (i = 0; i < q->q_depth; i++) {
699+
io = &q->ios[i];
700+
io->t = t;
701+
io->buf_index = i;
702+
ublk_queue_io_cmd(io);
703+
}
675704
}
676705
}
677706

@@ -826,7 +855,8 @@ static void *ublk_io_handler_fn(void *data)
826855
return NULL;
827856
}
828857
/* IO perf is sensitive with queue pthread affinity on NUMA machine*/
829-
ublk_thread_set_sched_affinity(t, info->affinity);
858+
if (info->affinity)
859+
ublk_thread_set_sched_affinity(t, info->affinity);
830860
sem_post(info->ready);
831861

832862
ublk_dbg(UBLK_DBG_THREAD, "tid %d: ublk dev %d thread %u started\n",
@@ -893,7 +923,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
893923

894924
ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
895925

896-
tinfo = calloc(sizeof(struct ublk_thread_info), dinfo->nr_hw_queues);
926+
tinfo = calloc(sizeof(struct ublk_thread_info), dev->nthreads);
897927
if (!tinfo)
898928
return -ENOMEM;
899929

@@ -919,17 +949,29 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
919949
dinfo->dev_id, i);
920950
goto fail;
921951
}
952+
}
922953

954+
for (i = 0; i < dev->nthreads; i++) {
923955
tinfo[i].dev = dev;
924956
tinfo[i].idx = i;
925957
tinfo[i].ready = &ready;
926-
tinfo[i].affinity = &affinity_buf[i];
958+
959+
/*
960+
* If threads are not tied 1:1 to queues, setting thread
961+
* affinity based on queue affinity makes little sense.
962+
* However, thread CPU affinity has significant impact
963+
* on performance, so to compare fairly, we'll still set
964+
* thread CPU affinity based on queue affinity where
965+
* possible.
966+
*/
967+
if (dev->nthreads == dinfo->nr_hw_queues)
968+
tinfo[i].affinity = &affinity_buf[i];
927969
pthread_create(&dev->threads[i].thread, NULL,
928970
ublk_io_handler_fn,
929971
&tinfo[i]);
930972
}
931973

932-
for (i = 0; i < dinfo->nr_hw_queues; i++)
974+
for (i = 0; i < dev->nthreads; i++)
933975
sem_wait(&ready);
934976
free(tinfo);
935977
free(affinity_buf);
@@ -953,7 +995,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
953995
ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
954996

955997
/* wait until we are terminated */
956-
for (i = 0; i < dinfo->nr_hw_queues; i++)
998+
for (i = 0; i < dev->nthreads; i++)
957999
pthread_join(dev->threads[i].thread, &thread_ret);
9581000
fail:
9591001
for (i = 0; i < dinfo->nr_hw_queues; i++)
@@ -1063,6 +1105,7 @@ static int ublk_stop_io_daemon(const struct ublk_dev *dev)
10631105

10641106
static int __cmd_dev_add(const struct dev_ctx *ctx)
10651107
{
1108+
unsigned nthreads = ctx->nthreads;
10661109
unsigned nr_queues = ctx->nr_hw_queues;
10671110
const char *tgt_type = ctx->tgt_type;
10681111
unsigned depth = ctx->queue_depth;
@@ -1086,6 +1129,23 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
10861129
return -EINVAL;
10871130
}
10881131

1132+
/* default to 1:1 threads:queues if nthreads is unspecified */
1133+
if (!nthreads)
1134+
nthreads = nr_queues;
1135+
1136+
if (nthreads > UBLK_MAX_THREADS) {
1137+
ublk_err("%s: %u is too many threads (max %u)\n",
1138+
__func__, nthreads, UBLK_MAX_THREADS);
1139+
return -EINVAL;
1140+
}
1141+
1142+
if (nthreads != nr_queues && !ctx->per_io_tasks) {
1143+
ublk_err("%s: threads %u must be same as queues %u if "
1144+
"not using per_io_tasks\n",
1145+
__func__, nthreads, nr_queues);
1146+
return -EINVAL;
1147+
}
1148+
10891149
dev = ublk_ctrl_init();
10901150
if (!dev) {
10911151
ublk_err("%s: can't alloc dev id %d, type %s\n",
@@ -1109,6 +1169,8 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
11091169
if ((features & UBLK_F_QUIESCE) &&
11101170
(info->flags & UBLK_F_USER_RECOVERY))
11111171
info->flags |= UBLK_F_QUIESCE;
1172+
dev->nthreads = nthreads;
1173+
dev->per_io_tasks = ctx->per_io_tasks;
11121174
dev->tgt.ops = ops;
11131175
dev->tgt.sq_depth = depth;
11141176
dev->tgt.cq_depth = depth;
@@ -1307,6 +1369,7 @@ static int cmd_dev_get_features(void)
13071369
[const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE",
13081370
[const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG",
13091371
[const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE",
1372+
[const_ilog2(UBLK_F_PER_IO_DAEMON)] = "PER_IO_DAEMON",
13101373
};
13111374
struct ublk_dev *dev;
13121375
__u64 features = 0;
@@ -1401,8 +1464,10 @@ static void __cmd_create_help(char *exe, bool recovery)
14011464
exe, recovery ? "recover" : "add");
14021465
printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n");
14031466
printf("\t[-e 0|1 ] [-i 0|1]\n");
1467+
printf("\t[--nthreads threads] [--per_io_tasks]\n");
14041468
printf("\t[target options] [backfile1] [backfile2] ...\n");
14051469
printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
1470+
printf("\tdefault: nthreads=nr_queues");
14061471

14071472
for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) {
14081473
const struct ublk_tgt_ops *ops = tgt_ops_list[i];
@@ -1459,6 +1524,8 @@ int main(int argc, char *argv[])
14591524
{ "auto_zc", 0, NULL, 0 },
14601525
{ "auto_zc_fallback", 0, NULL, 0 },
14611526
{ "size", 1, NULL, 's'},
1527+
{ "nthreads", 1, NULL, 0 },
1528+
{ "per_io_tasks", 0, NULL, 0 },
14621529
{ 0, 0, 0, 0 }
14631530
};
14641531
const struct ublk_tgt_ops *ops = NULL;
@@ -1534,6 +1601,10 @@ int main(int argc, char *argv[])
15341601
ctx.flags |= UBLK_F_AUTO_BUF_REG;
15351602
if (!strcmp(longopts[option_idx].name, "auto_zc_fallback"))
15361603
ctx.auto_zc_fallback = 1;
1604+
if (!strcmp(longopts[option_idx].name, "nthreads"))
1605+
ctx.nthreads = strtol(optarg, NULL, 10);
1606+
if (!strcmp(longopts[option_idx].name, "per_io_tasks"))
1607+
ctx.per_io_tasks = 1;
15371608
break;
15381609
case '?':
15391610
/*

tools/testing/selftests/ublk/kublk.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ struct dev_ctx {
8080
char tgt_type[16];
8181
unsigned long flags;
8282
unsigned nr_hw_queues;
83+
unsigned short nthreads;
8384
unsigned queue_depth;
8485
int dev_id;
8586
int nr_files;
@@ -89,6 +90,7 @@ struct dev_ctx {
8990
unsigned int fg:1;
9091
unsigned int recovery:1;
9192
unsigned int auto_zc_fallback:1;
93+
unsigned int per_io_tasks:1;
9294

9395
int _evtfd;
9496
int _shmid;
@@ -131,6 +133,7 @@ struct ublk_io {
131133

132134
int result;
133135

136+
unsigned short buf_index;
134137
unsigned short tgt_ios;
135138
void *private_data;
136139
struct ublk_thread *t;
@@ -203,6 +206,8 @@ struct ublk_dev {
203206
struct ublksrv_ctrl_dev_info dev_info;
204207
struct ublk_queue q[UBLK_MAX_QUEUES];
205208
struct ublk_thread threads[UBLK_MAX_THREADS];
209+
unsigned nthreads;
210+
unsigned per_io_tasks;
206211

207212
int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */
208213
int nr_fds;

tools/testing/selftests/ublk/null.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,15 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag)
6262

6363
ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3);
6464

65-
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
65+
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
6666
sqe[0]->user_data = build_user_data(tag,
6767
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
6868
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
6969

7070
__setup_nop_io(tag, iod, sqe[1], q->q_id);
7171
sqe[1]->flags |= IOSQE_IO_HARDLINK;
7272

73-
io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
73+
io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index);
7474
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, q->q_id, 1);
7575

7676
// buf register is marked as IOSQE_CQE_SKIP_SUCCESS
@@ -136,7 +136,7 @@ static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag)
136136
{
137137
if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK)
138138
return (unsigned short)-1;
139-
return tag;
139+
return q->ios[tag].buf_index;
140140
}
141141

142142
const struct ublk_tgt_ops null_tgt_ops = {

tools/testing/selftests/ublk/stripe.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
141141
ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, s->nr + extra);
142142

143143
if (zc) {
144-
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
144+
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index);
145145
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
146146
sqe[0]->user_data = build_user_data(tag,
147147
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, q->q_id, 1);
@@ -167,7 +167,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_
167167
if (zc) {
168168
struct io_uring_sqe *unreg = sqe[s->nr + 1];
169169

170-
io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, tag);
170+
io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, io->buf_index);
171171
unreg->user_data = build_user_data(
172172
tag, ublk_cmd_op_nr(unreg->cmd_op), 0, q->q_id, 1);
173173
}

0 commit comments

Comments
 (0)