Skip to content

Commit 3ab6e94

Browse files
Ming Leiaxboe
authored andcommitted
ublk_drv: avoid to touch io_uring cmd in blk_mq io path
io_uring cmd is supposed to be used in ubq daemon context mainly, and we should try to avoid to touch it in ublk io submission context, otherwise this data could become shared between the two contexts, and performance is hurt. So link request into one per-queue list, and use same batching policy of io_uring command, just avoid to touch ucmd in blk-mq io context. Signed-off-by: Ming Lei <[email protected]> Reviewed-by: ZiyangZhang <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]>
1 parent d57c2c6 commit 3ab6e94

File tree

1 file changed

+53
-30
lines changed

1 file changed

+53
-30
lines changed

drivers/block/ublk_drv.c

Lines changed: 53 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,14 @@
5757
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
5858

5959
struct ublk_rq_data {
60-
struct callback_head work;
60+
union {
61+
struct callback_head work;
62+
struct llist_node node;
63+
};
6164
};
6265

6366
struct ublk_uring_cmd_pdu {
64-
struct request *req;
67+
struct ublk_queue *ubq;
6568
};
6669

6770
/*
@@ -119,6 +122,8 @@ struct ublk_queue {
119122
struct task_struct *ubq_daemon;
120123
char *io_cmd_buf;
121124

125+
struct llist_head io_cmds;
126+
122127
unsigned long io_addr; /* mapped vm address */
123128
unsigned int max_io_sz;
124129
bool force_abort;
@@ -764,8 +769,12 @@ static inline void __ublk_rq_task_work(struct request *req)
764769
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
765770
{
766771
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
772+
struct ublk_queue *ubq = pdu->ubq;
773+
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
774+
struct ublk_rq_data *data;
767775

768-
__ublk_rq_task_work(pdu->req);
776+
llist_for_each_entry(data, io_cmds, node)
777+
__ublk_rq_task_work(blk_mq_rq_from_pdu(data));
769778
}
770779

771780
static void ublk_rq_task_work_fn(struct callback_head *work)
@@ -777,17 +786,50 @@ static void ublk_rq_task_work_fn(struct callback_head *work)
777786
__ublk_rq_task_work(req);
778787
}
779788

789+
static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq)
790+
{
791+
struct ublk_io *io = &ubq->ios[rq->tag];
792+
793+
/*
794+
* If the check pass, we know that this is a re-issued request aborted
795+
* previously in monitor_work because the ubq_daemon(cmd's task) is
796+
* PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
797+
* because this ioucmd's io_uring context may be freed now if no inflight
798+
* ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
799+
*
800+
* Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
801+
* the tag). Then the request is re-started(allocating the tag) and we are here.
802+
* Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
803+
* guarantees that here is a re-issued request aborted previously.
804+
*/
805+
if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
806+
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
807+
struct ublk_rq_data *data;
808+
809+
llist_for_each_entry(data, io_cmds, node)
810+
__ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data));
811+
} else {
812+
struct io_uring_cmd *cmd = io->cmd;
813+
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
814+
815+
pdu->ubq = ubq;
816+
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
817+
}
818+
}
819+
780820
static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
781821
const struct blk_mq_queue_data *bd)
782822
{
783823
struct ublk_queue *ubq = hctx->driver_data;
784824
struct request *rq = bd->rq;
825+
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
785826
blk_status_t res;
786827

787828
/* fill iod to slot in io cmd buffer */
788829
res = ublk_setup_iod(ubq, rq);
789830
if (unlikely(res != BLK_STS_OK))
790831
return BLK_STS_IOERR;
832+
791833
/* With recovery feature enabled, force_abort is set in
792834
* ublk_stop_dev() before calling del_gendisk(). We have to
793835
* abort all requeued and new rqs here to let del_gendisk()
@@ -809,36 +851,15 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
809851
}
810852

811853
if (ublk_can_use_task_work(ubq)) {
812-
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
813854
enum task_work_notify_mode notify_mode = bd->last ?
814855
TWA_SIGNAL_NO_IPI : TWA_NONE;
815856

816857
if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
817858
goto fail;
818859
} else {
819-
struct ublk_io *io = &ubq->ios[rq->tag];
820-
struct io_uring_cmd *cmd = io->cmd;
821-
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
822-
823-
/*
824-
* If the check pass, we know that this is a re-issued request aborted
825-
* previously in monitor_work because the ubq_daemon(cmd's task) is
826-
* PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
827-
* because this ioucmd's io_uring context may be freed now if no inflight
828-
* ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
829-
*
830-
* Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
831-
* the tag). Then the request is re-started(allocating the tag) and we are here.
832-
* Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
833-
* guarantees that here is a re-issued request aborted previously.
834-
*/
835-
if ((io->flags & UBLK_IO_FLAG_ABORTED))
836-
goto fail;
837-
838-
pdu->req = rq;
839-
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
860+
if (llist_add(&data->node, &ubq->io_cmds))
861+
ublk_submit_cmd(ubq, rq);
840862
}
841-
842863
return BLK_STS_OK;
843864
}
844865

@@ -1168,17 +1189,19 @@ static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
11681189
{
11691190
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
11701191
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
1192+
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
11711193

11721194
if (ublk_can_use_task_work(ubq)) {
1173-
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
1174-
11751195
/* should not fail since we call it just in ubq->ubq_daemon */
11761196
task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
11771197
} else {
11781198
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
11791199

1180-
pdu->req = req;
1181-
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
1200+
if (llist_add(&data->node, &ubq->io_cmds)) {
1201+
pdu->ubq = ubq;
1202+
io_uring_cmd_complete_in_task(cmd,
1203+
ublk_rq_task_work_cb);
1204+
}
11821205
}
11831206
}
11841207

0 commit comments

Comments
 (0)