Skip to content

Commit 4869f57

Browse files
committed
Merge tag 'block-6.1-2022-11-05' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - Fixes for the ublk driver (Ming) - Fixes for error handling memory leaks (Chen Jun, Chen Zhongjin) - Explicitly clear the last request in a chain when the plug is flushed, as it may have already been issued (Al) * tag 'block-6.1-2022-11-05' of git://git.kernel.dk/linux: block: blk_add_rq_to_plug(): clear stale 'last' after flush blk-mq: Fix kmemleak in blk_mq_init_allocated_queue block: Fix possible memory leak for rq_wb on add_disk failure ublk_drv: add ublk_queue_cmd() for cleanup ublk_drv: avoid to touch io_uring cmd in blk_mq io path ublk_drv: comment on ublk_driver entry of Kconfig ublk_drv: return flag of UBLK_F_URING_CMD_COMP_IN_TASK in case of module
2 parents b208b9f + 878eb6e commit 4869f57

File tree

4 files changed

+77
-50
lines changed

4 files changed

+77
-50
lines changed

block/blk-mq.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,7 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
12621262
(!blk_queue_nomerges(rq->q) &&
12631263
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
12641264
blk_mq_flush_plug_list(plug, false);
1265+
last = NULL;
12651266
trace_block_plug(rq->q);
12661267
}
12671268

@@ -4193,9 +4194,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
41934194
return 0;
41944195

41954196
err_hctxs:
4196-
xa_destroy(&q->hctx_table);
4197-
q->nr_hw_queues = 0;
4198-
blk_mq_sysfs_deinit(q);
4197+
blk_mq_release(q);
41994198
err_poll:
42004199
blk_stat_free_callback(q->poll_cb);
42014200
q->poll_cb = NULL;

block/genhd.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -527,6 +527,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
527527
bdi_unregister(disk->bdi);
528528
out_unregister_queue:
529529
blk_unregister_queue(disk);
530+
rq_qos_exit(disk->queue);
530531
out_put_slave_dir:
531532
kobject_put(disk->slave_dir);
532533
out_put_holder_dir:

drivers/block/Kconfig

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,12 @@ config BLK_DEV_UBLK
408408
definition isn't finalized yet, and might change according to future
409409
requirement, so mark is as experimental now.
410410

411+
Say Y if you want to get better performance because task_work_add()
412+
can be used in IO path for replacing io_uring cmd, which will become
413+
shared between IO tasks and ubq daemon, meantime task_work_add() can
414+
can handle batch more effectively, but task_work_add() isn't exported
415+
for module, so ublk has to be built to kernel.
416+
411417
source "drivers/block/rnbd/Kconfig"
412418

413419
endif # BLK_DEV

drivers/block/ublk_drv.c

Lines changed: 68 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -57,11 +57,14 @@
5757
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
5858

5959
struct ublk_rq_data {
60-
struct callback_head work;
60+
union {
61+
struct callback_head work;
62+
struct llist_node node;
63+
};
6164
};
6265

6366
struct ublk_uring_cmd_pdu {
64-
struct request *req;
67+
struct ublk_queue *ubq;
6568
};
6669

6770
/*
@@ -119,6 +122,8 @@ struct ublk_queue {
119122
struct task_struct *ubq_daemon;
120123
char *io_cmd_buf;
121124

125+
struct llist_head io_cmds;
126+
122127
unsigned long io_addr; /* mapped vm address */
123128
unsigned int max_io_sz;
124129
bool force_abort;
@@ -764,8 +769,12 @@ static inline void __ublk_rq_task_work(struct request *req)
764769
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
765770
{
766771
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
772+
struct ublk_queue *ubq = pdu->ubq;
773+
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
774+
struct ublk_rq_data *data;
767775

768-
__ublk_rq_task_work(pdu->req);
776+
llist_for_each_entry(data, io_cmds, node)
777+
__ublk_rq_task_work(blk_mq_rq_from_pdu(data));
769778
}
770779

771780
static void ublk_rq_task_work_fn(struct callback_head *work)
@@ -777,6 +786,54 @@ static void ublk_rq_task_work_fn(struct callback_head *work)
777786
__ublk_rq_task_work(req);
778787
}
779788

789+
static void ublk_submit_cmd(struct ublk_queue *ubq, const struct request *rq)
790+
{
791+
struct ublk_io *io = &ubq->ios[rq->tag];
792+
793+
/*
794+
* If the check pass, we know that this is a re-issued request aborted
795+
* previously in monitor_work because the ubq_daemon(cmd's task) is
796+
* PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
797+
* because this ioucmd's io_uring context may be freed now if no inflight
798+
* ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
799+
*
800+
* Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
801+
* the tag). Then the request is re-started(allocating the tag) and we are here.
802+
* Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
803+
* guarantees that here is a re-issued request aborted previously.
804+
*/
805+
if (unlikely(io->flags & UBLK_IO_FLAG_ABORTED)) {
806+
struct llist_node *io_cmds = llist_del_all(&ubq->io_cmds);
807+
struct ublk_rq_data *data;
808+
809+
llist_for_each_entry(data, io_cmds, node)
810+
__ublk_abort_rq(ubq, blk_mq_rq_from_pdu(data));
811+
} else {
812+
struct io_uring_cmd *cmd = io->cmd;
813+
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
814+
815+
pdu->ubq = ubq;
816+
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
817+
}
818+
}
819+
820+
static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq,
821+
bool last)
822+
{
823+
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
824+
825+
if (ublk_can_use_task_work(ubq)) {
826+
enum task_work_notify_mode notify_mode = last ?
827+
TWA_SIGNAL_NO_IPI : TWA_NONE;
828+
829+
if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
830+
__ublk_abort_rq(ubq, rq);
831+
} else {
832+
if (llist_add(&data->node, &ubq->io_cmds))
833+
ublk_submit_cmd(ubq, rq);
834+
}
835+
}
836+
780837
static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
781838
const struct blk_mq_queue_data *bd)
782839
{
@@ -788,6 +845,7 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
788845
res = ublk_setup_iod(ubq, rq);
789846
if (unlikely(res != BLK_STS_OK))
790847
return BLK_STS_IOERR;
848+
791849
/* With recovery feature enabled, force_abort is set in
792850
* ublk_stop_dev() before calling del_gendisk(). We have to
793851
* abort all requeued and new rqs here to let del_gendisk()
@@ -803,41 +861,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
803861
blk_mq_start_request(bd->rq);
804862

805863
if (unlikely(ubq_daemon_is_dying(ubq))) {
806-
fail:
807864
__ublk_abort_rq(ubq, rq);
808865
return BLK_STS_OK;
809866
}
810867

811-
if (ublk_can_use_task_work(ubq)) {
812-
struct ublk_rq_data *data = blk_mq_rq_to_pdu(rq);
813-
enum task_work_notify_mode notify_mode = bd->last ?
814-
TWA_SIGNAL_NO_IPI : TWA_NONE;
815-
816-
if (task_work_add(ubq->ubq_daemon, &data->work, notify_mode))
817-
goto fail;
818-
} else {
819-
struct ublk_io *io = &ubq->ios[rq->tag];
820-
struct io_uring_cmd *cmd = io->cmd;
821-
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
822-
823-
/*
824-
* If the check pass, we know that this is a re-issued request aborted
825-
* previously in monitor_work because the ubq_daemon(cmd's task) is
826-
* PF_EXITING. We cannot call io_uring_cmd_complete_in_task() anymore
827-
* because this ioucmd's io_uring context may be freed now if no inflight
828-
* ioucmd exists. Otherwise we may cause null-deref in ctx->fallback_work.
829-
*
830-
* Note: monitor_work sets UBLK_IO_FLAG_ABORTED and ends this request(releasing
831-
* the tag). Then the request is re-started(allocating the tag) and we are here.
832-
* Since releasing/allocating a tag implies smp_mb(), finding UBLK_IO_FLAG_ABORTED
833-
* guarantees that here is a re-issued request aborted previously.
834-
*/
835-
if ((io->flags & UBLK_IO_FLAG_ABORTED))
836-
goto fail;
837-
838-
pdu->req = rq;
839-
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
840-
}
868+
ublk_queue_cmd(ubq, rq, bd->last);
841869

842870
return BLK_STS_OK;
843871
}
@@ -1164,22 +1192,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
11641192
}
11651193

11661194
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
1167-
int tag, struct io_uring_cmd *cmd)
1195+
int tag)
11681196
{
11691197
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
11701198
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
11711199

1172-
if (ublk_can_use_task_work(ubq)) {
1173-
struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
1174-
1175-
/* should not fail since we call it just in ubq->ubq_daemon */
1176-
task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
1177-
} else {
1178-
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
1179-
1180-
pdu->req = req;
1181-
io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
1182-
}
1200+
ublk_queue_cmd(ubq, req, true);
11831201
}
11841202

11851203
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
@@ -1267,7 +1285,7 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
12671285
io->addr = ub_cmd->addr;
12681286
io->cmd = cmd;
12691287
io->flags |= UBLK_IO_FLAG_ACTIVE;
1270-
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd);
1288+
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
12711289
break;
12721290
default:
12731291
goto out;
@@ -1658,6 +1676,9 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
16581676
*/
16591677
ub->dev_info.flags &= UBLK_F_ALL;
16601678

1679+
if (!IS_BUILTIN(CONFIG_BLK_DEV_UBLK))
1680+
ub->dev_info.flags |= UBLK_F_URING_CMD_COMP_IN_TASK;
1681+
16611682
/* We are not ready to support zero copy */
16621683
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
16631684

0 commit comments

Comments
 (0)