Skip to content

Commit 91928e0

Browse files
committed
Merge tag 'for-6.15/io_uring-20250322' of git://git.kernel.dk/linux
Pull io_uring updates from Jens Axboe: "This is the first of the io_uring pull requests for the 6.15 merge window, there will be others once the net tree has gone in. This contains: - Cleanup and unification of cancelation handling across various request types. - Improvement for bundles, supporting them both for incrementally consumed buffers, and for non-multishot requests. - Enable toggling of using iowait while waiting on io_uring events or not. Unfortunately this is still tied with CPU frequency boosting on short waits, as the scheduler side has not been very receptive to splitting the (useless) iowait stat from the cpufreq implied boost. - Add support for kbuf nodes, enabling zero-copy support for the ublk block driver. - Various cleanups for resource node handling. - Series greatly cleaning up the legacy provided (non-ring based) buffers. For years, we've been pushing the ring provided buffers as the way to go, and that is what people have been using. Reduce the complexity and code associated with legacy provided buffers. - Series cleaning up the compat handling. - Series improving and cleaning up the recvmsg/sendmsg iovec and msg handling. - Series of cleanups for io-wq. - Start adding a bunch of selftests. The liburing repository generally carries feature and regression tests for everything, but at least for ublk initially, we'll try and go the route of having it in selftests as well. We'll see how this goes, might decide to migrate more tests this way in the future. - Various little cleanups and fixes" * tag 'for-6.15/io_uring-20250322' of git://git.kernel.dk/linux: (108 commits) selftests: ublk: add stripe target selftests: ublk: simplify loop io completion selftests: ublk: enable zero copy for null target selftests: ublk: prepare for supporting stripe target selftests: ublk: move common code into common.c selftests: ublk: increase max buffer size to 1MB selftests: ublk: add single sqe allocator helper selftests: ublk: add generic_01 for verifying sequential IO order selftests: ublk: fix starting ublk device io_uring: enable toggle of iowait usage when waiting on CQEs selftests: ublk: fix write cache implementation selftests: ublk: add variable for user to not show test result selftests: ublk: don't show `modprobe` failure selftests: ublk: add one dependency header io_uring/kbuf: enable bundles for incrementally consumed buffers Revert "io_uring/rsrc: simplify the bvec iter count calculation" selftests: ublk: improve test usability selftests: ublk: add stress test for covering IO vs. killing ublk server selftests: ublk: add one stress test for covering IO vs. removing device selftests: ublk: load/unload ublk_drv when preparing & cleaning up tests ...
2 parents 1e1ba8d + 0f3ebf2 commit 91928e0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+3883
-872
lines changed

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24397,6 +24397,7 @@ S: Maintained
2439724397
F: Documentation/block/ublk.rst
2439824398
F: drivers/block/ublk_drv.c
2439924399
F: include/uapi/linux/ublk_cmd.h
24400+
F: tools/testing/selftests/ublk/
2440024401

2440124402
UBSAN
2440224403
M: Kees Cook <[email protected]>

drivers/block/ublk_drv.c

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@
5151
/* private ioctl command mirror */
5252
#define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC)
5353

54+
#define UBLK_IO_REGISTER_IO_BUF _IOC_NR(UBLK_U_IO_REGISTER_IO_BUF)
55+
#define UBLK_IO_UNREGISTER_IO_BUF _IOC_NR(UBLK_U_IO_UNREGISTER_IO_BUF)
56+
5457
/* All UBLK_F_* have to be included into UBLK_F_ALL */
5558
#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
5659
| UBLK_F_URING_CMD_COMP_IN_TASK \
@@ -196,12 +199,14 @@ struct ublk_params_header {
196199

197200
static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq);
198201

202+
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
203+
struct ublk_queue *ubq, int tag, size_t offset);
199204
static inline unsigned int ublk_req_build_flags(struct request *req);
200205
static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
201206
int tag);
202207
static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub)
203208
{
204-
return ub->dev_info.flags & UBLK_F_USER_COPY;
209+
return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);
205210
}
206211

207212
static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
@@ -581,7 +586,7 @@ static void ublk_apply_params(struct ublk_device *ub)
581586

582587
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
583588
{
584-
return ubq->flags & UBLK_F_USER_COPY;
589+
return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);
585590
}
586591

587592
static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
@@ -1747,6 +1752,42 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
17471752
io_uring_cmd_mark_cancelable(cmd, issue_flags);
17481753
}
17491754

1755+
static void ublk_io_release(void *priv)
1756+
{
1757+
struct request *rq = priv;
1758+
struct ublk_queue *ubq = rq->mq_hctx->driver_data;
1759+
1760+
ublk_put_req_ref(ubq, rq);
1761+
}
1762+
1763+
static int ublk_register_io_buf(struct io_uring_cmd *cmd,
1764+
struct ublk_queue *ubq, unsigned int tag,
1765+
unsigned int index, unsigned int issue_flags)
1766+
{
1767+
struct ublk_device *ub = cmd->file->private_data;
1768+
struct request *req;
1769+
int ret;
1770+
1771+
req = __ublk_check_and_get_req(ub, ubq, tag, 0);
1772+
if (!req)
1773+
return -EINVAL;
1774+
1775+
ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index,
1776+
issue_flags);
1777+
if (ret) {
1778+
ublk_put_req_ref(ubq, req);
1779+
return ret;
1780+
}
1781+
1782+
return 0;
1783+
}
1784+
1785+
static int ublk_unregister_io_buf(struct io_uring_cmd *cmd,
1786+
unsigned int index, unsigned int issue_flags)
1787+
{
1788+
return io_buffer_unregister_bvec(cmd, index, issue_flags);
1789+
}
1790+
17501791
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
17511792
unsigned int issue_flags,
17521793
const struct ublksrv_io_cmd *ub_cmd)
@@ -1798,6 +1839,10 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
17981839

17991840
ret = -EINVAL;
18001841
switch (_IOC_NR(cmd_op)) {
1842+
case UBLK_IO_REGISTER_IO_BUF:
1843+
return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
1844+
case UBLK_IO_UNREGISTER_IO_BUF:
1845+
return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags);
18011846
case UBLK_IO_FETCH_REQ:
18021847
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
18031848
if (ublk_queue_ready(ubq)) {
@@ -2459,7 +2504,7 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
24592504
* buffer by pwrite() to ublk char device, which can't be
24602505
* used for unprivileged device
24612506
*/
2462-
if (info.flags & UBLK_F_USER_COPY)
2507+
if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
24632508
return -EINVAL;
24642509
}
24652510

@@ -2527,9 +2572,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
25272572
goto out_free_dev_number;
25282573
}
25292574

2530-
/* We are not ready to support zero copy */
2531-
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
2532-
25332575
ub->dev_info.nr_hw_queues = min_t(unsigned int,
25342576
ub->dev_info.nr_hw_queues, nr_cpu_ids);
25352577
ublk_align_max_io_size(ub);
@@ -2863,7 +2905,7 @@ static int ublk_ctrl_get_features(struct io_uring_cmd *cmd)
28632905
{
28642906
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
28652907
void __user *argp = (void __user *)(unsigned long)header->addr;
2866-
u64 features = UBLK_F_ALL & ~UBLK_F_SUPPORT_ZERO_COPY;
2908+
u64 features = UBLK_F_ALL;
28672909

28682910
if (header->len != UBLK_FEATURES_LEN || !header->addr)
28692911
return -EINVAL;

drivers/nvme/host/ioctl.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
114114

115115
static int nvme_map_user_request(struct request *req, u64 ubuffer,
116116
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
117-
struct io_uring_cmd *ioucmd, unsigned int flags)
117+
struct io_uring_cmd *ioucmd, unsigned int flags,
118+
unsigned int iou_issue_flags)
118119
{
119120
struct request_queue *q = req->q;
120121
struct nvme_ns *ns = q->queuedata;
@@ -146,7 +147,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
146147
goto out;
147148
}
148149
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
149-
rq_data_dir(req), &iter, ioucmd);
150+
rq_data_dir(req), &iter, ioucmd,
151+
iou_issue_flags);
150152
if (ret < 0)
151153
goto out;
152154
ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
@@ -198,7 +200,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
198200
req->timeout = timeout;
199201
if (ubuffer && bufflen) {
200202
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
201-
meta_len, NULL, flags);
203+
meta_len, NULL, flags, 0);
202204
if (ret)
203205
return ret;
204206
}
@@ -514,10 +516,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
514516
return PTR_ERR(req);
515517
req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;
516518

517-
if (d.addr && d.data_len) {
519+
if (d.data_len) {
518520
ret = nvme_map_user_request(req, d.addr,
519521
d.data_len, nvme_to_user_ptr(d.metadata),
520-
d.metadata_len, ioucmd, vec);
522+
d.metadata_len, ioucmd, vec, issue_flags);
521523
if (ret)
522524
return ret;
523525
}

include/linux/io_uring/cmd.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include <uapi/linux/io_uring.h>
66
#include <linux/io_uring_types.h>
7+
#include <linux/blk-mq.h>
78

89
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
910
#define IORING_URING_CMD_CANCELABLE (1U << 30)
@@ -39,7 +40,9 @@ static inline void io_uring_cmd_private_sz_check(size_t cmd_sz)
3940

4041
#if defined(CONFIG_IO_URING)
4142
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
42-
struct iov_iter *iter, void *ioucmd);
43+
struct iov_iter *iter,
44+
struct io_uring_cmd *ioucmd,
45+
unsigned int issue_flags);
4346

4447
/*
4548
* Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd
@@ -66,8 +69,10 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
6669
void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
6770

6871
#else
69-
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
70-
struct iov_iter *iter, void *ioucmd)
72+
static inline int
73+
io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
74+
struct iov_iter *iter, struct io_uring_cmd *ioucmd,
75+
unsigned int issue_flags)
7176
{
7277
return -EOPNOTSUPP;
7378
}
@@ -123,4 +128,10 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur
123128
return cmd_to_io_kiocb(cmd)->async_data;
124129
}
125130

131+
int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
132+
void (*release)(void *), unsigned int index,
133+
unsigned int issue_flags);
134+
int io_buffer_unregister_bvec(struct io_uring_cmd *cmd, unsigned int index,
135+
unsigned int issue_flags);
136+
126137
#endif /* _LINUX_IO_URING_CMD_H */

include/linux/io_uring_types.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -292,6 +292,8 @@ struct io_ring_ctx {
292292

293293
struct io_file_table file_table;
294294
struct io_rsrc_data buf_table;
295+
struct io_alloc_cache node_cache;
296+
struct io_alloc_cache imu_cache;
295297

296298
struct io_submit_state submit_state;
297299

@@ -360,7 +362,6 @@ struct io_ring_ctx {
360362

361363
spinlock_t completion_lock;
362364

363-
struct list_head io_buffers_comp;
364365
struct list_head cq_overflow_list;
365366

366367
struct hlist_head waitid_list;
@@ -379,8 +380,6 @@ struct io_ring_ctx {
379380
unsigned int file_alloc_start;
380381
unsigned int file_alloc_end;
381382

382-
struct list_head io_buffers_cache;
383-
384383
/* Keep this last, we don't need it for the fast path */
385384
struct wait_queue_head poll_wq;
386385
struct io_restriction restrictions;
@@ -439,8 +438,15 @@ struct io_ring_ctx {
439438
struct io_mapped_region param_region;
440439
};
441440

441+
/*
442+
* Token indicating function is called in task work context:
443+
* ctx->uring_lock is held and any completions generated will be flushed.
444+
* ONLY core io_uring.c should instantiate this struct.
445+
*/
442446
struct io_tw_state {
443447
};
448+
/* Alias to use in code that doesn't instantiate struct io_tw_state */
449+
typedef struct io_tw_state io_tw_token_t;
444450

445451
enum {
446452
REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
@@ -566,7 +572,7 @@ enum {
566572
REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
567573
};
568574

569-
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
575+
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);
570576

571577
struct io_task_work {
572578
struct llist_node node;
@@ -601,7 +607,11 @@ static inline void io_kiocb_cmd_sz_check(size_t cmd_sz)
601607
io_kiocb_cmd_sz_check(sizeof(cmd_type)) , \
602608
((cmd_type *)&(req)->cmd) \
603609
)
604-
#define cmd_to_io_kiocb(ptr) ((struct io_kiocb *) ptr)
610+
611+
static inline struct io_kiocb *cmd_to_io_kiocb(void *ptr)
612+
{
613+
return ptr;
614+
}
605615

606616
struct io_kiocb {
607617
union {

include/uapi/linux/io_uring.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -541,6 +541,7 @@ struct io_cqring_offsets {
541541
#define IORING_ENTER_REGISTERED_RING (1U << 4)
542542
#define IORING_ENTER_ABS_TIMER (1U << 5)
543543
#define IORING_ENTER_EXT_ARG_REG (1U << 6)
544+
#define IORING_ENTER_NO_IOWAIT (1U << 7)
544545

545546
/*
546547
* Passed in for io_uring_setup(2). Copied back with updated info on success
@@ -578,6 +579,7 @@ struct io_uring_params {
578579
#define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
579580
#define IORING_FEAT_MIN_TIMEOUT (1U << 15)
580581
#define IORING_FEAT_RW_ATTR (1U << 16)
582+
#define IORING_FEAT_NO_IOWAIT (1U << 17)
581583

582584
/*
583585
* io_uring_register(2) opcodes and arguments

include/uapi/linux/ublk_cmd.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,10 @@
9494
_IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd)
9595
#define UBLK_U_IO_NEED_GET_DATA \
9696
_IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd)
97+
#define UBLK_U_IO_REGISTER_IO_BUF \
98+
_IOWR('u', 0x23, struct ublksrv_io_cmd)
99+
#define UBLK_U_IO_UNREGISTER_IO_BUF \
100+
_IOWR('u', 0x24, struct ublksrv_io_cmd)
97101

98102
/* only ABORT means that no re-fetch */
99103
#define UBLK_IO_RES_OK 0

io_uring/alloc_cache.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,10 @@ static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp)
6868
return io_cache_alloc_new(cache, gfp);
6969
}
7070

71+
static inline void io_cache_free(struct io_alloc_cache *cache, void *obj)
72+
{
73+
if (!io_alloc_cache_put(cache, obj))
74+
kfree(obj);
75+
}
76+
7177
#endif

io_uring/cancel.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,3 +341,45 @@ int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg)
341341
fput(file);
342342
return ret;
343343
}
344+
345+
bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
346+
struct hlist_head *list, bool cancel_all,
347+
bool (*cancel)(struct io_kiocb *))
348+
{
349+
struct hlist_node *tmp;
350+
struct io_kiocb *req;
351+
bool found = false;
352+
353+
lockdep_assert_held(&ctx->uring_lock);
354+
355+
hlist_for_each_entry_safe(req, tmp, list, hash_node) {
356+
if (!io_match_task_safe(req, tctx, cancel_all))
357+
continue;
358+
hlist_del_init(&req->hash_node);
359+
if (cancel(req))
360+
found = true;
361+
}
362+
363+
return found;
364+
}
365+
366+
int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
367+
unsigned int issue_flags, struct hlist_head *list,
368+
bool (*cancel)(struct io_kiocb *))
369+
{
370+
struct hlist_node *tmp;
371+
struct io_kiocb *req;
372+
int nr = 0;
373+
374+
io_ring_submit_lock(ctx, issue_flags);
375+
hlist_for_each_entry_safe(req, tmp, list, hash_node) {
376+
if (!io_cancel_req_match(req, cd))
377+
continue;
378+
if (cancel(req))
379+
nr++;
380+
if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
381+
break;
382+
}
383+
io_ring_submit_unlock(ctx, issue_flags);
384+
return nr ?: -ENOENT;
385+
}

io_uring/cancel.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
2424
int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg);
2525
bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd);
2626

27+
bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
28+
struct hlist_head *list, bool cancel_all,
29+
bool (*cancel)(struct io_kiocb *));
30+
31+
int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
32+
unsigned int issue_flags, struct hlist_head *list,
33+
bool (*cancel)(struct io_kiocb *));
34+
2735
static inline bool io_cancel_match_sequence(struct io_kiocb *req, int sequence)
2836
{
2937
if (req->cancel_seq_set && sequence == req->work.cancel_seq)

0 commit comments

Comments
 (0)