Skip to content

Commit 3de7361

Browse files
committed
Merge branch 'io_uring-6.15' into for-6.16/io_uring
Merge in 6.15 io_uring fixes, mostly so that the fdinfo changes can get easily extended without causing merge conflicts. * io_uring-6.15: io_uring/fdinfo: grab ctx->uring_lock around io_uring_show_fdinfo() io_uring/memmap: don't use page_address() on a highmem page io_uring/uring_cmd: fix hybrid polling initialization issue io_uring/sqpoll: Increase task_work submission batch size io_uring: ensure deferred completions are flushed for multishot io_uring: always arm linked timeouts prior to issue io_uring/fdinfo: annotate racy sq/cq head/tail reads io_uring: fix 'sync' handling of io_fallback_tw() io_uring: don't duplicate flushing in io_req_post_cqe
2 parents 2b61bb1 + d871198 commit 3de7361

File tree

5 files changed

+72
-71
lines changed

5 files changed

+72
-71
lines changed

io_uring/fdinfo.c

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,8 @@ static inline void napi_show_fdinfo(struct io_ring_ctx *ctx,
8686
}
8787
#endif
8888

89-
/*
90-
* Caller holds a reference to the file already, we don't need to do
91-
* anything else to get an extra reference.
92-
*/
93-
__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
89+
static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
9490
{
95-
struct io_ring_ctx *ctx = file->private_data;
9691
struct io_overflow_cqe *ocqe;
9792
struct io_rings *r = ctx->rings;
9893
struct rusage sq_usage;
@@ -106,7 +101,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
106101
unsigned int sq_entries, cq_entries;
107102
int sq_pid = -1, sq_cpu = -1;
108103
u64 sq_total_time = 0, sq_work_time = 0;
109-
bool has_lock;
110104
unsigned int i;
111105

112106
if (ctx->flags & IORING_SETUP_CQE32)
@@ -123,11 +117,11 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
123117
seq_printf(m, "SqMask:\t0x%x\n", sq_mask);
124118
seq_printf(m, "SqHead:\t%u\n", sq_head);
125119
seq_printf(m, "SqTail:\t%u\n", sq_tail);
126-
seq_printf(m, "CachedSqHead:\t%u\n", ctx->cached_sq_head);
120+
seq_printf(m, "CachedSqHead:\t%u\n", data_race(ctx->cached_sq_head));
127121
seq_printf(m, "CqMask:\t0x%x\n", cq_mask);
128122
seq_printf(m, "CqHead:\t%u\n", cq_head);
129123
seq_printf(m, "CqTail:\t%u\n", cq_tail);
130-
seq_printf(m, "CachedCqTail:\t%u\n", ctx->cached_cq_tail);
124+
seq_printf(m, "CachedCqTail:\t%u\n", data_race(ctx->cached_cq_tail));
131125
seq_printf(m, "SQEs:\t%u\n", sq_tail - sq_head);
132126
sq_entries = min(sq_tail - sq_head, ctx->sq_entries);
133127
for (i = 0; i < sq_entries; i++) {
@@ -176,15 +170,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
176170
seq_printf(m, "\n");
177171
}
178172

179-
/*
180-
* Avoid ABBA deadlock between the seq lock and the io_uring mutex,
181-
* since fdinfo case grabs it in the opposite direction of normal use
182-
* cases. If we fail to get the lock, we just don't iterate any
183-
* structures that could be going away outside the io_uring mutex.
184-
*/
185-
has_lock = mutex_trylock(&ctx->uring_lock);
186-
187-
if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
173+
if (ctx->flags & IORING_SETUP_SQPOLL) {
188174
struct io_sq_data *sq = ctx->sq_data;
189175

190176
/*
@@ -206,7 +192,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
206192
seq_printf(m, "SqTotalTime:\t%llu\n", sq_total_time);
207193
seq_printf(m, "SqWorkTime:\t%llu\n", sq_work_time);
208194
seq_printf(m, "UserFiles:\t%u\n", ctx->file_table.data.nr);
209-
for (i = 0; has_lock && i < ctx->file_table.data.nr; i++) {
195+
for (i = 0; i < ctx->file_table.data.nr; i++) {
210196
struct file *f = NULL;
211197

212198
if (ctx->file_table.data.nodes[i])
@@ -218,7 +204,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
218204
}
219205
}
220206
seq_printf(m, "UserBufs:\t%u\n", ctx->buf_table.nr);
221-
for (i = 0; has_lock && i < ctx->buf_table.nr; i++) {
207+
for (i = 0; i < ctx->buf_table.nr; i++) {
222208
struct io_mapped_ubuf *buf = NULL;
223209

224210
if (ctx->buf_table.nodes[i])
@@ -228,7 +214,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
228214
else
229215
seq_printf(m, "%5u: <none>\n", i);
230216
}
231-
if (has_lock && !xa_empty(&ctx->personalities)) {
217+
if (!xa_empty(&ctx->personalities)) {
232218
unsigned long index;
233219
const struct cred *cred;
234220

@@ -238,7 +224,7 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
238224
}
239225

240226
seq_puts(m, "PollList:\n");
241-
for (i = 0; has_lock && i < (1U << ctx->cancel_table.hash_bits); i++) {
227+
for (i = 0; i < (1U << ctx->cancel_table.hash_bits); i++) {
242228
struct io_hash_bucket *hb = &ctx->cancel_table.hbs[i];
243229
struct io_kiocb *req;
244230

@@ -247,9 +233,6 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
247233
task_work_pending(req->tctx->task));
248234
}
249235

250-
if (has_lock)
251-
mutex_unlock(&ctx->uring_lock);
252-
253236
seq_puts(m, "CqOverflowList:\n");
254237
spin_lock(&ctx->completion_lock);
255238
list_for_each_entry(ocqe, &ctx->cq_overflow_list, list) {
@@ -262,4 +245,23 @@ __cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
262245
spin_unlock(&ctx->completion_lock);
263246
napi_show_fdinfo(ctx, m);
264247
}
248+
249+
/*
250+
* Caller holds a reference to the file already, we don't need to do
251+
* anything else to get an extra reference.
252+
*/
253+
__cold void io_uring_show_fdinfo(struct seq_file *m, struct file *file)
254+
{
255+
struct io_ring_ctx *ctx = file->private_data;
256+
257+
/*
258+
* Avoid ABBA deadlock between the seq lock and the io_uring mutex,
259+
* since fdinfo case grabs it in the opposite direction of normal use
260+
* cases.
261+
*/
262+
if (mutex_trylock(&ctx->uring_lock)) {
263+
__io_uring_show_fdinfo(ctx, m);
264+
mutex_unlock(&ctx->uring_lock);
265+
}
266+
}
265267
#endif

io_uring/io_uring.c

Lines changed: 38 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -430,24 +430,6 @@ static struct io_kiocb *__io_prep_linked_timeout(struct io_kiocb *req)
430430
return req->link;
431431
}
432432

433-
static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
434-
{
435-
if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT)))
436-
return NULL;
437-
return __io_prep_linked_timeout(req);
438-
}
439-
440-
static noinline void __io_arm_ltimeout(struct io_kiocb *req)
441-
{
442-
io_queue_linked_timeout(__io_prep_linked_timeout(req));
443-
}
444-
445-
static inline void io_arm_ltimeout(struct io_kiocb *req)
446-
{
447-
if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT))
448-
__io_arm_ltimeout(req);
449-
}
450-
451433
static void io_prep_async_work(struct io_kiocb *req)
452434
{
453435
const struct io_issue_def *def = &io_issue_defs[req->opcode];
@@ -500,7 +482,6 @@ static void io_prep_async_link(struct io_kiocb *req)
500482

501483
static void io_queue_iowq(struct io_kiocb *req)
502484
{
503-
struct io_kiocb *link = io_prep_linked_timeout(req);
504485
struct io_uring_task *tctx = req->tctx;
505486

506487
BUG_ON(!tctx);
@@ -525,8 +506,6 @@ static void io_queue_iowq(struct io_kiocb *req)
525506

526507
trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work));
527508
io_wq_enqueue(tctx->io_wq, &req->work);
528-
if (link)
529-
io_queue_linked_timeout(link);
530509
}
531510

532511
static void io_req_queue_iowq_tw(struct io_kiocb *req, io_tw_token_t tw)
@@ -864,13 +843,26 @@ bool io_req_post_cqe(struct io_kiocb *req, s32 res, u32 cflags)
864843
struct io_ring_ctx *ctx = req->ctx;
865844
bool posted;
866845

846+
/*
847+
* If multishot has already posted deferred completions, ensure that
848+
* those are flushed first before posting this one. If not, CQEs
849+
* could get reordered.
850+
*/
851+
if (!wq_list_empty(&ctx->submit_state.compl_reqs))
852+
__io_submit_flush_completions(ctx);
853+
867854
lockdep_assert(!io_wq_current_is_worker());
868855
lockdep_assert_held(&ctx->uring_lock);
869856

870-
__io_cq_lock(ctx);
871-
posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
857+
if (!ctx->lockless_cq) {
858+
spin_lock(&ctx->completion_lock);
859+
posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
860+
spin_unlock(&ctx->completion_lock);
861+
} else {
862+
posted = io_fill_cqe_aux(ctx, req->cqe.user_data, res, cflags);
863+
}
864+
872865
ctx->submit_state.cq_flush = true;
873-
__io_cq_unlock_post(ctx);
874866
return posted;
875867
}
876868

@@ -1058,21 +1050,22 @@ static __cold void __io_fallback_tw(struct llist_node *node, bool sync)
10581050
while (node) {
10591051
req = container_of(node, struct io_kiocb, io_task_work.node);
10601052
node = node->next;
1061-
if (sync && last_ctx != req->ctx) {
1053+
if (last_ctx != req->ctx) {
10621054
if (last_ctx) {
1063-
flush_delayed_work(&last_ctx->fallback_work);
1055+
if (sync)
1056+
flush_delayed_work(&last_ctx->fallback_work);
10641057
percpu_ref_put(&last_ctx->refs);
10651058
}
10661059
last_ctx = req->ctx;
10671060
percpu_ref_get(&last_ctx->refs);
10681061
}
1069-
if (llist_add(&req->io_task_work.node,
1070-
&req->ctx->fallback_llist))
1071-
schedule_delayed_work(&req->ctx->fallback_work, 1);
1062+
if (llist_add(&req->io_task_work.node, &last_ctx->fallback_llist))
1063+
schedule_delayed_work(&last_ctx->fallback_work, 1);
10721064
}
10731065

10741066
if (last_ctx) {
1075-
flush_delayed_work(&last_ctx->fallback_work);
1067+
if (sync)
1068+
flush_delayed_work(&last_ctx->fallback_work);
10761069
percpu_ref_put(&last_ctx->refs);
10771070
}
10781071
}
@@ -1684,15 +1677,22 @@ static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
16841677
return !!req->file;
16851678
}
16861679

1680+
#define REQ_ISSUE_SLOW_FLAGS (REQ_F_CREDS | REQ_F_ARM_LTIMEOUT)
1681+
16871682
static inline int __io_issue_sqe(struct io_kiocb *req,
16881683
unsigned int issue_flags,
16891684
const struct io_issue_def *def)
16901685
{
16911686
const struct cred *creds = NULL;
1687+
struct io_kiocb *link = NULL;
16921688
int ret;
16931689

1694-
if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
1695-
creds = override_creds(req->creds);
1690+
if (unlikely(req->flags & REQ_ISSUE_SLOW_FLAGS)) {
1691+
if ((req->flags & REQ_F_CREDS) && req->creds != current_cred())
1692+
creds = override_creds(req->creds);
1693+
if (req->flags & REQ_F_ARM_LTIMEOUT)
1694+
link = __io_prep_linked_timeout(req);
1695+
}
16961696

16971697
if (!def->audit_skip)
16981698
audit_uring_entry(req->opcode);
@@ -1702,8 +1702,12 @@ static inline int __io_issue_sqe(struct io_kiocb *req,
17021702
if (!def->audit_skip)
17031703
audit_uring_exit(!ret, ret);
17041704

1705-
if (creds)
1706-
revert_creds(creds);
1705+
if (unlikely(creds || link)) {
1706+
if (creds)
1707+
revert_creds(creds);
1708+
if (link)
1709+
io_queue_linked_timeout(link);
1710+
}
17071711

17081712
return ret;
17091713
}
@@ -1729,7 +1733,6 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
17291733

17301734
if (ret == IOU_ISSUE_SKIP_COMPLETE) {
17311735
ret = 0;
1732-
io_arm_ltimeout(req);
17331736

17341737
/* If the op doesn't have a file, we're not polling for it */
17351738
if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue)
@@ -1784,8 +1787,6 @@ void io_wq_submit_work(struct io_wq_work *work)
17841787
else
17851788
req_ref_get(req);
17861789

1787-
io_arm_ltimeout(req);
1788-
17891790
/* either cancelled or io-wq is dying, so don't touch tctx->iowq */
17901791
if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) {
17911792
fail:
@@ -1902,15 +1903,11 @@ struct file *io_file_get_normal(struct io_kiocb *req, int fd)
19021903
static void io_queue_async(struct io_kiocb *req, int ret)
19031904
__must_hold(&req->ctx->uring_lock)
19041905
{
1905-
struct io_kiocb *linked_timeout;
1906-
19071906
if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) {
19081907
io_req_defer_failed(req, ret);
19091908
return;
19101909
}
19111910

1912-
linked_timeout = io_prep_linked_timeout(req);
1913-
19141911
switch (io_arm_poll_handler(req, 0)) {
19151912
case IO_APOLL_READY:
19161913
io_kbuf_recycle(req, 0);
@@ -1923,9 +1920,6 @@ static void io_queue_async(struct io_kiocb *req, int ret)
19231920
case IO_APOLL_OK:
19241921
break;
19251922
}
1926-
1927-
if (linked_timeout)
1928-
io_queue_linked_timeout(linked_timeout);
19291923
}
19301924

19311925
static inline void io_queue_sqe(struct io_kiocb *req)

io_uring/memmap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ static int io_region_init_ptr(struct io_mapped_region *mr)
117117
void *ptr;
118118

119119
if (io_check_coalesce_buffer(mr->pages, mr->nr_pages, &ifd)) {
120-
if (ifd.nr_folios == 1) {
120+
if (ifd.nr_folios == 1 && !PageHighMem(mr->pages[0])) {
121121
mr->ptr = page_address(mr->pages[0]);
122122
return 0;
123123
}

io_uring/sqpoll.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#include "sqpoll.h"
2121

2222
#define IORING_SQPOLL_CAP_ENTRIES_VALUE 8
23-
#define IORING_TW_CAP_ENTRIES_VALUE 8
23+
#define IORING_TW_CAP_ENTRIES_VALUE 32
2424

2525
enum {
2626
IO_SQ_THREAD_SHOULD_STOP = 0,

io_uring/uring_cmd.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,11 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
251251
return -EOPNOTSUPP;
252252
issue_flags |= IO_URING_F_IOPOLL;
253253
req->iopoll_completed = 0;
254+
if (ctx->flags & IORING_SETUP_HYBRID_IOPOLL) {
255+
/* make sure every req only blocks once */
256+
req->flags &= ~REQ_F_IOPOLL_STATE;
257+
req->iopoll_start = ktime_get_ns();
258+
}
254259
}
255260

256261
ret = file->f_op->uring_cmd(ioucmd, issue_flags);

0 commit comments

Comments
 (0)