Skip to content

Commit cfd4039

Browse files
committed
Merge tag 'io_uring-6.19-20251208' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring updates from Jens Axboe: "Followup set of fixes for io_uring for this merge window. These are either later fixes, or cleanups that don't make sense to defer. This pull request contains: - Fix for a recent regression in io-wq worker creation - Tracing cleanup - Use READ_ONCE/WRITE_ONCE consistently for ring mapped kbufs. Mostly for documentation purposes, indicating that they are shared with userspace - Fix for POLL_ADD losing a completion, if the request is updated and now is triggerable - eg, if POLLIN is set with the updated, and the polled file is readable - In conjunction with the above fix, also unify how poll wait queue entries are deleted with the head update. We had 3 different spots doing both the list deletion and head write, with one of them nicely documented. Abstract that into a helper and use it consistently - Small series from Joanne fixing an issue with buffer cloning, and cleaning up the arg validation" * tag 'io_uring-6.19-20251208' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: io_uring/poll: unify poll waitqueue entry and list removal io_uring/kbuf: use WRITE_ONCE() for userspace-shared buffer ring fields io_uring/kbuf: use READ_ONCE() for userspace-mapped memory io_uring/rsrc: fix lost entries after cloned range io_uring/rsrc: rename misleading src_node variable in io_clone_buffers() io_uring/rsrc: clean up buffer cloning arg validation io_uring/trace: rename io_uring_queue_async_work event "rw" field io_uring/io-wq: always retry worker create on ERESTART* io_uring/poll: correctly handle io_poll_add() return value on update
2 parents 4482ebb + 55d57b3 commit cfd4039

File tree

5 files changed

+67
-65
lines changed

5 files changed

+67
-65
lines changed

include/trace/events/io_uring.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,15 +133,15 @@ TRACE_EVENT(io_uring_file_get,
133133
* io_uring_queue_async_work - called before submitting a new async work
134134
*
135135
* @req: pointer to a submitted request
136-
* @rw: type of workqueue, hashed or normal
136+
* @hashed: whether async work is hashed
137137
*
138138
* Allows to trace asynchronous work submission.
139139
*/
140140
TRACE_EVENT(io_uring_queue_async_work,
141141

142-
TP_PROTO(struct io_kiocb *req, int rw),
142+
TP_PROTO(struct io_kiocb *req, bool hashed),
143143

144-
TP_ARGS(req, rw),
144+
TP_ARGS(req, hashed),
145145

146146
TP_STRUCT__entry (
147147
__field( void *, ctx )
@@ -150,7 +150,7 @@ TRACE_EVENT(io_uring_queue_async_work,
150150
__field( u8, opcode )
151151
__field( unsigned long long, flags )
152152
__field( struct io_wq_work *, work )
153-
__field( int, rw )
153+
__field( bool, hashed )
154154

155155
__string( op_str, io_uring_get_opcode(req->opcode) )
156156
),
@@ -162,15 +162,15 @@ TRACE_EVENT(io_uring_queue_async_work,
162162
__entry->flags = (__force unsigned long long) req->flags;
163163
__entry->opcode = req->opcode;
164164
__entry->work = &req->work;
165-
__entry->rw = rw;
165+
__entry->hashed = hashed;
166166

167167
__assign_str(op_str);
168168
),
169169

170170
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%llx, %s queue, work %p",
171171
__entry->ctx, __entry->req, __entry->user_data,
172172
__get_str(op_str), __entry->flags,
173-
__entry->rw ? "hashed" : "normal", __entry->work)
173+
__entry->hashed ? "hashed" : "normal", __entry->work)
174174
);
175175

176176
/**

io_uring/io-wq.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -805,11 +805,12 @@ static inline bool io_should_retry_thread(struct io_worker *worker, long err)
805805
*/
806806
if (fatal_signal_pending(current))
807807
return false;
808-
if (worker->init_retries++ >= WORKER_INIT_LIMIT)
809-
return false;
810808

809+
worker->init_retries++;
811810
switch (err) {
812811
case -EAGAIN:
812+
return worker->init_retries <= WORKER_INIT_LIMIT;
813+
/* Analogous to a fork() syscall, always retry on a restartable error */
813814
case -ERESTARTSYS:
814815
case -ERESTARTNOINTR:
815816
case -ERESTARTNOHAND:

io_uring/kbuf.c

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,11 @@ static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
4444
buf_len -= this_len;
4545
/* Stop looping for invalid buffer length of 0 */
4646
if (buf_len || !this_len) {
47-
buf->addr += this_len;
48-
buf->len = buf_len;
47+
WRITE_ONCE(buf->addr, READ_ONCE(buf->addr) + this_len);
48+
WRITE_ONCE(buf->len, buf_len);
4949
return false;
5050
}
51-
buf->len = 0;
51+
WRITE_ONCE(buf->len, 0);
5252
bl->head++;
5353
len -= this_len;
5454
}
@@ -198,9 +198,9 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
198198
if (*len == 0 || *len > buf_len)
199199
*len = buf_len;
200200
req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT;
201-
req->buf_index = buf->bid;
201+
req->buf_index = READ_ONCE(buf->bid);
202202
sel.buf_list = bl;
203-
sel.addr = u64_to_user_ptr(buf->addr);
203+
sel.addr = u64_to_user_ptr(READ_ONCE(buf->addr));
204204

205205
if (io_should_commit(req, issue_flags)) {
206206
io_kbuf_commit(req, sel.buf_list, *len, 1);
@@ -280,7 +280,7 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
280280
if (!arg->max_len)
281281
arg->max_len = INT_MAX;
282282

283-
req->buf_index = buf->bid;
283+
req->buf_index = READ_ONCE(buf->bid);
284284
do {
285285
u32 len = READ_ONCE(buf->len);
286286

@@ -291,11 +291,11 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
291291
arg->partial_map = 1;
292292
if (iov != arg->iovs)
293293
break;
294-
buf->len = len;
294+
WRITE_ONCE(buf->len, len);
295295
}
296296
}
297297

298-
iov->iov_base = u64_to_user_ptr(buf->addr);
298+
iov->iov_base = u64_to_user_ptr(READ_ONCE(buf->addr));
299299
iov->iov_len = len;
300300
iov++;
301301

io_uring/poll.c

Lines changed: 29 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -138,14 +138,32 @@ static void io_init_poll_iocb(struct io_poll *poll, __poll_t events)
138138
init_waitqueue_func_entry(&poll->wait, io_poll_wake);
139139
}
140140

141+
static void io_poll_remove_waitq(struct io_poll *poll)
142+
{
143+
/*
144+
* If the waitqueue is being freed early but someone is already holds
145+
* ownership over it, we have to tear down the request as best we can.
146+
* That means immediately removing the request from its waitqueue and
147+
* preventing all further accesses to the waitqueue via the request.
148+
*/
149+
list_del_init(&poll->wait.entry);
150+
151+
/*
152+
* Careful: this *must* be the last step, since as soon as req->head is
153+
* NULL'ed out, the request can be completed and freed, since
154+
* io_poll_remove_entry() will no longer need to take the waitqueue
155+
* lock.
156+
*/
157+
smp_store_release(&poll->head, NULL);
158+
}
159+
141160
static inline void io_poll_remove_entry(struct io_poll *poll)
142161
{
143162
struct wait_queue_head *head = smp_load_acquire(&poll->head);
144163

145164
if (head) {
146165
spin_lock_irq(&head->lock);
147-
list_del_init(&poll->wait.entry);
148-
poll->head = NULL;
166+
io_poll_remove_waitq(poll);
149167
spin_unlock_irq(&head->lock);
150168
}
151169
}
@@ -368,23 +386,7 @@ static __cold int io_pollfree_wake(struct io_kiocb *req, struct io_poll *poll)
368386
io_poll_mark_cancelled(req);
369387
/* we have to kick tw in case it's not already */
370388
io_poll_execute(req, 0);
371-
372-
/*
373-
* If the waitqueue is being freed early but someone is already
374-
* holds ownership over it, we have to tear down the request as
375-
* best we can. That means immediately removing the request from
376-
* its waitqueue and preventing all further accesses to the
377-
* waitqueue via the request.
378-
*/
379-
list_del_init(&poll->wait.entry);
380-
381-
/*
382-
* Careful: this *must* be the last step, since as soon
383-
* as req->head is NULL'ed out, the request can be
384-
* completed and freed, since aio_poll_complete_work()
385-
* will no longer need to take the waitqueue lock.
386-
*/
387-
smp_store_release(&poll->head, NULL);
389+
io_poll_remove_waitq(poll);
388390
return 1;
389391
}
390392

@@ -413,8 +415,7 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
413415

414416
/* optional, saves extra locking for removal in tw handler */
415417
if (mask && poll->events & EPOLLONESHOT) {
416-
list_del_init(&poll->wait.entry);
417-
poll->head = NULL;
418+
io_poll_remove_waitq(poll);
418419
if (wqe_is_double(wait))
419420
req->flags &= ~REQ_F_DOUBLE_POLL;
420421
else
@@ -937,12 +938,17 @@ int io_poll_remove(struct io_kiocb *req, unsigned int issue_flags)
937938

938939
ret2 = io_poll_add(preq, issue_flags & ~IO_URING_F_UNLOCKED);
939940
/* successfully updated, don't complete poll request */
940-
if (!ret2 || ret2 == -EIOCBQUEUED)
941+
if (ret2 == IOU_ISSUE_SKIP_COMPLETE)
941942
goto out;
943+
/* request completed as part of the update, complete it */
944+
else if (ret2 == IOU_COMPLETE)
945+
goto complete;
942946
}
943947

944-
req_set_fail(preq);
945948
io_req_set_res(preq, -ECANCELED, 0);
949+
complete:
950+
if (preq->cqe.res < 0)
951+
req_set_fail(preq);
946952
preq->io_task_work.func = io_req_task_complete;
947953
io_req_task_work_add(preq);
948954
out:

io_uring/rsrc.c

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,12 +1186,16 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
11861186
return -EBUSY;
11871187

11881188
nbufs = src_ctx->buf_table.nr;
1189+
if (!nbufs)
1190+
return -ENXIO;
11891191
if (!arg->nr)
11901192
arg->nr = nbufs;
11911193
else if (arg->nr > nbufs)
11921194
return -EINVAL;
11931195
else if (arg->nr > IORING_MAX_REG_BUFFERS)
11941196
return -EINVAL;
1197+
if (check_add_overflow(arg->nr, arg->src_off, &off) || off > nbufs)
1198+
return -EOVERFLOW;
11951199
if (check_add_overflow(arg->nr, arg->dst_off, &nbufs))
11961200
return -EOVERFLOW;
11971201
if (nbufs > IORING_MAX_REG_BUFFERS)
@@ -1201,31 +1205,16 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
12011205
if (ret)
12021206
return ret;
12031207

1204-
/* Fill entries in data from dst that won't overlap with src */
1208+
/* Copy original dst nodes from before the cloned range */
12051209
for (i = 0; i < min(arg->dst_off, ctx->buf_table.nr); i++) {
1206-
struct io_rsrc_node *src_node = ctx->buf_table.nodes[i];
1210+
struct io_rsrc_node *node = ctx->buf_table.nodes[i];
12071211

1208-
if (src_node) {
1209-
data.nodes[i] = src_node;
1210-
src_node->refs++;
1212+
if (node) {
1213+
data.nodes[i] = node;
1214+
node->refs++;
12111215
}
12121216
}
12131217

1214-
ret = -ENXIO;
1215-
nbufs = src_ctx->buf_table.nr;
1216-
if (!nbufs)
1217-
goto out_free;
1218-
ret = -EINVAL;
1219-
if (!arg->nr)
1220-
arg->nr = nbufs;
1221-
else if (arg->nr > nbufs)
1222-
goto out_free;
1223-
ret = -EOVERFLOW;
1224-
if (check_add_overflow(arg->nr, arg->src_off, &off))
1225-
goto out_free;
1226-
if (off > nbufs)
1227-
goto out_free;
1228-
12291218
off = arg->dst_off;
12301219
i = arg->src_off;
12311220
nr = arg->nr;
@@ -1238,8 +1227,8 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
12381227
} else {
12391228
dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
12401229
if (!dst_node) {
1241-
ret = -ENOMEM;
1242-
goto out_free;
1230+
io_rsrc_data_free(ctx, &data);
1231+
return -ENOMEM;
12431232
}
12441233

12451234
refcount_inc(&src_node->buf->refs);
@@ -1249,6 +1238,16 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
12491238
i++;
12501239
}
12511240

1241+
/* Copy original dst nodes from after the cloned range */
1242+
for (i = nbufs; i < ctx->buf_table.nr; i++) {
1243+
struct io_rsrc_node *node = ctx->buf_table.nodes[i];
1244+
1245+
if (node) {
1246+
data.nodes[i] = node;
1247+
node->refs++;
1248+
}
1249+
}
1250+
12521251
/*
12531252
* If asked for replace, put the old table. data->nodes[] holds both
12541253
* old and new nodes at this point.
@@ -1265,10 +1264,6 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
12651264
WARN_ON_ONCE(ctx->buf_table.nr);
12661265
ctx->buf_table = data;
12671266
return 0;
1268-
1269-
out_free:
1270-
io_rsrc_data_free(ctx, &data);
1271-
return ret;
12721267
}
12731268

12741269
/*

0 commit comments

Comments
 (0)