Skip to content

Commit c98c70e

Browse files
committed
Merge tag 'io_uring-6.1-2022-10-13' of git://git.kernel.dk/linux
Pull more io_uring updates from Jens Axboe: "A collection of fixes that ended up either being later than the initial pull, or dependent on multiple branches (6.0-late being one of them) and hence deferred purposely. This contains: - Cleanup fixes for the single submitter late 6.0 change, which we pushed to 6.1 to keep the 6.0 changes small (Dylan, Pavel) - Fix for IORING_OP_CONNECT not handling -EINPROGRESS correctly (me) - Ensure that the zc sendmsg variant gets audited correctly (me) - Regression fix from this merge window where kiocb_end_write() doesn't always gets called, which can cause issues with fs freezing (me) - Registered files SCM handling fix (Pavel) - Regression fix for big sqe dumping in fdinfo (Pavel) - Registered buffers accounting fix (Pavel) - Remove leftover notification structures, we killed them off late in 6.0 (Pavel) - Minor optimizations (Pavel) - Cosmetic variable shadowing fix (Stefan)" * tag 'io_uring-6.1-2022-10-13' of git://git.kernel.dk/linux: io_uring/rw: ensure kiocb_end_write() is always called io_uring: fix fdinfo sqe offsets calculation io_uring: local variable rw shadows outer variable in io_write io_uring/opdef: remove 'audit_skip' from SENDMSG_ZC io_uring: optimise locking for local tw with submit_wait io_uring: remove redundant memory barrier in io_req_local_work_add io_uring/net: handle -EINPROGRESS correct for IORING_OP_CONNECT io_uring: remove notif leftovers io_uring: correct pinned_vm accounting io_uring/af_unix: defer registered files gc to io_uring release io_uring: limit registration w/ SINGLE_ISSUER io_uring: remove io_register_submitter io_uring: simplify __io_uring_add_tctx_node
2 parents 6d84c25 + 2ec33a6 commit c98c70e

File tree

12 files changed

+138
-66
lines changed

12 files changed

+138
-66
lines changed

include/linux/io_uring_types.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,6 @@ struct io_file_table {
3434
unsigned int alloc_hint;
3535
};
3636

37-
struct io_notif;
38-
struct io_notif_slot;
39-
4037
struct io_hash_bucket {
4138
spinlock_t lock;
4239
struct hlist_head list;
@@ -242,8 +239,6 @@ struct io_ring_ctx {
242239
unsigned nr_user_files;
243240
unsigned nr_user_bufs;
244241
struct io_mapped_ubuf **user_bufs;
245-
struct io_notif_slot *notif_slots;
246-
unsigned nr_notif_slots;
247242

248243
struct io_submit_state submit_state;
249244

include/linux/skbuff.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,7 @@ typedef unsigned char *sk_buff_data_t;
803803
* @csum_level: indicates the number of consecutive checksums found in
804804
* the packet minus one that have been verified as
805805
* CHECKSUM_UNNECESSARY (max 3)
806+
* @scm_io_uring: SKB holds io_uring registered files
806807
* @dst_pending_confirm: need to confirm neighbour
807808
* @decrypted: Decrypted SKB
808809
* @slow_gro: state present at GRO time, slower prepare step required
@@ -982,6 +983,7 @@ struct sk_buff {
982983
#endif
983984
__u8 slow_gro:1;
984985
__u8 csum_not_inet:1;
986+
__u8 scm_io_uring:1;
985987

986988
#ifdef CONFIG_NET_SCHED
987989
__u16 tc_index; /* traffic control index */

io_uring/fdinfo.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ static __cold void __io_uring_show_fdinfo(struct io_ring_ctx *ctx,
9494
sq_idx = READ_ONCE(ctx->sq_array[entry & sq_mask]);
9595
if (sq_idx > sq_mask)
9696
continue;
97-
sqe = &ctx->sq_sqes[sq_idx << 1];
97+
sqe = &ctx->sq_sqes[sq_idx << sq_shift];
9898
seq_printf(m, "%5u: opcode:%s, fd:%d, flags:%x, off:%llu, "
9999
"addr:0x%llx, rw_flags:0x%x, buf_index:%d "
100100
"user_data:%llu",

io_uring/io_uring.c

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,8 @@ static void io_req_local_work_add(struct io_kiocb *req)
11061106

11071107
if (!llist_add(&req->io_task_work.node, &ctx->work_llist))
11081108
return;
1109+
/* need it for the following io_cqring_wake() */
1110+
smp_mb__after_atomic();
11091111

11101112
if (unlikely(atomic_read(&req->task->io_uring->in_idle))) {
11111113
io_move_task_work_from_local(ctx);
@@ -1117,8 +1119,7 @@ static void io_req_local_work_add(struct io_kiocb *req)
11171119

11181120
if (ctx->has_evfd)
11191121
io_eventfd_signal(ctx);
1120-
io_cqring_wake(ctx);
1121-
1122+
__io_cqring_wake(ctx);
11221123
}
11231124

11241125
static inline void __io_req_task_work_add(struct io_kiocb *req, bool allow_local)
@@ -2585,12 +2586,6 @@ static void io_req_caches_free(struct io_ring_ctx *ctx)
25852586
static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
25862587
{
25872588
io_sq_thread_finish(ctx);
2588-
2589-
if (ctx->mm_account) {
2590-
mmdrop(ctx->mm_account);
2591-
ctx->mm_account = NULL;
2592-
}
2593-
25942589
io_rsrc_refs_drop(ctx);
25952590
/* __io_rsrc_put_work() may need uring_lock to progress, wait w/o it */
25962591
io_wait_rsrc_data(ctx->buf_data);
@@ -2631,8 +2626,11 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
26312626
}
26322627
#endif
26332628
WARN_ON_ONCE(!list_empty(&ctx->ltimeout_list));
2634-
WARN_ON_ONCE(ctx->notif_slots || ctx->nr_notif_slots);
26352629

2630+
if (ctx->mm_account) {
2631+
mmdrop(ctx->mm_account);
2632+
ctx->mm_account = NULL;
2633+
}
26362634
io_mem_free(ctx->rings);
26372635
io_mem_free(ctx->sq_sqes);
26382636

@@ -3229,8 +3227,16 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
32293227
mutex_unlock(&ctx->uring_lock);
32303228
goto out;
32313229
}
3232-
if ((flags & IORING_ENTER_GETEVENTS) && ctx->syscall_iopoll)
3233-
goto iopoll_locked;
3230+
if (flags & IORING_ENTER_GETEVENTS) {
3231+
if (ctx->syscall_iopoll)
3232+
goto iopoll_locked;
3233+
/*
3234+
* Ignore errors, we'll soon call io_cqring_wait() and
3235+
* it should handle ownership problems if any.
3236+
*/
3237+
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN)
3238+
(void)io_run_local_work_locked(ctx);
3239+
}
32343240
mutex_unlock(&ctx->uring_lock);
32353241
}
32363242

@@ -3355,7 +3361,7 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
33553361
if (fd < 0)
33563362
return fd;
33573363

3358-
ret = __io_uring_add_tctx_node(ctx, false);
3364+
ret = __io_uring_add_tctx_node(ctx);
33593365
if (ret) {
33603366
put_unused_fd(fd);
33613367
return ret;
@@ -3890,6 +3896,9 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
38903896
if (WARN_ON_ONCE(percpu_ref_is_dying(&ctx->refs)))
38913897
return -ENXIO;
38923898

3899+
if (ctx->submitter_task && ctx->submitter_task != current)
3900+
return -EEXIST;
3901+
38933902
if (ctx->restricted) {
38943903
if (opcode >= IORING_REGISTER_LAST)
38953904
return -EINVAL;

io_uring/io_uring.h

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,17 +203,24 @@ static inline void io_commit_cqring(struct io_ring_ctx *ctx)
203203
smp_store_release(&ctx->rings->cq.tail, ctx->cached_cq_tail);
204204
}
205205

206-
static inline void io_cqring_wake(struct io_ring_ctx *ctx)
206+
/* requires smb_mb() prior, see wq_has_sleeper() */
207+
static inline void __io_cqring_wake(struct io_ring_ctx *ctx)
207208
{
208209
/*
209210
* wake_up_all() may seem excessive, but io_wake_function() and
210211
* io_should_wake() handle the termination of the loop and only
211212
* wake as many waiters as we need to.
212213
*/
213-
if (wq_has_sleeper(&ctx->cq_wait))
214+
if (waitqueue_active(&ctx->cq_wait))
214215
wake_up_all(&ctx->cq_wait);
215216
}
216217

218+
static inline void io_cqring_wake(struct io_ring_ctx *ctx)
219+
{
220+
smp_mb();
221+
__io_cqring_wake(ctx);
222+
}
223+
217224
static inline bool io_sqring_full(struct io_ring_ctx *ctx)
218225
{
219226
struct io_rings *r = ctx->rings;
@@ -268,6 +275,13 @@ static inline int io_run_task_work_ctx(struct io_ring_ctx *ctx)
268275
return ret;
269276
}
270277

278+
static inline int io_run_local_work_locked(struct io_ring_ctx *ctx)
279+
{
280+
if (llist_empty(&ctx->work_llist))
281+
return 0;
282+
return __io_run_local_work(ctx, true);
283+
}
284+
271285
static inline void io_tw_lock(struct io_ring_ctx *ctx, bool *locked)
272286
{
273287
if (!*locked) {

io_uring/net.c

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ struct io_connect {
4646
struct file *file;
4747
struct sockaddr __user *addr;
4848
int addr_len;
49+
bool in_progress;
4950
};
5051

5152
struct io_sr_msg {
@@ -1386,6 +1387,7 @@ int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
13861387

13871388
conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
13881389
conn->addr_len = READ_ONCE(sqe->addr2);
1390+
conn->in_progress = false;
13891391
return 0;
13901392
}
13911393

@@ -1397,6 +1399,16 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
13971399
int ret;
13981400
bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
13991401

1402+
if (connect->in_progress) {
1403+
struct socket *socket;
1404+
1405+
ret = -ENOTSOCK;
1406+
socket = sock_from_file(req->file);
1407+
if (socket)
1408+
ret = sock_error(socket->sk);
1409+
goto out;
1410+
}
1411+
14001412
if (req_has_async_data(req)) {
14011413
io = req->async_data;
14021414
} else {
@@ -1413,13 +1425,17 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)
14131425
ret = __sys_connect_file(req->file, &io->address,
14141426
connect->addr_len, file_flags);
14151427
if ((ret == -EAGAIN || ret == -EINPROGRESS) && force_nonblock) {
1416-
if (req_has_async_data(req))
1417-
return -EAGAIN;
1418-
if (io_alloc_async_data(req)) {
1419-
ret = -ENOMEM;
1420-
goto out;
1428+
if (ret == -EINPROGRESS) {
1429+
connect->in_progress = true;
1430+
} else {
1431+
if (req_has_async_data(req))
1432+
return -EAGAIN;
1433+
if (io_alloc_async_data(req)) {
1434+
ret = -ENOMEM;
1435+
goto out;
1436+
}
1437+
memcpy(req->async_data, &__io, sizeof(__io));
14211438
}
1422-
memcpy(req->async_data, &__io, sizeof(__io));
14231439
return -EAGAIN;
14241440
}
14251441
if (ret == -ERESTARTSYS)

io_uring/opdef.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -510,7 +510,6 @@ const struct io_op_def io_op_defs[] = {
510510
.needs_file = 1,
511511
.unbound_nonreg_file = 1,
512512
.pollout = 1,
513-
.audit_skip = 1,
514513
.ioprio = 1,
515514
.manual_alloc = 1,
516515
#if defined(CONFIG_NET)

io_uring/rsrc.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,7 @@ int __io_scm_file_account(struct io_ring_ctx *ctx, struct file *file)
855855

856856
UNIXCB(skb).fp = fpl;
857857
skb->sk = sk;
858+
skb->scm_io_uring = 1;
858859
skb->destructor = unix_destruct_scm;
859860
refcount_add(skb->truesize, &sk->sk_wmem_alloc);
860861
}

io_uring/rw.c

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -234,11 +234,34 @@ static void kiocb_end_write(struct io_kiocb *req)
234234
}
235235
}
236236

237+
/*
238+
* Trigger the notifications after having done some IO, and finish the write
239+
* accounting, if any.
240+
*/
241+
static void io_req_io_end(struct io_kiocb *req)
242+
{
243+
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
244+
245+
WARN_ON(!in_task());
246+
247+
if (rw->kiocb.ki_flags & IOCB_WRITE) {
248+
kiocb_end_write(req);
249+
fsnotify_modify(req->file);
250+
} else {
251+
fsnotify_access(req->file);
252+
}
253+
}
254+
237255
static bool __io_complete_rw_common(struct io_kiocb *req, long res)
238256
{
239257
if (unlikely(res != req->cqe.res)) {
240258
if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
241259
io_rw_should_reissue(req)) {
260+
/*
261+
* Reissue will start accounting again, finish the
262+
* current cycle.
263+
*/
264+
io_req_io_end(req);
242265
req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
243266
return true;
244267
}
@@ -264,15 +287,7 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
264287

265288
static void io_req_rw_complete(struct io_kiocb *req, bool *locked)
266289
{
267-
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
268-
269-
if (rw->kiocb.ki_flags & IOCB_WRITE) {
270-
kiocb_end_write(req);
271-
fsnotify_modify(req->file);
272-
} else {
273-
fsnotify_access(req->file);
274-
}
275-
290+
io_req_io_end(req);
276291
io_req_task_complete(req, locked);
277292
}
278293

@@ -317,6 +332,11 @@ static int kiocb_done(struct io_kiocb *req, ssize_t ret,
317332
req->file->f_pos = rw->kiocb.ki_pos;
318333
if (ret >= 0 && (rw->kiocb.ki_complete == io_complete_rw)) {
319334
if (!__io_complete_rw_common(req, ret)) {
335+
/*
336+
* Safe to call io_end from here as we're inline
337+
* from the submission path.
338+
*/
339+
io_req_io_end(req);
320340
io_req_set_res(req, final_ret,
321341
io_put_kbuf(req, issue_flags));
322342
return IOU_OK;
@@ -916,7 +936,7 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
916936
goto copy_iov;
917937

918938
if (ret2 != req->cqe.res && ret2 >= 0 && need_complete_io(req)) {
919-
struct io_async_rw *rw;
939+
struct io_async_rw *io;
920940

921941
trace_io_uring_short_write(req->ctx, kiocb->ki_pos - ret2,
922942
req->cqe.res, ret2);
@@ -929,9 +949,9 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
929949
iov_iter_save_state(&s->iter, &s->iter_state);
930950
ret = io_setup_async_rw(req, iovec, s, true);
931951

932-
rw = req->async_data;
933-
if (rw)
934-
rw->bytes_done += ret2;
952+
io = req->async_data;
953+
if (io)
954+
io->bytes_done += ret2;
935955

936956
if (kiocb->ki_flags & IOCB_WRITE)
937957
kiocb_end_write(req);

io_uring/tctx.c

Lines changed: 18 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -91,32 +91,12 @@ __cold int io_uring_alloc_task_context(struct task_struct *task,
9191
return 0;
9292
}
9393

94-
static int io_register_submitter(struct io_ring_ctx *ctx)
95-
{
96-
int ret = 0;
97-
98-
mutex_lock(&ctx->uring_lock);
99-
if (!ctx->submitter_task)
100-
ctx->submitter_task = get_task_struct(current);
101-
else if (ctx->submitter_task != current)
102-
ret = -EEXIST;
103-
mutex_unlock(&ctx->uring_lock);
104-
105-
return ret;
106-
}
107-
108-
int __io_uring_add_tctx_node(struct io_ring_ctx *ctx, bool submitter)
94+
int __io_uring_add_tctx_node(struct io_ring_ctx *ctx)
10995
{
11096
struct io_uring_task *tctx = current->io_uring;
11197
struct io_tctx_node *node;
11298
int ret;
11399

114-
if ((ctx->flags & IORING_SETUP_SINGLE_ISSUER) && submitter) {
115-
ret = io_register_submitter(ctx);
116-
if (ret)
117-
return ret;
118-
}
119-
120100
if (unlikely(!tctx)) {
121101
ret = io_uring_alloc_task_context(current, ctx);
122102
if (unlikely(ret))
@@ -150,8 +130,22 @@ int __io_uring_add_tctx_node(struct io_ring_ctx *ctx, bool submitter)
150130
list_add(&node->ctx_node, &ctx->tctx_list);
151131
mutex_unlock(&ctx->uring_lock);
152132
}
153-
if (submitter)
154-
tctx->last = ctx;
133+
return 0;
134+
}
135+
136+
int __io_uring_add_tctx_node_from_submit(struct io_ring_ctx *ctx)
137+
{
138+
int ret;
139+
140+
if (ctx->flags & IORING_SETUP_SINGLE_ISSUER
141+
&& ctx->submitter_task != current)
142+
return -EEXIST;
143+
144+
ret = __io_uring_add_tctx_node(ctx);
145+
if (ret)
146+
return ret;
147+
148+
current->io_uring->last = ctx;
155149
return 0;
156150
}
157151

@@ -259,7 +253,7 @@ int io_ringfd_register(struct io_ring_ctx *ctx, void __user *__arg,
259253
return -EINVAL;
260254

261255
mutex_unlock(&ctx->uring_lock);
262-
ret = __io_uring_add_tctx_node(ctx, false);
256+
ret = __io_uring_add_tctx_node(ctx);
263257
mutex_lock(&ctx->uring_lock);
264258
if (ret)
265259
return ret;

0 commit comments

Comments
 (0)