Skip to content

Commit 812e7eb

Browse files
committed
Merge tag 'io_uring-6.18-20251023' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux
Pull io_uring fixes from Jens Axboe: - Add MAINTAINERS entry for zcrx, mostly so that netdev gets automatically CC'ed by default on any changes there too. - Fix for the SQPOLL busy vs work time accounting. It was using getrusage(), which was both broken from a thread point of view (we only care about the SQPOLL thread itself), and vastly overkill as only the systime was used. On top of that, also be a bit smarter in when it's queried. It used excessive CPU before this change. Marked for stable as well. - Fix provided ring buffer auto commit for uring_cmd. - Fix a few style issues and sparse annotation for a lock. * tag 'io_uring-6.18-20251023' of git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux: io_uring: fix buffer auto-commit for multishot uring_cmd io_uring: correct __must_hold annotation in io_install_fixed_file io_uring zcrx: add MAINTAINERS entry io_uring: Fix code indentation error io_uring/sqpoll: be smarter on when to update the stime usage io_uring/sqpoll: switch away from getrusage() for CPU accounting io_uring: fix incorrect unlikely() usage in io_waitid_prep()
2 parents 66cd8e9 + 6f1cbf6 commit 812e7eb

File tree

9 files changed

+85
-39
lines changed

9 files changed

+85
-39
lines changed

MAINTAINERS

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13116,6 +13116,15 @@ F: include/uapi/linux/io_uring.h
1311613116
F: include/uapi/linux/io_uring/
1311713117
F: io_uring/
1311813118

13119+
IO_URING ZCRX
13120+
M: Pavel Begunkov <[email protected]>
13121+
13122+
13123+
T: git https://github.com/isilence/linux.git zcrx/for-next
13124+
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux.git
13125+
S: Maintained
13126+
F: io_uring/zcrx.*
13127+
1311913128
IPMI SUBSYSTEM
1312013129
M: Corey Minyard <[email protected]>
1312113130
L: [email protected] (moderated for non-subscribers)

io_uring/fdinfo.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
5959
{
6060
struct io_overflow_cqe *ocqe;
6161
struct io_rings *r = ctx->rings;
62-
struct rusage sq_usage;
6362
unsigned int sq_mask = ctx->sq_entries - 1, cq_mask = ctx->cq_entries - 1;
6463
unsigned int sq_head = READ_ONCE(r->sq.head);
6564
unsigned int sq_tail = READ_ONCE(r->sq.tail);
@@ -152,14 +151,15 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
152151
* thread termination.
153152
*/
154153
if (tsk) {
154+
u64 usec;
155+
155156
get_task_struct(tsk);
156157
rcu_read_unlock();
157-
getrusage(tsk, RUSAGE_SELF, &sq_usage);
158+
usec = io_sq_cpu_usec(tsk);
158159
put_task_struct(tsk);
159160
sq_pid = sq->task_pid;
160161
sq_cpu = sq->sq_cpu;
161-
sq_total_time = (sq_usage.ru_stime.tv_sec * 1000000
162-
+ sq_usage.ru_stime.tv_usec);
162+
sq_total_time = usec;
163163
sq_work_time = sq->work_time;
164164
} else {
165165
rcu_read_unlock();

io_uring/filetable.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ void io_free_file_tables(struct io_ring_ctx *ctx, struct io_file_table *table)
5757

5858
static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
5959
u32 slot_index)
60-
__must_hold(&req->ctx->uring_lock)
60+
__must_hold(&ctx->uring_lock)
6161
{
6262
struct io_rsrc_node *node;
6363

io_uring/io_uring.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -879,7 +879,7 @@ static inline struct io_cqe io_init_cqe(u64 user_data, s32 res, u32 cflags)
879879
}
880880

881881
static __cold void io_cqe_overflow(struct io_ring_ctx *ctx, struct io_cqe *cqe,
882-
struct io_big_cqe *big_cqe)
882+
struct io_big_cqe *big_cqe)
883883
{
884884
struct io_overflow_cqe *ocqe;
885885

io_uring/kbuf.c

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,27 @@ static int io_provided_buffers_select(struct io_kiocb *req, size_t *len,
155155
return 1;
156156
}
157157

158+
static bool io_should_commit(struct io_kiocb *req, unsigned int issue_flags)
159+
{
160+
/*
161+
* If we came in unlocked, we have no choice but to consume the
162+
* buffer here, otherwise nothing ensures that the buffer won't
163+
* get used by others. This does mean it'll be pinned until the
164+
* IO completes, coming in unlocked means we're being called from
165+
* io-wq context and there may be further retries in async hybrid
166+
* mode. For the locked case, the caller must call commit when
167+
* the transfer completes (or if we get -EAGAIN and must poll of
168+
* retry).
169+
*/
170+
if (issue_flags & IO_URING_F_UNLOCKED)
171+
return true;
172+
173+
/* uring_cmd commits kbuf upfront, no need to auto-commit */
174+
if (!io_file_can_poll(req) && req->opcode != IORING_OP_URING_CMD)
175+
return true;
176+
return false;
177+
}
178+
158179
static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
159180
struct io_buffer_list *bl,
160181
unsigned int issue_flags)
@@ -181,17 +202,7 @@ static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len,
181202
sel.buf_list = bl;
182203
sel.addr = u64_to_user_ptr(buf->addr);
183204

184-
if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) {
185-
/*
186-
* If we came in unlocked, we have no choice but to consume the
187-
* buffer here, otherwise nothing ensures that the buffer won't
188-
* get used by others. This does mean it'll be pinned until the
189-
* IO completes, coming in unlocked means we're being called from
190-
* io-wq context and there may be further retries in async hybrid
191-
* mode. For the locked case, the caller must call commit when
192-
* the transfer completes (or if we get -EAGAIN and must poll of
193-
* retry).
194-
*/
205+
if (io_should_commit(req, issue_flags)) {
195206
io_kbuf_commit(req, sel.buf_list, *len, 1);
196207
sel.buf_list = NULL;
197208
}

io_uring/net.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -383,7 +383,7 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
383383
return 0;
384384

385385
if (sr->flags & IORING_SEND_VECTORIZED)
386-
return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
386+
return io_net_import_vec(req, kmsg, sr->buf, sr->len, ITER_SOURCE);
387387

388388
return import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
389389
}

io_uring/sqpoll.c

Lines changed: 45 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/audit.h>
1212
#include <linux/security.h>
1313
#include <linux/cpuset.h>
14+
#include <linux/sched/cputime.h>
1415
#include <linux/io_uring.h>
1516

1617
#include <uapi/linux/io_uring.h>
@@ -169,7 +170,38 @@ static inline bool io_sqd_events_pending(struct io_sq_data *sqd)
169170
return READ_ONCE(sqd->state);
170171
}
171172

172-
static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
173+
struct io_sq_time {
174+
bool started;
175+
u64 usec;
176+
};
177+
178+
u64 io_sq_cpu_usec(struct task_struct *tsk)
179+
{
180+
u64 utime, stime;
181+
182+
task_cputime_adjusted(tsk, &utime, &stime);
183+
do_div(stime, 1000);
184+
return stime;
185+
}
186+
187+
static void io_sq_update_worktime(struct io_sq_data *sqd, struct io_sq_time *ist)
188+
{
189+
if (!ist->started)
190+
return;
191+
ist->started = false;
192+
sqd->work_time += io_sq_cpu_usec(current) - ist->usec;
193+
}
194+
195+
static void io_sq_start_worktime(struct io_sq_time *ist)
196+
{
197+
if (ist->started)
198+
return;
199+
ist->started = true;
200+
ist->usec = io_sq_cpu_usec(current);
201+
}
202+
203+
static int __io_sq_thread(struct io_ring_ctx *ctx, struct io_sq_data *sqd,
204+
bool cap_entries, struct io_sq_time *ist)
173205
{
174206
unsigned int to_submit;
175207
int ret = 0;
@@ -182,6 +214,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
182214
if (to_submit || !wq_list_empty(&ctx->iopoll_list)) {
183215
const struct cred *creds = NULL;
184216

217+
io_sq_start_worktime(ist);
218+
185219
if (ctx->sq_creds != current_cred())
186220
creds = override_creds(ctx->sq_creds);
187221

@@ -255,23 +289,11 @@ static bool io_sq_tw_pending(struct llist_node *retry_list)
255289
return retry_list || !llist_empty(&tctx->task_list);
256290
}
257291

258-
static void io_sq_update_worktime(struct io_sq_data *sqd, struct rusage *start)
259-
{
260-
struct rusage end;
261-
262-
getrusage(current, RUSAGE_SELF, &end);
263-
end.ru_stime.tv_sec -= start->ru_stime.tv_sec;
264-
end.ru_stime.tv_usec -= start->ru_stime.tv_usec;
265-
266-
sqd->work_time += end.ru_stime.tv_usec + end.ru_stime.tv_sec * 1000000;
267-
}
268-
269292
static int io_sq_thread(void *data)
270293
{
271294
struct llist_node *retry_list = NULL;
272295
struct io_sq_data *sqd = data;
273296
struct io_ring_ctx *ctx;
274-
struct rusage start;
275297
unsigned long timeout = 0;
276298
char buf[TASK_COMM_LEN] = {};
277299
DEFINE_WAIT(wait);
@@ -309,6 +331,7 @@ static int io_sq_thread(void *data)
309331
mutex_lock(&sqd->lock);
310332
while (1) {
311333
bool cap_entries, sqt_spin = false;
334+
struct io_sq_time ist = { };
312335

313336
if (io_sqd_events_pending(sqd) || signal_pending(current)) {
314337
if (io_sqd_handle_event(sqd))
@@ -317,25 +340,27 @@ static int io_sq_thread(void *data)
317340
}
318341

319342
cap_entries = !list_is_singular(&sqd->ctx_list);
320-
getrusage(current, RUSAGE_SELF, &start);
321343
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
322-
int ret = __io_sq_thread(ctx, cap_entries);
344+
int ret = __io_sq_thread(ctx, sqd, cap_entries, &ist);
323345

324346
if (!sqt_spin && (ret > 0 || !wq_list_empty(&ctx->iopoll_list)))
325347
sqt_spin = true;
326348
}
327349
if (io_sq_tw(&retry_list, IORING_TW_CAP_ENTRIES_VALUE))
328350
sqt_spin = true;
329351

330-
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
331-
if (io_napi(ctx))
352+
list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
353+
if (io_napi(ctx)) {
354+
io_sq_start_worktime(&ist);
332355
io_napi_sqpoll_busy_poll(ctx);
356+
}
357+
}
358+
359+
io_sq_update_worktime(sqd, &ist);
333360

334361
if (sqt_spin || !time_after(jiffies, timeout)) {
335-
if (sqt_spin) {
336-
io_sq_update_worktime(sqd, &start);
362+
if (sqt_spin)
337363
timeout = jiffies + sqd->sq_thread_idle;
338-
}
339364
if (unlikely(need_resched())) {
340365
mutex_unlock(&sqd->lock);
341366
cond_resched();

io_uring/sqpoll.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd);
2929
void io_put_sq_data(struct io_sq_data *sqd);
3030
void io_sqpoll_wait_sq(struct io_ring_ctx *ctx);
3131
int io_sqpoll_wq_cpu_affinity(struct io_ring_ctx *ctx, cpumask_var_t mask);
32+
u64 io_sq_cpu_usec(struct task_struct *tsk);
3233

3334
static inline struct task_struct *sqpoll_task_locked(struct io_sq_data *sqd)
3435
{

io_uring/waitid.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ int io_waitid_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
250250
return -EINVAL;
251251

252252
iwa = io_uring_alloc_async_data(NULL, req);
253-
if (!unlikely(iwa))
253+
if (unlikely(!iwa))
254254
return -ENOMEM;
255255
iwa->req = req;
256256

0 commit comments

Comments
 (0)