Skip to content

Commit f873db9

Browse files
committed
Merge tag 'io_uring-5.9-2020-08-21' of git://git.kernel.dk/linux-block
Pull io_uring fixes from Jens Axboe: - Make sure the head link cancelation includes async work - Get rid of kiocb_wait_page_queue_init(), makes no sense to have it as a separate function since you moved it into io_uring itself - io_import_iovec cleanups (Pavel, me) - Use system_unbound_wq for ring exit work, to avoid spawning tons of these if we have tons of rings exiting at the same time - Fix req->flags overflow flag manipulation (Pavel) * tag 'io_uring-5.9-2020-08-21' of git://git.kernel.dk/linux-block: io_uring: kill extra iovec=NULL in import_iovec() io_uring: comment on kfree(iovec) checks io_uring: fix racy req->flags modification io_uring: use system_unbound_wq for ring exit work io_uring: cleanup io_import_iovec() of pre-mapped request io_uring: get rid of kiocb_wait_page_queue_init() io_uring: find and cancel head link async work on files exit
2 parents 349111f + 867a23e commit f873db9

File tree

1 file changed

+79
-94
lines changed

1 file changed

+79
-94
lines changed

fs/io_uring.c

Lines changed: 79 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,6 @@ enum {
540540
REQ_F_ISREG_BIT,
541541
REQ_F_COMP_LOCKED_BIT,
542542
REQ_F_NEED_CLEANUP_BIT,
543-
REQ_F_OVERFLOW_BIT,
544543
REQ_F_POLLED_BIT,
545544
REQ_F_BUFFER_SELECTED_BIT,
546545
REQ_F_NO_FILE_TABLE_BIT,
@@ -583,8 +582,6 @@ enum {
583582
REQ_F_COMP_LOCKED = BIT(REQ_F_COMP_LOCKED_BIT),
584583
/* needs cleanup */
585584
REQ_F_NEED_CLEANUP = BIT(REQ_F_NEED_CLEANUP_BIT),
586-
/* in overflow list */
587-
REQ_F_OVERFLOW = BIT(REQ_F_OVERFLOW_BIT),
588585
/* already went through poll handler */
589586
REQ_F_POLLED = BIT(REQ_F_POLLED_BIT),
590587
/* buffer already selected */
@@ -946,7 +943,8 @@ static void io_get_req_task(struct io_kiocb *req)
946943

947944
static inline void io_clean_op(struct io_kiocb *req)
948945
{
949-
if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED))
946+
if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED |
947+
REQ_F_INFLIGHT))
950948
__io_clean_op(req);
951949
}
952950

@@ -1366,7 +1364,6 @@ static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force)
13661364
req = list_first_entry(&ctx->cq_overflow_list, struct io_kiocb,
13671365
compl.list);
13681366
list_move(&req->compl.list, &list);
1369-
req->flags &= ~REQ_F_OVERFLOW;
13701367
if (cqe) {
13711368
WRITE_ONCE(cqe->user_data, req->user_data);
13721369
WRITE_ONCE(cqe->res, req->result);
@@ -1419,7 +1416,6 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
14191416
ctx->rings->sq_flags |= IORING_SQ_CQ_OVERFLOW;
14201417
}
14211418
io_clean_op(req);
1422-
req->flags |= REQ_F_OVERFLOW;
14231419
req->result = res;
14241420
req->compl.cflags = cflags;
14251421
refcount_inc(&req->refs);
@@ -1563,17 +1559,6 @@ static bool io_dismantle_req(struct io_kiocb *req)
15631559
if (req->file)
15641560
io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
15651561

1566-
if (req->flags & REQ_F_INFLIGHT) {
1567-
struct io_ring_ctx *ctx = req->ctx;
1568-
unsigned long flags;
1569-
1570-
spin_lock_irqsave(&ctx->inflight_lock, flags);
1571-
list_del(&req->inflight_entry);
1572-
if (waitqueue_active(&ctx->inflight_wait))
1573-
wake_up(&ctx->inflight_wait);
1574-
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
1575-
}
1576-
15771562
return io_req_clean_work(req);
15781563
}
15791564

@@ -2819,22 +2804,15 @@ static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
28192804
return __io_iov_buffer_select(req, iov, needs_lock);
28202805
}
28212806

2822-
static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
2823-
struct iovec **iovec, struct iov_iter *iter,
2824-
bool needs_lock)
2807+
static ssize_t __io_import_iovec(int rw, struct io_kiocb *req,
2808+
struct iovec **iovec, struct iov_iter *iter,
2809+
bool needs_lock)
28252810
{
28262811
void __user *buf = u64_to_user_ptr(req->rw.addr);
28272812
size_t sqe_len = req->rw.len;
28282813
ssize_t ret;
28292814
u8 opcode;
28302815

2831-
if (req->io) {
2832-
struct io_async_rw *iorw = &req->io->rw;
2833-
2834-
*iovec = NULL;
2835-
return iov_iter_count(&iorw->iter);
2836-
}
2837-
28382816
opcode = req->opcode;
28392817
if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) {
28402818
*iovec = NULL;
@@ -2848,10 +2826,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
28482826
if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) {
28492827
if (req->flags & REQ_F_BUFFER_SELECT) {
28502828
buf = io_rw_buffer_select(req, &sqe_len, needs_lock);
2851-
if (IS_ERR(buf)) {
2852-
*iovec = NULL;
2829+
if (IS_ERR(buf))
28532830
return PTR_ERR(buf);
2854-
}
28552831
req->rw.len = sqe_len;
28562832
}
28572833

@@ -2879,6 +2855,16 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
28792855
return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter);
28802856
}
28812857

2858+
static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
2859+
struct iovec **iovec, struct iov_iter *iter,
2860+
bool needs_lock)
2861+
{
2862+
if (!req->io)
2863+
return __io_import_iovec(rw, req, iovec, iter, needs_lock);
2864+
*iovec = NULL;
2865+
return iov_iter_count(&req->io->rw.iter);
2866+
}
2867+
28822868
/*
28832869
* For files that don't have ->read_iter() and ->write_iter(), handle them
28842870
* by looping over ->read() or ->write() manually.
@@ -3001,11 +2987,8 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw,
30012987
ssize_t ret;
30022988

30032989
iorw->iter.iov = iorw->fast_iov;
3004-
/* reset ->io around the iovec import, we don't want to use it */
3005-
req->io = NULL;
3006-
ret = io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov,
2990+
ret = __io_import_iovec(rw, req, (struct iovec **) &iorw->iter.iov,
30072991
&iorw->iter, !force_nonblock);
3008-
req->io = container_of(iorw, struct io_async_ctx, rw);
30092992
if (unlikely(ret < 0))
30102993
return ret;
30112994

@@ -3074,27 +3057,6 @@ static int io_async_buf_func(struct wait_queue_entry *wait, unsigned mode,
30743057
return 1;
30753058
}
30763059

3077-
static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
3078-
struct wait_page_queue *wait,
3079-
wait_queue_func_t func,
3080-
void *data)
3081-
{
3082-
/* Can't support async wakeup with polled IO */
3083-
if (kiocb->ki_flags & IOCB_HIPRI)
3084-
return -EINVAL;
3085-
if (kiocb->ki_filp->f_mode & FMODE_BUF_RASYNC) {
3086-
wait->wait.func = func;
3087-
wait->wait.private = data;
3088-
wait->wait.flags = 0;
3089-
INIT_LIST_HEAD(&wait->wait.entry);
3090-
kiocb->ki_flags |= IOCB_WAITQ;
3091-
kiocb->ki_waitq = wait;
3092-
return 0;
3093-
}
3094-
3095-
return -EOPNOTSUPP;
3096-
}
3097-
30983060
/*
30993061
* This controls whether a given IO request should be armed for async page
31003062
* based retry. If we return false here, the request is handed to the async
@@ -3109,31 +3071,33 @@ static inline int kiocb_wait_page_queue_init(struct kiocb *kiocb,
31093071
*/
31103072
static bool io_rw_should_retry(struct io_kiocb *req)
31113073
{
3074+
struct wait_page_queue *wait = &req->io->rw.wpq;
31123075
struct kiocb *kiocb = &req->rw.kiocb;
3113-
int ret;
31143076

31153077
/* never retry for NOWAIT, we just complete with -EAGAIN */
31163078
if (req->flags & REQ_F_NOWAIT)
31173079
return false;
31183080

31193081
/* Only for buffered IO */
3120-
if (kiocb->ki_flags & IOCB_DIRECT)
3082+
if (kiocb->ki_flags & (IOCB_DIRECT | IOCB_HIPRI))
31213083
return false;
3084+
31223085
/*
31233086
* just use poll if we can, and don't attempt if the fs doesn't
31243087
* support callback based unlocks
31253088
*/
31263089
if (file_can_poll(req->file) || !(req->file->f_mode & FMODE_BUF_RASYNC))
31273090
return false;
31283091

3129-
ret = kiocb_wait_page_queue_init(kiocb, &req->io->rw.wpq,
3130-
io_async_buf_func, req);
3131-
if (!ret) {
3132-
io_get_req_task(req);
3133-
return true;
3134-
}
3092+
wait->wait.func = io_async_buf_func;
3093+
wait->wait.private = req;
3094+
wait->wait.flags = 0;
3095+
INIT_LIST_HEAD(&wait->wait.entry);
3096+
kiocb->ki_flags |= IOCB_WAITQ;
3097+
kiocb->ki_waitq = wait;
31353098

3136-
return false;
3099+
io_get_req_task(req);
3100+
return true;
31373101
}
31383102

31393103
static int io_iter_do_read(struct io_kiocb *req, struct iov_iter *iter)
@@ -3238,6 +3202,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
32383202
kiocb_done(kiocb, ret, cs);
32393203
ret = 0;
32403204
out_free:
3205+
/* it's reportedly faster than delegating the null check to kfree() */
32413206
if (iovec)
32423207
kfree(iovec);
32433208
return ret;
@@ -3334,6 +3299,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
33343299
return -EAGAIN;
33353300
}
33363301
out_free:
3302+
/* it's reportedly faster than delegating the null check to kfree() */
33373303
if (iovec)
33383304
kfree(iovec);
33393305
return ret;
@@ -5653,6 +5619,18 @@ static void __io_clean_op(struct io_kiocb *req)
56535619
}
56545620
req->flags &= ~REQ_F_NEED_CLEANUP;
56555621
}
5622+
5623+
if (req->flags & REQ_F_INFLIGHT) {
5624+
struct io_ring_ctx *ctx = req->ctx;
5625+
unsigned long flags;
5626+
5627+
spin_lock_irqsave(&ctx->inflight_lock, flags);
5628+
list_del(&req->inflight_entry);
5629+
if (waitqueue_active(&ctx->inflight_wait))
5630+
wake_up(&ctx->inflight_wait);
5631+
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
5632+
req->flags &= ~REQ_F_INFLIGHT;
5633+
}
56565634
}
56575635

56585636
static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
@@ -7979,7 +7957,13 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
79797957
ACCT_LOCKED);
79807958

79817959
INIT_WORK(&ctx->exit_work, io_ring_exit_work);
7982-
queue_work(system_wq, &ctx->exit_work);
7960+
/*
7961+
* Use system_unbound_wq to avoid spawning tons of event kworkers
7962+
* if we're exiting a ton of rings at the same time. It just adds
7963+
* noise and overhead, there's no discernable change in runtime
7964+
* over using system_wq.
7965+
*/
7966+
queue_work(system_unbound_wq, &ctx->exit_work);
79837967
}
79847968

79857969
static int io_uring_release(struct inode *inode, struct file *file)
@@ -8063,6 +8047,33 @@ static bool io_timeout_remove_link(struct io_ring_ctx *ctx,
80638047
return found;
80648048
}
80658049

8050+
static bool io_cancel_link_cb(struct io_wq_work *work, void *data)
8051+
{
8052+
return io_match_link(container_of(work, struct io_kiocb, work), data);
8053+
}
8054+
8055+
static void io_attempt_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
8056+
{
8057+
enum io_wq_cancel cret;
8058+
8059+
/* cancel this particular work, if it's running */
8060+
cret = io_wq_cancel_work(ctx->io_wq, &req->work);
8061+
if (cret != IO_WQ_CANCEL_NOTFOUND)
8062+
return;
8063+
8064+
/* find links that hold this pending, cancel those */
8065+
cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_link_cb, req, true);
8066+
if (cret != IO_WQ_CANCEL_NOTFOUND)
8067+
return;
8068+
8069+
/* if we have a poll link holding this pending, cancel that */
8070+
if (io_poll_remove_link(ctx, req))
8071+
return;
8072+
8073+
/* final option, timeout link is holding this req pending */
8074+
io_timeout_remove_link(ctx, req);
8075+
}
8076+
80668077
static void io_uring_cancel_files(struct io_ring_ctx *ctx,
80678078
struct files_struct *files)
80688079
{
@@ -8094,35 +8105,9 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
80948105
/* We need to keep going until we don't find a matching req */
80958106
if (!cancel_req)
80968107
break;
8097-
8098-
if (cancel_req->flags & REQ_F_OVERFLOW) {
8099-
spin_lock_irq(&ctx->completion_lock);
8100-
list_del(&cancel_req->compl.list);
8101-
cancel_req->flags &= ~REQ_F_OVERFLOW;
8102-
8103-
io_cqring_mark_overflow(ctx);
8104-
WRITE_ONCE(ctx->rings->cq_overflow,
8105-
atomic_inc_return(&ctx->cached_cq_overflow));
8106-
io_commit_cqring(ctx);
8107-
spin_unlock_irq(&ctx->completion_lock);
8108-
8109-
/*
8110-
* Put inflight ref and overflow ref. If that's
8111-
* all we had, then we're done with this request.
8112-
*/
8113-
if (refcount_sub_and_test(2, &cancel_req->refs)) {
8114-
io_free_req(cancel_req);
8115-
finish_wait(&ctx->inflight_wait, &wait);
8116-
continue;
8117-
}
8118-
} else {
8119-
io_wq_cancel_work(ctx->io_wq, &cancel_req->work);
8120-
/* could be a link, check and remove if it is */
8121-
if (!io_poll_remove_link(ctx, cancel_req))
8122-
io_timeout_remove_link(ctx, cancel_req);
8123-
io_put_req(cancel_req);
8124-
}
8125-
8108+
/* cancel this request, or head link requests */
8109+
io_attempt_cancel(ctx, cancel_req);
8110+
io_put_req(cancel_req);
81268111
schedule();
81278112
finish_wait(&ctx->inflight_wait, &wait);
81288113
}

0 commit comments

Comments
 (0)