Skip to content

Commit 016dc85

Browse files
committed
netfs: Implement unbuffered/DIO read support
Implement support for unbuffered and DIO reads in the netfs library, utilising the existing read helper code to do block splitting and individual queuing. The code also handles extraction of the destination buffer from the supplied iterator, allowing async unbuffered reads to take place. The read will be split up according to the rsize setting and, if supplied, the ->clamp_length() method. Note that the next subrequest will be issued as soon as issue_op returns, without waiting for previous ones to finish. The network filesystem needs to pause or handle queuing them if it doesn't want to fire them all at the server simultaneously. Once all the subrequests have finished, the state will be assessed and the amount of data to be indicated as having being obtained will be determined. As the subrequests may finish in any order, if an intermediate subrequest is short, any further subrequests may be copied into the buffer and then abandoned. In the future, this will also take care of doing an unbuffered read from encrypted content, with the decryption being done by the library. Signed-off-by: David Howells <[email protected]> cc: Jeff Layton <[email protected]> cc: [email protected] cc: [email protected] cc: [email protected]
1 parent e2e2e83 commit 016dc85

File tree

10 files changed

+226
-11
lines changed

10 files changed

+226
-11
lines changed

fs/netfs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
netfs-y := \
44
buffered_read.o \
55
buffered_write.o \
6+
direct_read.o \
67
io.o \
78
iterator.o \
89
locking.o \

fs/netfs/direct_read.c

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
/* Direct I/O support.
3+
*
4+
* Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5+
* Written by David Howells ([email protected])
6+
*/
7+
8+
#include <linux/export.h>
9+
#include <linux/fs.h>
10+
#include <linux/mm.h>
11+
#include <linux/pagemap.h>
12+
#include <linux/slab.h>
13+
#include <linux/uio.h>
14+
#include <linux/sched/mm.h>
15+
#include <linux/task_io_accounting_ops.h>
16+
#include <linux/netfs.h>
17+
#include "internal.h"
18+
19+
/**
20+
* netfs_unbuffered_read_iter_locked - Perform an unbuffered or direct I/O read
21+
* @iocb: The I/O control descriptor describing the read
22+
* @iter: The output buffer (also specifies read length)
23+
*
24+
* Perform an unbuffered I/O or direct I/O from the file in @iocb to the
25+
* output buffer. No use is made of the pagecache.
26+
*
27+
* The caller must hold any appropriate locks.
28+
*/
29+
static ssize_t netfs_unbuffered_read_iter_locked(struct kiocb *iocb, struct iov_iter *iter)
30+
{
31+
struct netfs_io_request *rreq;
32+
ssize_t ret;
33+
size_t orig_count = iov_iter_count(iter);
34+
bool async = !is_sync_kiocb(iocb);
35+
36+
_enter("");
37+
38+
if (!orig_count)
39+
return 0; /* Don't update atime */
40+
41+
ret = kiocb_write_and_wait(iocb, orig_count);
42+
if (ret < 0)
43+
return ret;
44+
file_accessed(iocb->ki_filp);
45+
46+
rreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
47+
iocb->ki_pos, orig_count,
48+
NETFS_DIO_READ);
49+
if (IS_ERR(rreq))
50+
return PTR_ERR(rreq);
51+
52+
netfs_stat(&netfs_n_rh_dio_read);
53+
trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_dio_read);
54+
55+
/* If this is an async op, we have to keep track of the destination
56+
* buffer for ourselves as the caller's iterator will be trashed when
57+
* we return.
58+
*
59+
* In such a case, extract an iterator to represent as much of the the
60+
* output buffer as we can manage. Note that the extraction might not
61+
* be able to allocate a sufficiently large bvec array and may shorten
62+
* the request.
63+
*/
64+
if (user_backed_iter(iter)) {
65+
ret = netfs_extract_user_iter(iter, rreq->len, &rreq->iter, 0);
66+
if (ret < 0)
67+
goto out;
68+
rreq->direct_bv = (struct bio_vec *)rreq->iter.bvec;
69+
rreq->direct_bv_count = ret;
70+
rreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
71+
rreq->len = iov_iter_count(&rreq->iter);
72+
} else {
73+
rreq->iter = *iter;
74+
rreq->len = orig_count;
75+
rreq->direct_bv_unpin = false;
76+
iov_iter_advance(iter, orig_count);
77+
}
78+
79+
// TODO: Set up bounce buffer if needed
80+
81+
if (async)
82+
rreq->iocb = iocb;
83+
84+
ret = netfs_begin_read(rreq, is_sync_kiocb(iocb));
85+
if (ret < 0)
86+
goto out; /* May be -EIOCBQUEUED */
87+
if (!async) {
88+
// TODO: Copy from bounce buffer
89+
iocb->ki_pos += rreq->transferred;
90+
ret = rreq->transferred;
91+
}
92+
93+
out:
94+
netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
95+
if (ret > 0)
96+
orig_count -= ret;
97+
if (ret != -EIOCBQUEUED)
98+
iov_iter_revert(iter, orig_count - iov_iter_count(iter));
99+
return ret;
100+
}
101+
102+
/**
103+
* netfs_unbuffered_read_iter - Perform an unbuffered or direct I/O read
104+
* @iocb: The I/O control descriptor describing the read
105+
* @iter: The output buffer (also specifies read length)
106+
*
107+
* Perform an unbuffered I/O or direct I/O from the file in @iocb to the
108+
* output buffer. No use is made of the pagecache.
109+
*/
110+
ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter)
111+
{
112+
struct inode *inode = file_inode(iocb->ki_filp);
113+
ssize_t ret;
114+
115+
if (!iter->count)
116+
return 0; /* Don't update atime */
117+
118+
ret = netfs_start_io_direct(inode);
119+
if (ret == 0) {
120+
ret = netfs_unbuffered_read_iter_locked(iocb, iter);
121+
netfs_end_io_direct(inode);
122+
}
123+
return ret;
124+
}
125+
EXPORT_SYMBOL(netfs_unbuffered_read_iter);

fs/netfs/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
100100
* stats.c
101101
*/
102102
#ifdef CONFIG_NETFS_STATS
103+
extern atomic_t netfs_n_rh_dio_read;
103104
extern atomic_t netfs_n_rh_readahead;
104105
extern atomic_t netfs_n_rh_readpage;
105106
extern atomic_t netfs_n_rh_rreq;

fs/netfs/io.c

Lines changed: 75 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,9 @@ static void netfs_read_from_server(struct netfs_io_request *rreq,
7878
struct netfs_io_subrequest *subreq)
7979
{
8080
netfs_stat(&netfs_n_rh_download);
81-
if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
81+
82+
if (rreq->origin != NETFS_DIO_READ &&
83+
iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
8284
pr_warn("R=%08x[%u] ITER PRE-MISMATCH %zx != %zx-%zx %lx\n",
8385
rreq->debug_id, subreq->debug_index,
8486
iov_iter_count(&subreq->io_iter), subreq->len,
@@ -341,6 +343,43 @@ static void netfs_rreq_is_still_valid(struct netfs_io_request *rreq)
341343
}
342344
}
343345

346+
/*
347+
* Determine how much we can admit to having read from a DIO read.
348+
*/
349+
static void netfs_rreq_assess_dio(struct netfs_io_request *rreq)
350+
{
351+
struct netfs_io_subrequest *subreq;
352+
unsigned int i;
353+
size_t transferred = 0;
354+
355+
for (i = 0; i < rreq->direct_bv_count; i++)
356+
flush_dcache_page(rreq->direct_bv[i].bv_page);
357+
358+
list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
359+
if (subreq->error || subreq->transferred == 0)
360+
break;
361+
transferred += subreq->transferred;
362+
if (subreq->transferred < subreq->len)
363+
break;
364+
}
365+
366+
for (i = 0; i < rreq->direct_bv_count; i++)
367+
flush_dcache_page(rreq->direct_bv[i].bv_page);
368+
369+
rreq->transferred = transferred;
370+
task_io_account_read(transferred);
371+
372+
if (rreq->iocb) {
373+
rreq->iocb->ki_pos += transferred;
374+
if (rreq->iocb->ki_complete)
375+
rreq->iocb->ki_complete(
376+
rreq->iocb, rreq->error ? rreq->error : transferred);
377+
}
378+
if (rreq->netfs_ops->done)
379+
rreq->netfs_ops->done(rreq);
380+
inode_dio_end(rreq->inode);
381+
}
382+
344383
/*
345384
* Assess the state of a read request and decide what to do next.
346385
*
@@ -361,7 +400,10 @@ static void netfs_rreq_assess(struct netfs_io_request *rreq, bool was_async)
361400
return;
362401
}
363402

364-
netfs_rreq_unlock_folios(rreq);
403+
if (rreq->origin != NETFS_DIO_READ)
404+
netfs_rreq_unlock_folios(rreq);
405+
else
406+
netfs_rreq_assess_dio(rreq);
365407

366408
trace_netfs_rreq(rreq, netfs_rreq_trace_wake_ip);
367409
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
@@ -526,14 +568,16 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
526568
struct netfs_io_subrequest *subreq,
527569
struct iov_iter *io_iter)
528570
{
529-
enum netfs_io_source source;
571+
enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
530572
size_t lsize;
531573

532574
_enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);
533575

534-
source = netfs_cache_prepare_read(subreq, rreq->i_size);
535-
if (source == NETFS_INVALID_READ)
536-
goto out;
576+
if (rreq->origin != NETFS_DIO_READ) {
577+
source = netfs_cache_prepare_read(subreq, rreq->i_size);
578+
if (source == NETFS_INVALID_READ)
579+
goto out;
580+
}
537581

538582
if (source == NETFS_DOWNLOAD_FROM_SERVER) {
539583
/* Call out to the netfs to let it shrink the request to fit
@@ -544,6 +588,8 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
544588
*/
545589
if (subreq->len > rreq->i_size - subreq->start)
546590
subreq->len = rreq->i_size - subreq->start;
591+
if (rreq->rsize && subreq->len > rreq->rsize)
592+
subreq->len = rreq->rsize;
547593

548594
if (rreq->netfs_ops->clamp_length &&
549595
!rreq->netfs_ops->clamp_length(subreq)) {
@@ -662,6 +708,10 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
662708
return -EIO;
663709
}
664710

711+
if (rreq->origin == NETFS_DIO_READ)
712+
inode_dio_begin(rreq->inode);
713+
714+
// TODO: Use bounce buffer if requested
665715
rreq->io_iter = rreq->iter;
666716

667717
INIT_WORK(&rreq->work, netfs_rreq_work);
@@ -673,11 +723,25 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
673723
atomic_set(&rreq->nr_outstanding, 1);
674724
io_iter = rreq->io_iter;
675725
do {
726+
_debug("submit %llx + %zx >= %llx",
727+
rreq->start, rreq->submitted, rreq->i_size);
728+
if (rreq->origin == NETFS_DIO_READ &&
729+
rreq->start + rreq->submitted >= rreq->i_size)
730+
break;
676731
if (!netfs_rreq_submit_slice(rreq, &io_iter, &debug_index))
677732
break;
733+
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
734+
test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
735+
break;
678736

679737
} while (rreq->submitted < rreq->len);
680738

739+
if (!rreq->submitted) {
740+
netfs_put_request(rreq, false, netfs_rreq_trace_put_no_submit);
741+
ret = 0;
742+
goto out;
743+
}
744+
681745
if (sync) {
682746
/* Keep nr_outstanding incremented so that the ref always
683747
* belongs to us, and the service code isn't punted off to a
@@ -694,15 +758,18 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
694758
TASK_UNINTERRUPTIBLE);
695759

696760
ret = rreq->error;
697-
if (ret == 0 && rreq->submitted < rreq->len) {
761+
if (ret == 0 && rreq->submitted < rreq->len &&
762+
rreq->origin != NETFS_DIO_READ) {
698763
trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_read);
699764
ret = -EIO;
700765
}
701766
} else {
702767
/* If we decrement nr_outstanding to 0, the ref belongs to us. */
703768
if (atomic_dec_and_test(&rreq->nr_outstanding))
704769
netfs_rreq_assess(rreq, false);
705-
ret = 0;
770+
ret = -EIOCBQUEUED;
706771
}
772+
773+
out:
707774
return ret;
708775
}

fs/netfs/main.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
3030
[NETFS_READPAGE] = "RP",
3131
[NETFS_READ_FOR_WRITE] = "RW",
3232
[NETFS_WRITEBACK] = "WB",
33+
[NETFS_DIO_READ] = "DR",
3334
};
3435

3536
/*

fs/netfs/objects.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
2020
struct inode *inode = file ? file_inode(file) : mapping->host;
2121
struct netfs_inode *ctx = netfs_inode(inode);
2222
struct netfs_io_request *rreq;
23-
bool cached = netfs_is_cache_enabled(ctx);
23+
bool is_dio = (origin == NETFS_DIO_READ);
24+
bool cached = is_dio && netfs_is_cache_enabled(ctx);
2425
int ret;
2526

2627
rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request),
@@ -42,6 +43,8 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
4243
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
4344
if (cached)
4445
__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
46+
if (file && file->f_flags & O_NONBLOCK)
47+
__set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
4548
if (rreq->netfs_ops->init_request) {
4649
ret = rreq->netfs_ops->init_request(rreq, file);
4750
if (ret < 0) {

fs/netfs/stats.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <linux/seq_file.h>
1010
#include "internal.h"
1111

12+
atomic_t netfs_n_rh_dio_read;
1213
atomic_t netfs_n_rh_readahead;
1314
atomic_t netfs_n_rh_readpage;
1415
atomic_t netfs_n_rh_rreq;
@@ -36,7 +37,8 @@ atomic_t netfs_n_wh_write_failed;
3637

3738
int netfs_stats_show(struct seq_file *m, void *v)
3839
{
39-
seq_printf(m, "Netfs : RA=%u RP=%u WB=%u WBZ=%u rr=%u sr=%u\n",
40+
seq_printf(m, "Netfs : DR=%u RA=%u RP=%u WB=%u WBZ=%u rr=%u sr=%u\n",
41+
atomic_read(&netfs_n_rh_dio_read),
4042
atomic_read(&netfs_n_rh_readahead),
4143
atomic_read(&netfs_n_rh_readpage),
4244
atomic_read(&netfs_n_rh_write_begin),

include/linux/netfs.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ enum netfs_io_origin {
226226
NETFS_READPAGE, /* This read is a synchronous read */
227227
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
228228
NETFS_WRITEBACK, /* This write was triggered by writepages */
229+
NETFS_DIO_READ, /* This is a direct I/O read */
229230
nr__netfs_io_origin
230231
} __mode(byte);
231232

@@ -240,6 +241,7 @@ struct netfs_io_request {
240241
};
241242
struct inode *inode; /* The file being accessed */
242243
struct address_space *mapping; /* The mapping being accessed */
244+
struct kiocb *iocb; /* AIO completion vector */
243245
struct netfs_cache_resources cache_resources;
244246
struct list_head proc_link; /* Link in netfs_iorequests */
245247
struct list_head subrequests; /* Contributory I/O operations */
@@ -249,12 +251,14 @@ struct netfs_io_request {
249251
struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */
250252
unsigned int direct_bv_count; /* Number of elements in direct_bv[] */
251253
unsigned int debug_id;
254+
unsigned int rsize; /* Maximum read size (0 for none) */
252255
unsigned int wsize; /* Maximum write size (0 for none) */
253256
unsigned int subreq_counter; /* Next subreq->debug_index */
254257
atomic_t nr_outstanding; /* Number of ops in progress */
255258
atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */
256259
size_t submitted; /* Amount submitted for I/O so far */
257260
size_t len; /* Length of the request */
261+
size_t transferred; /* Amount to be indicated as transferred */
258262
short error; /* 0 or error that occurred */
259263
enum netfs_io_origin origin; /* Origin of the request */
260264
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
@@ -271,6 +275,8 @@ struct netfs_io_request {
271275
#define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */
272276
#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */
273277
#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
278+
#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
279+
#define NETFS_RREQ_BLOCKED 10 /* We blocked */
274280
const struct netfs_request_ops *netfs_ops;
275281
void (*cleanup)(struct netfs_io_request *req);
276282
};
@@ -367,6 +373,9 @@ struct netfs_cache_ops {
367373
loff_t *_data_start, size_t *_data_len);
368374
};
369375

376+
/* High-level read API. */
377+
ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
378+
370379
/* High-level write API */
371380
ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
372381
struct netfs_group *netfs_group);

0 commit comments

Comments
 (0)