Skip to content

Commit 153a996

Browse files
committed
netfs: Implement unbuffered/DIO write support
Implement support for unbuffered writes and direct I/O writes. If the write is misaligned with respect to the fscrypt block size, then RMW cycles are performed if necessary. DIO writes are a special case of unbuffered writes with extra restriction imposed, such as block size alignment requirements. Also provide a field that can tell the code to add some extra space onto the bounce buffer for use by the filesystem in the case of a content-encrypted file. Signed-off-by: David Howells <[email protected]> Reviewed-by: Jeff Layton <[email protected]> cc: [email protected] cc: [email protected] cc: [email protected]
1 parent 016dc85 commit 153a996

File tree

11 files changed

+224
-10
lines changed

11 files changed

+224
-10
lines changed

fs/afs/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ static void afs_apply_status(struct afs_operation *op,
250250
* what's on the server.
251251
*/
252252
vnode->netfs.remote_i_size = status->size;
253-
if (change_size) {
253+
if (change_size || status->size > i_size_read(inode)) {
254254
afs_set_i_size(vnode, status->size);
255255
inode_set_ctime_to_ts(inode, t);
256256
inode_set_atime_to_ts(inode, t);

fs/netfs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ netfs-y := \
44
buffered_read.o \
55
buffered_write.o \
66
direct_read.o \
7+
direct_write.o \
78
io.o \
89
iterator.o \
910
locking.o \

fs/netfs/direct_write.c

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
// SPDX-License-Identifier: GPL-2.0-or-later
2+
/* Unbuffered and direct write support.
3+
*
4+
* Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5+
* Written by David Howells ([email protected])
6+
*/
7+
8+
#include <linux/export.h>
9+
#include <linux/uio.h>
10+
#include "internal.h"
11+
12+
static void netfs_cleanup_dio_write(struct netfs_io_request *wreq)
13+
{
14+
struct inode *inode = wreq->inode;
15+
unsigned long long end = wreq->start + wreq->len;
16+
17+
if (!wreq->error &&
18+
i_size_read(inode) < end) {
19+
if (wreq->netfs_ops->update_i_size)
20+
wreq->netfs_ops->update_i_size(inode, end);
21+
else
22+
i_size_write(inode, end);
23+
}
24+
}
25+
26+
/*
27+
* Perform an unbuffered write where we may have to do an RMW operation on an
28+
* encrypted file. This can also be used for direct I/O writes.
29+
*/
30+
ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter,
31+
struct netfs_group *netfs_group)
32+
{
33+
struct netfs_io_request *wreq;
34+
unsigned long long start = iocb->ki_pos;
35+
unsigned long long end = start + iov_iter_count(iter);
36+
ssize_t ret, n;
37+
bool async = !is_sync_kiocb(iocb);
38+
39+
_enter("");
40+
41+
/* We're going to need a bounce buffer if what we transmit is going to
42+
* be different in some way to the source buffer, e.g. because it gets
43+
* encrypted/compressed or because it needs expanding to a block size.
44+
*/
45+
// TODO
46+
47+
_debug("uw %llx-%llx", start, end);
48+
49+
wreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
50+
start, end - start,
51+
iocb->ki_flags & IOCB_DIRECT ?
52+
NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE);
53+
if (IS_ERR(wreq))
54+
return PTR_ERR(wreq);
55+
56+
{
57+
/* If this is an async op and we're not using a bounce buffer,
58+
* we have to save the source buffer as the iterator is only
59+
* good until we return. In such a case, extract an iterator
60+
* to represent as much of the the output buffer as we can
61+
* manage. Note that the extraction might not be able to
62+
* allocate a sufficiently large bvec array and may shorten the
63+
* request.
64+
*/
65+
if (async || user_backed_iter(iter)) {
66+
n = netfs_extract_user_iter(iter, wreq->len, &wreq->iter, 0);
67+
if (n < 0) {
68+
ret = n;
69+
goto out;
70+
}
71+
wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec;
72+
wreq->direct_bv_count = n;
73+
wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
74+
wreq->len = iov_iter_count(&wreq->iter);
75+
} else {
76+
wreq->iter = *iter;
77+
}
78+
79+
wreq->io_iter = wreq->iter;
80+
}
81+
82+
/* Copy the data into the bounce buffer and encrypt it. */
83+
// TODO
84+
85+
/* Dispatch the write. */
86+
__set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
87+
if (async)
88+
wreq->iocb = iocb;
89+
wreq->cleanup = netfs_cleanup_dio_write;
90+
ret = netfs_begin_write(wreq, is_sync_kiocb(iocb),
91+
iocb->ki_flags & IOCB_DIRECT ?
92+
netfs_write_trace_dio_write :
93+
netfs_write_trace_unbuffered_write);
94+
if (ret < 0) {
95+
_debug("begin = %zd", ret);
96+
goto out;
97+
}
98+
99+
if (!async) {
100+
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
101+
wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
102+
TASK_UNINTERRUPTIBLE);
103+
104+
ret = wreq->error;
105+
_debug("waited = %zd", ret);
106+
if (ret == 0) {
107+
ret = wreq->transferred;
108+
iocb->ki_pos += ret;
109+
}
110+
} else {
111+
ret = -EIOCBQUEUED;
112+
}
113+
114+
out:
115+
netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
116+
return ret;
117+
}
118+
119+
/**
120+
* netfs_unbuffered_write_iter - Unbuffered write to a file
121+
* @iocb: IO state structure
122+
* @from: iov_iter with data to write
123+
*
124+
* Do an unbuffered write to a file, writing the data directly to the server
125+
* and not lodging the data in the pagecache.
126+
*
127+
* Return:
128+
* * Negative error code if no data has been written at all of
129+
* vfs_fsync_range() failed for a synchronous write
130+
* * Number of bytes written, even for truncated writes
131+
*/
132+
ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
133+
{
134+
struct file *file = iocb->ki_filp;
135+
struct inode *inode = file->f_mapping->host;
136+
struct netfs_inode *ictx = netfs_inode(inode);
137+
ssize_t ret;
138+
139+
_enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
140+
141+
trace_netfs_write_iter(iocb, from);
142+
143+
ret = netfs_start_io_direct(inode);
144+
if (ret < 0)
145+
return ret;
146+
ret = generic_write_checks(iocb, from);
147+
if (ret < 0)
148+
goto out;
149+
ret = file_remove_privs(file);
150+
if (ret < 0)
151+
goto out;
152+
ret = file_update_time(file);
153+
if (ret < 0)
154+
goto out;
155+
ret = kiocb_invalidate_pages(iocb, iov_iter_count(from));
156+
if (ret < 0)
157+
goto out;
158+
159+
fscache_invalidate(netfs_i_cookie(ictx), NULL, i_size_read(inode),
160+
FSCACHE_INVAL_DIO_WRITE);
161+
ret = netfs_unbuffered_write_iter_locked(iocb, from, NULL);
162+
out:
163+
netfs_end_io_direct(inode);
164+
return ret;
165+
}
166+
EXPORT_SYMBOL(netfs_unbuffered_write_iter);

fs/netfs/internal.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq);
2626
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
2727
size_t offset, size_t len);
2828

29+
/*
30+
* direct_write.c
31+
*/
32+
ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov_iter *iter,
33+
struct netfs_group *netfs_group);
34+
2935
/*
3036
* io.c
3137
*/

fs/netfs/io.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -645,7 +645,7 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
645645

646646
subreq->debug_index = (*_debug_index)++;
647647
subreq->start = rreq->start + rreq->submitted;
648-
subreq->len = rreq->len - rreq->submitted;
648+
subreq->len = io_iter->count;
649649

650650
_debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
651651
list_add_tail(&subreq->rreq_link, &rreq->subrequests);

fs/netfs/main.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,13 @@ LIST_HEAD(netfs_io_requests);
2626
DEFINE_SPINLOCK(netfs_proc_lock);
2727

2828
static const char *netfs_origins[nr__netfs_io_origin] = {
29-
[NETFS_READAHEAD] = "RA",
30-
[NETFS_READPAGE] = "RP",
31-
[NETFS_READ_FOR_WRITE] = "RW",
32-
[NETFS_WRITEBACK] = "WB",
33-
[NETFS_DIO_READ] = "DR",
29+
[NETFS_READAHEAD] = "RA",
30+
[NETFS_READPAGE] = "RP",
31+
[NETFS_READ_FOR_WRITE] = "RW",
32+
[NETFS_WRITEBACK] = "WB",
33+
[NETFS_UNBUFFERED_WRITE] = "UW",
34+
[NETFS_DIO_READ] = "DR",
35+
[NETFS_DIO_WRITE] = "DW",
3436
};
3537

3638
/*

fs/netfs/objects.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
2020
struct inode *inode = file ? file_inode(file) : mapping->host;
2121
struct netfs_inode *ctx = netfs_inode(inode);
2222
struct netfs_io_request *rreq;
23-
bool is_dio = (origin == NETFS_DIO_READ);
24-
bool cached = is_dio && netfs_is_cache_enabled(ctx);
23+
bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
24+
origin == NETFS_DIO_READ ||
25+
origin == NETFS_DIO_WRITE);
26+
bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx);
2527
int ret;
2628

2729
rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request),

fs/netfs/output.c

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,21 @@ static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async
7474
{
7575
struct netfs_io_subrequest *subreq;
7676
struct netfs_inode *ctx = netfs_inode(wreq->inode);
77+
size_t transferred = 0;
7778

7879
_enter("R=%x[]", wreq->debug_id);
7980

8081
trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
8182

83+
list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
84+
if (subreq->error || subreq->transferred == 0)
85+
break;
86+
transferred += subreq->transferred;
87+
if (subreq->transferred < subreq->len)
88+
break;
89+
}
90+
wreq->transferred = transferred;
91+
8292
list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
8393
if (!subreq->error)
8494
continue;
@@ -110,11 +120,28 @@ static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async
110120

111121
wreq->cleanup(wreq);
112122

123+
if (wreq->origin == NETFS_DIO_WRITE &&
124+
wreq->mapping->nrpages) {
125+
pgoff_t first = wreq->start >> PAGE_SHIFT;
126+
pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
127+
invalidate_inode_pages2_range(wreq->mapping, first, last);
128+
}
129+
130+
if (wreq->origin == NETFS_DIO_WRITE)
131+
inode_dio_end(wreq->inode);
132+
113133
_debug("finished");
114134
trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
115135
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
116136
wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
117137

138+
if (wreq->iocb) {
139+
wreq->iocb->ki_pos += transferred;
140+
if (wreq->iocb->ki_complete)
141+
wreq->iocb->ki_complete(
142+
wreq->iocb, wreq->error ? wreq->error : transferred);
143+
}
144+
118145
netfs_clear_subrequests(wreq, was_async);
119146
netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete);
120147
}
@@ -329,6 +356,9 @@ int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
329356
return -EIO;
330357
}
331358

359+
if (wreq->origin == NETFS_DIO_WRITE)
360+
inode_dio_begin(wreq->inode);
361+
332362
wreq->io_iter = wreq->iter;
333363

334364
/* ->outstanding > 0 carries a ref */

include/linux/netfs.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ struct netfs_inode {
138138
loff_t remote_i_size; /* Size of the remote file */
139139
unsigned long flags;
140140
#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
141+
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
141142
};
142143

143144
/*
@@ -226,7 +227,9 @@ enum netfs_io_origin {
226227
NETFS_READPAGE, /* This read is a synchronous read */
227228
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
228229
NETFS_WRITEBACK, /* This write was triggered by writepages */
230+
NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
229231
NETFS_DIO_READ, /* This is a direct I/O read */
232+
NETFS_DIO_WRITE, /* This is a direct I/O write */
230233
nr__netfs_io_origin
231234
} __mode(byte);
232235

@@ -379,6 +382,7 @@ ssize_t netfs_unbuffered_read_iter(struct kiocb *iocb, struct iov_iter *iter);
379382
/* High-level write API */
380383
ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
381384
struct netfs_group *netfs_group);
385+
ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from);
382386

383387
/* Address operations API */
384388
struct readahead_control;

include/trace/events/netfs.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@
3333
EM(NETFS_READPAGE, "RP") \
3434
EM(NETFS_READ_FOR_WRITE, "RW") \
3535
EM(NETFS_WRITEBACK, "WB") \
36-
E_(NETFS_DIO_READ, "DR")
36+
EM(NETFS_UNBUFFERED_WRITE, "UW") \
37+
EM(NETFS_DIO_READ, "DR") \
38+
E_(NETFS_DIO_WRITE, "DW")
3739

3840
#define netfs_rreq_traces \
3941
EM(netfs_rreq_trace_assess, "ASSESS ") \

0 commit comments

Comments
 (0)