Skip to content

Commit c38f4e9

Browse files
committed
netfs: Provide func to copy data to pagecache for buffered write
Provide a netfs write helper, netfs_perform_write() to buffer data to be written in the pagecache and mark the modified folios dirty. It will perform "streaming writes" for folios that aren't currently resident, if possible, storing data in partially modified folios that are marked dirty, but not uptodate. It will also tag pages as belonging to fs-specific write groups if so directed by the filesystem. This is derived from generic_perform_write(), but doesn't use ->write_begin() and ->write_end(), having that logic rolled in instead. Signed-off-by: David Howells <[email protected]> cc: Jeff Layton <[email protected]> cc: [email protected] cc: [email protected] cc: [email protected]
1 parent 0e0f2df commit c38f4e9

File tree

7 files changed

+461
-0
lines changed

7 files changed

+461
-0
lines changed

fs/netfs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
netfs-y := \
44
buffered_read.o \
5+
buffered_write.o \
56
io.o \
67
iterator.o \
78
locking.o \

fs/netfs/buffered_read.c

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
6363
break;
6464
}
6565
if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
66+
trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
6667
folio_start_fscache(folio);
6768
folio_started = true;
6869
}
@@ -454,3 +455,51 @@ int netfs_write_begin(struct netfs_inode *ctx,
454455
return ret;
455456
}
456457
EXPORT_SYMBOL(netfs_write_begin);
458+
459+
/*
460+
* Preload the data into a page we're proposing to write into.
461+
*/
462+
int netfs_prefetch_for_write(struct file *file, struct folio *folio,
463+
size_t offset, size_t len)
464+
{
465+
struct netfs_io_request *rreq;
466+
struct address_space *mapping = folio_file_mapping(folio);
467+
struct netfs_inode *ctx = netfs_inode(mapping->host);
468+
unsigned long long start = folio_pos(folio);
469+
size_t flen = folio_size(folio);
470+
int ret;
471+
472+
_enter("%zx @%llx", flen, start);
473+
474+
ret = -ENOMEM;
475+
476+
rreq = netfs_alloc_request(mapping, file, start, flen,
477+
NETFS_READ_FOR_WRITE);
478+
if (IS_ERR(rreq)) {
479+
ret = PTR_ERR(rreq);
480+
goto error;
481+
}
482+
483+
rreq->no_unlock_folio = folio_index(folio);
484+
__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
485+
ret = netfs_begin_cache_read(rreq, ctx);
486+
if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
487+
goto error_put;
488+
489+
netfs_stat(&netfs_n_rh_write_begin);
490+
trace_netfs_read(rreq, start, flen, netfs_read_trace_prefetch_for_write);
491+
492+
/* Set up the output buffer */
493+
iov_iter_xarray(&rreq->iter, ITER_DEST, &mapping->i_pages,
494+
rreq->start, rreq->len);
495+
496+
ret = netfs_begin_read(rreq, true);
497+
netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
498+
return ret;
499+
500+
error_put:
501+
netfs_put_request(rreq, false, netfs_rreq_trace_put_discard);
502+
error:
503+
_leave(" = %d", ret);
504+
return ret;
505+
}

fs/netfs/buffered_write.c

Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/* Network filesystem high-level write support.
3+
*
4+
* Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5+
* Written by David Howells ([email protected])
6+
*/
7+
8+
#include <linux/export.h>
9+
#include <linux/fs.h>
10+
#include <linux/mm.h>
11+
#include <linux/pagemap.h>
12+
#include <linux/slab.h>
13+
#include <linux/pagevec.h>
14+
#include "internal.h"
15+
16+
/*
17+
* Determined write method. Adjust netfs_folio_traces if this is changed.
18+
*/
19+
enum netfs_how_to_modify {
20+
NETFS_FOLIO_IS_UPTODATE, /* Folio is uptodate already */
21+
NETFS_JUST_PREFETCH, /* We have to read the folio anyway */
22+
NETFS_WHOLE_FOLIO_MODIFY, /* We're going to overwrite the whole folio */
23+
NETFS_MODIFY_AND_CLEAR, /* We can assume there is no data to be downloaded. */
24+
NETFS_STREAMING_WRITE, /* Store incomplete data in non-uptodate page. */
25+
NETFS_STREAMING_WRITE_CONT, /* Continue streaming write. */
26+
NETFS_FLUSH_CONTENT, /* Flush incompatible content. */
27+
};
28+
29+
static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
30+
{
31+
if (netfs_group && !folio_get_private(folio))
32+
folio_attach_private(folio, netfs_get_group(netfs_group));
33+
}
34+
35+
/*
36+
* Decide how we should modify a folio. We might be attempting to do
37+
* write-streaming, in which case we don't want to a local RMW cycle if we can
38+
* avoid it. If we're doing local caching or content crypto, we award that
39+
* priority over avoiding RMW. If the file is open readably, then we also
40+
* assume that we may want to read what we wrote.
41+
*/
42+
static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
43+
struct file *file,
44+
struct folio *folio,
45+
void *netfs_group,
46+
size_t flen,
47+
size_t offset,
48+
size_t len,
49+
bool maybe_trouble)
50+
{
51+
struct netfs_folio *finfo = netfs_folio_info(folio);
52+
loff_t pos = folio_file_pos(folio);
53+
54+
_enter("");
55+
56+
if (netfs_folio_group(folio) != netfs_group)
57+
return NETFS_FLUSH_CONTENT;
58+
59+
if (folio_test_uptodate(folio))
60+
return NETFS_FOLIO_IS_UPTODATE;
61+
62+
if (pos >= ctx->remote_i_size)
63+
return NETFS_MODIFY_AND_CLEAR;
64+
65+
if (!maybe_trouble && offset == 0 && len >= flen)
66+
return NETFS_WHOLE_FOLIO_MODIFY;
67+
68+
if (file->f_mode & FMODE_READ)
69+
return NETFS_JUST_PREFETCH;
70+
71+
if (netfs_is_cache_enabled(ctx))
72+
return NETFS_JUST_PREFETCH;
73+
74+
if (!finfo)
75+
return NETFS_STREAMING_WRITE;
76+
77+
/* We can continue a streaming write only if it continues on from the
78+
* previous. If it overlaps, we must flush lest we suffer a partial
79+
* copy and disjoint dirty regions.
80+
*/
81+
if (offset == finfo->dirty_offset + finfo->dirty_len)
82+
return NETFS_STREAMING_WRITE_CONT;
83+
return NETFS_FLUSH_CONTENT;
84+
}
85+
86+
/*
87+
* Grab a folio for writing and lock it.
88+
*/
89+
static struct folio *netfs_grab_folio_for_write(struct address_space *mapping,
90+
loff_t pos, size_t part)
91+
{
92+
pgoff_t index = pos / PAGE_SIZE;
93+
94+
return __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
95+
mapping_gfp_mask(mapping));
96+
}
97+
98+
/**
99+
* netfs_perform_write - Copy data into the pagecache.
100+
* @iocb: The operation parameters
101+
* @iter: The source buffer
102+
* @netfs_group: Grouping for dirty pages (eg. ceph snaps).
103+
*
104+
* Copy data into pagecache pages attached to the inode specified by @iocb.
105+
* The caller must hold appropriate inode locks.
106+
*
107+
* Dirty pages are tagged with a netfs_folio struct if they're not up to date
108+
* to indicate the range modified. Dirty pages may also be tagged with a
109+
* netfs-specific grouping such that data from an old group gets flushed before
110+
* a new one is started.
111+
*/
112+
ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
113+
struct netfs_group *netfs_group)
114+
{
115+
struct file *file = iocb->ki_filp;
116+
struct inode *inode = file_inode(file);
117+
struct address_space *mapping = inode->i_mapping;
118+
struct netfs_inode *ctx = netfs_inode(inode);
119+
struct netfs_folio *finfo;
120+
struct folio *folio;
121+
enum netfs_how_to_modify howto;
122+
enum netfs_folio_trace trace;
123+
unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
124+
ssize_t written = 0, ret;
125+
loff_t i_size, pos = iocb->ki_pos, from, to;
126+
size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER;
127+
bool maybe_trouble = false;
128+
129+
do {
130+
size_t flen;
131+
size_t offset; /* Offset into pagecache folio */
132+
size_t part; /* Bytes to write to folio */
133+
size_t copied; /* Bytes copied from user */
134+
135+
ret = balance_dirty_pages_ratelimited_flags(mapping, bdp_flags);
136+
if (unlikely(ret < 0))
137+
break;
138+
139+
offset = pos & (max_chunk - 1);
140+
part = min(max_chunk - offset, iov_iter_count(iter));
141+
142+
/* Bring in the user pages that we will copy from _first_ lest
143+
* we hit a nasty deadlock on copying from the same page as
144+
* we're writing to, without it being marked uptodate.
145+
*
146+
* Not only is this an optimisation, but it is also required to
147+
* check that the address is actually valid, when atomic
148+
* usercopies are used below.
149+
*
150+
* We rely on the page being held onto long enough by the LRU
151+
* that we can grab it below if this causes it to be read.
152+
*/
153+
ret = -EFAULT;
154+
if (unlikely(fault_in_iov_iter_readable(iter, part) == part))
155+
break;
156+
157+
ret = -ENOMEM;
158+
folio = netfs_grab_folio_for_write(mapping, pos, part);
159+
if (!folio)
160+
break;
161+
162+
flen = folio_size(folio);
163+
offset = pos & (flen - 1);
164+
part = min_t(size_t, flen - offset, part);
165+
166+
if (signal_pending(current)) {
167+
ret = written ? -EINTR : -ERESTARTSYS;
168+
goto error_folio_unlock;
169+
}
170+
171+
/* See if we need to prefetch the area we're going to modify.
172+
* We need to do this before we get a lock on the folio in case
173+
* there's more than one writer competing for the same cache
174+
* block.
175+
*/
176+
howto = netfs_how_to_modify(ctx, file, folio, netfs_group,
177+
flen, offset, part, maybe_trouble);
178+
_debug("howto %u", howto);
179+
switch (howto) {
180+
case NETFS_JUST_PREFETCH:
181+
ret = netfs_prefetch_for_write(file, folio, offset, part);
182+
if (ret < 0) {
183+
_debug("prefetch = %zd", ret);
184+
goto error_folio_unlock;
185+
}
186+
break;
187+
case NETFS_FOLIO_IS_UPTODATE:
188+
case NETFS_WHOLE_FOLIO_MODIFY:
189+
case NETFS_STREAMING_WRITE_CONT:
190+
break;
191+
case NETFS_MODIFY_AND_CLEAR:
192+
zero_user_segment(&folio->page, 0, offset);
193+
break;
194+
case NETFS_STREAMING_WRITE:
195+
ret = -EIO;
196+
if (WARN_ON(folio_get_private(folio)))
197+
goto error_folio_unlock;
198+
break;
199+
case NETFS_FLUSH_CONTENT:
200+
trace_netfs_folio(folio, netfs_flush_content);
201+
from = folio_pos(folio);
202+
to = from + folio_size(folio) - 1;
203+
folio_unlock(folio);
204+
folio_put(folio);
205+
ret = filemap_write_and_wait_range(mapping, from, to);
206+
if (ret < 0)
207+
goto error_folio_unlock;
208+
continue;
209+
}
210+
211+
if (mapping_writably_mapped(mapping))
212+
flush_dcache_folio(folio);
213+
214+
copied = copy_folio_from_iter_atomic(folio, offset, part, iter);
215+
216+
flush_dcache_folio(folio);
217+
218+
/* Deal with a (partially) failed copy */
219+
if (copied == 0) {
220+
ret = -EFAULT;
221+
goto error_folio_unlock;
222+
}
223+
224+
trace = (enum netfs_folio_trace)howto;
225+
switch (howto) {
226+
case NETFS_FOLIO_IS_UPTODATE:
227+
case NETFS_JUST_PREFETCH:
228+
netfs_set_group(folio, netfs_group);
229+
break;
230+
case NETFS_MODIFY_AND_CLEAR:
231+
zero_user_segment(&folio->page, offset + copied, flen);
232+
netfs_set_group(folio, netfs_group);
233+
folio_mark_uptodate(folio);
234+
break;
235+
case NETFS_WHOLE_FOLIO_MODIFY:
236+
if (unlikely(copied < part)) {
237+
maybe_trouble = true;
238+
iov_iter_revert(iter, copied);
239+
copied = 0;
240+
goto retry;
241+
}
242+
netfs_set_group(folio, netfs_group);
243+
folio_mark_uptodate(folio);
244+
break;
245+
case NETFS_STREAMING_WRITE:
246+
if (offset == 0 && copied == flen) {
247+
netfs_set_group(folio, netfs_group);
248+
folio_mark_uptodate(folio);
249+
trace = netfs_streaming_filled_page;
250+
break;
251+
}
252+
finfo = kzalloc(sizeof(*finfo), GFP_KERNEL);
253+
if (!finfo) {
254+
iov_iter_revert(iter, copied);
255+
ret = -ENOMEM;
256+
goto error_folio_unlock;
257+
}
258+
finfo->netfs_group = netfs_get_group(netfs_group);
259+
finfo->dirty_offset = offset;
260+
finfo->dirty_len = copied;
261+
folio_attach_private(folio, (void *)((unsigned long)finfo |
262+
NETFS_FOLIO_INFO));
263+
break;
264+
case NETFS_STREAMING_WRITE_CONT:
265+
finfo = netfs_folio_info(folio);
266+
finfo->dirty_len += copied;
267+
if (finfo->dirty_offset == 0 && finfo->dirty_len == flen) {
268+
if (finfo->netfs_group)
269+
folio_change_private(folio, finfo->netfs_group);
270+
else
271+
folio_detach_private(folio);
272+
folio_mark_uptodate(folio);
273+
kfree(finfo);
274+
trace = netfs_streaming_cont_filled_page;
275+
}
276+
break;
277+
default:
278+
WARN(true, "Unexpected modify type %u ix=%lx\n",
279+
howto, folio_index(folio));
280+
ret = -EIO;
281+
goto error_folio_unlock;
282+
}
283+
284+
trace_netfs_folio(folio, trace);
285+
286+
/* Update the inode size if we moved the EOF marker */
287+
i_size = i_size_read(inode);
288+
pos += copied;
289+
if (pos > i_size) {
290+
if (ctx->ops->update_i_size) {
291+
ctx->ops->update_i_size(inode, pos);
292+
} else {
293+
i_size_write(inode, pos);
294+
#if IS_ENABLED(CONFIG_FSCACHE)
295+
fscache_update_cookie(ctx->cache, NULL, &pos);
296+
#endif
297+
}
298+
}
299+
written += copied;
300+
301+
folio_mark_dirty(folio);
302+
retry:
303+
folio_unlock(folio);
304+
folio_put(folio);
305+
folio = NULL;
306+
307+
cond_resched();
308+
} while (iov_iter_count(iter));
309+
310+
out:
311+
if (likely(written)) {
312+
/* Flush and wait for a write that requires immediate synchronisation. */
313+
if (iocb->ki_flags & (IOCB_DSYNC | IOCB_SYNC)) {
314+
_debug("dsync");
315+
ret = filemap_fdatawait_range(mapping, iocb->ki_pos,
316+
iocb->ki_pos + written);
317+
}
318+
319+
iocb->ki_pos += written;
320+
}
321+
322+
_leave(" = %zd [%zd]", written, ret);
323+
return written ? written : ret;
324+
325+
error_folio_unlock:
326+
folio_unlock(folio);
327+
folio_put(folio);
328+
goto out;
329+
}
330+
EXPORT_SYMBOL(netfs_perform_write);

0 commit comments

Comments
 (0)