Skip to content

Commit a4c9ab1

Browse files
joannekoongbrauner
authored andcommitted
fuse: use iomap for buffered writes
Have buffered writes go through iomap. This has two advantages: * granular large folio synchronous reads * granular large folio dirty tracking If for example there is a 1 MB large folio and a write issued at pos 1 to pos 1 MB - 2, only the head and tail pages will need to be read in and marked uptodate instead of the entire folio needing to be read in. Non-relevant trailing pages are also skipped (eg if for a 1 MB large folio a write is issued at pos 1 to 4099, only the first two pages are read in and the ones after that are skipped). iomap also has granular dirty tracking. This is useful in that when it comes to writeback time, only the dirty portions of the large folio will be written instead of having to write out the entire folio. For example if there is a 1 MB large folio and only 2 bytes in it are dirty, only the page for those dirty bytes get written out. Please note that granular writeback is only done once fuse also uses iomap in writeback (separate commit). .release_folio needs to be set to iomap_release_folio so that any allocated iomap ifs structs get freed. Signed-off-by: Joanne Koong <[email protected]> Link: https://lore.kernel.org/[email protected] Reviewed-by: Darrick J. Wong <[email protected]> Signed-off-by: Christian Brauner <[email protected]>
1 parent 2f368b5 commit a4c9ab1

File tree

2 files changed

+55
-94
lines changed

2 files changed

+55
-94
lines changed

fs/fuse/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
config FUSE_FS
33
tristate "FUSE (Filesystem in Userspace) support"
44
select FS_POSIX_ACL
5+
select FS_IOMAP
56
help
67
With FUSE it is possible to implement a fully functional filesystem
78
in a userspace program.

fs/fuse/file.c

Lines changed: 54 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <linux/filelock.h>
2222
#include <linux/splice.h>
2323
#include <linux/task_io_accounting_ops.h>
24+
#include <linux/iomap.h>
2425

2526
static int fuse_send_open(struct fuse_mount *fm, u64 nodeid,
2627
unsigned int open_flags, int opcode,
@@ -788,12 +789,16 @@ static void fuse_short_read(struct inode *inode, u64 attr_ver, size_t num_read,
788789
}
789790
}
790791

791-
static int fuse_do_readfolio(struct file *file, struct folio *folio)
792+
static int fuse_do_readfolio(struct file *file, struct folio *folio,
793+
size_t off, size_t len)
792794
{
793795
struct inode *inode = folio->mapping->host;
794796
struct fuse_mount *fm = get_fuse_mount(inode);
795-
loff_t pos = folio_pos(folio);
796-
struct fuse_folio_desc desc = { .length = folio_size(folio) };
797+
loff_t pos = folio_pos(folio) + off;
798+
struct fuse_folio_desc desc = {
799+
.offset = off,
800+
.length = len,
801+
};
797802
struct fuse_io_args ia = {
798803
.ap.args.page_zeroing = true,
799804
.ap.args.out_pages = true,
@@ -820,8 +825,6 @@ static int fuse_do_readfolio(struct file *file, struct folio *folio)
820825
if (res < desc.length)
821826
fuse_short_read(inode, attr_ver, res, &ia.ap);
822827

823-
folio_mark_uptodate(folio);
824-
825828
return 0;
826829
}
827830

@@ -834,13 +837,26 @@ static int fuse_read_folio(struct file *file, struct folio *folio)
834837
if (fuse_is_bad(inode))
835838
goto out;
836839

837-
err = fuse_do_readfolio(file, folio);
840+
err = fuse_do_readfolio(file, folio, 0, folio_size(folio));
841+
if (!err)
842+
folio_mark_uptodate(folio);
843+
838844
fuse_invalidate_atime(inode);
839845
out:
840846
folio_unlock(folio);
841847
return err;
842848
}
843849

850+
static int fuse_iomap_read_folio_range(const struct iomap_iter *iter,
851+
struct folio *folio, loff_t pos,
852+
size_t len)
853+
{
854+
struct file *file = iter->private;
855+
size_t off = offset_in_folio(folio, pos);
856+
857+
return fuse_do_readfolio(file, folio, off, len);
858+
}
859+
844860
static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args,
845861
int err)
846862
{
@@ -1375,6 +1391,24 @@ static void fuse_dio_unlock(struct kiocb *iocb, bool exclusive)
13751391
}
13761392
}
13771393

1394+
static const struct iomap_write_ops fuse_iomap_write_ops = {
1395+
.read_folio_range = fuse_iomap_read_folio_range,
1396+
};
1397+
1398+
static int fuse_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
1399+
unsigned int flags, struct iomap *iomap,
1400+
struct iomap *srcmap)
1401+
{
1402+
iomap->type = IOMAP_MAPPED;
1403+
iomap->length = length;
1404+
iomap->offset = offset;
1405+
return 0;
1406+
}
1407+
1408+
static const struct iomap_ops fuse_iomap_ops = {
1409+
.iomap_begin = fuse_iomap_begin,
1410+
};
1411+
13781412
static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
13791413
{
13801414
struct file *file = iocb->ki_filp;
@@ -1384,6 +1418,7 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
13841418
struct inode *inode = mapping->host;
13851419
ssize_t err, count;
13861420
struct fuse_conn *fc = get_fuse_conn(inode);
1421+
bool writeback = false;
13871422

13881423
if (fc->writeback_cache) {
13891424
/* Update size (EOF optimization) and mode (SUID clearing) */
@@ -1392,16 +1427,11 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
13921427
if (err)
13931428
return err;
13941429

1395-
if (fc->handle_killpriv_v2 &&
1396-
setattr_should_drop_suidgid(idmap,
1397-
file_inode(file))) {
1398-
goto writethrough;
1399-
}
1400-
1401-
return generic_file_write_iter(iocb, from);
1430+
if (!fc->handle_killpriv_v2 ||
1431+
!setattr_should_drop_suidgid(idmap, file_inode(file)))
1432+
writeback = true;
14021433
}
14031434

1404-
writethrough:
14051435
inode_lock(inode);
14061436

14071437
err = count = generic_write_checks(iocb, from);
@@ -1420,6 +1450,15 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
14201450
goto out;
14211451
written = direct_write_fallback(iocb, from, written,
14221452
fuse_perform_write(iocb, from));
1453+
} else if (writeback) {
1454+
/*
1455+
* Use iomap so that we can do granular uptodate reads
1456+
* and granular dirty tracking for large folios.
1457+
*/
1458+
written = iomap_file_buffered_write(iocb, from,
1459+
&fuse_iomap_ops,
1460+
&fuse_iomap_write_ops,
1461+
file);
14231462
} else {
14241463
written = fuse_perform_write(iocb, from);
14251464
}
@@ -2209,84 +2248,6 @@ static int fuse_writepages(struct address_space *mapping,
22092248
return err;
22102249
}
22112250

2212-
/*
2213-
* It's worthy to make sure that space is reserved on disk for the write,
2214-
* but how to implement it without killing performance need more thinking.
2215-
*/
2216-
static int fuse_write_begin(struct file *file, struct address_space *mapping,
2217-
loff_t pos, unsigned len, struct folio **foliop, void **fsdata)
2218-
{
2219-
pgoff_t index = pos >> PAGE_SHIFT;
2220-
struct fuse_conn *fc = get_fuse_conn(file_inode(file));
2221-
struct folio *folio;
2222-
loff_t fsize;
2223-
int err = -ENOMEM;
2224-
2225-
WARN_ON(!fc->writeback_cache);
2226-
2227-
folio = __filemap_get_folio(mapping, index, FGP_WRITEBEGIN,
2228-
mapping_gfp_mask(mapping));
2229-
if (IS_ERR(folio))
2230-
goto error;
2231-
2232-
if (folio_test_uptodate(folio) || len >= folio_size(folio))
2233-
goto success;
2234-
/*
2235-
* Check if the start of this folio comes after the end of file,
2236-
* in which case the readpage can be optimized away.
2237-
*/
2238-
fsize = i_size_read(mapping->host);
2239-
if (fsize <= folio_pos(folio)) {
2240-
size_t off = offset_in_folio(folio, pos);
2241-
if (off)
2242-
folio_zero_segment(folio, 0, off);
2243-
goto success;
2244-
}
2245-
err = fuse_do_readfolio(file, folio);
2246-
if (err)
2247-
goto cleanup;
2248-
success:
2249-
*foliop = folio;
2250-
return 0;
2251-
2252-
cleanup:
2253-
folio_unlock(folio);
2254-
folio_put(folio);
2255-
error:
2256-
return err;
2257-
}
2258-
2259-
static int fuse_write_end(struct file *file, struct address_space *mapping,
2260-
loff_t pos, unsigned len, unsigned copied,
2261-
struct folio *folio, void *fsdata)
2262-
{
2263-
struct inode *inode = folio->mapping->host;
2264-
2265-
/* Haven't copied anything? Skip zeroing, size extending, dirtying. */
2266-
if (!copied)
2267-
goto unlock;
2268-
2269-
pos += copied;
2270-
if (!folio_test_uptodate(folio)) {
2271-
/* Zero any unwritten bytes at the end of the page */
2272-
size_t endoff = pos & ~PAGE_MASK;
2273-
if (endoff)
2274-
folio_zero_segment(folio, endoff, PAGE_SIZE);
2275-
folio_mark_uptodate(folio);
2276-
}
2277-
2278-
if (pos > inode->i_size)
2279-
i_size_write(inode, pos);
2280-
2281-
folio_mark_dirty(folio);
2282-
2283-
unlock:
2284-
folio_unlock(folio);
2285-
folio_put(folio);
2286-
2287-
return copied;
2288-
}
2289-
22902251
static int fuse_launder_folio(struct folio *folio)
22912252
{
22922253
int err = 0;
@@ -3145,11 +3106,10 @@ static const struct address_space_operations fuse_file_aops = {
31453106
.writepages = fuse_writepages,
31463107
.launder_folio = fuse_launder_folio,
31473108
.dirty_folio = filemap_dirty_folio,
3109+
.release_folio = iomap_release_folio,
31483110
.migrate_folio = filemap_migrate_folio,
31493111
.bmap = fuse_bmap,
31503112
.direct_IO = fuse_direct_IO,
3151-
.write_begin = fuse_write_begin,
3152-
.write_end = fuse_write_end,
31533113
};
31543114

31553115
void fuse_init_file_inode(struct inode *inode, unsigned int flags)

0 commit comments

Comments
 (0)