Skip to content

Commit 6945653

Browse files
committed
Merge tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi: - Support directly accessing host page cache from virtiofs. This can improve I/O performance for various workloads, as well as reducing the memory requirement by eliminating double caching. Thanks to Vivek Goyal for doing most of the work on this. - Allow automatic submounting inside virtiofs. This allows unique st_dev/ st_ino values to be assigned inside the guest to files residing on different filesystems on the host. Thanks to Max Reitz for the patches. - Fix an old use after free bug found by Pradeep P V K. * tag 'fuse-update-5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: (25 commits) virtiofs: calculate number of scatter-gather elements accurately fuse: connection remove fix fuse: implement crossmounts fuse: Allow fuse_fill_super_common() for submounts fuse: split fuse_mount off of fuse_conn fuse: drop fuse_conn parameter where possible fuse: store fuse_conn in fuse_req fuse: add submount support to <uapi/linux/fuse.h> fuse: fix page dereference after free virtiofs: add logic to free up a memory range virtiofs: maintain a list of busy elements virtiofs: serialize truncate/punch_hole and dax fault path virtiofs: define dax address space operations virtiofs: add DAX mmap support virtiofs: implement dax read/write operations virtiofs: introduce setupmapping/removemapping commands virtiofs: implement FUSE_INIT map_alignment field virtiofs: keep a list of free dax memory ranges virtiofs: add a mount option to enable dax virtiofs: set up virtio_fs dax_device ...
2 parents 922a763 + 42d3e2d commit 6945653

File tree

20 files changed

+2689
-496
lines changed

20 files changed

+2689
-496
lines changed

Documentation/filesystems/fuse.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ filesystems. A good example is sshfs: a secure network filesystem
4747
using the sftp protocol.
4848

4949
The userspace library and utilities are available from the
50-
`FUSE homepage: <http://fuse.sourceforge.net/>`_
50+
`FUSE homepage: <https://github.com/libfuse/>`_
5151

5252
Filesystem type
5353
===============

MAINTAINERS

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7238,7 +7238,7 @@ FUSE: FILESYSTEM IN USERSPACE
72387238
M: Miklos Szeredi <[email protected]>
72397239
72407240
S: Maintained
7241-
W: http://fuse.sourceforge.net/
7241+
W: https://github.com/libfuse/
72427242
T: git git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git
72437243
F: Documentation/filesystems/fuse.rst
72447244
F: fs/fuse/

drivers/dax/super.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,8 @@ EXPORT_SYMBOL_GPL(dax_read_unlock);
4646
int bdev_dax_pgoff(struct block_device *bdev, sector_t sector, size_t size,
4747
pgoff_t *pgoff)
4848
{
49-
phys_addr_t phys_off = (get_start_sect(bdev) + sector) * 512;
49+
sector_t start_sect = bdev ? get_start_sect(bdev) : 0;
50+
phys_addr_t phys_off = (start_sect + sector) * 512;
5051

5152
if (pgoff)
5253
*pgoff = PHYS_PFN(phys_off);

fs/dax.c

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -559,8 +559,11 @@ static void *grab_mapping_entry(struct xa_state *xas,
559559
}
560560

561561
/**
562-
* dax_layout_busy_page - find first pinned page in @mapping
562+
* dax_layout_busy_page_range - find first pinned page in @mapping
563563
* @mapping: address space to scan for a page with ref count > 1
564+
* @start: Starting offset. Page containing 'start' is included.
565+
* @end: End offset. Page containing 'end' is included. If 'end' is LLONG_MAX,
566+
* pages from 'start' till the end of file are included.
564567
*
565568
* DAX requires ZONE_DEVICE mapped pages. These pages are never
566569
* 'onlined' to the page allocator so they are considered idle when
@@ -573,12 +576,15 @@ static void *grab_mapping_entry(struct xa_state *xas,
573576
* to be able to run unmap_mapping_range() and subsequently not race
574577
* mapping_mapped() becoming true.
575578
*/
576-
struct page *dax_layout_busy_page(struct address_space *mapping)
579+
struct page *dax_layout_busy_page_range(struct address_space *mapping,
580+
loff_t start, loff_t end)
577581
{
578-
XA_STATE(xas, &mapping->i_pages, 0);
579582
void *entry;
580583
unsigned int scanned = 0;
581584
struct page *page = NULL;
585+
pgoff_t start_idx = start >> PAGE_SHIFT;
586+
pgoff_t end_idx;
587+
XA_STATE(xas, &mapping->i_pages, start_idx);
582588

583589
/*
584590
* In the 'limited' case get_user_pages() for dax is disabled.
@@ -589,22 +595,27 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
589595
if (!dax_mapping(mapping) || !mapping_mapped(mapping))
590596
return NULL;
591597

598+
/* If end == LLONG_MAX, all pages from start to till end of file */
599+
if (end == LLONG_MAX)
600+
end_idx = ULONG_MAX;
601+
else
602+
end_idx = end >> PAGE_SHIFT;
592603
/*
593604
* If we race get_user_pages_fast() here either we'll see the
594605
* elevated page count in the iteration and wait, or
595606
* get_user_pages_fast() will see that the page it took a reference
596607
* against is no longer mapped in the page tables and bail to the
597608
* get_user_pages() slow path. The slow path is protected by
598609
* pte_lock() and pmd_lock(). New references are not taken without
599-
* holding those locks, and unmap_mapping_range() will not zero the
610+
* holding those locks, and unmap_mapping_pages() will not zero the
600611
* pte or pmd without holding the respective lock, so we are
601612
* guaranteed to either see new references or prevent new
602613
* references from being established.
603614
*/
604-
unmap_mapping_range(mapping, 0, 0, 0);
615+
unmap_mapping_pages(mapping, start_idx, end_idx - start_idx + 1, 0);
605616

606617
xas_lock_irq(&xas);
607-
xas_for_each(&xas, entry, ULONG_MAX) {
618+
xas_for_each(&xas, entry, end_idx) {
608619
if (WARN_ON_ONCE(!xa_is_value(entry)))
609620
continue;
610621
if (unlikely(dax_is_locked(entry)))
@@ -625,6 +636,12 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
625636
xas_unlock_irq(&xas);
626637
return page;
627638
}
639+
EXPORT_SYMBOL_GPL(dax_layout_busy_page_range);
640+
641+
struct page *dax_layout_busy_page(struct address_space *mapping)
642+
{
643+
return dax_layout_busy_page_range(mapping, 0, LLONG_MAX);
644+
}
628645
EXPORT_SYMBOL_GPL(dax_layout_busy_page);
629646

630647
static int __dax_invalidate_entry(struct address_space *mapping,

fs/fuse/Kconfig

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ config FUSE_FS
88

99
There's also a companion library: libfuse2. This library is available
1010
from the FUSE homepage:
11-
<http://fuse.sourceforge.net/>
11+
<https://github.com/libfuse/>
1212
although chances are your distribution already has that library
1313
installed if you've installed the "fuse" package itself.
1414

@@ -38,3 +38,17 @@ config VIRTIO_FS
3838

3939
If you want to share files between guests or with the host, answer Y
4040
or M.
41+
42+
config FUSE_DAX
43+
bool "Virtio Filesystem Direct Host Memory Access support"
44+
default y
45+
select INTERVAL_TREE
46+
depends on VIRTIO_FS
47+
depends on FS_DAX
48+
depends on DAX_DRIVER
49+
help
50+
This allows bypassing guest page cache and allows mapping host page
51+
cache directly in guest address space.
52+
53+
If you want to allow mounting a Virtio Filesystem with the "dax"
54+
option, answer Y.

fs/fuse/Makefile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,7 @@ obj-$(CONFIG_FUSE_FS) += fuse.o
77
obj-$(CONFIG_CUSE) += cuse.o
88
obj-$(CONFIG_VIRTIO_FS) += virtiofs.o
99

10-
fuse-objs := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
11-
virtiofs-y += virtio_fs.o
10+
fuse-y := dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o
11+
fuse-$(CONFIG_FUSE_DAX) += dax.o
12+
13+
virtiofs-y := virtio_fs.o

fs/fuse/control.c

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,7 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
164164
{
165165
unsigned val;
166166
struct fuse_conn *fc;
167+
struct fuse_mount *fm;
167168
ssize_t ret;
168169

169170
ret = fuse_conn_limit_write(file, buf, count, ppos, &val,
@@ -174,18 +175,27 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file,
174175
if (!fc)
175176
goto out;
176177

178+
down_read(&fc->killsb);
177179
spin_lock(&fc->bg_lock);
178180
fc->congestion_threshold = val;
179-
if (fc->sb) {
181+
182+
/*
183+
* Get any fuse_mount belonging to this fuse_conn; s_bdi is
184+
* shared between all of them
185+
*/
186+
187+
if (!list_empty(&fc->mounts)) {
188+
fm = list_first_entry(&fc->mounts, struct fuse_mount, fc_entry);
180189
if (fc->num_background < fc->congestion_threshold) {
181-
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
182-
clear_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
190+
clear_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
191+
clear_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
183192
} else {
184-
set_bdi_congested(fc->sb->s_bdi, BLK_RW_SYNC);
185-
set_bdi_congested(fc->sb->s_bdi, BLK_RW_ASYNC);
193+
set_bdi_congested(fm->sb->s_bdi, BLK_RW_SYNC);
194+
set_bdi_congested(fm->sb->s_bdi, BLK_RW_ASYNC);
186195
}
187196
}
188197
spin_unlock(&fc->bg_lock);
198+
up_read(&fc->killsb);
189199
fuse_conn_put(fc);
190200
out:
191201
return ret;

fs/fuse/cuse.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757

5858
struct cuse_conn {
5959
struct list_head list; /* linked on cuse_conntbl */
60+
struct fuse_mount fm; /* Dummy mount referencing fc */
6061
struct fuse_conn fc; /* fuse connection */
6162
struct cdev *cdev; /* associated character device */
6263
struct device *dev; /* device representing @cdev */
@@ -134,7 +135,7 @@ static int cuse_open(struct inode *inode, struct file *file)
134135
* Generic permission check is already done against the chrdev
135136
* file, proceed to open.
136137
*/
137-
rc = fuse_do_open(&cc->fc, 0, file, 0);
138+
rc = fuse_do_open(&cc->fm, 0, file, 0);
138139
if (rc)
139140
fuse_conn_put(&cc->fc);
140141
return rc;
@@ -143,10 +144,10 @@ static int cuse_open(struct inode *inode, struct file *file)
143144
static int cuse_release(struct inode *inode, struct file *file)
144145
{
145146
struct fuse_file *ff = file->private_data;
146-
struct fuse_conn *fc = ff->fc;
147+
struct fuse_mount *fm = ff->fm;
147148

148149
fuse_sync_release(NULL, ff, file->f_flags);
149-
fuse_conn_put(fc);
150+
fuse_conn_put(fm->fc);
150151

151152
return 0;
152153
}
@@ -155,7 +156,7 @@ static long cuse_file_ioctl(struct file *file, unsigned int cmd,
155156
unsigned long arg)
156157
{
157158
struct fuse_file *ff = file->private_data;
158-
struct cuse_conn *cc = fc_to_cc(ff->fc);
159+
struct cuse_conn *cc = fc_to_cc(ff->fm->fc);
159160
unsigned int flags = 0;
160161

161162
if (cc->unrestricted_ioctl)
@@ -168,7 +169,7 @@ static long cuse_file_compat_ioctl(struct file *file, unsigned int cmd,
168169
unsigned long arg)
169170
{
170171
struct fuse_file *ff = file->private_data;
171-
struct cuse_conn *cc = fc_to_cc(ff->fc);
172+
struct cuse_conn *cc = fc_to_cc(ff->fm->fc);
172173
unsigned int flags = FUSE_IOCTL_COMPAT;
173174

174175
if (cc->unrestricted_ioctl)
@@ -313,9 +314,10 @@ struct cuse_init_args {
313314
* required data structures for it. Please read the comment at the
314315
* top of this file for high level overview.
315316
*/
316-
static void cuse_process_init_reply(struct fuse_conn *fc,
317+
static void cuse_process_init_reply(struct fuse_mount *fm,
317318
struct fuse_args *args, int error)
318319
{
320+
struct fuse_conn *fc = fm->fc;
319321
struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args);
320322
struct fuse_args_pages *ap = &ia->ap;
321323
struct cuse_conn *cc = fc_to_cc(fc), *pos;
@@ -424,7 +426,7 @@ static int cuse_send_init(struct cuse_conn *cc)
424426
{
425427
int rc;
426428
struct page *page;
427-
struct fuse_conn *fc = &cc->fc;
429+
struct fuse_mount *fm = &cc->fm;
428430
struct cuse_init_args *ia;
429431
struct fuse_args_pages *ap;
430432

@@ -460,7 +462,7 @@ static int cuse_send_init(struct cuse_conn *cc)
460462
ia->desc.length = ap->args.out_args[1].size;
461463
ap->args.end = cuse_process_init_reply;
462464

463-
rc = fuse_simple_background(fc, &ap->args, GFP_KERNEL);
465+
rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL);
464466
if (rc) {
465467
kfree(ia);
466468
err_free_page:
@@ -506,7 +508,8 @@ static int cuse_channel_open(struct inode *inode, struct file *file)
506508
* Limit the cuse channel to requests that can
507509
* be represented in file->f_cred->user_ns.
508510
*/
509-
fuse_conn_init(&cc->fc, file->f_cred->user_ns, &fuse_dev_fiq_ops, NULL);
511+
fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns,
512+
&fuse_dev_fiq_ops, NULL);
510513

511514
fud = fuse_dev_alloc_install(&cc->fc);
512515
if (!fud) {

0 commit comments

Comments
 (0)