Skip to content

Commit 5b14671

Browse files
committed
Merge tag 'fuse-update-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse
Pull fuse updates from Miklos Szeredi: - Fix a rare deadlock in virtiofs - Fix st_blocks in writeback cache mode - Fix wrong checks in splice move causing spurious warnings - Fix a race between a GETATTR request and a FUSE_NOTIFY_INVAL_INODE notification - Use rb-tree instead of linear search for pages currently under writeout by userspace - Fix copy_file_range() inconsistencies * tag 'fuse-update-5.8' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse: fuse: copy_file_range should truncate cache fuse: fix copy_file_range cache issues fuse: optimize writepages search fuse: update attr_version counter on fuse_notify_inval_inode() fuse: don't check refcount after stealing page fuse: fix weird page warning fuse: use dump_page virtiofs: do not use fuse_fill_super_common() for device installation fuse: always allow query of st_dev fuse: always flush dirty data on close(2) fuse: invalidate inode attr in writeback cache mode fuse: Update stale comment in queue_interrupt() fuse: BUG_ON correction in fuse_dev_splice_write() virtiofs: Add mount option and atime behavior to the doc virtiofs: schedule blocking async replies in separate worker
2 parents 52435c8 + 9b46418 commit 5b14671

File tree

7 files changed

+219
-85
lines changed

7 files changed

+219
-85
lines changed

Documentation/filesystems/virtiofs.rst

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,20 @@ Mount file system with tag ``myfs`` on ``/mnt``:
3939
Please see https://virtio-fs.gitlab.io/ for details on how to configure QEMU
4040
and the virtiofsd daemon.
4141

42+
Mount options
43+
-------------
44+
45+
virtiofs supports general VFS mount options, for example, remount,
46+
ro, rw, context, etc. It also supports FUSE mount options.
47+
48+
atime behavior
49+
^^^^^^^^^^^^^^
50+
51+
The atime-related mount options, for example, noatime, strictatime,
52+
are ignored. The atime behavior for virtiofs is the same as the
53+
underlying filesystem of the directory that has been exported
54+
on the host.
55+
4256
Internals
4357
=========
4458
Since the virtio-fs device uses the FUSE protocol for file system requests, the

fs/fuse/dev.c

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ static int queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
342342
list_add_tail(&req->intr_entry, &fiq->interrupts);
343343
/*
344344
* Pairs with smp_mb() implied by test_and_set_bit()
345-
* from request_end().
345+
* from fuse_request_end().
346346
*/
347347
smp_mb();
348348
if (test_bit(FR_FINISHED, &req->flags)) {
@@ -764,16 +764,15 @@ static int fuse_check_page(struct page *page)
764764
{
765765
if (page_mapcount(page) ||
766766
page->mapping != NULL ||
767-
page_count(page) != 1 ||
768767
(page->flags & PAGE_FLAGS_CHECK_AT_PREP &
769768
~(1 << PG_locked |
770769
1 << PG_referenced |
771770
1 << PG_uptodate |
772771
1 << PG_lru |
773772
1 << PG_active |
774-
1 << PG_reclaim))) {
775-
pr_warn("trying to steal weird page\n");
776-
pr_warn(" page=%p index=%li flags=%08lx, count=%i, mapcount=%i, mapping=%p\n", page, page->index, page->flags, page_count(page), page_mapcount(page), page->mapping);
773+
1 << PG_reclaim |
774+
1 << PG_waiters))) {
775+
dump_page(page, "fuse: trying to steal weird page");
777776
return 1;
778777
}
779778
return 0;
@@ -1977,8 +1976,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
19771976
struct pipe_buffer *ibuf;
19781977
struct pipe_buffer *obuf;
19791978

1980-
BUG_ON(nbuf >= pipe->ring_size);
1981-
BUG_ON(tail == head);
1979+
if (WARN_ON(nbuf >= count || tail == head))
1980+
goto out_free;
1981+
19821982
ibuf = &pipe->bufs[tail & mask];
19831983
obuf = &bufs[nbuf];
19841984

fs/fuse/dir.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1689,8 +1689,18 @@ static int fuse_getattr(const struct path *path, struct kstat *stat,
16891689
struct inode *inode = d_inode(path->dentry);
16901690
struct fuse_conn *fc = get_fuse_conn(inode);
16911691

1692-
if (!fuse_allow_current_process(fc))
1692+
if (!fuse_allow_current_process(fc)) {
1693+
if (!request_mask) {
1694+
/*
1695+
* If user explicitly requested *nothing* then don't
1696+
* error out, but return st_dev only.
1697+
*/
1698+
stat->result_mask = 0;
1699+
stat->dev = inode->i_sb->s_dev;
1700+
return 0;
1701+
}
16931702
return -EACCES;
1703+
}
16941704

16951705
return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
16961706
}

fs/fuse/file.c

Lines changed: 92 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id)
357357

358358
struct fuse_writepage_args {
359359
struct fuse_io_args ia;
360-
struct list_head writepages_entry;
360+
struct rb_node writepages_entry;
361361
struct list_head queue_entry;
362362
struct fuse_writepage_args *next;
363363
struct inode *inode;
@@ -366,17 +366,23 @@ struct fuse_writepage_args {
366366
static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
367367
pgoff_t idx_from, pgoff_t idx_to)
368368
{
369-
struct fuse_writepage_args *wpa;
369+
struct rb_node *n;
370+
371+
n = fi->writepages.rb_node;
370372

371-
list_for_each_entry(wpa, &fi->writepages, writepages_entry) {
373+
while (n) {
374+
struct fuse_writepage_args *wpa;
372375
pgoff_t curr_index;
373376

377+
wpa = rb_entry(n, struct fuse_writepage_args, writepages_entry);
374378
WARN_ON(get_fuse_inode(wpa->inode) != fi);
375379
curr_index = wpa->ia.write.in.offset >> PAGE_SHIFT;
376-
if (idx_from < curr_index + wpa->ia.ap.num_pages &&
377-
curr_index <= idx_to) {
380+
if (idx_from >= curr_index + wpa->ia.ap.num_pages)
381+
n = n->rb_right;
382+
else if (idx_to < curr_index)
383+
n = n->rb_left;
384+
else
378385
return wpa;
379-
}
380386
}
381387
return NULL;
382388
}
@@ -445,9 +451,6 @@ static int fuse_flush(struct file *file, fl_owner_t id)
445451
if (is_bad_inode(inode))
446452
return -EIO;
447453

448-
if (fc->no_flush)
449-
return 0;
450-
451454
err = write_inode_now(inode, 1);
452455
if (err)
453456
return err;
@@ -460,6 +463,10 @@ static int fuse_flush(struct file *file, fl_owner_t id)
460463
if (err)
461464
return err;
462465

466+
err = 0;
467+
if (fc->no_flush)
468+
goto inval_attr_out;
469+
463470
memset(&inarg, 0, sizeof(inarg));
464471
inarg.fh = ff->fh;
465472
inarg.lock_owner = fuse_lock_owner_id(fc, id);
@@ -475,6 +482,14 @@ static int fuse_flush(struct file *file, fl_owner_t id)
475482
fc->no_flush = 1;
476483
err = 0;
477484
}
485+
486+
inval_attr_out:
487+
/*
488+
* In memory i_blocks is not maintained by fuse, if writeback cache is
489+
* enabled, i_blocks from cached attr may not be accurate.
490+
*/
491+
if (!err && fc->writeback_cache)
492+
fuse_invalidate_attr(inode);
478493
return err;
479494
}
480495

@@ -712,6 +727,7 @@ static ssize_t fuse_async_req_send(struct fuse_conn *fc,
712727
spin_unlock(&io->lock);
713728

714729
ia->ap.args.end = fuse_aio_complete_req;
730+
ia->ap.args.may_block = io->should_dirty;
715731
err = fuse_simple_background(fc, &ia->ap.args, GFP_KERNEL);
716732
if (err)
717733
fuse_aio_complete_req(fc, &ia->ap.args, err);
@@ -1570,7 +1586,7 @@ static void fuse_writepage_finish(struct fuse_conn *fc,
15701586
struct backing_dev_info *bdi = inode_to_bdi(inode);
15711587
int i;
15721588

1573-
list_del(&wpa->writepages_entry);
1589+
rb_erase(&wpa->writepages_entry, &fi->writepages);
15741590
for (i = 0; i < ap->num_pages; i++) {
15751591
dec_wb_stat(&bdi->wb, WB_WRITEBACK);
15761592
dec_node_page_state(ap->pages[i], NR_WRITEBACK_TEMP);
@@ -1658,6 +1674,36 @@ __acquires(fi->lock)
16581674
}
16591675
}
16601676

1677+
static void tree_insert(struct rb_root *root, struct fuse_writepage_args *wpa)
1678+
{
1679+
pgoff_t idx_from = wpa->ia.write.in.offset >> PAGE_SHIFT;
1680+
pgoff_t idx_to = idx_from + wpa->ia.ap.num_pages - 1;
1681+
struct rb_node **p = &root->rb_node;
1682+
struct rb_node *parent = NULL;
1683+
1684+
WARN_ON(!wpa->ia.ap.num_pages);
1685+
while (*p) {
1686+
struct fuse_writepage_args *curr;
1687+
pgoff_t curr_index;
1688+
1689+
parent = *p;
1690+
curr = rb_entry(parent, struct fuse_writepage_args,
1691+
writepages_entry);
1692+
WARN_ON(curr->inode != wpa->inode);
1693+
curr_index = curr->ia.write.in.offset >> PAGE_SHIFT;
1694+
1695+
if (idx_from >= curr_index + curr->ia.ap.num_pages)
1696+
p = &(*p)->rb_right;
1697+
else if (idx_to < curr_index)
1698+
p = &(*p)->rb_left;
1699+
else
1700+
return (void) WARN_ON(true);
1701+
}
1702+
1703+
rb_link_node(&wpa->writepages_entry, parent, p);
1704+
rb_insert_color(&wpa->writepages_entry, root);
1705+
}
1706+
16611707
static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
16621708
int error)
16631709
{
@@ -1676,7 +1722,7 @@ static void fuse_writepage_end(struct fuse_conn *fc, struct fuse_args *args,
16761722
wpa->next = next->next;
16771723
next->next = NULL;
16781724
next->ia.ff = fuse_file_get(wpa->ia.ff);
1679-
list_add(&next->writepages_entry, &fi->writepages);
1725+
tree_insert(&fi->writepages, next);
16801726

16811727
/*
16821728
* Skip fuse_flush_writepages() to make it easy to crop requests
@@ -1811,7 +1857,7 @@ static int fuse_writepage_locked(struct page *page)
18111857
inc_node_page_state(tmp_page, NR_WRITEBACK_TEMP);
18121858

18131859
spin_lock(&fi->lock);
1814-
list_add(&wpa->writepages_entry, &fi->writepages);
1860+
tree_insert(&fi->writepages, wpa);
18151861
list_add_tail(&wpa->queue_entry, &fi->queued_writes);
18161862
fuse_flush_writepages(inode);
18171863
spin_unlock(&fi->lock);
@@ -1923,10 +1969,10 @@ static bool fuse_writepage_in_flight(struct fuse_writepage_args *new_wpa,
19231969
WARN_ON(new_ap->num_pages != 0);
19241970

19251971
spin_lock(&fi->lock);
1926-
list_del(&new_wpa->writepages_entry);
1972+
rb_erase(&new_wpa->writepages_entry, &fi->writepages);
19271973
old_wpa = fuse_find_writeback(fi, page->index, page->index);
19281974
if (!old_wpa) {
1929-
list_add(&new_wpa->writepages_entry, &fi->writepages);
1975+
tree_insert(&fi->writepages, new_wpa);
19301976
spin_unlock(&fi->lock);
19311977
return false;
19321978
}
@@ -2041,7 +2087,7 @@ static int fuse_writepages_fill(struct page *page,
20412087
wpa->inode = inode;
20422088

20432089
spin_lock(&fi->lock);
2044-
list_add(&wpa->writepages_entry, &fi->writepages);
2090+
tree_insert(&fi->writepages, wpa);
20452091
spin_unlock(&fi->lock);
20462092

20472093
data->wpa = wpa;
@@ -3235,25 +3281,39 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
32353281
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
32363282
return -EXDEV;
32373283

3238-
if (fc->writeback_cache) {
3239-
inode_lock(inode_in);
3240-
err = fuse_writeback_range(inode_in, pos_in, pos_in + len);
3241-
inode_unlock(inode_in);
3242-
if (err)
3243-
return err;
3244-
}
3284+
inode_lock(inode_in);
3285+
err = fuse_writeback_range(inode_in, pos_in, pos_in + len - 1);
3286+
inode_unlock(inode_in);
3287+
if (err)
3288+
return err;
32453289

32463290
inode_lock(inode_out);
32473291

32483292
err = file_modified(file_out);
32493293
if (err)
32503294
goto out;
32513295

3252-
if (fc->writeback_cache) {
3253-
err = fuse_writeback_range(inode_out, pos_out, pos_out + len);
3254-
if (err)
3255-
goto out;
3256-
}
3296+
/*
3297+
* Write out dirty pages in the destination file before sending the COPY
3298+
* request to userspace. After the request is completed, truncate off
3299+
* pages (including partial ones) from the cache that have been copied,
3300+
* since these contain stale data at that point.
3301+
*
3302+
* This should be mostly correct, but if the COPY writes to partial
3303+
* pages (at the start or end) and the parts not covered by the COPY are
3304+
* written through a memory map after calling fuse_writeback_range(),
3305+
* then these partial page modifications will be lost on truncation.
3306+
*
3307+
* It is unlikely that someone would rely on such mixed style
3308+
* modifications. Yet this does give less guarantees than if the
3309+
* copying was performed with write(2).
3310+
*
3311+
* To fix this a i_mmap_sem style lock could be used to prevent new
3312+
* faults while the copy is ongoing.
3313+
*/
3314+
err = fuse_writeback_range(inode_out, pos_out, pos_out + len - 1);
3315+
if (err)
3316+
goto out;
32573317

32583318
if (is_unstable)
32593319
set_bit(FUSE_I_SIZE_UNSTABLE, &fi_out->state);
@@ -3274,6 +3334,10 @@ static ssize_t __fuse_copy_file_range(struct file *file_in, loff_t pos_in,
32743334
if (err)
32753335
goto out;
32763336

3337+
truncate_inode_pages_range(inode_out->i_mapping,
3338+
ALIGN_DOWN(pos_out, PAGE_SIZE),
3339+
ALIGN(pos_out + outarg.size, PAGE_SIZE) - 1);
3340+
32773341
if (fc->writeback_cache) {
32783342
fuse_write_update_size(inode_out, pos_out + outarg.size);
32793343
file_update_time(file_out);
@@ -3351,5 +3415,5 @@ void fuse_init_file_inode(struct inode *inode)
33513415
INIT_LIST_HEAD(&fi->queued_writes);
33523416
fi->writectr = 0;
33533417
init_waitqueue_head(&fi->page_waitq);
3354-
INIT_LIST_HEAD(&fi->writepages);
3418+
fi->writepages = RB_ROOT;
33553419
}

fs/fuse/fuse_i.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ struct fuse_inode {
111111
wait_queue_head_t page_waitq;
112112

113113
/* List of writepage requestst (pending or sent) */
114-
struct list_head writepages;
114+
struct rb_root writepages;
115115
};
116116

117117
/* readdir cache (directory only) */
@@ -249,6 +249,7 @@ struct fuse_args {
249249
bool out_argvar:1;
250250
bool page_zeroing:1;
251251
bool page_replace:1;
252+
bool may_block:1;
252253
struct fuse_in_arg in_args[3];
253254
struct fuse_arg out_args[2];
254255
void (*end)(struct fuse_conn *fc, struct fuse_args *args, int error);

0 commit comments

Comments
 (0)