Skip to content

Commit 4ac0f08

Browse files
committed
Merge tag 'vfs-6.11-rc4.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs fixes from Christian Brauner: "VFS: - Fix the name of file lease slab cache. When file leases were split out of file locks the name of the file lock slab cache was used for the file leases slab cache as well. - Fix a type in take_fd() helper. - Fix infinite directory iteration for stable offsets in tmpfs. - When the icache is pruned all reclaimable inodes are marked with I_FREEING and other processes that try to lookup such inodes will block. But some filesystems like ext4 can trigger lookups in their inode evict callback causing deadlocks. Ext4 does such lookups if the ea_inode feature is used whereby a separate inode may be used to store xattrs. Introduce I_LRU_ISOLATING which pins the inode while its pages are reclaimed. This avoids inode deletion during inode_lru_isolate() avoiding the deadlock and evict is made to wait until I_LRU_ISOLATING is done. netfs: - Fault in smaller chunks for non-large folio mappings for filesystems that haven't been converted to large folios yet. - Fix the CONFIG_NETFS_DEBUG config option. The config option was renamed a short while ago and that introduced two minor issues. First, it depended on CONFIG_NETFS whereas it wants to depend on CONFIG_NETFS_SUPPORT. The former doesn't exist, while the latter does. Second, the documentation for the config option wasn't fixed up. - Revert the removal of the PG_private_2 writeback flag as ceph is using it and fix how that flag is handled in netfs. - Fix DIO reads on 9p. A program watching a file on a 9p mount wouldn't see any changes in the size of the file being exported by the server if the file was changed directly in the source filesystem. Fix this by attempting to read the full size specified when a DIO read is requested. - Fix a NULL pointer dereference bug due to a data race where a cachefiles cookies was retired even though it was still in use. Check the cookie's n_accesses counter before discarding it. nsfs: - Fix ioctl declaration for NS_GET_MNTNS_ID from _IO() to _IOR() as the kernel is writing to userspace. pidfs: - Prevent the creation of pidfds for kthreads until we have a use-case for it and we know the semantics we want. It also confuses userspace why they can get pidfds for kthreads. squashfs: - Fix an unitialized value bug reported by KMSAN caused by a corrupted symbolic link size read from disk. Check that the symbolic link size is not larger than expected" * tag 'vfs-6.11-rc4.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: Squashfs: sanity check symbolic link size 9p: Fix DIO read through netfs vfs: Don't evict inode under the inode lru traversing context netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag" file: fix typo in take_fd() comment pidfd: prevent creation of pidfds for kthreads netfs: clean up after renaming FSCACHE_DEBUG config libfs: fix infinite directory reads for offset dir nsfs: fix ioctl declaration fs/netfs/fscache_cookie: add missing "n_accesses" check filelock: fix name of file_lease slab cache netfs: Fault in smaller chunks for non-large folio mappings
2 parents 02f8ca3 + 810ee43 commit 4ac0f08

File tree

25 files changed

+412
-71
lines changed

25 files changed

+412
-71
lines changed

Documentation/filesystems/caching/fscache.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,10 +318,10 @@ where the columns are:
318318
Debugging
319319
=========
320320

321-
If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime
322-
debugging enabled by adjusting the value in::
321+
If CONFIG_NETFS_DEBUG is enabled, the FS-Cache facility and NETFS support can
322+
have runtime debugging enabled by adjusting the value in::
323323

324-
/sys/module/fscache/parameters/debug
324+
/sys/module/netfs/parameters/debug
325325

326326
This is a bitmask of debugging streams to enable:
327327

@@ -343,6 +343,6 @@ This is a bitmask of debugging streams to enable:
343343
The appropriate set of values should be OR'd together and the result written to
344344
the control file. For example::
345345

346-
echo $((1|8|512)) >/sys/module/fscache/parameters/debug
346+
echo $((1|8|512)) >/sys/module/netfs/parameters/debug
347347

348348
will turn on all function entry debugging.

fs/9p/vfs_addr.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
7575

7676
/* if we just extended the file size, any portion not in
7777
* cache won't be on server and is zeroes */
78-
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
78+
if (subreq->rreq->origin != NETFS_DIO_READ)
79+
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
7980

8081
netfs_subreq_terminated(subreq, err ?: total, false);
8182
}

fs/afs/file.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,8 @@ static void afs_fetch_data_notify(struct afs_operation *op)
242242

243243
req->error = error;
244244
if (subreq) {
245-
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
245+
if (subreq->rreq->origin != NETFS_DIO_READ)
246+
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
246247
netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
247248
req->subreq = NULL;
248249
} else if (req->done) {

fs/ceph/addr.c

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,8 @@ static void finish_netfs_read(struct ceph_osd_request *req)
246246
if (err >= 0) {
247247
if (sparse && err > 0)
248248
err = ceph_sparse_ext_map_end(op);
249-
if (err < subreq->len)
249+
if (err < subreq->len &&
250+
subreq->rreq->origin != NETFS_DIO_READ)
250251
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
251252
if (IS_ENCRYPTED(inode) && err > 0) {
252253
err = ceph_fscrypt_decrypt_extents(inode,
@@ -282,7 +283,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
282283
size_t len;
283284
int mode;
284285

285-
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
286+
if (rreq->origin != NETFS_DIO_READ)
287+
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
286288
__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
287289

288290
if (subreq->start >= inode->i_size)
@@ -424,6 +426,9 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
424426
struct ceph_netfs_request_data *priv;
425427
int ret = 0;
426428

429+
/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
430+
__set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
431+
427432
if (rreq->origin != NETFS_READAHEAD)
428433
return 0;
429434

@@ -498,6 +503,11 @@ const struct netfs_request_ops ceph_netfs_ops = {
498503
};
499504

500505
#ifdef CONFIG_CEPH_FSCACHE
506+
static void ceph_set_page_fscache(struct page *page)
507+
{
508+
folio_start_private_2(page_folio(page)); /* [DEPRECATED] */
509+
}
510+
501511
static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async)
502512
{
503513
struct inode *inode = priv;
@@ -515,6 +525,10 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
515525
ceph_fscache_write_terminated, inode, true, caching);
516526
}
517527
#else
528+
static inline void ceph_set_page_fscache(struct page *page)
529+
{
530+
}
531+
518532
static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
519533
{
520534
}
@@ -706,6 +720,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
706720
len = wlen;
707721

708722
set_page_writeback(page);
723+
if (caching)
724+
ceph_set_page_fscache(page);
709725
ceph_fscache_write_to_cache(inode, page_off, len, caching);
710726

711727
if (IS_ENCRYPTED(inode)) {
@@ -789,6 +805,8 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
789805
return AOP_WRITEPAGE_ACTIVATE;
790806
}
791807

808+
folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
809+
792810
err = writepage_nounlock(page, wbc);
793811
if (err == -ERESTARTSYS) {
794812
/* direct memory reclaimer was killed by SIGKILL. return 0
@@ -1062,14 +1080,16 @@ static int ceph_writepages_start(struct address_space *mapping,
10621080
unlock_page(page);
10631081
break;
10641082
}
1065-
if (PageWriteback(page)) {
1083+
if (PageWriteback(page) ||
1084+
PagePrivate2(page) /* [DEPRECATED] */) {
10661085
if (wbc->sync_mode == WB_SYNC_NONE) {
10671086
doutc(cl, "%p under writeback\n", page);
10681087
unlock_page(page);
10691088
continue;
10701089
}
10711090
doutc(cl, "waiting on writeback %p\n", page);
10721091
wait_on_page_writeback(page);
1092+
folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
10731093
}
10741094

10751095
if (!clear_page_dirty_for_io(page)) {
@@ -1254,6 +1274,8 @@ static int ceph_writepages_start(struct address_space *mapping,
12541274
}
12551275

12561276
set_page_writeback(page);
1277+
if (caching)
1278+
ceph_set_page_fscache(page);
12571279
len += thp_size(page);
12581280
}
12591281
ceph_fscache_write_to_cache(inode, offset, len, caching);

fs/ceph/inode.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -577,8 +577,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
577577

578578
/* Set parameters for the netfs library */
579579
netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
580-
/* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
581-
__set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags);
582580

583581
spin_lock_init(&ci->i_ceph_lock);
584582

fs/inode.c

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,39 @@ static void inode_lru_list_del(struct inode *inode)
488488
this_cpu_dec(nr_unused);
489489
}
490490

491+
static void inode_pin_lru_isolating(struct inode *inode)
492+
{
493+
lockdep_assert_held(&inode->i_lock);
494+
WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE));
495+
inode->i_state |= I_LRU_ISOLATING;
496+
}
497+
498+
static void inode_unpin_lru_isolating(struct inode *inode)
499+
{
500+
spin_lock(&inode->i_lock);
501+
WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
502+
inode->i_state &= ~I_LRU_ISOLATING;
503+
smp_mb();
504+
wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
505+
spin_unlock(&inode->i_lock);
506+
}
507+
508+
static void inode_wait_for_lru_isolating(struct inode *inode)
509+
{
510+
spin_lock(&inode->i_lock);
511+
if (inode->i_state & I_LRU_ISOLATING) {
512+
DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
513+
wait_queue_head_t *wqh;
514+
515+
wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
516+
spin_unlock(&inode->i_lock);
517+
__wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
518+
spin_lock(&inode->i_lock);
519+
WARN_ON(inode->i_state & I_LRU_ISOLATING);
520+
}
521+
spin_unlock(&inode->i_lock);
522+
}
523+
491524
/**
492525
* inode_sb_list_add - add inode to the superblock list of inodes
493526
* @inode: inode to add
@@ -657,6 +690,8 @@ static void evict(struct inode *inode)
657690

658691
inode_sb_list_del(inode);
659692

693+
inode_wait_for_lru_isolating(inode);
694+
660695
/*
661696
* Wait for flusher thread to be done with the inode so that filesystem
662697
* does not start destroying it while writeback is still running. Since
@@ -855,7 +890,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
855890
* be under pressure before the cache inside the highmem zone.
856891
*/
857892
if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
858-
__iget(inode);
893+
inode_pin_lru_isolating(inode);
859894
spin_unlock(&inode->i_lock);
860895
spin_unlock(lru_lock);
861896
if (remove_inode_buffers(inode)) {
@@ -867,7 +902,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item,
867902
__count_vm_events(PGINODESTEAL, reap);
868903
mm_account_reclaimed_pages(reap);
869904
}
870-
iput(inode);
905+
inode_unpin_lru_isolating(inode);
871906
spin_lock(lru_lock);
872907
return LRU_RETRY;
873908
}

fs/libfs.c

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,14 @@ void simple_offset_destroy(struct offset_ctx *octx)
450450
mtree_destroy(&octx->mt);
451451
}
452452

453+
static int offset_dir_open(struct inode *inode, struct file *file)
454+
{
455+
struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
456+
457+
file->private_data = (void *)ctx->next_offset;
458+
return 0;
459+
}
460+
453461
/**
454462
* offset_dir_llseek - Advance the read position of a directory descriptor
455463
* @file: an open directory whose position is to be updated
@@ -463,6 +471,9 @@ void simple_offset_destroy(struct offset_ctx *octx)
463471
*/
464472
static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
465473
{
474+
struct inode *inode = file->f_inode;
475+
struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode);
476+
466477
switch (whence) {
467478
case SEEK_CUR:
468479
offset += file->f_pos;
@@ -476,7 +487,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
476487
}
477488

478489
/* In this case, ->private_data is protected by f_pos_lock */
479-
file->private_data = NULL;
490+
if (!offset)
491+
file->private_data = (void *)ctx->next_offset;
480492
return vfs_setpos(file, offset, LONG_MAX);
481493
}
482494

@@ -507,25 +519,29 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
507519
inode->i_ino, fs_umode_to_dtype(inode->i_mode));
508520
}
509521

510-
static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
522+
static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index)
511523
{
512524
struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
513525
struct dentry *dentry;
514526

515527
while (true) {
516528
dentry = offset_find_next(octx, ctx->pos);
517529
if (!dentry)
518-
return ERR_PTR(-ENOENT);
530+
return;
531+
532+
if (dentry2offset(dentry) >= last_index) {
533+
dput(dentry);
534+
return;
535+
}
519536

520537
if (!offset_dir_emit(ctx, dentry)) {
521538
dput(dentry);
522-
break;
539+
return;
523540
}
524541

525542
ctx->pos = dentry2offset(dentry) + 1;
526543
dput(dentry);
527544
}
528-
return NULL;
529545
}
530546

531547
/**
@@ -552,22 +568,19 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx)
552568
static int offset_readdir(struct file *file, struct dir_context *ctx)
553569
{
554570
struct dentry *dir = file->f_path.dentry;
571+
long last_index = (long)file->private_data;
555572

556573
lockdep_assert_held(&d_inode(dir)->i_rwsem);
557574

558575
if (!dir_emit_dots(file, ctx))
559576
return 0;
560577

561-
/* In this case, ->private_data is protected by f_pos_lock */
562-
if (ctx->pos == DIR_OFFSET_MIN)
563-
file->private_data = NULL;
564-
else if (file->private_data == ERR_PTR(-ENOENT))
565-
return 0;
566-
file->private_data = offset_iterate_dir(d_inode(dir), ctx);
578+
offset_iterate_dir(d_inode(dir), ctx, last_index);
567579
return 0;
568580
}
569581

570582
const struct file_operations simple_offset_dir_operations = {
583+
.open = offset_dir_open,
571584
.llseek = offset_dir_llseek,
572585
.iterate_shared = offset_readdir,
573586
.read = generic_read_dir,

fs/locks.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2984,7 +2984,7 @@ static int __init filelock_init(void)
29842984
filelock_cache = kmem_cache_create("file_lock_cache",
29852985
sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
29862986

2987-
filelease_cache = kmem_cache_create("file_lock_cache",
2987+
filelease_cache = kmem_cache_create("file_lease_cache",
29882988
sizeof(struct file_lease), 0, SLAB_PANIC, NULL);
29892989

29902990
for_each_possible_cpu(i) {

fs/netfs/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ config NETFS_STATS
2424

2525
config NETFS_DEBUG
2626
bool "Enable dynamic debugging netfslib and FS-Cache"
27-
depends on NETFS
27+
depends on NETFS_SUPPORT
2828
help
2929
This permits debugging to be dynamically enabled in the local caching
3030
management module. If this is set, the debugging output may be

0 commit comments

Comments
 (0)