From 1527a3a68e062cd8d67d0c5b720bbdfed4e11b2e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:10 +0000 Subject: [PATCH 01/54] fuse_ctl_add_conn(): fix nlink breakage in case of early failure fuse_ctl_remove_conn() used to decrement the link count of root manually; that got subsumed by simple_recursive_removal(), but in case when subdirectory creation has failed the latter won't get called. Just move the modification of parent's link count into fuse_ctl_add_dentry() to keep the things simple. Allows to get rid of the nlink argument as well... Fixes: fcaac5b42768 "fuse_ctl: use simple_recursive_removal()" Acked-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/fuse/control.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index bb407705603c2..5247df896c5d0 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -205,8 +205,7 @@ static const struct file_operations fuse_conn_congestion_threshold_ops = { static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, struct fuse_conn *fc, - const char *name, - int mode, int nlink, + const char *name, int mode, const struct inode_operations *iop, const struct file_operations *fop) { @@ -232,7 +231,10 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, if (iop) inode->i_op = iop; inode->i_fop = fop; - set_nlink(inode, nlink); + if (S_ISDIR(mode)) { + inc_nlink(d_inode(parent)); + inc_nlink(inode); + } inode->i_private = fc; d_add(dentry, inode); @@ -252,22 +254,21 @@ int fuse_ctl_add_conn(struct fuse_conn *fc) return 0; parent = fuse_control_sb->s_root; - inc_nlink(d_inode(parent)); sprintf(name, "%u", fc->dev); - parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, 2, + parent = fuse_ctl_add_dentry(parent, fc, name, S_IFDIR | 0500, &simple_dir_inode_operations, &simple_dir_operations); if (!parent) goto err; - if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, 1, + if (!fuse_ctl_add_dentry(parent, fc, "waiting", S_IFREG | 0400, NULL, &fuse_ctl_waiting_ops) || - !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, 1, + !fuse_ctl_add_dentry(parent, fc, "abort", S_IFREG | 0200, NULL, &fuse_ctl_abort_ops) || !fuse_ctl_add_dentry(parent, fc, "max_background", S_IFREG | 0600, - 1, NULL, &fuse_conn_max_background_ops) || + NULL, &fuse_conn_max_background_ops) || !fuse_ctl_add_dentry(parent, fc, "congestion_threshold", - S_IFREG | 0600, 1, NULL, + S_IFREG | 0600, NULL, &fuse_conn_congestion_threshold_ops)) goto err; From f34237c468b178f021d91c776bb7ed4cdc1a0540 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:11 +0000 Subject: [PATCH 02/54] tracefs: fix a leak in eventfs_create_events_dir() If we have LOCKDOWN_TRACEFS, the function bails out - *after* having locked the parent directory and without bothering to undo that. Just check it before tracefs_start_creating()... Fixes: e24709454c45 "tracefs/eventfs: Add missing lockdown checks" Acked-by: Steven Rostedt (Google) Signed-off-by: Al Viro --- fs/tracefs/event_inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 8705c77a9e75a..93c231601c8e2 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -757,7 +757,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry const struct eventfs_entry *entries, int size, void *data) { - struct dentry *dentry = tracefs_start_creating(name, parent); + struct dentry *dentry; struct eventfs_root_inode *rei; struct eventfs_inode *ei; struct tracefs_inode *ti; @@ -768,6 +768,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry if (security_locked_down(LOCKDOWN_TRACEFS)) return NULL; + dentry = tracefs_start_creating(name, parent); if (IS_ERR(dentry)) return ERR_CAST(dentry); From a6a6d7e995f91ff676d404d7436ae0bf5eb87091 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:12 +0000 Subject: [PATCH 03/54] new helper: simple_remove_by_name() simple_recursive_removal(), but instead of victim dentry it takes parent + name. Used to be open-coded in fs/fuse/control.c, but there's no need to expose the guts of that thing there and there are other potential users, so let's lift it into libfs... Acked-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/fuse/control.c | 7 +------ fs/libfs.c | 13 +++++++++++++ include/linux/fs.h | 2 ++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 5247df896c5d0..3dca752127ff8 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -290,18 +290,13 @@ static void remove_one(struct dentry *dentry) */ void fuse_ctl_remove_conn(struct fuse_conn *fc) { - struct dentry *dentry; char name[32]; if (!fuse_control_sb || fc->no_control) return; sprintf(name, "%u", fc->dev); - dentry = lookup_noperm_positive_unlocked(&QSTR(name), fuse_control_sb->s_root); - if (!IS_ERR(dentry)) { - simple_recursive_removal(dentry, remove_one); - dput(dentry); // paired with lookup_noperm_positive_unlocked() - } + simple_remove_by_name(fuse_control_sb->s_root, name, remove_one); } static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc) diff --git a/fs/libfs.c b/fs/libfs.c index ce8c496a6940a..d029aff41f666 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -655,6 +655,19 @@ void simple_recursive_removal(struct dentry *dentry, } EXPORT_SYMBOL(simple_recursive_removal); +void simple_remove_by_name(struct dentry *parent, const char *name, + void (*callback)(struct dentry *)) +{ + struct dentry *dentry; + + dentry = lookup_noperm_positive_unlocked(&QSTR(name), parent); + if (!IS_ERR(dentry)) { + simple_recursive_removal(dentry, callback); + dput(dentry); // paired with lookup_noperm_positive_unlocked() + } +} +EXPORT_SYMBOL(simple_remove_by_name); + /* caller holds parent directory with I_MUTEX_PARENT */ void locked_recursive_removal(struct dentry *dentry, void (*callback)(struct dentry *)) diff --git a/include/linux/fs.h b/include/linux/fs.h index c895146c1444b..28bd4e8d38925 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3631,6 +3631,8 @@ extern int simple_rename(struct mnt_idmap *, struct inode *, unsigned int); extern void simple_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); +extern void simple_remove_by_name(struct dentry *, const char *, + void (*callback)(struct dentry *)); extern void locked_recursive_removal(struct dentry *, void (*callback)(struct dentry *)); extern int noop_fsync(struct file *, loff_t, loff_t, int); From b812488bc32bcee84da0e8eb1a08b66d005334f9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:13 +0000 Subject: [PATCH 04/54] new helper: simple_done_creating() should be paired with simple_start_creating() - unlocks parent and drops dentry reference. Signed-off-by: Al Viro --- fs/libfs.c | 8 ++++++++ include/linux/fs.h | 1 + 2 files changed, 9 insertions(+) diff --git a/fs/libfs.c b/fs/libfs.c index d029aff41f666..a033f35493d0c 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -2326,3 +2326,11 @@ struct dentry *simple_start_creating(struct dentry *parent, const char *name) return dentry; } EXPORT_SYMBOL(simple_start_creating); + +/* parent must have been held exclusive since simple_start_creating() */ +void simple_done_creating(struct dentry *child) +{ + inode_unlock(child->d_parent->d_inode); + dput(child); +} +EXPORT_SYMBOL(simple_done_creating); diff --git a/include/linux/fs.h b/include/linux/fs.h index 28bd4e8d38925..f5037c556f617 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3662,6 +3662,7 @@ extern int simple_fill_super(struct super_block *, unsigned long, extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); struct dentry *simple_start_creating(struct dentry *, const char *); +void simple_done_creating(struct dentry *); extern ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos, const void *from, size_t available); From 118d360a921064d91e199d5858432b62c535cce0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:14 +0000 Subject: [PATCH 05/54] introduce a flag for explicitly marking persistently pinned dentries Some filesystems use a kinda-sorta controlled dentry refcount leak to pin dentries of created objects in dcache (and undo it when removing those). Reference is grabbed and not released, but it's not actually _stored_ anywhere. That works, but it's hard to follow and verify; among other things, we have no way to tell _which_ of the increments is intended to be an unpaired one. Worse, on removal we need to decide whether the reference had already been dropped, which can be non-trivial if that removal is on umount and we need to figure out if this dentry is pinned due to e.g. unlink() not done. Usually that is handled by using kill_litter_super() as ->kill_sb(), but there are open-coded special cases of the same (consider e.g. /proc/self). Things get simpler if we introduce a new dentry flag (DCACHE_PERSISTENT) marking those "leaked" dentries. Having it set claims responsibility for +1 in refcount. The end result this series is aiming for: * get these unbalanced dget() and dput() replaced with new primitives that would, in addition to adjusting refcount, set and clear persistency flag. * instead of having kill_litter_super() mess with removing the remaining "leaked" references (e.g. for all tmpfs files that hadn't been removed prior to umount), have the regular shrink_dcache_for_umount() strip DCACHE_PERSISTENT of all dentries, dropping the corresponding reference if it had been set. After that kill_litter_super() becomes an equivalent of kill_anon_super(). Doing that in a single step is not feasible - it would affect too many places in too many filesystems. It has to be split into a series. Here we * introduce the new flag * teach shrink_dcache_for_umount() to handle it (i.e. remove and drop refcount on anything that survives to umount with that flag still set) * teach kill_litter_super() that anything with that flag does *not* need to be unpinned. Next commits will add primitives for maintaing that flag and convert the common helpers to those. After that - a long series of per-filesystem patches converting to those primitives. Signed-off-by: Al Viro --- fs/dcache.c | 27 ++++++++++++++++++++++----- include/linux/dcache.h | 1 + 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 035cccbc92765..f2c9f4fef2a2f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1511,6 +1511,15 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry) return ret; } +static enum d_walk_ret select_collect_umount(void *_data, struct dentry *dentry) +{ + if (dentry->d_flags & DCACHE_PERSISTENT) { + dentry->d_flags &= ~DCACHE_PERSISTENT; + dentry->d_lockref.count--; + } + return select_collect(_data, dentry); +} + static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry) { struct select_data *data = _data; @@ -1539,18 +1548,20 @@ static enum d_walk_ret select_collect2(void *_data, struct dentry *dentry) } /** - * shrink_dcache_parent - prune dcache + * shrink_dcache_tree - prune dcache * @parent: parent of entries to prune + * @for_umount: true if we want to unpin the persistent ones * * Prune the dcache to remove unused children of the parent dentry. */ -void shrink_dcache_parent(struct dentry *parent) +static void shrink_dcache_tree(struct dentry *parent, bool for_umount) { for (;;) { struct select_data data = {.start = parent}; INIT_LIST_HEAD(&data.dispose); - d_walk(parent, &data, select_collect); + d_walk(parent, &data, + for_umount ? select_collect_umount : select_collect); if (!list_empty(&data.dispose)) { shrink_dentry_list(&data.dispose); @@ -1575,6 +1586,11 @@ void shrink_dcache_parent(struct dentry *parent) shrink_dentry_list(&data.dispose); } } + +void shrink_dcache_parent(struct dentry *parent) +{ + shrink_dcache_tree(parent, false); +} EXPORT_SYMBOL(shrink_dcache_parent); static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) @@ -1601,7 +1617,7 @@ static enum d_walk_ret umount_check(void *_data, struct dentry *dentry) static void do_one_tree(struct dentry *dentry) { - shrink_dcache_parent(dentry); + shrink_dcache_tree(dentry, true); d_walk(dentry, dentry, umount_check); d_drop(dentry); dput(dentry); @@ -3111,7 +3127,8 @@ static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) { struct dentry *root = data; if (dentry != root) { - if (d_unhashed(dentry) || !dentry->d_inode) + if (d_unhashed(dentry) || !dentry->d_inode || + dentry->d_flags & DCACHE_PERSISTENT) return D_WALK_SKIP; if (!(dentry->d_flags & DCACHE_GENOCIDE)) { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c83e02b943894..94b58655322a0 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -225,6 +225,7 @@ enum dentry_flags { DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ DCACHE_DENTRY_CURSOR = BIT(25), DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ + DCACHE_PERSISTENT = BIT(27) }; #define DCACHE_MANAGED_DENTRY \ From 8b9a3762eae80e5413f8acba0a6cbf818be90a09 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:15 +0000 Subject: [PATCH 06/54] primitives for maintaining persisitency * d_make_persistent(dentry, inode) - bump refcount, mark persistent and make hashed positive. Return value is a borrowed reference to dentry; it can be used until something removes persistency (at the very least, until the parent gets unlocked, but some filesystems may have stronger exclusion). * d_make_discardable() - remove persistency mark and drop reference. d_make_persistent() is similar to combination of d_instantiate(), dget() and setting flag. The only difference is that unlike d_instantiate() it accepts hashed and unhashed negatives alike. It is always called in strong locking environment (parent held exclusive, or, in some cases, dentry coming from d_alloc_name()); if we ever start using it with parent held only shared and dentry coming from d_alloc_parallel(), we'll need to copy the in-lookup logics from __d_add(). d_make_discardable() is eqiuvalent to combination of removing flag and dput(); since flag removal requires ->d_lock, there's no point trying to avoid taking that for refcount decrement as fast_dput() does. The slow path of dput() has been taken into a helper and reused in d_make_discardable() instead. Signed-off-by: Al Viro --- fs/dcache.c | 74 +++++++++++++++++++++++++++++++++--------- include/linux/dcache.h | 2 ++ 2 files changed, 61 insertions(+), 15 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index f2c9f4fef2a2f..3cc6c3876177b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -869,6 +869,24 @@ static inline bool fast_dput(struct dentry *dentry) return false; } +static void finish_dput(struct dentry *dentry) + __releases(dentry->d_lock) + __releases(RCU) +{ + while (lock_for_kill(dentry)) { + rcu_read_unlock(); + dentry = __dentry_kill(dentry); + if (!dentry) + return; + if (retain_dentry(dentry, true)) { + spin_unlock(&dentry->d_lock); + return; + } + rcu_read_lock(); + } + rcu_read_unlock(); + spin_unlock(&dentry->d_lock); +} /* * This is dput @@ -906,22 +924,28 @@ void dput(struct dentry *dentry) rcu_read_unlock(); return; } - while (lock_for_kill(dentry)) { - rcu_read_unlock(); - dentry = __dentry_kill(dentry); - if (!dentry) - return; - if (retain_dentry(dentry, true)) { - spin_unlock(&dentry->d_lock); - return; - } - rcu_read_lock(); - } - rcu_read_unlock(); - spin_unlock(&dentry->d_lock); + finish_dput(dentry); } EXPORT_SYMBOL(dput); +void d_make_discardable(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + /* + * By the end of the series we'll add + * WARN_ON(!(dentry->d_flags & DCACHE_PERSISTENT); + * here, but while object removal is done by a few common helpers, + * object creation tends to be open-coded (if nothing else, new inode + * needs to be set up), so adding a warning from the very beginning + * would make for much messier patch series. + */ + dentry->d_flags &= ~DCACHE_PERSISTENT; + dentry->d_lockref.count--; + rcu_read_lock(); + finish_dput(dentry); +} +EXPORT_SYMBOL(d_make_discardable); + static void to_shrink_list(struct dentry *dentry, struct list_head *list) __must_hold(&dentry->d_lock) { @@ -1939,7 +1963,6 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) unsigned add_flags = d_flags_for_inode(inode); WARN_ON(d_in_lookup(dentry)); - spin_lock(&dentry->d_lock); /* * The negative counter only tracks dentries on the LRU. Don't dec if * d_lru is on another list. @@ -1952,7 +1975,6 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) __d_set_inode_and_type(dentry, inode, add_flags); raw_write_seqcount_end(&dentry->d_seq); fsnotify_update_flags(dentry); - spin_unlock(&dentry->d_lock); } /** @@ -1976,7 +1998,9 @@ void d_instantiate(struct dentry *entry, struct inode * inode) if (inode) { security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); + spin_lock(&entry->d_lock); __d_instantiate(entry, inode); + spin_unlock(&entry->d_lock); spin_unlock(&inode->i_lock); } } @@ -1995,7 +2019,9 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode) lockdep_annotate_inode_mutex_key(inode); security_d_instantiate(entry, inode); spin_lock(&inode->i_lock); + spin_lock(&entry->d_lock); __d_instantiate(entry, inode); + spin_unlock(&entry->d_lock); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW & ~I_CREATING; /* @@ -2754,6 +2780,24 @@ void d_add(struct dentry *entry, struct inode *inode) } EXPORT_SYMBOL(d_add); +struct dentry *d_make_persistent(struct dentry *dentry, struct inode *inode) +{ + WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias)); + WARN_ON(!inode); + security_d_instantiate(dentry, inode); + spin_lock(&inode->i_lock); + spin_lock(&dentry->d_lock); + __d_instantiate(dentry, inode); + dentry->d_flags |= DCACHE_PERSISTENT; + dget_dlock(dentry); + if (d_unhashed(dentry)) + __d_rehash(dentry); + spin_unlock(&dentry->d_lock); + spin_unlock(&inode->i_lock); + return dentry; +} +EXPORT_SYMBOL(d_make_persistent); + static void swap_names(struct dentry *dentry, struct dentry *target) { if (unlikely(dname_external(target))) { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 94b58655322a0..6ec4066825e31 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -611,5 +611,7 @@ static inline struct dentry *d_next_sibling(const struct dentry *dentry) } void set_default_d_op(struct super_block *, const struct dentry_operations *); +struct dentry *d_make_persistent(struct dentry *, struct inode *); +void d_make_discardable(struct dentry *dentry); #endif /* __LINUX_DCACHE_H */ From 1c42cc40fc68a1da853595fcf4d532c812632e73 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:16 +0000 Subject: [PATCH 07/54] convert simple_{link,unlink,rmdir,rename,fill_super}() to new primitives Note that simple_unlink() et.al. are used by many filesystems; for now they can not assume that persistency mark will have been set back when the object got created. Once all conversions are done we'll have them complain if called for something that had not been marked persistent. Signed-off-by: Al Viro --- fs/libfs.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/libfs.c b/fs/libfs.c index a033f35493d0c..80f288a771e30 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -630,7 +630,7 @@ static void __simple_recursive_removal(struct dentry *dentry, if (callback) callback(victim); fsnotify_delete(inode, d_inode(victim), victim); - dput(victim); // unpin it + d_make_discardable(victim); } if (victim == dentry) { inode_set_mtime_to_ts(inode, @@ -764,8 +764,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inc_nlink(inode); ihold(inode); - dget(dentry); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); return 0; } EXPORT_SYMBOL(simple_link); @@ -798,7 +797,7 @@ int simple_unlink(struct inode *dir, struct dentry *dentry) inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); drop_nlink(inode); - dput(dentry); + d_make_discardable(dentry); return 0; } EXPORT_SYMBOL(simple_unlink); @@ -1078,7 +1077,8 @@ int simple_fill_super(struct super_block *s, unsigned long magic, simple_inode_init_ts(inode); inode->i_fop = files->ops; inode->i_ino = i; - d_add(dentry, inode); + d_make_persistent(dentry, inode); + dput(dentry); } return 0; } From 04953df9ab4c57227ef4c0fee59a4cf6dff33c47 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:17 +0000 Subject: [PATCH 08/54] convert ramfs and tmpfs Quite a bit is already done by infrastructure changes (simple_link(), simple_unlink()) - all that is left is replacing d_instantiate() + pinning dget() (in ->symlink() and ->mknod()) with d_make_persistent(), and, in case of shmem, using simple_unlink() and simple_link() in ->unlink() and ->link() resp., instead of open-coding those there. Since d_make_persistent() accepts (and hashes) unhashed ones, shmem situation gets simpler - we no longer care whether ->lookup() has hashed the sucker. With that done, we don't need kill_litter_super() for these filesystems anymore - by the umount time all remaining dentries will be marked persistent and kill_litter_super() will boil down to call of kill_anon_super(). The same goes for devtmpfs and rootfs - they are handled by ramfs or by shmem, depending upon config. NB: strictly speaking, both devtmpfs and rootfs ought to use ramfs_kill_sb() if they end up using ramfs; that's a separate story and the only impact of "just use kill_{litter,anon}_super()" is that we fail to free their sb->s_fs_info... on reboot. That's orthogonal to the changes in this series - kill_litter_super() is identical to kill_anon_super() for those at this point. Signed-off-by: Al Viro --- drivers/base/devtmpfs.c | 2 +- fs/ramfs/inode.c | 8 +++----- init/do_mounts.c | 2 +- mm/shmem.c | 38 ++++++++------------------------------ 4 files changed, 13 insertions(+), 37 deletions(-) diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 9d4e46ad83522..a63b0ff0c432d 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -70,7 +70,7 @@ static struct file_system_type internal_fs_type = { #else .init_fs_context = ramfs_init_fs_context, #endif - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; /* Simply take a ref on the existing mount */ diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 41f9995da7cab..505d10a0cb366 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -110,8 +110,7 @@ ramfs_mknod(struct mnt_idmap *idmap, struct inode *dir, goto out; } - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ + d_make_persistent(dentry, inode); error = 0; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } @@ -154,8 +153,7 @@ static int ramfs_symlink(struct mnt_idmap *idmap, struct inode *dir, error = page_symlink(inode, symname, l); if (!error) { - d_instantiate(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } else @@ -313,7 +311,7 @@ int ramfs_init_fs_context(struct fs_context *fc) void ramfs_kill_sb(struct super_block *sb) { kfree(sb->s_fs_info); - kill_litter_super(sb); + kill_anon_super(sb); } static struct file_system_type ramfs_fs_type = { diff --git a/init/do_mounts.c b/init/do_mounts.c index 6af29da8889eb..810878fb55b61 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -507,7 +507,7 @@ static int rootfs_init_fs_context(struct fs_context *fc) struct file_system_type rootfs_fs_type = { .name = "rootfs", .init_fs_context = rootfs_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; void __init init_rootfs(void) diff --git a/mm/shmem.c b/mm/shmem.c index 58701d14dd96c..6219026c34031 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -3861,12 +3861,7 @@ shmem_mknod(struct mnt_idmap *idmap, struct inode *dir, inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); inode_inc_iversion(dir); - if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) - d_add(dentry, inode); - else - d_instantiate(dentry, inode); - - dget(dentry); /* Extra count - pin the dentry in core */ + d_make_persistent(dentry, inode); return error; out_iput: @@ -3927,7 +3922,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(old_dentry); - int ret = 0; + int ret; /* * No ordinary (disk based) filesystem counts links as inodes; @@ -3939,29 +3934,19 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, if (inode->i_nlink) { ret = shmem_reserve_inode(inode->i_sb, NULL); if (ret) - goto out; + return ret; } ret = simple_offset_add(shmem_get_offset_ctx(dir), dentry); if (ret) { if (inode->i_nlink) shmem_free_inode(inode->i_sb, 0); - goto out; + return ret; } dir->i_size += BOGO_DIRENT_SIZE; - inode_set_mtime_to_ts(dir, - inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inode_inc_iversion(dir); - inc_nlink(inode); - ihold(inode); /* New dentry reference */ - dget(dentry); /* Extra pinning count for the created dentry */ - if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) - d_add(dentry, inode); - else - d_instantiate(dentry, inode); -out: - return ret; + return simple_link(old_dentry, dir, dentry); } static int shmem_unlink(struct inode *dir, struct dentry *dentry) @@ -3974,11 +3959,8 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) simple_offset_remove(shmem_get_offset_ctx(dir), dentry); dir->i_size -= BOGO_DIRENT_SIZE; - inode_set_mtime_to_ts(dir, - inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); inode_inc_iversion(dir); - drop_nlink(inode); - dput(dentry); /* Undo the count from "create" - does all the work */ + simple_unlink(dir, dentry); /* * For now, VFS can't deal with case-insensitive negative dentries, so @@ -4133,11 +4115,7 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir, dir->i_size += BOGO_DIRENT_SIZE; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); inode_inc_iversion(dir); - if (IS_ENABLED(CONFIG_UNICODE) && IS_CASEFOLDED(dir)) - d_add(dentry, inode); - else - d_instantiate(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); return 0; out_remove_offset: @@ -5337,7 +5315,7 @@ static struct file_system_type shmem_fs_type = { #ifdef CONFIG_TMPFS .parameters = shmem_fs_parameters, #endif - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP | FS_MGTIME, }; From b1a7de2afcc9c6f3fc1abc58f9d420889327e14a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:18 +0000 Subject: [PATCH 09/54] procfs: make /self and /thread_self dentries persistent ... and there's no need to remember those pointers anywhere - ->kill_sb() no longer needs to bother since kill_anon_super() will take care of them anyway and proc_pid_readdir() only wants the inumbers, which we had in a couple of static variables all along. Signed-off-by: Al Viro --- fs/proc/base.c | 6 ++---- fs/proc/internal.h | 1 + fs/proc/root.c | 14 ++++---------- fs/proc/self.c | 10 +++------- fs/proc/thread_self.c | 11 +++-------- include/linux/proc_fs.h | 2 -- 6 files changed, 13 insertions(+), 31 deletions(-) diff --git a/fs/proc/base.c b/fs/proc/base.c index 6299878e3d97e..869677a26332d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3585,14 +3585,12 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) return 0; if (pos == TGID_OFFSET - 2) { - struct inode *inode = d_inode(fs_info->proc_self); - if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) + if (!dir_emit(ctx, "self", 4, self_inum, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } if (pos == TGID_OFFSET - 1) { - struct inode *inode = d_inode(fs_info->proc_thread_self); - if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) + if (!dir_emit(ctx, "thread-self", 11, thread_self_inum, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } diff --git a/fs/proc/internal.h b/fs/proc/internal.h index d1598576506c1..c1e8eb984da81 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -373,6 +373,7 @@ static inline void proc_tty_init(void) {} extern struct proc_dir_entry proc_root; extern void proc_self_init(void); +extern unsigned self_inum, thread_self_inum; /* * task_[no]mmu.c diff --git a/fs/proc/root.c b/fs/proc/root.c index 1e24e085c7d5a..d8ca41d823e44 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -347,17 +347,11 @@ static void proc_kill_sb(struct super_block *sb) { struct proc_fs_info *fs_info = proc_sb_info(sb); - if (!fs_info) { - kill_anon_super(sb); - return; - } - - dput(fs_info->proc_self); - dput(fs_info->proc_thread_self); - kill_anon_super(sb); - put_pid_ns(fs_info->pid_ns); - kfree_rcu(fs_info, rcu); + if (fs_info) { + put_pid_ns(fs_info->pid_ns); + kfree_rcu(fs_info, rcu); + } } static struct file_system_type proc_fs_type = { diff --git a/fs/proc/self.c b/fs/proc/self.c index b46fbfd226811..62d2c0cfe35c5 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -31,12 +31,11 @@ static const struct inode_operations proc_self_inode_operations = { .get_link = proc_self_get_link, }; -static unsigned self_inum __ro_after_init; +unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct proc_fs_info *fs_info = proc_sb_info(s); struct dentry *self; int ret = -ENOMEM; @@ -51,18 +50,15 @@ int proc_setup_self(struct super_block *s) inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_op = &proc_self_inode_operations; - d_add(self, inode); + d_make_persistent(self, inode); ret = 0; - } else { - dput(self); } + dput(self); } inode_unlock(root_inode); if (ret) pr_err("proc_fill_super: can't allocate /proc/self\n"); - else - fs_info->proc_self = self; return ret; } diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index 0e5050d6ab647..d6113dbe58e05 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -31,12 +31,11 @@ static const struct inode_operations proc_thread_self_inode_operations = { .get_link = proc_thread_self_get_link, }; -static unsigned thread_self_inum __ro_after_init; +unsigned thread_self_inum __ro_after_init; int proc_setup_thread_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); - struct proc_fs_info *fs_info = proc_sb_info(s); struct dentry *thread_self; int ret = -ENOMEM; @@ -51,19 +50,15 @@ int proc_setup_thread_self(struct super_block *s) inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_op = &proc_thread_self_inode_operations; - d_add(thread_self, inode); + d_make_persistent(thread_self, inode); ret = 0; - } else { - dput(thread_self); } + dput(thread_self); } inode_unlock(root_inode); if (ret) pr_err("proc_fill_super: can't allocate /proc/thread-self\n"); - else - fs_info->proc_thread_self = thread_self; - return ret; } diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index f139377f4b319..19d1c5e5f3350 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -66,8 +66,6 @@ enum proc_pidonly { struct proc_fs_info { struct pid_namespace *pid_ns; - struct dentry *proc_self; /* For /proc/self */ - struct dentry *proc_thread_self; /* For /proc/thread-self */ kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; From 8cd6e13e44b4185128f044b4279361930916949a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:19 +0000 Subject: [PATCH 10/54] configfs, securityfs: kill_litter_super() not needed These are guaranteed to be empty by the time they are shut down; both are single-instance and there is an internal mount maintained for as long as there is any contents. Both have that internal mount pinned by every object in root. In other words, kill_litter_super() boils down to kill_anon_super() for those. Reviewed-by: Joel Becker Acked-by: Paul Moore (LSM) Acked-by: Andreas Hindborg (configfs) Signed-off-by: Al Viro --- fs/configfs/mount.c | 2 +- security/inode.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index 456c4a2efb532..4929f34311894 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -116,7 +116,7 @@ static struct file_system_type configfs_fs_type = { .owner = THIS_MODULE, .name = "configfs", .init_fs_context = configfs_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("configfs"); diff --git a/security/inode.c b/security/inode.c index 43382ef8896e1..bf7b5e2e69556 100644 --- a/security/inode.c +++ b/security/inode.c @@ -70,7 +70,7 @@ static struct file_system_type fs_type = { .owner = THIS_MODULE, .name = "securityfs", .init_fs_context = securityfs_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; /** From f5b26969bc8418cd126cf1cafa22288394463f3a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:20 +0000 Subject: [PATCH 11/54] convert xenfs entirely static tree, populated by simple_fill_super(). Can switch to kill_anon_super() without any other changes. Signed-off-by: Al Viro --- drivers/xen/xenfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c index d7d64235010d4..37ea7c5c03469 100644 --- a/drivers/xen/xenfs/super.c +++ b/drivers/xen/xenfs/super.c @@ -88,7 +88,7 @@ static struct file_system_type xenfs_type = { .owner = THIS_MODULE, .name = "xenfs", .init_fs_context = xenfs_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("xenfs"); From 301c543ec3cbf78194aea4daf077c13e2da40a39 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:21 +0000 Subject: [PATCH 12/54] convert smackfs Entirely static tree populated by simple_fill_super(). Can use kill_anon_super() as-is. Acked-by: Casey Schaufler Signed-off-by: Al Viro --- security/smack/smackfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index b1e5e62f5cbd1..e989ae3890c75 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -2960,7 +2960,7 @@ static int smk_init_fs_context(struct fs_context *fc) static struct file_system_type smk_fs_type = { .name = "smackfs", .init_fs_context = smk_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; static struct vfsmount *smackfs_mount; From 9b304b2795c680b0e60818b7735a104cd72ccb73 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:22 +0000 Subject: [PATCH 13/54] convert hugetlbfs Very much ramfs-like; dget()+d_instantiate() -> d_make_persistent() (in two places) is all it takes. NB: might make sense to turn its ->put_super() into ->kill_sb(). Signed-off-by: Al Viro --- fs/hugetlbfs/inode.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index f42548ee9083c..83273677183d4 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -975,8 +975,7 @@ static int hugetlbfs_mknod(struct mnt_idmap *idmap, struct inode *dir, if (!inode) return -ENOSPC; inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); - d_instantiate(dentry, inode); - dget(dentry);/* Extra count - pin the dentry in core */ + d_make_persistent(dentry, inode); return 0; } @@ -1023,10 +1022,9 @@ static int hugetlbfs_symlink(struct mnt_idmap *idmap, if (inode) { int l = strlen(symname)+1; error = page_symlink(inode, symname, l); - if (!error) { - d_instantiate(dentry, inode); - dget(dentry); - } else + if (!error) + d_make_persistent(dentry, inode); + else iput(inode); } inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); @@ -1483,7 +1481,7 @@ static struct file_system_type hugetlbfs_fs_type = { .name = "hugetlbfs", .init_fs_context = hugetlbfs_init_fs_context, .parameters = hugetlb_fs_parameters, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, .fs_flags = FS_ALLOW_IDMAP, }; From c2bed6fd0f6647d2abea760b810c306c97ba7cb8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:23 +0000 Subject: [PATCH 14/54] convert mqueue All modifications via normal VFS codepaths; just take care of making persistent in in mqueue_create_attr() and discardable in mqueue_unlink() and it doesn't need kill_litter_super() at all. mqueue_unlink() side is best handled by having it call simple_unlink() rather than duplicating its guts... Signed-off-by: Al Viro --- ipc/mqueue.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 093551fe66a7e..5737130137bf4 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -599,8 +599,7 @@ static int mqueue_create_attr(struct dentry *dentry, umode_t mode, void *arg) dir->i_size += DIRENT_SIZE; simple_inode_init_ts(dir); - d_instantiate(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); return 0; out_unlock: spin_unlock(&mq_lock); @@ -617,13 +616,8 @@ static int mqueue_create(struct mnt_idmap *idmap, struct inode *dir, static int mqueue_unlink(struct inode *dir, struct dentry *dentry) { - struct inode *inode = d_inode(dentry); - - simple_inode_init_ts(dir); dir->i_size -= DIRENT_SIZE; - drop_nlink(inode); - dput(dentry); - return 0; + return simple_unlink(dir, dentry); } /* @@ -1638,7 +1632,7 @@ static const struct fs_context_operations mqueue_fs_context_ops = { static struct file_system_type mqueue_fs_type = { .name = "mqueue", .init_fs_context = mqueue_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, .fs_flags = FS_USERNS_MOUNT, }; From 619f29bdb59f71fb5022e3d3fdfcf38c4a2d88a4 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:24 +0000 Subject: [PATCH 15/54] convert bpf object creation goes through the normal VFS paths or approximation thereof (user_path_create()/done_path_create() in case of bpf_obj_do_pin(), open-coded simple_{start,done}_creating() in bpf_iter_link_pin_kernel() at mount time), removals go entirely through the normal VFS paths (and ->unlink() is simple_unlink() there). Enough to have bpf_dentry_finalize() use d_make_persistent() instead of dget() and we are done. Convert bpf_iter_link_pin_kernel() to simple_{start,done}_creating(), while we are at it. Signed-off-by: Al Viro --- kernel/bpf/inode.c | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 81780bcf8d254..9f866a010dada 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -144,8 +144,7 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, struct inode *dir) { - d_instantiate(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); } @@ -420,16 +419,12 @@ static int bpf_iter_link_pin_kernel(struct dentry *parent, struct dentry *dentry; int ret; - inode_lock(parent->d_inode); - dentry = lookup_noperm(&QSTR(name), parent); - if (IS_ERR(dentry)) { - inode_unlock(parent->d_inode); + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) return PTR_ERR(dentry); - } ret = bpf_mkobj_ops(dentry, mode, link, &bpf_link_iops, &bpf_iter_fops); - dput(dentry); - inode_unlock(parent->d_inode); + simple_done_creating(dentry); return ret; } @@ -1080,7 +1075,7 @@ static void bpf_kill_super(struct super_block *sb) { struct bpf_mount_opts *opts = sb->s_fs_info; - kill_litter_super(sb); + kill_anon_super(sb); kfree(opts); } From 05d33b750752fbaa217f86dfa494622fcede9e6a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:25 +0000 Subject: [PATCH 16/54] convert dlmfs All modifications via normal VFS codepaths; just take care of making persistent in ->create() and ->mkdir() and that's it (removal side doesn't need any changes, since it uses simple_rmdir() for ->rmdir() and calls simple_unlink() from ->unlink()). Signed-off-by: Al Viro --- fs/ocfs2/dlmfs/dlmfs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index cccaa1d6fbbac..339f0b11cdc81 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -441,8 +441,7 @@ static struct dentry *dlmfs_mkdir(struct mnt_idmap * idmap, ip->ip_conn = conn; inc_nlink(dir); - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ + d_make_persistent(dentry, inode); status = 0; bail: @@ -480,8 +479,7 @@ static int dlmfs_create(struct mnt_idmap *idmap, goto bail; } - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ + d_make_persistent(dentry, inode); bail: return status; } @@ -574,7 +572,7 @@ static int dlmfs_init_fs_context(struct fs_context *fc) static struct file_system_type dlmfs_fs_type = { .owner = THIS_MODULE, .name = "ocfs2_dlmfs", - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, .init_fs_context = dlmfs_init_fs_context, }; MODULE_ALIAS_FS("ocfs2_dlmfs"); From 3a227a5c0d6f89ff15136daee5db78bfa5b90c05 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:26 +0000 Subject: [PATCH 17/54] convert fuse_ctl objects are created in fuse_ctl_add_dentry() by d_alloc_name()+d_add(), removed by simple_remove_by_name(). What we return is a borrowed reference - it is valid until the call of fuse_ctl_remove_conn() and we depend upon the exclusion (on fuse_mutex) for safety. Return value is used only within the caller (fuse_ctl_add_conn()). Replace d_add() with d_make_persistent() + dput(). dput() is paired with d_alloc_name() and return value is the result of d_make_persistent(). Acked-by: Miklos Szeredi Signed-off-by: Al Viro --- fs/fuse/control.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 3dca752127ff8..140bd5730d998 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -236,8 +236,14 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, inc_nlink(inode); } inode->i_private = fc; - d_add(dentry, inode); - + d_make_persistent(dentry, inode); + dput(dentry); + + /* + * We are returning a borrowed reference here - it's only good while + * fuse_mutex is held. Actually it's d_make_persistent() return + * value... + */ return dentry; } @@ -346,7 +352,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb) fuse_control_sb = NULL; mutex_unlock(&fuse_mutex); - kill_litter_super(sb); + kill_anon_super(sb); } static struct file_system_type fuse_ctl_fs_type = { From a5f7c7c64d19f09b197ee5b6d7b7bd5ff7265a33 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:27 +0000 Subject: [PATCH 18/54] convert pstore object creation by d_alloc_name()+d_add() in pstore_mkfile(), removal - via normal VFS codepaths (with ->unlink() using simple_unlink()) or in pstore_put_backend_records() via locked_recursive_removal() Replace d_add() with d_make_persistent()+dput() - that's what really happens there. The reference that goes into record->dentry is valid only until the unlink (and explicitly cleared by pstore_unlink()). Reviewed-by: Kees Cook Signed-off-by: Al Viro --- fs/pstore/inode.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index b4e55c90f8dc2..71deffcc33567 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -373,7 +373,7 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) if (!dentry) return -ENOMEM; - private->dentry = dentry; + private->dentry = dentry; // borrowed private->record = record; inode->i_size = private->total_size = size; inode->i_private = private; @@ -382,7 +382,8 @@ int pstore_mkfile(struct dentry *root, struct pstore_record *record) inode_set_mtime_to_ts(inode, inode_set_ctime_to_ts(inode, record->time)); - d_add(dentry, no_free_ptr(inode)); + d_make_persistent(dentry, no_free_ptr(inode)); + dput(dentry); list_add(&(no_free_ptr(private))->list, &records_list); @@ -465,7 +466,7 @@ static void pstore_kill_sb(struct super_block *sb) guard(mutex)(&pstore_sb_lock); WARN_ON(pstore_sb && pstore_sb != sb); - kill_litter_super(sb); + kill_anon_super(sb); pstore_sb = NULL; guard(mutex)(&records_list_lock); From 02fcd29cdceebe24d00aacd12dff0fd81e7a0a2e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:28 +0000 Subject: [PATCH 19/54] convert tracefs A mix of persistent and non-persistent dentries in there. Strictly speaking, no need for kill_litter_super() anyway - it pins an internal mount whenever a persistent dentry is created, so at fs shutdown time there won't be any to deal with. However, let's make it explicit - replace d_instantiate() with d_make_persistent() + dput() (the latter in tracefs_end_creating(), where it folds with inode_unlock() into simple_done_creating()) for dentries we want persistent and have d_make_discardable() done either by simple_recursive_removal() (used by tracefs_remove()) or explicitly in eventfs_remove_events_dir(). Acked-by: Steven Rostedt (Google) Signed-off-by: Al Viro --- fs/tracefs/event_inode.c | 4 ++-- fs/tracefs/inode.c | 13 ++++++------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 93c231601c8e2..61cbdafa2411a 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -823,7 +823,7 @@ struct eventfs_inode *eventfs_create_events_dir(const char *name, struct dentry * something not worth much. Keeping directory links at 1 * tells userspace not to trust the link number. */ - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); /* The dentry of the "events" parent does keep track though */ inc_nlink(dentry->d_parent->d_inode); fsnotify_mkdir(dentry->d_parent->d_inode, dentry); @@ -910,5 +910,5 @@ void eventfs_remove_events_dir(struct eventfs_inode *ei) * and destroyed dynamically. */ d_invalidate(dentry); - dput(dentry); + d_make_discardable(dentry); } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 0c023941a316f..d9d8932a7b9c9 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -538,7 +538,7 @@ static struct file_system_type trace_fs_type = { .name = "tracefs", .init_fs_context = tracefs_init_fs_context, .parameters = tracefs_param_specs, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("tracefs"); @@ -571,16 +571,15 @@ struct dentry *tracefs_start_creating(const char *name, struct dentry *parent) struct dentry *tracefs_failed_creating(struct dentry *dentry) { - inode_unlock(d_inode(dentry->d_parent)); - dput(dentry); + simple_done_creating(dentry); simple_release_fs(&tracefs_mount, &tracefs_mount_count); return NULL; } struct dentry *tracefs_end_creating(struct dentry *dentry) { - inode_unlock(d_inode(dentry->d_parent)); - return dentry; + simple_done_creating(dentry); + return dentry; // borrowed } /* Find the inode that this will use for default */ @@ -661,7 +660,7 @@ struct dentry *tracefs_create_file(const char *name, umode_t mode, inode->i_private = data; inode->i_uid = d_inode(dentry->d_parent)->i_uid; inode->i_gid = d_inode(dentry->d_parent)->i_gid; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); return tracefs_end_creating(dentry); } @@ -692,7 +691,7 @@ static struct dentry *__create_dir(const char *name, struct dentry *parent, /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return tracefs_end_creating(dentry); From eefe7be3838fc019e8b09896f939c39d3e5e26e5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:29 +0000 Subject: [PATCH 20/54] convert debugfs similar to tracefs - simulation of normal codepath for creation, simple_recursive_removal() for removal. Acked-by: Greg Kroah-Hartman Signed-off-by: Al Viro --- fs/debugfs/inode.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 661a99a7dfbe2..682120fdbb17b 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -329,7 +329,7 @@ static struct file_system_type debug_fs_type = { .name = "debugfs", .init_fs_context = debugfs_init_fs_context, .parameters = debugfs_param_specs, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("debugfs"); @@ -405,16 +405,15 @@ static struct dentry *debugfs_start_creating(const char *name, static struct dentry *failed_creating(struct dentry *dentry) { - inode_unlock(d_inode(dentry->d_parent)); - dput(dentry); + simple_done_creating(dentry); simple_release_fs(&debugfs_mount, &debugfs_mount_count); return ERR_PTR(-ENOMEM); } static struct dentry *end_creating(struct dentry *dentry) { - inode_unlock(d_inode(dentry->d_parent)); - return dentry; + simple_done_creating(dentry); + return dentry; // borrowed } static struct dentry *__debugfs_create_file(const char *name, umode_t mode, @@ -456,7 +455,7 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, DEBUGFS_I(inode)->raw = real_fops; DEBUGFS_I(inode)->aux = (void *)aux; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(d_inode(dentry->d_parent), dentry); return end_creating(dentry); } @@ -602,7 +601,7 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return end_creating(dentry); @@ -649,7 +648,7 @@ struct dentry *debugfs_create_automount(const char *name, DEBUGFS_I(inode)->automount = f; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); inc_nlink(d_inode(dentry->d_parent)); fsnotify_mkdir(d_inode(dentry->d_parent), dentry); return end_creating(dentry); @@ -704,7 +703,7 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_op = &debugfs_symlink_inode_operations; inode->i_link = link; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); return end_creating(dentry); } EXPORT_SYMBOL_GPL(debugfs_create_symlink); From af7978bb3a82d97c84d6d165f95b795d7008e303 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:30 +0000 Subject: [PATCH 21/54] debugfs: remove duplicate checks in callers of start_creating() we'd already verified that DEBUGFS_ALLOW_API was there in start_creating() - it would've failed otherwise Acked-by: Greg Kroah-Hartman Signed-off-by: Al Viro --- fs/debugfs/inode.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 682120fdbb17b..25a554331ac4f 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -433,11 +433,6 @@ static struct dentry *__debugfs_create_file(const char *name, umode_t mode, if (IS_ERR(dentry)) return dentry; - if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { - failed_creating(dentry); - return ERR_PTR(-EPERM); - } - inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create file '%s'\n", @@ -583,11 +578,6 @@ struct dentry *debugfs_create_dir(const char *name, struct dentry *parent) if (IS_ERR(dentry)) return dentry; - if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { - failed_creating(dentry); - return ERR_PTR(-EPERM); - } - inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create directory '%s'\n", @@ -630,11 +620,6 @@ struct dentry *debugfs_create_automount(const char *name, if (IS_ERR(dentry)) return dentry; - if (!(debugfs_allow & DEBUGFS_ALLOW_API)) { - failed_creating(dentry); - return ERR_PTR(-EPERM); - } - inode = debugfs_get_inode(dentry->d_sb); if (unlikely(!inode)) { pr_err("out of free dentries, can not create automount '%s'\n", From 9815dd3e6d8a23e924a53838a1911b6c82614868 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:31 +0000 Subject: [PATCH 22/54] convert efivarfs Initially filesystem is populated with d_alloc_name() + d_add(). That becomes d_alloc_name() + d_make_persistent() + dput(). Dynamic creation is switched to d_make_persistent(); removal - to simple_unlink() (no point open-coding it in efivarfs_unlink(), better call it there) Signed-off-by: Al Viro --- fs/efivarfs/inode.c | 7 ++----- fs/efivarfs/super.c | 5 +++-- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/efivarfs/inode.c b/fs/efivarfs/inode.c index 2891614abf8d5..95dcad83da11b 100644 --- a/fs/efivarfs/inode.c +++ b/fs/efivarfs/inode.c @@ -113,8 +113,7 @@ static int efivarfs_create(struct mnt_idmap *idmap, struct inode *dir, inode->i_private = var; - d_instantiate(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); return 0; } @@ -126,9 +125,7 @@ static int efivarfs_unlink(struct inode *dir, struct dentry *dentry) if (efivar_entry_delete(var)) return -EINVAL; - drop_nlink(d_inode(dentry)); - dput(dentry); - return 0; + return simple_unlink(dir, dentry); }; const struct inode_operations efivarfs_dir_inode_operations = { diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 1f4d8ce566670..298ab3c929eb3 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -278,7 +278,8 @@ static int efivarfs_create_dentry(struct super_block *sb, efi_char16_t *name16, inode->i_private = entry; i_size_write(inode, size + sizeof(__u32)); /* attributes + data */ inode_unlock(inode); - d_add(dentry, inode); + d_make_persistent(dentry, inode); + dput(dentry); return 0; @@ -522,7 +523,7 @@ static void efivarfs_kill_sb(struct super_block *sb) struct efivarfs_fs_info *sfi = sb->s_fs_info; blocking_notifier_chain_unregister(&efivar_ops_nh, &sfi->nb); - kill_litter_super(sb); + kill_anon_super(sb); kfree(sfi); } From ff55b765845bb2373d168c01aa03c103f723cc10 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:32 +0000 Subject: [PATCH 23/54] convert spufs have spufs_new_file() use d_make_persistent() instead of d_add() and do an uncondition dput() in the caller; the rest is completely straightforward. [a braino in spufs_mkgang() fixed] Signed-off-by: Al Viro --- arch/powerpc/platforms/cell/spufs/inode.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 7ec60290abe64..fc8ccf4dc159b 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -127,7 +127,7 @@ spufs_new_file(struct super_block *sb, struct dentry *dentry, inode->i_fop = fops; inode->i_size = size; inode->i_private = SPUFS_I(inode)->i_ctx = get_spu_context(ctx); - d_add(dentry, inode); + d_make_persistent(dentry, inode); out: return ret; } @@ -163,10 +163,9 @@ static int spufs_fill_dir(struct dentry *dir, return -ENOMEM; ret = spufs_new_file(dir->d_sb, dentry, files->ops, files->mode & mode, files->size, ctx); - if (ret) { - dput(dentry); + dput(dentry); + if (ret) return ret; - } files++; } return 0; @@ -241,11 +240,10 @@ spufs_mkdir(struct inode *dir, struct dentry *dentry, unsigned int flags, inode_lock(inode); - dget(dentry); inc_nlink(dir); inc_nlink(inode); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); if (flags & SPU_CREATE_NOSCHED) ret = spufs_fill_dir(dentry, spufs_dir_nosched_contents, @@ -479,10 +477,9 @@ spufs_mkgang(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; - d_instantiate(dentry, inode); - dget(dentry); inc_nlink(dir); - inc_nlink(d_inode(dentry)); + inc_nlink(inode); + d_make_persistent(dentry, inode); return ret; out_iput: @@ -780,7 +777,7 @@ static struct file_system_type spufs_type = { .name = "spufs", .init_fs_context = spufs_init_fs_context, .parameters = spufs_fs_parameters, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("spufs"); From 5f7b19805d21778e30aeb856dba85400c7af0707 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:33 +0000 Subject: [PATCH 24/54] convert ibmasmfs static contents for each "service processor", whatever the fuck it is. Congruent subdirectories of root, created at mount time, taken out by kill_litter_super(). All dentries created with d_alloc_name() and are left pinned. The odd part is that the list of service providers is assumed to be unchanging - no locking, nothing to handle removals or extra elements added later on. ... and it's a PCI device. If you ever tell it to remove an instance, you are fucked - it doesn't bother with removing its directory from filesystem, it has a strange check that presumably wanted to be a check for removed devices, but it had never been fleshed out. Anyway, d_add() -> d_make_persistent()+dput() in ibmasmfs_create_dir() and ibmasmfs_create_file(), and make the latter return int - no need to even borrow that dentry, callers completely ignore it. Signed-off-by: Al Viro --- drivers/misc/ibmasm/ibmasmfs.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index b26c930e3edb4..a6cde74efb68c 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -103,7 +103,7 @@ static struct file_system_type ibmasmfs_type = { .owner = THIS_MODULE, .name = "ibmasmfs", .init_fs_context = ibmasmfs_init_fs_context, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("ibmasmfs"); @@ -144,7 +144,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode) return ret; } -static struct dentry *ibmasmfs_create_file(struct dentry *parent, +static int ibmasmfs_create_file(struct dentry *parent, const char *name, const struct file_operations *fops, void *data, @@ -155,19 +155,20 @@ static struct dentry *ibmasmfs_create_file(struct dentry *parent, dentry = d_alloc_name(parent, name); if (!dentry) - return NULL; + return -ENOMEM; inode = ibmasmfs_make_inode(parent->d_sb, S_IFREG | mode); if (!inode) { dput(dentry); - return NULL; + return -ENOMEM; } inode->i_fop = fops; inode->i_private = data; - d_add(dentry, inode); - return dentry; + d_make_persistent(dentry, inode); + dput(dentry); + return 0; } static struct dentry *ibmasmfs_create_dir(struct dentry *parent, @@ -189,8 +190,9 @@ static struct dentry *ibmasmfs_create_dir(struct dentry *parent, inode->i_op = &simple_dir_inode_operations; inode->i_fop = ibmasmfs_dir_ops; - d_add(dentry, inode); - return dentry; + d_make_persistent(dentry, inode); + dput(dentry); + return dentry; // borrowed } int ibmasmfs_register(void) From b93fb2c4b24b63a5860c654ab11cc80e1c18a611 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:34 +0000 Subject: [PATCH 25/54] ibmasmfs: get rid of ibmasmfs_dir_ops it is always equal (and always had been equal) to &simple_dir_operations Signed-off-by: Al Viro --- drivers/misc/ibmasm/ibmasmfs.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index a6cde74efb68c..824c5b664985b 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -97,8 +97,6 @@ static const struct super_operations ibmasmfs_s_ops = { .drop_inode = inode_just_drop, }; -static const struct file_operations *ibmasmfs_dir_ops = &simple_dir_operations; - static struct file_system_type ibmasmfs_type = { .owner = THIS_MODULE, .name = "ibmasmfs", @@ -122,7 +120,7 @@ static int ibmasmfs_fill_super(struct super_block *sb, struct fs_context *fc) return -ENOMEM; root->i_op = &simple_dir_inode_operations; - root->i_fop = ibmasmfs_dir_ops; + root->i_fop = &simple_dir_operations; sb->s_root = d_make_root(root); if (!sb->s_root) @@ -188,7 +186,7 @@ static struct dentry *ibmasmfs_create_dir(struct dentry *parent, } inode->i_op = &simple_dir_inode_operations; - inode->i_fop = ibmasmfs_dir_ops; + inode->i_fop = &simple_dir_operations; d_make_persistent(dentry, inode); dput(dentry); From 58f5293804b66d1eada3af9b97b6ada7f5bdcff6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:35 +0000 Subject: [PATCH 26/54] convert devpts Two kinds of objects there - ptmx and everything else (pty). The former is created on mount and kept until the fs shutdown; the latter get created and removed by tty layer (the references are borrowed into tty->driver_data). The reference to ptmx dentry is also kept, but we only ever use it to find ptmx inode on remount. * turn d_add() into d_make_persistent() + dput() both in mknod_ptmx() and in devpts_pty_new(). * turn dput() to d_make_discardable() in devpts_pty_kill(). * switch mknod_ptmx() to simple_{start,done}_creating(). * instead of storing in pts_fs_info a reference to ptmx dentry, store a reference to its inode, seeing that this is what we use it for. Signed-off-by: Al Viro --- fs/devpts/inode.c | 57 +++++++++++++++++------------------------------ 1 file changed, 21 insertions(+), 36 deletions(-) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index fdf22264a8e98..9f3de528c3586 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -102,7 +102,7 @@ struct pts_fs_info { struct ida allocated_ptys; struct pts_mount_opts mount_opts; struct super_block *sb; - struct dentry *ptmx_dentry; + struct inode *ptmx_inode; // borrowed }; static inline struct pts_fs_info *DEVPTS_SB(struct super_block *sb) @@ -259,7 +259,6 @@ static int devpts_parse_param(struct fs_context *fc, struct fs_parameter *param) static int mknod_ptmx(struct super_block *sb, struct fs_context *fc) { int mode; - int rc = -ENOMEM; struct dentry *dentry; struct inode *inode; struct dentry *root = sb->s_root; @@ -268,18 +267,10 @@ static int mknod_ptmx(struct super_block *sb, struct fs_context *fc) kuid_t ptmx_uid = current_fsuid(); kgid_t ptmx_gid = current_fsgid(); - inode_lock(d_inode(root)); - - /* If we have already created ptmx node, return */ - if (fsi->ptmx_dentry) { - rc = 0; - goto out; - } - - dentry = d_alloc_name(root, "ptmx"); - if (!dentry) { + dentry = simple_start_creating(root, "ptmx"); + if (IS_ERR(dentry)) { pr_err("Unable to alloc dentry for ptmx node\n"); - goto out; + return PTR_ERR(dentry); } /* @@ -287,9 +278,9 @@ static int mknod_ptmx(struct super_block *sb, struct fs_context *fc) */ inode = new_inode(sb); if (!inode) { + simple_done_creating(dentry); pr_err("Unable to alloc inode for ptmx node\n"); - dput(dentry); - goto out; + return -ENOMEM; } inode->i_ino = 2; @@ -299,23 +290,18 @@ static int mknod_ptmx(struct super_block *sb, struct fs_context *fc) init_special_inode(inode, mode, MKDEV(TTYAUX_MAJOR, 2)); inode->i_uid = ptmx_uid; inode->i_gid = ptmx_gid; + fsi->ptmx_inode = inode; - d_add(dentry, inode); + d_make_persistent(dentry, inode); - fsi->ptmx_dentry = dentry; - rc = 0; -out: - inode_unlock(d_inode(root)); - return rc; + simple_done_creating(dentry); + + return 0; } static void update_ptmx_mode(struct pts_fs_info *fsi) { - struct inode *inode; - if (fsi->ptmx_dentry) { - inode = d_inode(fsi->ptmx_dentry); - inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; - } + fsi->ptmx_inode->i_mode = S_IFCHR|fsi->mount_opts.ptmxmode; } static int devpts_reconfigure(struct fs_context *fc) @@ -461,7 +447,7 @@ static void devpts_kill_sb(struct super_block *sb) if (fsi) ida_destroy(&fsi->allocated_ptys); kfree(fsi); - kill_litter_super(sb); + kill_anon_super(sb); } static struct file_system_type devpts_fs_type = { @@ -534,16 +520,15 @@ struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv) sprintf(s, "%d", index); dentry = d_alloc_name(root, s); - if (dentry) { - dentry->d_fsdata = priv; - d_add(dentry, inode); - fsnotify_create(d_inode(root), dentry); - } else { + if (!dentry) { iput(inode); - dentry = ERR_PTR(-ENOMEM); + return ERR_PTR(-ENOMEM); } - - return dentry; + dentry->d_fsdata = priv; + d_make_persistent(dentry, inode); + fsnotify_create(d_inode(root), dentry); + dput(dentry); + return dentry; // borrowed } /** @@ -573,7 +558,7 @@ void devpts_pty_kill(struct dentry *dentry) drop_nlink(dentry->d_inode); d_drop(dentry); fsnotify_unlink(d_inode(dentry->d_parent), dentry); - dput(dentry); /* d_alloc_name() in devpts_pty_new() */ + d_make_discardable(dentry); } static int __init init_devpts_fs(void) From 5bace84a899f8f220aa7757b612c8698465af941 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:36 +0000 Subject: [PATCH 27/54] binderfs: use simple_start_creating() binderfs_binder_device_create() gets simpler, binderfs_create_dentry() simply goes away... Signed-off-by: Al Viro --- drivers/android/binderfs.c | 43 +++++--------------------------------- 1 file changed, 5 insertions(+), 38 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index be8e64eb39ec5..a7b0a773d47f5 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -183,24 +183,11 @@ static int binderfs_binder_device_create(struct inode *ref_inode, } root = sb->s_root; - inode_lock(d_inode(root)); - - /* look it up */ - dentry = lookup_noperm(&QSTR(name), root); + dentry = simple_start_creating(root, name); if (IS_ERR(dentry)) { - inode_unlock(d_inode(root)); ret = PTR_ERR(dentry); goto err; } - - if (d_really_is_positive(dentry)) { - /* already exists */ - dput(dentry); - inode_unlock(d_inode(root)); - ret = -EEXIST; - goto err; - } - inode->i_private = device; d_instantiate(dentry, inode); fsnotify_create(root->d_inode, dentry); @@ -481,24 +468,6 @@ static struct inode *binderfs_make_inode(struct super_block *sb, int mode) return ret; } -static struct dentry *binderfs_create_dentry(struct dentry *parent, - const char *name) -{ - struct dentry *dentry; - - dentry = lookup_noperm(&QSTR(name), parent); - if (IS_ERR(dentry)) - return dentry; - - /* Return error if the file/dir already exists. */ - if (d_really_is_positive(dentry)) { - dput(dentry); - return ERR_PTR(-EEXIST); - } - - return dentry; -} - struct dentry *binderfs_create_file(struct dentry *parent, const char *name, const struct file_operations *fops, void *data) @@ -508,11 +477,10 @@ struct dentry *binderfs_create_file(struct dentry *parent, const char *name, struct super_block *sb; parent_inode = d_inode(parent); - inode_lock(parent_inode); - dentry = binderfs_create_dentry(parent, name); + dentry = simple_start_creating(parent, name); if (IS_ERR(dentry)) - goto out; + return dentry; sb = parent_inode->i_sb; new_inode = binderfs_make_inode(sb, S_IFREG | 0444); @@ -540,11 +508,10 @@ static struct dentry *binderfs_create_dir(struct dentry *parent, struct super_block *sb; parent_inode = d_inode(parent); - inode_lock(parent_inode); - dentry = binderfs_create_dentry(parent, name); + dentry = simple_start_creating(parent, name); if (IS_ERR(dentry)) - goto out; + return dentry; sb = parent_inode->i_sb; new_inode = binderfs_make_inode(sb, S_IFDIR | 0755); From c6d954a42b687910eea4941d96052de7f410a859 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:37 +0000 Subject: [PATCH 28/54] binderfs_binder_ctl_create(): kill a bogus check It's called once, during binderfs mount, right after allocating root dentry. Checking that it hadn't been already called is only obfuscating things. Looks like that bogosity had been copied from devpts... Signed-off-by: Al Viro --- drivers/android/binderfs.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index a7b0a773d47f5..8253e517ab6c6 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -397,12 +397,6 @@ static int binderfs_binder_ctl_create(struct super_block *sb) if (!device) return -ENOMEM; - /* If we have already created a binder-control node, return. */ - if (info->control_dentry) { - ret = 0; - goto out; - } - ret = -ENOMEM; inode = new_inode(sb); if (!inode) From 78fb8a63eb4757523c24139453e25fc74761dd36 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:38 +0000 Subject: [PATCH 29/54] convert binderfs Objects are created either by d_alloc_name()+d_add() (in binderfs_ctl_create()) or by simple_start_creating()+d_instantiate(). Removals are by simple_recurisive_removal(). Switch d_add()/d_instantiate() to d_make_persistent() + dput(). Voila - kill_litter_super() is not needed anymore. Fold dput()+unlocking the parent into simple_done_creating(), while we are at it. NOTE: return value of binderfs_create_file() is borrowed; it may get stored in proc->binderfs_entry. See binder_release()... Signed-off-by: Al Viro --- drivers/android/binderfs.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 8253e517ab6c6..a28d0511960e2 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -189,9 +189,9 @@ static int binderfs_binder_device_create(struct inode *ref_inode, goto err; } inode->i_private = device; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(root->d_inode, dentry); - inode_unlock(d_inode(root)); + simple_done_creating(dentry); binder_add_device(device); @@ -432,7 +432,8 @@ static int binderfs_binder_ctl_create(struct super_block *sb) inode->i_private = device; info->control_dentry = dentry; - d_add(dentry, inode); + d_make_persistent(dentry, inode); + dput(dentry); return 0; @@ -479,19 +480,16 @@ struct dentry *binderfs_create_file(struct dentry *parent, const char *name, sb = parent_inode->i_sb; new_inode = binderfs_make_inode(sb, S_IFREG | 0444); if (!new_inode) { - dput(dentry); - dentry = ERR_PTR(-ENOMEM); - goto out; + simple_done_creating(dentry); + return ERR_PTR(-ENOMEM); } new_inode->i_fop = fops; new_inode->i_private = data; - d_instantiate(dentry, new_inode); + d_make_persistent(dentry, new_inode); fsnotify_create(parent_inode, dentry); - -out: - inode_unlock(parent_inode); - return dentry; + simple_done_creating(dentry); + return dentry; // borrowed } static struct dentry *binderfs_create_dir(struct dentry *parent, @@ -510,21 +508,18 @@ static struct dentry *binderfs_create_dir(struct dentry *parent, sb = parent_inode->i_sb; new_inode = binderfs_make_inode(sb, S_IFDIR | 0755); if (!new_inode) { - dput(dentry); - dentry = ERR_PTR(-ENOMEM); - goto out; + simple_done_creating(dentry); + return ERR_PTR(-ENOMEM); } new_inode->i_fop = &simple_dir_operations; new_inode->i_op = &simple_dir_inode_operations; set_nlink(new_inode, 2); - d_instantiate(dentry, new_inode); + d_make_persistent(dentry, new_inode); inc_nlink(parent_inode); fsnotify_mkdir(parent_inode, dentry); - -out: - inode_unlock(parent_inode); + simple_done_creating(dentry); return dentry; } @@ -740,7 +735,7 @@ static void binderfs_kill_super(struct super_block *sb) * During inode eviction struct binderfs_info is needed. * So first wipe the super_block then free struct binderfs_info. */ - kill_litter_super(sb); + kill_anon_super(sb); if (info && info->ipc_ns) put_ipc_ns(info->ipc_ns); From 74e76f1d165ed041c08249e945d22da14f9e3c62 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:39 +0000 Subject: [PATCH 30/54] autofs_{rmdir,unlink}: dentry->d_fsdata->dentry == dentry there Signed-off-by: Al Viro --- fs/autofs/root.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 174c7205fee44..39794633d4849 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -623,12 +623,11 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap, static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry) { struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb); - struct autofs_info *ino = autofs_dentry_ino(dentry); struct autofs_info *p_ino; p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count--; - dput(ino->dentry); + dput(dentry); d_inode(dentry)->i_size = 0; clear_nlink(d_inode(dentry)); @@ -710,7 +709,7 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry) p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count--; - dput(ino->dentry); + dput(dentry); d_inode(dentry)->i_size = 0; clear_nlink(d_inode(dentry)); From 5c51625251702253ea37a72add4e0d98fd3583b8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:40 +0000 Subject: [PATCH 31/54] convert autofs creation/removal is via normal VFS paths; make ->mkdir() and ->symlink() use d_make_persistent(); ->rmdir() and ->unlink() - d_make_discardable() instead of dput() and that's it. d_make_persistent() works for unhashed just fine... Note that only persistent dentries are ever hashed there; unusual absense of ->d_delete() in dentry_operations is due to that - anything that has refcount reach 0 will be unhashed there, so it won't get to checking ->d_delete anyway. Signed-off-by: Al Viro --- fs/autofs/inode.c | 2 +- fs/autofs/root.c | 10 ++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index f5c16ffba0134..eb86f893efbb8 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -55,7 +55,7 @@ void autofs_kill_sb(struct super_block *sb) } pr_debug("shutting down\n"); - kill_litter_super(sb); + kill_anon_super(sb); if (sbi) kfree_rcu(sbi, rcu); } diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 39794633d4849..fb6c8215456c2 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -594,9 +594,8 @@ static int autofs_dir_symlink(struct mnt_idmap *idmap, } inode->i_private = cp; inode->i_size = size; - d_add(dentry, inode); - dget(dentry); + d_make_persistent(dentry, inode); p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count++; @@ -627,7 +626,7 @@ static int autofs_dir_unlink(struct inode *dir, struct dentry *dentry) p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count--; - dput(dentry); + d_make_discardable(dentry); d_inode(dentry)->i_size = 0; clear_nlink(d_inode(dentry)); @@ -709,7 +708,7 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry) p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count--; - dput(dentry); + d_make_discardable(dentry); d_inode(dentry)->i_size = 0; clear_nlink(d_inode(dentry)); @@ -739,12 +738,11 @@ static struct dentry *autofs_dir_mkdir(struct mnt_idmap *idmap, inode = autofs_get_inode(dir->i_sb, S_IFDIR | mode); if (!inode) return ERR_PTR(-ENOMEM); - d_add(dentry, inode); if (sbi->version < 5) autofs_set_leaf_automount_flags(dentry); - dget(dentry); + d_make_persistent(dentry, inode); p_ino = autofs_dentry_ino(dentry->d_parent); p_ino->count++; inc_nlink(dir); From 8b0eee188bec9b35894063abd23581c39c8f620c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:41 +0000 Subject: [PATCH 32/54] convert binfmt_misc removals are done with locked_recursive_removal(); switch creations to simple_start_creating()/d_make_persistent()/simple_done_creating() and take them to a helper (add_entry()), while we are at it - simpler control flow that way. Signed-off-by: Al Viro --- fs/binfmt_misc.c | 69 ++++++++++++++++++++++-------------------------- 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index a839f960cd4a0..2093f9dcd3210 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -765,14 +765,41 @@ static const struct file_operations bm_entry_operations = { /* /register */ +/* add to filesystem */ +static int add_entry(Node *e, struct super_block *sb) +{ + struct dentry *dentry = simple_start_creating(sb->s_root, e->name); + struct inode *inode; + struct binfmt_misc *misc; + + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + + inode = bm_get_inode(sb, S_IFREG | 0644); + if (unlikely(!inode)) { + simple_done_creating(dentry); + return -ENOMEM; + } + + refcount_set(&e->users, 1); + e->dentry = dentry; + inode->i_private = e; + inode->i_fop = &bm_entry_operations; + + d_make_persistent(dentry, inode); + misc = i_binfmt_misc(inode); + write_lock(&misc->entries_lock); + list_add(&e->list, &misc->entries); + write_unlock(&misc->entries_lock); + simple_done_creating(dentry); + return 0; +} + static ssize_t bm_register_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { Node *e; - struct inode *inode; struct super_block *sb = file_inode(file)->i_sb; - struct dentry *root = sb->s_root, *dentry; - struct binfmt_misc *misc; int err = 0; struct file *f = NULL; @@ -803,39 +830,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, e->interp_file = f; } - inode_lock(d_inode(root)); - dentry = lookup_noperm(&QSTR(e->name), root); - err = PTR_ERR(dentry); - if (IS_ERR(dentry)) - goto out; - - err = -EEXIST; - if (d_really_is_positive(dentry)) - goto out2; - - inode = bm_get_inode(sb, S_IFREG | 0644); - - err = -ENOMEM; - if (!inode) - goto out2; - - refcount_set(&e->users, 1); - e->dentry = dget(dentry); - inode->i_private = e; - inode->i_fop = &bm_entry_operations; - - d_instantiate(dentry, inode); - misc = i_binfmt_misc(inode); - write_lock(&misc->entries_lock); - list_add(&e->list, &misc->entries); - write_unlock(&misc->entries_lock); - - err = 0; -out2: - dput(dentry); -out: - inode_unlock(d_inode(root)); - + err = add_entry(e, sb); if (err) { if (f) filp_close(f, NULL); @@ -1028,7 +1023,7 @@ static struct file_system_type bm_fs_type = { .name = "binfmt_misc", .init_fs_context = bm_init_fs_context, .fs_flags = FS_USERNS_MOUNT, - .kill_sb = kill_litter_super, + .kill_sb = kill_anon_super, }; MODULE_ALIAS_FS("binfmt_misc"); From 1acee377cc69c37746abecf16563642a37317ae5 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:42 +0000 Subject: [PATCH 33/54] selinuxfs: don't stash the dentry of /policy_capabilities Don't bother to store the dentry of /policy_capabilities - it belongs to invariant part of tree and we only use it to populate that directory, so there's no reason to keep it around afterwards. Same situation as with /avc, /ss, etc. There are two directories that get replaced on policy load - /class and /booleans. These we need to stash (and update the pointers on policy reload); /policy_capabilities is not in the same boat. Acked-by: Paul Moore Reviewed-by: Stephen Smalley Tested-by: Stephen Smalley Signed-off-by: Al Viro --- security/selinux/selinuxfs.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 232e087bce3ee..b39e919c27b1c 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -75,7 +75,6 @@ struct selinux_fs_info { struct dentry *class_dir; unsigned long last_class_ino; bool policy_opened; - struct dentry *policycap_dir; unsigned long last_ino; struct super_block *sb; }; @@ -117,7 +116,6 @@ static void selinux_fs_info_free(struct super_block *sb) #define BOOL_DIR_NAME "booleans" #define CLASS_DIR_NAME "class" -#define POLICYCAP_DIR_NAME "policy_capabilities" #define TMPBUFLEN 12 static ssize_t sel_read_enforce(struct file *filp, char __user *buf, @@ -1871,23 +1869,24 @@ static int sel_make_classes(struct selinux_policy *newpolicy, return rc; } -static int sel_make_policycap(struct selinux_fs_info *fsi) +static int sel_make_policycap(struct dentry *dir) { + struct super_block *sb = dir->d_sb; unsigned int iter; struct dentry *dentry = NULL; struct inode *inode = NULL; for (iter = 0; iter <= POLICYDB_CAP_MAX; iter++) { if (iter < ARRAY_SIZE(selinux_policycap_names)) - dentry = d_alloc_name(fsi->policycap_dir, + dentry = d_alloc_name(dir, selinux_policycap_names[iter]); else - dentry = d_alloc_name(fsi->policycap_dir, "unknown"); + dentry = d_alloc_name(dir, "unknown"); if (dentry == NULL) return -ENOMEM; - inode = sel_make_inode(fsi->sb, S_IFREG | 0444); + inode = sel_make_inode(sb, S_IFREG | 0444); if (inode == NULL) { dput(dentry); return -ENOMEM; @@ -2071,15 +2070,13 @@ static int sel_fill_super(struct super_block *sb, struct fs_context *fc) goto err; } - fsi->policycap_dir = sel_make_dir(sb->s_root, POLICYCAP_DIR_NAME, - &fsi->last_ino); - if (IS_ERR(fsi->policycap_dir)) { - ret = PTR_ERR(fsi->policycap_dir); - fsi->policycap_dir = NULL; + dentry = sel_make_dir(sb->s_root, "policy_capabilities", &fsi->last_ino); + if (IS_ERR(dentry)) { + ret = PTR_ERR(dentry); goto err; } - ret = sel_make_policycap(fsi); + ret = sel_make_policycap(dentry); if (ret) { pr_err("SELinux: failed to load policy capabilities\n"); goto err; From 19df9e11191a634c19e2b4435539c7b8468b1b08 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:43 +0000 Subject: [PATCH 34/54] selinuxfs: new helper for attaching files to tree allocating dentry after the inode has been set up reduces the amount of boilerplate - "attach this inode under that name and this parent or drop inode in case of failure" simplifies quite a few places. Acked-by: Paul Moore Reviewed-by: Stephen Smalley Tested-by: Stephen Smalley Signed-off-by: Al Viro --- security/selinux/selinuxfs.c | 160 +++++++++++++++-------------------- 1 file changed, 66 insertions(+), 94 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index b39e919c27b1c..f088776dbbd36 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1197,6 +1197,25 @@ static struct inode *sel_make_inode(struct super_block *sb, umode_t mode) return ret; } +static struct dentry *sel_attach(struct dentry *parent, const char *name, + struct inode *inode) +{ + struct dentry *dentry = d_alloc_name(parent, name); + if (unlikely(!dentry)) { + iput(inode); + return ERR_PTR(-ENOMEM); + } + d_add(dentry, inode); + return dentry; +} + +static int sel_attach_file(struct dentry *parent, const char *name, + struct inode *inode) +{ + struct dentry *dentry = sel_attach(parent, name, inode); + return PTR_ERR_OR_ZERO(dentry); +} + static ssize_t sel_read_bool(struct file *filep, char __user *buf, size_t count, loff_t *ppos) { @@ -1356,8 +1375,7 @@ static int sel_make_bools(struct selinux_policy *newpolicy, struct dentry *bool_ *bool_num = num; *bool_pending_names = names; - for (i = 0; i < num; i++) { - struct dentry *dentry; + for (i = 0; !ret && i < num; i++) { struct inode *inode; struct inode_security_struct *isec; ssize_t len; @@ -1368,15 +1386,9 @@ static int sel_make_bools(struct selinux_policy *newpolicy, struct dentry *bool_ ret = -ENAMETOOLONG; break; } - dentry = d_alloc_name(bool_dir, names[i]); - if (!dentry) { - ret = -ENOMEM; - break; - } inode = sel_make_inode(bool_dir->d_sb, S_IFREG | S_IRUGO | S_IWUSR); if (!inode) { - dput(dentry); ret = -ENOMEM; break; } @@ -1394,7 +1406,8 @@ static int sel_make_bools(struct selinux_policy *newpolicy, struct dentry *bool_ isec->initialized = LABEL_INITIALIZED; inode->i_fop = &sel_bool_ops; inode->i_ino = i|SEL_BOOL_INO_OFFSET; - d_add(dentry, inode); + + ret = sel_attach_file(bool_dir, names[i], inode); } out: free_page((unsigned long)page); @@ -1579,6 +1592,7 @@ static int sel_make_avc_files(struct dentry *dir) struct super_block *sb = dir->d_sb; struct selinux_fs_info *fsi = sb->s_fs_info; unsigned int i; + int err = 0; static const struct tree_descr files[] = { { "cache_threshold", &sel_avc_cache_threshold_ops, S_IRUGO|S_IWUSR }, @@ -1588,26 +1602,20 @@ static int sel_make_avc_files(struct dentry *dir) #endif }; - for (i = 0; i < ARRAY_SIZE(files); i++) { + for (i = 0; !err && i < ARRAY_SIZE(files); i++) { struct inode *inode; - struct dentry *dentry; - - dentry = d_alloc_name(dir, files[i].name); - if (!dentry) - return -ENOMEM; inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode); - if (!inode) { - dput(dentry); + if (!inode) return -ENOMEM; - } inode->i_fop = files[i].ops; inode->i_ino = ++fsi->last_ino; - d_add(dentry, inode); + + err = sel_attach_file(dir, files[i].name, inode); } - return 0; + return err; } static int sel_make_ss_files(struct dentry *dir) @@ -1615,30 +1623,25 @@ static int sel_make_ss_files(struct dentry *dir) struct super_block *sb = dir->d_sb; struct selinux_fs_info *fsi = sb->s_fs_info; unsigned int i; + int err = 0; static const struct tree_descr files[] = { { "sidtab_hash_stats", &sel_sidtab_hash_stats_ops, S_IRUGO }, }; - for (i = 0; i < ARRAY_SIZE(files); i++) { + for (i = 0; !err && i < ARRAY_SIZE(files); i++) { struct inode *inode; - struct dentry *dentry; - - dentry = d_alloc_name(dir, files[i].name); - if (!dentry) - return -ENOMEM; inode = sel_make_inode(dir->d_sb, S_IFREG|files[i].mode); - if (!inode) { - dput(dentry); + if (!inode) return -ENOMEM; - } inode->i_fop = files[i].ops; inode->i_ino = ++fsi->last_ino; - d_add(dentry, inode); + + err = sel_attach_file(dir, files[i].name, inode); } - return 0; + return err; } static ssize_t sel_read_initcon(struct file *file, char __user *buf, @@ -1666,30 +1669,25 @@ static const struct file_operations sel_initcon_ops = { static int sel_make_initcon_files(struct dentry *dir) { unsigned int i; + int err = 0; - for (i = 1; i <= SECINITSID_NUM; i++) { - struct inode *inode; - struct dentry *dentry; + for (i = 1; !err && i <= SECINITSID_NUM; i++) { const char *s = security_get_initial_sid_context(i); + struct inode *inode; if (!s) continue; - dentry = d_alloc_name(dir, s); - if (!dentry) - return -ENOMEM; inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO); - if (!inode) { - dput(dentry); + if (!inode) return -ENOMEM; - } inode->i_fop = &sel_initcon_ops; inode->i_ino = i|SEL_INITCON_INO_OFFSET; - d_add(dentry, inode); + err = sel_attach_file(dir, s, inode); } - return 0; + return err; } static inline unsigned long sel_class_to_ino(u16 class) @@ -1771,29 +1769,21 @@ static int sel_make_perm_files(struct selinux_policy *newpolicy, if (rc) return rc; - for (i = 0; i < nperms; i++) { + for (i = 0; !rc && i < nperms; i++) { struct inode *inode; - struct dentry *dentry; - rc = -ENOMEM; - dentry = d_alloc_name(dir, perms[i]); - if (!dentry) - goto out; - - rc = -ENOMEM; inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO); if (!inode) { - dput(dentry); - goto out; + rc = -ENOMEM; + break; } inode->i_fop = &sel_perm_ops; /* i+1 since perm values are 1-indexed */ inode->i_ino = sel_perm_to_ino(classvalue, i + 1); - d_add(dentry, inode); + + rc = sel_attach_file(dir, perms[i], inode); } - rc = 0; -out: for (i = 0; i < nperms; i++) kfree(perms[i]); kfree(perms); @@ -1808,20 +1798,18 @@ static int sel_make_class_dir_entries(struct selinux_policy *newpolicy, struct selinux_fs_info *fsi = sb->s_fs_info; struct dentry *dentry = NULL; struct inode *inode = NULL; - - dentry = d_alloc_name(dir, "index"); - if (!dentry) - return -ENOMEM; + int err; inode = sel_make_inode(dir->d_sb, S_IFREG|S_IRUGO); - if (!inode) { - dput(dentry); + if (!inode) return -ENOMEM; - } inode->i_fop = &sel_class_ops; inode->i_ino = sel_class_to_ino(index); - d_add(dentry, inode); + + err = sel_attach_file(dir, "index", inode); + if (err) + return err; dentry = sel_make_dir(dir, "perms", &fsi->last_class_ino); if (IS_ERR(dentry)) @@ -1873,58 +1861,47 @@ static int sel_make_policycap(struct dentry *dir) { struct super_block *sb = dir->d_sb; unsigned int iter; - struct dentry *dentry = NULL; struct inode *inode = NULL; + int err = 0; + + for (iter = 0; !err && iter <= POLICYDB_CAP_MAX; iter++) { + const char *name; - for (iter = 0; iter <= POLICYDB_CAP_MAX; iter++) { if (iter < ARRAY_SIZE(selinux_policycap_names)) - dentry = d_alloc_name(dir, - selinux_policycap_names[iter]); + name = selinux_policycap_names[iter]; else - dentry = d_alloc_name(dir, "unknown"); - - if (dentry == NULL) - return -ENOMEM; + name = "unknown"; inode = sel_make_inode(sb, S_IFREG | 0444); - if (inode == NULL) { - dput(dentry); + if (!inode) return -ENOMEM; - } inode->i_fop = &sel_policycap_ops; inode->i_ino = iter | SEL_POLICYCAP_INO_OFFSET; - d_add(dentry, inode); + err = sel_attach_file(dir, name, inode); } - return 0; + return err; } static struct dentry *sel_make_dir(struct dentry *dir, const char *name, unsigned long *ino) { - struct dentry *dentry = d_alloc_name(dir, name); struct inode *inode; - if (!dentry) - return ERR_PTR(-ENOMEM); - inode = sel_make_inode(dir->d_sb, S_IFDIR | S_IRUGO | S_IXUGO); - if (!inode) { - dput(dentry); + if (!inode) return ERR_PTR(-ENOMEM); - } inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; inode->i_ino = ++(*ino); /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); - d_add(dentry, inode); /* bump link count on parent directory, too */ inc_nlink(d_inode(dir)); - return dentry; + return sel_attach(dir, name, inode); } static int reject_all(struct mnt_idmap *idmap, struct inode *inode, int mask) @@ -2012,17 +1989,10 @@ static int sel_fill_super(struct super_block *sb, struct fs_context *fc) goto err; } - ret = -ENOMEM; - dentry = d_alloc_name(sb->s_root, NULL_FILE_NAME); - if (!dentry) - goto err; - ret = -ENOMEM; inode = sel_make_inode(sb, S_IFCHR | S_IRUGO | S_IWUGO); - if (!inode) { - dput(dentry); + if (!inode) goto err; - } inode->i_ino = ++fsi->last_ino; isec = selinux_inode(inode); @@ -2031,7 +2001,9 @@ static int sel_fill_super(struct super_block *sb, struct fs_context *fc) isec->initialized = LABEL_INITIALIZED; init_special_inode(inode, S_IFCHR | S_IRUGO | S_IWUGO, MKDEV(MEM_MAJOR, 3)); - d_add(dentry, inode); + ret = sel_attach_file(sb->s_root, NULL_FILE_NAME, inode); + if (ret) + goto err; dentry = sel_make_dir(sb->s_root, "avc", &fsi->last_ino); if (IS_ERR(dentry)) { From 0e2b93bf627e4f459bdb1e1b4ee7d2ef2f0c2af9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:44 +0000 Subject: [PATCH 35/54] convert selinuxfs Tree has invariant part + two subtrees that get replaced upon each policy load. Invariant parts stay for the lifetime of filesystem, these two subdirs - from policy load to policy load (serialized on lock_rename(root, ...)). All object creations are via d_alloc_name()+d_add() inside selinuxfs, all removals are via simple_recursive_removal(). Turn those d_add() into d_make_persistent()+dput() and that's mostly it. Acked-by: Paul Moore Reviewed-by: Stephen Smalley Tested-by: Stephen Smalley Signed-off-by: Al Viro --- security/selinux/selinuxfs.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index f088776dbbd36..eae565358db41 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1205,7 +1205,8 @@ static struct dentry *sel_attach(struct dentry *parent, const char *name, iput(inode); return ERR_PTR(-ENOMEM); } - d_add(dentry, inode); + d_make_persistent(dentry, inode); + dput(dentry); return dentry; } @@ -1934,10 +1935,11 @@ static struct dentry *sel_make_swapover_dir(struct super_block *sb, /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); inode_lock(sb->s_root->d_inode); - d_add(dentry, inode); + d_make_persistent(dentry, inode); inc_nlink(sb->s_root->d_inode); inode_unlock(sb->s_root->d_inode); - return dentry; + dput(dentry); + return dentry; // borrowed } #define NULL_FILE_NAME "null" @@ -2080,7 +2082,7 @@ static int sel_init_fs_context(struct fs_context *fc) static void sel_kill_sb(struct super_block *sb) { selinux_fs_info_free(sb); - kill_litter_super(sb); + kill_anon_super(sb); } static struct file_system_type sel_fs_type = { From 6390795bbd743799dcf47271d93ffee888670742 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:45 +0000 Subject: [PATCH 36/54] functionfs: don't abuse ffs_data_closed() on fs shutdown ffs_data_closed() has a seriously confusing logics in it: in addition to the normal "decrement a counter and do some work if it hits zero" there's "... and if it has somehow become negative, do that" bit. It's not a race, despite smelling rather fishy. What really happens is that in addition to "call that on close of files there, to match the increments of counter on opens" there's one call in ->kill_sb(). Counter starts at 0 and never goes negative over the lifetime of filesystem (or we have much worse problems everywhere - ->release() call of some file somehow unpaired with successful ->open() of the same). At the filesystem shutdown it will be 0 or, again, we have much worse problems - filesystem instance destroyed with files on it still open. In other words, at that call and at that call alone the decrement would go from 0 to -1, hitting that chunk (and not hitting the "if it hits 0" part). So that check is a weirdly spelled "called from ffs_kill_sb()". Just expand the call in the latter and kill the misplaced chunk in ffs_data_closed(). Reviewed-by: Greg Kroah-Hartman Signed-off-by: Al Viro --- drivers/usb/gadget/function/f_fs.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 47cfbe41fdff8..43926aca8a40c 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2071,12 +2071,18 @@ static int ffs_fs_init_fs_context(struct fs_context *fc) return 0; } +static void ffs_data_reset(struct ffs_data *ffs); + static void ffs_fs_kill_sb(struct super_block *sb) { kill_litter_super(sb); - if (sb->s_fs_info) - ffs_data_closed(sb->s_fs_info); + if (sb->s_fs_info) { + struct ffs_data *ffs = sb->s_fs_info; + ffs->state = FFS_CLOSING; + ffs_data_reset(ffs); + ffs_data_put(ffs); + } } static struct file_system_type ffs_fs_type = { @@ -2114,7 +2120,6 @@ static void functionfs_cleanup(void) /* ffs_data and ffs_function construction and destruction code **************/ static void ffs_data_clear(struct ffs_data *ffs); -static void ffs_data_reset(struct ffs_data *ffs); static void ffs_data_get(struct ffs_data *ffs) { @@ -2171,11 +2176,6 @@ static void ffs_data_closed(struct ffs_data *ffs) ffs_data_reset(ffs); } } - if (atomic_read(&ffs->opened) < 0) { - ffs->state = FFS_CLOSING; - ffs_data_reset(ffs); - } - ffs_data_put(ffs); } From 52186ba2b888a779d225b5ec7c29fe1f249ccb94 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:46 +0000 Subject: [PATCH 37/54] functionfs: don't bother with ffs->ref in ffs_data_{opened,closed}() A reference is held by the superblock (it's dropped in ffs_kill_sb()) and filesystem will not get to ->kill_sb() while there are any opened files, TYVM... Reviewed-by: Greg Kroah-Hartman Signed-off-by: Al Viro --- drivers/usb/gadget/function/f_fs.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 43926aca8a40c..0bcff49e1f11e 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2128,7 +2128,6 @@ static void ffs_data_get(struct ffs_data *ffs) static void ffs_data_opened(struct ffs_data *ffs) { - refcount_inc(&ffs->ref); if (atomic_add_return(1, &ffs->opened) == 1 && ffs->state == FFS_DEACTIVATED) { ffs->state = FFS_CLOSING; @@ -2153,11 +2152,11 @@ static void ffs_data_put(struct ffs_data *ffs) static void ffs_data_closed(struct ffs_data *ffs) { - struct ffs_epfile *epfiles; - unsigned long flags; - if (atomic_dec_and_test(&ffs->opened)) { if (ffs->no_disconnect) { + struct ffs_epfile *epfiles; + unsigned long flags; + ffs->state = FFS_DEACTIVATED; spin_lock_irqsave(&ffs->eps_lock, flags); epfiles = ffs->epfiles; @@ -2176,7 +2175,6 @@ static void ffs_data_closed(struct ffs_data *ffs) ffs_data_reset(ffs); } } - ffs_data_put(ffs); } static struct ffs_data *ffs_data_new(const char *dev_name) From e4e5e1d0f79c3a6326233a2b37d7f912591032de Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:47 +0000 Subject: [PATCH 38/54] functionfs: need to cancel ->reset_work in ->kill_sb() ... otherwise we just might free ffs with ffs->reset_work still on queue. That needs to be done after ffs_data_reset() - that's the cutoff point for configfs accesses (serialized on gadget_info->lock), which is where the schedule_work() would come from. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Al Viro --- drivers/usb/gadget/function/f_fs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 0bcff49e1f11e..27860fc0fd7d1 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -2081,6 +2081,9 @@ ffs_fs_kill_sb(struct super_block *sb) struct ffs_data *ffs = sb->s_fs_info; ffs->state = FFS_CLOSING; ffs_data_reset(ffs); + // no configfs accesses from that point on, + // so no further schedule_work() is possible + cancel_work_sync(&ffs->reset_work); ffs_data_put(ffs); } } From 3dde9e2b7ca6335ff446caa0a1d9794bad9d2877 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:48 +0000 Subject: [PATCH 39/54] functionfs: fix the open/removal races ffs_epfile_open() can race with removal, ending up with file->private_data pointing to freed object. There is a total count of opened files on functionfs (both ep0 and dynamic ones) and when it hits zero, dynamic files get removed. Unfortunately, that removal can happen while another thread is in ffs_epfile_open(), but has not incremented the count yet. In that case open will succeed, leaving us with UAF on any subsequent read() or write(). The root cause is that ffs->opened is misused; atomic_dec_and_test() vs. atomic_add_return() is not a good idea, when object remains visible all along. To untangle that * serialize openers on ffs->mutex (both for ep0 and for dynamic files) * have dynamic ones use atomic_inc_not_zero() and fail if we had zero ->opened; in that case the file we are opening is doomed. * have the inodes of dynamic files marked on removal (from the callback of simple_recursive_removal()) - clear ->i_private there. * have open of dynamic ones verify they hadn't been already removed, along with checking that state is FFS_ACTIVE. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Al Viro --- drivers/usb/gadget/function/f_fs.c | 53 ++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 27860fc0fd7d1..c7cb23a15fd08 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -640,13 +640,22 @@ static ssize_t ffs_ep0_read(struct file *file, char __user *buf, static int ffs_ep0_open(struct inode *inode, struct file *file) { - struct ffs_data *ffs = inode->i_private; + struct ffs_data *ffs = inode->i_sb->s_fs_info; + int ret; - if (ffs->state == FFS_CLOSING) - return -EBUSY; + /* Acquire mutex */ + ret = ffs_mutex_lock(&ffs->mutex, file->f_flags & O_NONBLOCK); + if (ret < 0) + return ret; - file->private_data = ffs; ffs_data_opened(ffs); + if (ffs->state == FFS_CLOSING) { + ffs_data_closed(ffs); + mutex_unlock(&ffs->mutex); + return -EBUSY; + } + mutex_unlock(&ffs->mutex); + file->private_data = ffs; return stream_open(inode, file); } @@ -1193,14 +1202,33 @@ static ssize_t ffs_epfile_io(struct file *file, struct ffs_io_data *io_data) static int ffs_epfile_open(struct inode *inode, struct file *file) { - struct ffs_epfile *epfile = inode->i_private; + struct ffs_data *ffs = inode->i_sb->s_fs_info; + struct ffs_epfile *epfile; + int ret; - if (WARN_ON(epfile->ffs->state != FFS_ACTIVE)) + /* Acquire mutex */ + ret = ffs_mutex_lock(&ffs->mutex, file->f_flags & O_NONBLOCK); + if (ret < 0) + return ret; + + if (!atomic_inc_not_zero(&ffs->opened)) { + mutex_unlock(&ffs->mutex); + return -ENODEV; + } + /* + * we want the state to be FFS_ACTIVE; FFS_ACTIVE alone is + * not enough, though - we might have been through FFS_CLOSING + * and back to FFS_ACTIVE, with our file already removed. + */ + epfile = smp_load_acquire(&inode->i_private); + if (unlikely(ffs->state != FFS_ACTIVE || !epfile)) { + mutex_unlock(&ffs->mutex); + ffs_data_closed(ffs); return -ENODEV; + } + mutex_unlock(&ffs->mutex); file->private_data = epfile; - ffs_data_opened(epfile->ffs); - return stream_open(inode, file); } @@ -1332,7 +1360,7 @@ static void ffs_dmabuf_put(struct dma_buf_attachment *attach) static int ffs_epfile_release(struct inode *inode, struct file *file) { - struct ffs_epfile *epfile = inode->i_private; + struct ffs_epfile *epfile = file->private_data; struct ffs_dmabuf_priv *priv, *tmp; struct ffs_data *ffs = epfile->ffs; @@ -2353,6 +2381,11 @@ static int ffs_epfiles_create(struct ffs_data *ffs) return 0; } +static void clear_one(struct dentry *dentry) +{ + smp_store_release(&dentry->d_inode->i_private, NULL); +} + static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count) { struct ffs_epfile *epfile = epfiles; @@ -2360,7 +2393,7 @@ static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count) for (; count; --count, ++epfile) { BUG_ON(mutex_is_locked(&epfile->mutex)); if (epfile->dentry) { - simple_recursive_removal(epfile->dentry, NULL); + simple_recursive_removal(epfile->dentry, clear_one); epfile->dentry = NULL; } } From b1b850058bce6f9a50d63c14cf529739b3553077 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:49 +0000 Subject: [PATCH 40/54] functionfs: switch to simple_remove_by_name() No need to return dentry from ffs_sb_create_file() or keep it around afterwards. To avoid subtle issues with getting to ffs from epfiles in ffs_epfiles_destroy(), pass the superblock as explicit argument. Callers have it anyway. Reviewed-by: Greg Kroah-Hartman Signed-off-by: Al Viro --- drivers/usb/gadget/function/f_fs.c | 51 +++++++++++++----------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index c7cb23a15fd08..40868ceb765c7 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -160,8 +160,6 @@ struct ffs_epfile { struct ffs_data *ffs; struct ffs_ep *ep; /* P: ffs->eps_lock */ - struct dentry *dentry; - /* * Buffer for holding data from partial reads which may happen since * we’re rounding user read requests to a multiple of a max packet size. @@ -271,11 +269,11 @@ struct ffs_desc_helper { }; static int __must_check ffs_epfiles_create(struct ffs_data *ffs); -static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count); +static void ffs_epfiles_destroy(struct super_block *sb, + struct ffs_epfile *epfiles, unsigned count); -static struct dentry * -ffs_sb_create_file(struct super_block *sb, const char *name, void *data, - const struct file_operations *fops); +static int ffs_sb_create_file(struct super_block *sb, const char *name, + void *data, const struct file_operations *fops); /* Devices management *******************************************************/ @@ -1894,9 +1892,8 @@ ffs_sb_make_inode(struct super_block *sb, void *data, } /* Create "regular" file */ -static struct dentry *ffs_sb_create_file(struct super_block *sb, - const char *name, void *data, - const struct file_operations *fops) +static int ffs_sb_create_file(struct super_block *sb, const char *name, + void *data, const struct file_operations *fops) { struct ffs_data *ffs = sb->s_fs_info; struct dentry *dentry; @@ -1904,16 +1901,16 @@ static struct dentry *ffs_sb_create_file(struct super_block *sb, dentry = d_alloc_name(sb->s_root, name); if (!dentry) - return NULL; + return -ENOMEM; inode = ffs_sb_make_inode(sb, data, fops, NULL, &ffs->file_perms); if (!inode) { dput(dentry); - return NULL; + return -ENOMEM; } d_add(dentry, inode); - return dentry; + return 0; } /* Super block */ @@ -1956,10 +1953,7 @@ static int ffs_sb_fill(struct super_block *sb, struct fs_context *fc) return -ENOMEM; /* EP0 file */ - if (!ffs_sb_create_file(sb, "ep0", ffs, &ffs_ep0_operations)) - return -ENOMEM; - - return 0; + return ffs_sb_create_file(sb, "ep0", ffs, &ffs_ep0_operations); } enum { @@ -2196,7 +2190,7 @@ static void ffs_data_closed(struct ffs_data *ffs) flags); if (epfiles) - ffs_epfiles_destroy(epfiles, + ffs_epfiles_destroy(ffs->sb, epfiles, ffs->eps_count); if (ffs->setup_state == FFS_SETUP_PENDING) @@ -2255,7 +2249,7 @@ static void ffs_data_clear(struct ffs_data *ffs) * copy of epfile will save us from use-after-free. */ if (epfiles) { - ffs_epfiles_destroy(epfiles, ffs->eps_count); + ffs_epfiles_destroy(ffs->sb, epfiles, ffs->eps_count); ffs->epfiles = NULL; } @@ -2352,6 +2346,7 @@ static int ffs_epfiles_create(struct ffs_data *ffs) { struct ffs_epfile *epfile, *epfiles; unsigned i, count; + int err; count = ffs->eps_count; epfiles = kcalloc(count, sizeof(*epfiles), GFP_KERNEL); @@ -2368,12 +2363,11 @@ static int ffs_epfiles_create(struct ffs_data *ffs) sprintf(epfile->name, "ep%02x", ffs->eps_addrmap[i]); else sprintf(epfile->name, "ep%u", i); - epfile->dentry = ffs_sb_create_file(ffs->sb, epfile->name, - epfile, - &ffs_epfile_operations); - if (!epfile->dentry) { - ffs_epfiles_destroy(epfiles, i - 1); - return -ENOMEM; + err = ffs_sb_create_file(ffs->sb, epfile->name, + epfile, &ffs_epfile_operations); + if (err) { + ffs_epfiles_destroy(ffs->sb, epfiles, i - 1); + return err; } } @@ -2386,16 +2380,15 @@ static void clear_one(struct dentry *dentry) smp_store_release(&dentry->d_inode->i_private, NULL); } -static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count) +static void ffs_epfiles_destroy(struct super_block *sb, + struct ffs_epfile *epfiles, unsigned count) { struct ffs_epfile *epfile = epfiles; + struct dentry *root = sb->s_root; for (; count; --count, ++epfile) { BUG_ON(mutex_is_locked(&epfile->mutex)); - if (epfile->dentry) { - simple_recursive_removal(epfile->dentry, clear_one); - epfile->dentry = NULL; - } + simple_remove_by_name(root, epfile->name, clear_one); } kfree(epfiles); From 626cc516850b2dc9d01a3c56c654b6edb8feb947 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:50 +0000 Subject: [PATCH 41/54] convert functionfs All files are regular; ep0 is there all along, other ep* may appear and go away during the filesystem lifetime; all of those are guaranteed to be gone by the time we umount it. Object creation is in ffs_sb_create_file(), removals - at ->kill_sb() time (for ep0) or by simple_remove_by_name() from ffs_epfiles_destroy() (for the rest of them). Switch ffs_sb_create_file() to simple_start_creating()/d_make_persistent()/ simple_done_creating() and that's it. Signed-off-by: Al Viro --- drivers/usb/gadget/function/f_fs.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 40868ceb765c7..4bf61017b42d7 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -1899,17 +1899,18 @@ static int ffs_sb_create_file(struct super_block *sb, const char *name, struct dentry *dentry; struct inode *inode; - dentry = d_alloc_name(sb->s_root, name); - if (!dentry) - return -ENOMEM; - inode = ffs_sb_make_inode(sb, data, fops, NULL, &ffs->file_perms); - if (!inode) { - dput(dentry); + if (!inode) return -ENOMEM; + dentry = simple_start_creating(sb->s_root, name); + if (IS_ERR(dentry)) { + iput(inode); + return PTR_ERR(dentry); } - d_add(dentry, inode); + d_make_persistent(dentry, inode); + + simple_done_creating(dentry); return 0; } @@ -2098,7 +2099,7 @@ static void ffs_data_reset(struct ffs_data *ffs); static void ffs_fs_kill_sb(struct super_block *sb) { - kill_litter_super(sb); + kill_anon_super(sb); if (sb->s_fs_info) { struct ffs_data *ffs = sb->s_fs_info; ffs->state = FFS_CLOSING; From 450e24747f05d0906c1a37bdf8ea310db05eab36 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:51 +0000 Subject: [PATCH 42/54] gadgetfs: switch to simple_remove_by_name() No need to return dentry from gadgetfs_create_file() or keep it around afterwards. Signed-off-by: Al Viro --- drivers/usb/gadget/legacy/inode.c | 32 +++++++++++++------------------ 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index 13c3da49348c5..bcc25f13483f4 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -150,7 +150,6 @@ struct dev_data { void *buf; wait_queue_head_t wait; struct super_block *sb; - struct dentry *dentry; /* except this scratch i/o buffer for ep0 */ u8 rbuf[RBUF_SIZE]; @@ -208,7 +207,6 @@ struct ep_data { struct usb_endpoint_descriptor desc, hs_desc; struct list_head epfiles; wait_queue_head_t wait; - struct dentry *dentry; }; static inline void get_ep (struct ep_data *data) @@ -1561,16 +1559,12 @@ static void destroy_ep_files (struct dev_data *dev) spin_lock_irq (&dev->lock); while (!list_empty(&dev->epfiles)) { struct ep_data *ep; - struct dentry *dentry; /* break link to FS */ ep = list_first_entry (&dev->epfiles, struct ep_data, epfiles); list_del_init (&ep->epfiles); spin_unlock_irq (&dev->lock); - dentry = ep->dentry; - ep->dentry = NULL; - /* break link to controller */ mutex_lock(&ep->lock); if (ep->state == STATE_EP_ENABLED) @@ -1581,10 +1575,11 @@ static void destroy_ep_files (struct dev_data *dev) mutex_unlock(&ep->lock); wake_up (&ep->wait); - put_ep (ep); /* break link to dcache */ - simple_recursive_removal(dentry, NULL); + simple_remove_by_name(dev->sb->s_root, ep->name, NULL); + + put_ep (ep); spin_lock_irq (&dev->lock); } @@ -1592,14 +1587,14 @@ static void destroy_ep_files (struct dev_data *dev) } -static struct dentry * -gadgetfs_create_file (struct super_block *sb, char const *name, +static int gadgetfs_create_file (struct super_block *sb, char const *name, void *data, const struct file_operations *fops); static int activate_ep_files (struct dev_data *dev) { struct usb_ep *ep; struct ep_data *data; + int err; gadget_for_each_ep (ep, dev->gadget) { @@ -1622,9 +1617,9 @@ static int activate_ep_files (struct dev_data *dev) if (!data->req) goto enomem1; - data->dentry = gadgetfs_create_file (dev->sb, data->name, + err = gadgetfs_create_file (dev->sb, data->name, data, &ep_io_operations); - if (!data->dentry) + if (err) goto enomem2; list_add_tail (&data->epfiles, &dev->epfiles); } @@ -1988,8 +1983,7 @@ gadgetfs_make_inode (struct super_block *sb, /* creates in fs root directory, so non-renamable and non-linkable. * so inode and dentry are paired, until device reconfig. */ -static struct dentry * -gadgetfs_create_file (struct super_block *sb, char const *name, +static int gadgetfs_create_file (struct super_block *sb, char const *name, void *data, const struct file_operations *fops) { struct dentry *dentry; @@ -1997,16 +1991,16 @@ gadgetfs_create_file (struct super_block *sb, char const *name, dentry = d_alloc_name(sb->s_root, name); if (!dentry) - return NULL; + return -ENOMEM; inode = gadgetfs_make_inode (sb, data, fops, S_IFREG | (default_perm & S_IRWXUGO)); if (!inode) { dput(dentry); - return NULL; + return -ENOMEM; } d_add (dentry, inode); - return dentry; + return 0; } static const struct super_operations gadget_fs_operations = { @@ -2059,8 +2053,8 @@ gadgetfs_fill_super (struct super_block *sb, struct fs_context *fc) goto Enomem; dev->sb = sb; - dev->dentry = gadgetfs_create_file(sb, CHIP, dev, &ep0_operations); - if (!dev->dentry) { + rc = gadgetfs_create_file(sb, CHIP, dev, &ep0_operations); + if (rc) { put_dev(dev); goto Enomem; } From 793632284a6a1829005a7d344d895784da743bb7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:52 +0000 Subject: [PATCH 43/54] convert gadgetfs same as functionfs Signed-off-by: Al Viro --- drivers/usb/gadget/legacy/inode.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index bcc25f13483f4..62566a8e74515 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -1989,17 +1989,20 @@ static int gadgetfs_create_file (struct super_block *sb, char const *name, struct dentry *dentry; struct inode *inode; - dentry = d_alloc_name(sb->s_root, name); - if (!dentry) - return -ENOMEM; - inode = gadgetfs_make_inode (sb, data, fops, S_IFREG | (default_perm & S_IRWXUGO)); - if (!inode) { - dput(dentry); + if (!inode) return -ENOMEM; + + dentry = simple_start_creating(sb->s_root, name); + if (IS_ERR(dentry)) { + iput(inode); + return PTR_ERR(dentry); } - d_add (dentry, inode); + + d_make_persistent(dentry, inode); + + simple_done_creating(dentry); return 0; } @@ -2096,7 +2099,7 @@ static void gadgetfs_kill_sb (struct super_block *sb) { mutex_lock(&sb_mutex); - kill_litter_super (sb); + kill_anon_super (sb); if (the_device) { put_dev (the_device); the_device = NULL; From 1657c559f09e0754b59c4653c9d26ea24a362005 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:53 +0000 Subject: [PATCH 44/54] hypfs: don't pin dentries twice hypfs dentries end up with refcount 2 when they are not busy. Refcount 1 is enough to keep them pinned, and going that way allows to simplify things nicely: * don't need to drop an extra reference before the call of kill_litter_super() in ->kill_sb(); all we need there is to reset the cleanup list - everything on it will be taken out automatically. * we can make use of simple_recursive_removal() on tree rebuilds; just make sure that only children of root end up in the cleanup list and hypfs_delete_tree() becomes much simpler Signed-off-by: Al Viro --- arch/s390/hypfs/inode.c | 41 ++++++++++------------------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 96409573c75dd..a4dc8e13d9997 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -61,33 +61,17 @@ static void hypfs_update_update(struct super_block *sb) static void hypfs_add_dentry(struct dentry *dentry) { - dentry->d_fsdata = hypfs_last_dentry; - hypfs_last_dentry = dentry; -} - -static void hypfs_remove(struct dentry *dentry) -{ - struct dentry *parent; - - parent = dentry->d_parent; - inode_lock(d_inode(parent)); - if (simple_positive(dentry)) { - if (d_is_dir(dentry)) - simple_rmdir(d_inode(parent), dentry); - else - simple_unlink(d_inode(parent), dentry); + if (IS_ROOT(dentry->d_parent)) { + dentry->d_fsdata = hypfs_last_dentry; + hypfs_last_dentry = dentry; } - d_drop(dentry); - dput(dentry); - inode_unlock(d_inode(parent)); } -static void hypfs_delete_tree(struct dentry *root) +static void hypfs_delete_tree(void) { while (hypfs_last_dentry) { - struct dentry *next_dentry; - next_dentry = hypfs_last_dentry->d_fsdata; - hypfs_remove(hypfs_last_dentry); + struct dentry *next_dentry = hypfs_last_dentry->d_fsdata; + simple_recursive_removal(hypfs_last_dentry, NULL); hypfs_last_dentry = next_dentry; } } @@ -184,14 +168,14 @@ static ssize_t hypfs_write_iter(struct kiocb *iocb, struct iov_iter *from) rc = -EBUSY; goto out; } - hypfs_delete_tree(sb->s_root); + hypfs_delete_tree(); if (machine_is_vm()) rc = hypfs_vm_create_files(sb->s_root); else rc = hypfs_diag_create_files(sb->s_root); if (rc) { pr_err("Updating the hypfs tree failed\n"); - hypfs_delete_tree(sb->s_root); + hypfs_delete_tree(); goto out; } hypfs_update_update(sb); @@ -326,13 +310,9 @@ static void hypfs_kill_super(struct super_block *sb) { struct hypfs_sb_info *sb_info = sb->s_fs_info; - if (sb->s_root) - hypfs_delete_tree(sb->s_root); - if (sb_info && sb_info->update_file) - hypfs_remove(sb_info->update_file); - kfree(sb->s_fs_info); - sb->s_fs_info = NULL; + hypfs_last_dentry = NULL; kill_litter_super(sb); + kfree(sb_info); } static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, @@ -367,7 +347,6 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, BUG(); inode->i_private = data; d_instantiate(dentry, inode); - dget(dentry); fail: inode_unlock(d_inode(parent)); return dentry; From a4f0093ca0f377cf83e9a77453363d70640a0831 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:54 +0000 Subject: [PATCH 45/54] hypfs: switch hypfs_create_str() to returning int Every single caller only cares about PTR_ERR_OR_ZERO() of return value... Signed-off-by: Al Viro --- arch/s390/hypfs/hypfs.h | 3 +-- arch/s390/hypfs/hypfs_diag_fs.c | 40 +++++++++------------------------ arch/s390/hypfs/hypfs_vm_fs.c | 6 ++--- arch/s390/hypfs/inode.c | 9 ++++---- 4 files changed, 18 insertions(+), 40 deletions(-) diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index 4dc2e068e0ff5..0d109d956015f 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -25,8 +25,7 @@ extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name); extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name, __u64 value); -extern struct dentry *hypfs_create_str(struct dentry *dir, const char *name, - char *string); +extern int hypfs_create_str(struct dentry *dir, const char *name, char *string); /* LPAR Hypervisor */ extern int hypfs_diag_init(void); diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c index ede951dc00858..2178e6060a5da 100644 --- a/arch/s390/hypfs/hypfs_diag_fs.c +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -228,8 +228,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) return PTR_ERR(rc); } diag224_idx2name(cpu_info__ctidx(diag204_get_info_type(), cpu_info), buffer); - rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_ERR_OR_ZERO(rc); + return hypfs_create_str(cpu_dir, "type", buffer); } static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) @@ -276,8 +275,7 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) if (IS_ERR(rc)) return PTR_ERR(rc); diag224_idx2name(phys_cpu__ctidx(diag204_get_info_type(), cpu_info), buffer); - rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_ERR_OR_ZERO(rc); + return hypfs_create_str(cpu_dir, "type", buffer); } static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) @@ -316,41 +314,25 @@ int hypfs_diag_create_files(struct dentry *root) return rc; systems_dir = hypfs_mkdir(root, "systems"); - if (IS_ERR(systems_dir)) { - rc = PTR_ERR(systems_dir); - goto err_out; - } + if (IS_ERR(systems_dir)) + return PTR_ERR(systems_dir); time_hdr = (struct x_info_blk_hdr *)buffer; part_hdr = time_hdr + info_blk_hdr__size(diag204_get_info_type()); for (i = 0; i < info_blk_hdr__npar(diag204_get_info_type(), time_hdr); i++) { part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr); - if (IS_ERR(part_hdr)) { - rc = PTR_ERR(part_hdr); - goto err_out; - } + if (IS_ERR(part_hdr)) + return PTR_ERR(part_hdr); } if (info_blk_hdr__flags(diag204_get_info_type(), time_hdr) & DIAG204_LPAR_PHYS_FLG) { ptr = hypfs_create_phys_files(root, part_hdr); - if (IS_ERR(ptr)) { - rc = PTR_ERR(ptr); - goto err_out; - } + if (IS_ERR(ptr)) + return PTR_ERR(ptr); } hyp_dir = hypfs_mkdir(root, "hyp"); - if (IS_ERR(hyp_dir)) { - rc = PTR_ERR(hyp_dir); - goto err_out; - } - ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); - if (IS_ERR(ptr)) { - rc = PTR_ERR(ptr); - goto err_out; - } - rc = 0; - -err_out: - return rc; + if (IS_ERR(hyp_dir)) + return PTR_ERR(hyp_dir); + return hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); } /* Diagnose 224 functions */ diff --git a/arch/s390/hypfs/hypfs_vm_fs.c b/arch/s390/hypfs/hypfs_vm_fs.c index 6011289afa8c8..e8a32d66062bf 100644 --- a/arch/s390/hypfs/hypfs_vm_fs.c +++ b/arch/s390/hypfs/hypfs_vm_fs.c @@ -100,11 +100,9 @@ int hypfs_vm_create_files(struct dentry *root) rc = PTR_ERR(dir); goto failed; } - file = hypfs_create_str(dir, "type", "z/VM Hypervisor"); - if (IS_ERR(file)) { - rc = PTR_ERR(file); + rc = hypfs_create_str(dir, "type", "z/VM Hypervisor"); + if (rc) goto failed; - } /* physical cpus */ dir = hypfs_mkdir(root, "cpus"); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index a4dc8e13d9997..c5e2d8932b882 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -398,24 +398,23 @@ struct dentry *hypfs_create_u64(struct dentry *dir, return dentry; } -struct dentry *hypfs_create_str(struct dentry *dir, - const char *name, char *string) +int hypfs_create_str(struct dentry *dir, const char *name, char *string) { char *buffer; struct dentry *dentry; buffer = kmalloc(strlen(string) + 2, GFP_KERNEL); if (!buffer) - return ERR_PTR(-ENOMEM); + return -ENOMEM; sprintf(buffer, "%s\n", string); dentry = hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); if (IS_ERR(dentry)) { kfree(buffer); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } hypfs_add_dentry(dentry); - return dentry; + return 0; } static const struct file_operations hypfs_file_ops = { From f70ceafaef925e461c20f16d2b2ae3d13d62b0f9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:55 +0000 Subject: [PATCH 46/54] hypfs: swich hypfs_create_u64() to returning int same story as for hypfs_create_str() Signed-off-by: Al Viro --- arch/s390/hypfs/hypfs.h | 3 +-- arch/s390/hypfs/hypfs_diag_fs.c | 20 ++++++++++---------- arch/s390/hypfs/hypfs_vm_fs.c | 15 ++++++--------- arch/s390/hypfs/inode.c | 9 ++++----- 4 files changed, 21 insertions(+), 26 deletions(-) diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index 0d109d956015f..2bb7104124cac 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -22,8 +22,7 @@ extern struct dentry *hypfs_mkdir(struct dentry *parent, const char *name); -extern struct dentry *hypfs_create_u64(struct dentry *dir, const char *name, - __u64 value); +extern int hypfs_create_u64(struct dentry *dir, const char *name, __u64 value); extern int hypfs_create_str(struct dentry *dir, const char *name, char *string); diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c index 2178e6060a5da..83c9426df08e2 100644 --- a/arch/s390/hypfs/hypfs_diag_fs.c +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -204,7 +204,7 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) { struct dentry *cpu_dir; char buffer[TMP_SIZE]; - void *rc; + int rc; snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(), cpu_info)); @@ -214,18 +214,18 @@ static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) rc = hypfs_create_u64(cpu_dir, "mgmtime", cpu_info__acc_time(diag204_get_info_type(), cpu_info) - cpu_info__lp_time(diag204_get_info_type(), cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); + if (rc) + return rc; rc = hypfs_create_u64(cpu_dir, "cputime", cpu_info__lp_time(diag204_get_info_type(), cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); + if (rc) + return rc; if (diag204_get_info_type() == DIAG204_INFO_EXT) { rc = hypfs_create_u64(cpu_dir, "onlinetime", cpu_info__online_time(diag204_get_info_type(), cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); + if (rc) + return rc; } diag224_idx2name(cpu_info__ctidx(diag204_get_info_type(), cpu_info), buffer); return hypfs_create_str(cpu_dir, "type", buffer); @@ -263,7 +263,7 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) { struct dentry *cpu_dir; char buffer[TMP_SIZE]; - void *rc; + int rc; snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_get_info_type(), cpu_info)); @@ -272,8 +272,8 @@ static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) return PTR_ERR(cpu_dir); rc = hypfs_create_u64(cpu_dir, "mgmtime", phys_cpu__mgm_time(diag204_get_info_type(), cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); + if (rc) + return rc; diag224_idx2name(phys_cpu__ctidx(diag204_get_info_type(), cpu_info), buffer); return hypfs_create_str(cpu_dir, "type", buffer); } diff --git a/arch/s390/hypfs/hypfs_vm_fs.c b/arch/s390/hypfs/hypfs_vm_fs.c index e8a32d66062bf..a149a9f92e40a 100644 --- a/arch/s390/hypfs/hypfs_vm_fs.c +++ b/arch/s390/hypfs/hypfs_vm_fs.c @@ -19,10 +19,9 @@ #define ATTRIBUTE(dir, name, member) \ do { \ - void *rc; \ - rc = hypfs_create_u64(dir, name, member); \ - if (IS_ERR(rc)) \ - return PTR_ERR(rc); \ + int rc = hypfs_create_u64(dir, name, member); \ + if (rc) \ + return rc; \ } while (0) static int hypfs_vm_create_guest(struct dentry *systems_dir, @@ -85,7 +84,7 @@ static int hypfs_vm_create_guest(struct dentry *systems_dir, int hypfs_vm_create_files(struct dentry *root) { - struct dentry *dir, *file; + struct dentry *dir; struct diag2fc_data *data; unsigned int count = 0; int rc, i; @@ -110,11 +109,9 @@ int hypfs_vm_create_files(struct dentry *root) rc = PTR_ERR(dir); goto failed; } - file = hypfs_create_u64(dir, "count", data->lcpus); - if (IS_ERR(file)) { - rc = PTR_ERR(file); + rc = hypfs_create_u64(dir, "count", data->lcpus); + if (rc) goto failed; - } /* guests */ dir = hypfs_mkdir(root, "systems"); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index c5e2d8932b882..6a80ab2692bed 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -377,8 +377,7 @@ static struct dentry *hypfs_create_update_file(struct dentry *dir) return dentry; } -struct dentry *hypfs_create_u64(struct dentry *dir, - const char *name, __u64 value) +int hypfs_create_u64(struct dentry *dir, const char *name, __u64 value) { char *buffer; char tmp[TMP_SIZE]; @@ -387,15 +386,15 @@ struct dentry *hypfs_create_u64(struct dentry *dir, snprintf(tmp, TMP_SIZE, "%llu\n", (unsigned long long int)value); buffer = kstrdup(tmp, GFP_KERNEL); if (!buffer) - return ERR_PTR(-ENOMEM); + return -ENOMEM; dentry = hypfs_create_file(dir, name, buffer, S_IFREG | REG_FILE_MODE); if (IS_ERR(dentry)) { kfree(buffer); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } hypfs_add_dentry(dentry); - return dentry; + return 0; } int hypfs_create_str(struct dentry *dir, const char *name, char *string) From 1c0d8b4381261ec9cb559a32825014eaf0dbaf63 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:56 +0000 Subject: [PATCH 47/54] convert hypfs just have hypfs_create_file() do the usual simple_start_creating()/ d_make_persistent()/simple_done_creating() and that's it Signed-off-by: Al Viro --- arch/s390/hypfs/inode.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 6a80ab2692bed..98952543d5935 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -311,7 +311,7 @@ static void hypfs_kill_super(struct super_block *sb) struct hypfs_sb_info *sb_info = sb->s_fs_info; hypfs_last_dentry = NULL; - kill_litter_super(sb); + kill_anon_super(sb); kfree(sb_info); } @@ -321,17 +321,13 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, struct dentry *dentry; struct inode *inode; - inode_lock(d_inode(parent)); - dentry = lookup_noperm(&QSTR(name), parent); - if (IS_ERR(dentry)) { - dentry = ERR_PTR(-ENOMEM); - goto fail; - } + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) + return ERR_PTR(-ENOMEM); inode = hypfs_make_inode(parent->d_sb, mode); if (!inode) { - dput(dentry); - dentry = ERR_PTR(-ENOMEM); - goto fail; + simple_done_creating(dentry); + return ERR_PTR(-ENOMEM); } if (S_ISREG(mode)) { inode->i_fop = &hypfs_file_ops; @@ -346,10 +342,9 @@ static struct dentry *hypfs_create_file(struct dentry *parent, const char *name, } else BUG(); inode->i_private = data; - d_instantiate(dentry, inode); -fail: - inode_unlock(d_inode(parent)); - return dentry; + d_make_persistent(dentry, inode); + simple_done_creating(dentry); + return dentry; // borrowed } struct dentry *hypfs_mkdir(struct dentry *parent, const char *name) From 837b346a949268e8db9f74442b0be169c8b36baa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:57 +0000 Subject: [PATCH 48/54] convert rpc_pipefs Just use d_make_persistent() + dput() (and fold the latter into simple_finish_creating()) and that's it... NOTE: pipe->dentry is a borrowed reference - it does not contribute to dentry refcount. Signed-off-by: Al Viro --- net/sunrpc/rpc_pipe.c | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 0bd1df2ebb479..379daefc48471 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -536,17 +536,16 @@ static int rpc_new_file(struct dentry *parent, inode = rpc_get_inode(dir->i_sb, S_IFREG | mode); if (unlikely(!inode)) { - dput(dentry); - inode_unlock(dir); + simple_done_creating(dentry); return -ENOMEM; } inode->i_ino = iunique(dir->i_sb, 100); if (i_fop) inode->i_fop = i_fop; rpc_inode_setowner(inode, private); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(dir, dentry); - inode_unlock(dir); + simple_done_creating(dentry); return 0; } @@ -563,18 +562,17 @@ static struct dentry *rpc_new_dir(struct dentry *parent, inode = rpc_get_inode(dir->i_sb, S_IFDIR | mode); if (unlikely(!inode)) { - dput(dentry); - inode_unlock(dir); + simple_done_creating(dentry); return ERR_PTR(-ENOMEM); } inode->i_ino = iunique(dir->i_sb, 100); inc_nlink(dir); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_mkdir(dir, dentry); - inode_unlock(dir); + simple_done_creating(dentry); - return dentry; + return dentry; // borrowed } static int rpc_populate(struct dentry *parent, @@ -657,8 +655,7 @@ int rpc_mkpipe_dentry(struct dentry *parent, const char *name, inode = rpc_get_inode(dir->i_sb, umode); if (unlikely(!inode)) { - dput(dentry); - inode_unlock(dir); + simple_done_creating(dentry); err = -ENOMEM; goto failed; } @@ -668,10 +665,10 @@ int rpc_mkpipe_dentry(struct dentry *parent, const char *name, rpci->private = private; rpci->pipe = pipe; rpc_inode_setowner(inode, private); - d_instantiate(dentry, inode); - pipe->dentry = dentry; + pipe->dentry = dentry; // borrowed + d_make_persistent(dentry, inode); fsnotify_create(dir, dentry); - inode_unlock(dir); + simple_done_creating(dentry); return 0; failed: @@ -1206,7 +1203,7 @@ static void rpc_kill_sb(struct super_block *sb) sb); mutex_unlock(&sn->pipefs_sb_lock); out: - kill_litter_super(sb); + kill_anon_super(sb); put_net(net); } From 1e85286d16df80d4a077da2b0cca7211c5d70584 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:58 +0000 Subject: [PATCH 49/54] convert nfsctl One instance per net-ns. There's a fixed subset (several files in root, an optional symlink in root + initially empty /clients/) + per-client subdirectory in /clients/. Clients can appear only after the filesystem is there and they are all gone before it gets through ->kill_sb(). Fixed subset created in fill_super(), regular files by simple_fill_super(), then a subdirectory and a symlink - manually. It is removed by kill_litter_super(). Per-client subdirectories are created by nfsd_client_mkdir() (populated with client-supplied list of files in them). Removed by nfsd_client_rmdir(), which is simple_recursive_removal(). All dentries except for the ones from simple_fill_super() come from * nfsd_mkdir() (subdirectory, dentry from simple_start_creating()). Called from fill_super() (creates initially empty /clients) and from nfsd_client_mkdir (creates a per-client subdirectory in /clients). * _nfsd_symlink() (symlink, dentry from simple_start_creating()), called from fill_super(). * nfsdfs_create_files() (regulars, dentry from simple_start_creating()), called only from nfsd_client_mkdir(). Turn d_instatiate() + inode_unlock() into d_make_persistent() + simple_done_creating() in nfsd_mkdir(), _nfsd_symlink() and nfsdfs_create_files() and we are done. Signed-off-by: Al Viro --- fs/nfsd/nfsctl.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2b79129703d54..5ce9a49e76ba4 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1137,11 +1137,11 @@ static struct dentry *nfsd_mkdir(struct dentry *parent, struct nfsdfs_client *nc inode->i_private = ncl; kref_get(&ncl->cl_ref); } - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); inc_nlink(dir); fsnotify_mkdir(dir, dentry); - inode_unlock(dir); - return dentry; + simple_done_creating(dentry); + return dentry; // borrowed } #if IS_ENABLED(CONFIG_SUNRPC_GSS) @@ -1170,9 +1170,9 @@ static void _nfsd_symlink(struct dentry *parent, const char *name, inode->i_link = (char *)content; inode->i_size = strlen(content); - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(dir, dentry); - inode_unlock(dir); + simple_done_creating(dentry); } #else static inline void _nfsd_symlink(struct dentry *parent, const char *name, @@ -1228,11 +1228,11 @@ static int nfsdfs_create_files(struct dentry *root, kref_get(&ncl->cl_ref); inode->i_fop = files->ops; inode->i_private = ncl; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); fsnotify_create(dir, dentry); if (fdentries) - fdentries[i] = dentry; - inode_unlock(dir); + fdentries[i] = dentry; // borrowed + simple_done_creating(dentry); } return 0; } @@ -1346,7 +1346,7 @@ static void nfsd_umount(struct super_block *sb) nfsd_shutdown_threads(net); - kill_litter_super(sb); + kill_anon_super(sb); put_net(net); } From ab2c497776b7c1f4cd4ad811196465fd80d8d7e6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:15:59 +0000 Subject: [PATCH 50/54] convert rust_binderfs Parallel to binderfs stuff: * use simple_start_creating()/simple_done_creating()/d_make_persistent() instead of manual inode_lock()/lookup_noperm()/d_instanitate()/inode_unlock(). * allocate inode first - simpler cleanup that way. * use simple_recursive_removal() instead of open-coding it. * switch to kill_anon_super() Signed-off-by: Al Viro --- drivers/android/binder/rust_binderfs.c | 121 +++++++------------------ 1 file changed, 33 insertions(+), 88 deletions(-) diff --git a/drivers/android/binder/rust_binderfs.c b/drivers/android/binder/rust_binderfs.c index 6b497146b698b..c69026df775c2 100644 --- a/drivers/android/binder/rust_binderfs.c +++ b/drivers/android/binder/rust_binderfs.c @@ -178,28 +178,17 @@ static int binderfs_binder_device_create(struct inode *ref_inode, } root = sb->s_root; - inode_lock(d_inode(root)); - - /* look it up */ - dentry = lookup_noperm(&QSTR(req->name), root); + dentry = simple_start_creating(root, req->name); if (IS_ERR(dentry)) { - inode_unlock(d_inode(root)); ret = PTR_ERR(dentry); goto err; } - if (d_really_is_positive(dentry)) { - /* already exists */ - dput(dentry); - inode_unlock(d_inode(root)); - ret = -EEXIST; - goto err; - } - inode->i_private = device; - d_instantiate(dentry, inode); + d_make_persistent(dentry, inode); + fsnotify_create(root->d_inode, dentry); - inode_unlock(d_inode(root)); + simple_done_creating(dentry); return 0; @@ -472,37 +461,9 @@ static struct inode *binderfs_make_inode(struct super_block *sb, int mode) return ret; } -static struct dentry *binderfs_create_dentry(struct dentry *parent, - const char *name) -{ - struct dentry *dentry; - - dentry = lookup_noperm(&QSTR(name), parent); - if (IS_ERR(dentry)) - return dentry; - - /* Return error if the file/dir already exists. */ - if (d_really_is_positive(dentry)) { - dput(dentry); - return ERR_PTR(-EEXIST); - } - - return dentry; -} - void rust_binderfs_remove_file(struct dentry *dentry) { - struct inode *parent_inode; - - parent_inode = d_inode(dentry->d_parent); - inode_lock(parent_inode); - if (simple_positive(dentry)) { - dget(dentry); - simple_unlink(parent_inode, dentry); - d_delete(dentry); - dput(dentry); - } - inode_unlock(parent_inode); + simple_recursive_removal(dentry, NULL); } static struct dentry *rust_binderfs_create_file(struct dentry *parent, const char *name, @@ -510,31 +471,23 @@ static struct dentry *rust_binderfs_create_file(struct dentry *parent, const cha void *data) { struct dentry *dentry; - struct inode *new_inode, *parent_inode; - struct super_block *sb; - - parent_inode = d_inode(parent); - inode_lock(parent_inode); - - dentry = binderfs_create_dentry(parent, name); - if (IS_ERR(dentry)) - goto out; - - sb = parent_inode->i_sb; - new_inode = binderfs_make_inode(sb, S_IFREG | 0444); - if (!new_inode) { - dput(dentry); - dentry = ERR_PTR(-ENOMEM); - goto out; - } + struct inode *new_inode; + new_inode = binderfs_make_inode(parent->d_sb, S_IFREG | 0444); + if (!new_inode) + return ERR_PTR(-ENOMEM); new_inode->i_fop = fops; new_inode->i_private = data; - d_instantiate(dentry, new_inode); - fsnotify_create(parent_inode, dentry); -out: - inode_unlock(parent_inode); + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) { + iput(new_inode); + return dentry; + } + + d_make_persistent(dentry, new_inode); + fsnotify_create(parent->d_inode, dentry); + simple_done_creating(dentry); return dentry; } @@ -556,34 +509,26 @@ static struct dentry *binderfs_create_dir(struct dentry *parent, const char *name) { struct dentry *dentry; - struct inode *new_inode, *parent_inode; - struct super_block *sb; - - parent_inode = d_inode(parent); - inode_lock(parent_inode); - - dentry = binderfs_create_dentry(parent, name); - if (IS_ERR(dentry)) - goto out; + struct inode *new_inode; - sb = parent_inode->i_sb; - new_inode = binderfs_make_inode(sb, S_IFDIR | 0755); - if (!new_inode) { - dput(dentry); - dentry = ERR_PTR(-ENOMEM); - goto out; - } + new_inode = binderfs_make_inode(parent->d_sb, S_IFDIR | 0755); + if (!new_inode) + return ERR_PTR(-ENOMEM); new_inode->i_fop = &simple_dir_operations; new_inode->i_op = &simple_dir_inode_operations; - set_nlink(new_inode, 2); - d_instantiate(dentry, new_inode); - inc_nlink(parent_inode); - fsnotify_mkdir(parent_inode, dentry); + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) { + iput(new_inode); + return dentry; + } -out: - inode_unlock(parent_inode); + inc_nlink(parent->d_inode); + set_nlink(new_inode, 2); + d_make_persistent(dentry, new_inode); + fsnotify_mkdir(parent->d_inode, dentry); + simple_done_creating(dentry); return dentry; } @@ -802,7 +747,7 @@ static void binderfs_kill_super(struct super_block *sb) * During inode eviction struct binderfs_info is needed. * So first wipe the super_block then free struct binderfs_info. */ - kill_litter_super(sb); + kill_anon_super(sb); if (info && info->ipc_ns) put_ipc_ns(info->ipc_ns); From 88bf2b9c6aca899bb59bb010a6bba28f2d88e9c8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:16:00 +0000 Subject: [PATCH 51/54] get rid of kill_litter_super() Not used anymore. Signed-off-by: Al Viro --- Documentation/filesystems/porting.rst | 7 +++++++ fs/dcache.c | 21 --------------------- fs/internal.h | 1 - fs/super.c | 8 -------- include/linux/dcache.h | 1 - include/linux/fs.h | 1 - 6 files changed, 7 insertions(+), 32 deletions(-) diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 7233b04668fcc..4921b3b0662a7 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -1309,3 +1309,10 @@ a different length, use vfs_parse_fs_qstr(fc, key, &QSTR_LEN(value, len)) instead. + +--- + +**mandatory** + +kill_litter_super() is gone; convert to DCACHE_PERSISTENT use (as all +in-tree filesystems have done). diff --git a/fs/dcache.c b/fs/dcache.c index 3cc6c3876177b..5ee2e78a91b39 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -3167,27 +3167,6 @@ bool is_subdir(struct dentry *new_dentry, struct dentry *old_dentry) } EXPORT_SYMBOL(is_subdir); -static enum d_walk_ret d_genocide_kill(void *data, struct dentry *dentry) -{ - struct dentry *root = data; - if (dentry != root) { - if (d_unhashed(dentry) || !dentry->d_inode || - dentry->d_flags & DCACHE_PERSISTENT) - return D_WALK_SKIP; - - if (!(dentry->d_flags & DCACHE_GENOCIDE)) { - dentry->d_flags |= DCACHE_GENOCIDE; - dentry->d_lockref.count--; - } - } - return D_WALK_CONTINUE; -} - -void d_genocide(struct dentry *parent) -{ - d_walk(parent, parent, d_genocide_kill); -} - void d_mark_tmpfile(struct file *file, struct inode *inode) { struct dentry *dentry = file->f_path.dentry; diff --git a/fs/internal.h b/fs/internal.h index 9b2b4d1168802..144686af6c368 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -227,7 +227,6 @@ extern void shrink_dcache_for_umount(struct super_block *); extern struct dentry *__d_lookup(const struct dentry *, const struct qstr *); extern struct dentry *__d_lookup_rcu(const struct dentry *parent, const struct qstr *name, unsigned *seq); -extern void d_genocide(struct dentry *); /* * pipe.c diff --git a/fs/super.c b/fs/super.c index 5bab94fb7e035..ee001f684d2a6 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1284,14 +1284,6 @@ void kill_anon_super(struct super_block *sb) } EXPORT_SYMBOL(kill_anon_super); -void kill_litter_super(struct super_block *sb) -{ - if (sb->s_root) - d_genocide(sb->s_root); - kill_anon_super(sb); -} -EXPORT_SYMBOL(kill_litter_super); - int set_anon_super_fc(struct super_block *sb, struct fs_context *fc) { return set_anon_super(sb, NULL); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 6ec4066825e31..20a85144a00e5 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -198,7 +198,6 @@ enum dentry_flags { DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ DCACHE_CANT_MOUNT = BIT(8), - DCACHE_GENOCIDE = BIT(9), DCACHE_SHRINK_LIST = BIT(10), DCACHE_OP_WEAK_REVALIDATE = BIT(11), /* diff --git a/include/linux/fs.h b/include/linux/fs.h index f5037c556f617..95933ceaae51d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2728,7 +2728,6 @@ void retire_super(struct super_block *sb); void generic_shutdown_super(struct super_block *sb); void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); -void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); From ac2a1a368abe0cd894b4cb2f1d50569f3ee3de37 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:16:01 +0000 Subject: [PATCH 52/54] convert securityfs securityfs uses simple_recursive_removal(), but does not bother to mark dentries persistent. This is the only place where it still happens; get rid of that irregularity. * use simple_{start,done}_creating() and d_make_persitent(); kill_litter_super() use was already gone, since we empty the filesystem instance before it gets shut down. Acked-by: Paul Moore Signed-off-by: Al Viro --- security/inode.c | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/security/inode.c b/security/inode.c index bf7b5e2e69556..73df5db7f8313 100644 --- a/security/inode.c +++ b/security/inode.c @@ -127,24 +127,19 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, parent = mount->mnt_root; } - dir = d_inode(parent); - - inode_lock(dir); - dentry = lookup_noperm(&QSTR(name), parent); - if (IS_ERR(dentry)) + inode = new_inode(parent->d_sb); + if (unlikely(!inode)) { + dentry = ERR_PTR(-ENOMEM); goto out; - - if (d_really_is_positive(dentry)) { - error = -EEXIST; - goto out1; } - inode = new_inode(dir->i_sb); - if (!inode) { - error = -ENOMEM; - goto out1; - } + dir = d_inode(parent); + dentry = simple_start_creating(parent, name); + if (IS_ERR(dentry)) { + iput(inode); + goto out; + } inode->i_ino = get_next_ino(); inode->i_mode = mode; simple_inode_init_ts(inode); @@ -160,15 +155,11 @@ static struct dentry *securityfs_create_dentry(const char *name, umode_t mode, } else { inode->i_fop = fops; } - d_instantiate(dentry, inode); - inode_unlock(dir); - return dentry; + d_make_persistent(dentry, inode); + simple_done_creating(dentry); + return dentry; // borrowed -out1: - dput(dentry); - dentry = ERR_PTR(error); out: - inode_unlock(dir); if (pinned) simple_release_fs(&mount, &mount_count); return dentry; From cebff17e51765aa62cee69c635a04d9309930116 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:16:02 +0000 Subject: [PATCH 53/54] kill securityfs_recursive_remove() it's an unused alias for securityfs_remove() Acked-by: Paul Moore Signed-off-by: Al Viro --- include/linux/security.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/security.h b/include/linux/security.h index 92ac3f27b9733..9e710cfee7445 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -2258,8 +2258,6 @@ static inline void securityfs_remove(struct dentry *dentry) #endif -#define securityfs_recursive_remove securityfs_remove - #ifdef CONFIG_BPF_SYSCALL union bpf_attr; struct bpf_map; From 4c1aadeaf3757d0443e16320d41c556c61b91fa6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Nov 2025 05:16:03 +0000 Subject: [PATCH 54/54] d_make_discardable(): warn if given a non-persistent dentry At this point there are very few call chains that might lead to d_make_discardable() on a dentry that hadn't been made persistent: calls of simple_unlink() and simple_rmdir() in configfs and apparmorfs. Both filesystems do pin (part of) their contents in dcache, but they are currently playing very unusual games with that. Converting them to more usual patterns might be possible, but it's definitely going to be a long series of changes in both cases. For now the easiest solution is to have both stop using simple_unlink() and simple_rmdir() - that allows to make d_make_discardable() warn when given a non-persistent dentry. Rather than giving them full-blown private copies (with calls of d_make_discardable() replaced with dput()), let's pull the parts of simple_unlink() and simple_rmdir() that deal with timestamps and link counts into separate helpers (__simple_unlink() and __simple_rmdir() resp.) and have those used by configfs and apparmorfs. Signed-off-by: Al Viro --- fs/configfs/dir.c | 10 ++++++++-- fs/configfs/inode.c | 3 ++- fs/dcache.c | 9 +-------- fs/libfs.c | 21 +++++++++++++++++---- include/linux/fs.h | 2 ++ security/apparmor/apparmorfs.c | 13 +++++++++---- 6 files changed, 39 insertions(+), 19 deletions(-) diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 81f4f06bc87e7..e8f2f44012e9a 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -400,8 +400,14 @@ static void remove_dir(struct dentry * d) configfs_remove_dirent(d); - if (d_really_is_positive(d)) - simple_rmdir(d_inode(parent),d); + if (d_really_is_positive(d)) { + if (likely(simple_empty(d))) { + __simple_rmdir(d_inode(parent),d); + dput(d); + } else { + pr_warn("remove_dir (%pd): attributes remain", d); + } + } pr_debug(" o %pd removing done (%d)\n", d, d_count(d)); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index 1d2e3a5738d10..bcda3372e141a 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -211,7 +211,8 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) dget_dlock(dentry); __d_drop(dentry); spin_unlock(&dentry->d_lock); - simple_unlink(d_inode(parent), dentry); + __simple_unlink(d_inode(parent), dentry); + dput(dentry); } else spin_unlock(&dentry->d_lock); } diff --git a/fs/dcache.c b/fs/dcache.c index 5ee2e78a91b39..824d620bb563b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -931,14 +931,7 @@ EXPORT_SYMBOL(dput); void d_make_discardable(struct dentry *dentry) { spin_lock(&dentry->d_lock); - /* - * By the end of the series we'll add - * WARN_ON(!(dentry->d_flags & DCACHE_PERSISTENT); - * here, but while object removal is done by a few common helpers, - * object creation tends to be open-coded (if nothing else, new inode - * needs to be set up), so adding a warning from the very beginning - * would make for much messier patch series. - */ + WARN_ON(!(dentry->d_flags & DCACHE_PERSISTENT)); dentry->d_flags &= ~DCACHE_PERSISTENT; dentry->d_lockref.count--; rcu_read_lock(); diff --git a/fs/libfs.c b/fs/libfs.c index 80f288a771e30..0aa630e7eb00f 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -790,13 +790,27 @@ int simple_empty(struct dentry *dentry) } EXPORT_SYMBOL(simple_empty); -int simple_unlink(struct inode *dir, struct dentry *dentry) +void __simple_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); inode_set_mtime_to_ts(dir, inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode))); drop_nlink(inode); +} +EXPORT_SYMBOL(__simple_unlink); + +void __simple_rmdir(struct inode *dir, struct dentry *dentry) +{ + drop_nlink(d_inode(dentry)); + __simple_unlink(dir, dentry); + drop_nlink(dir); +} +EXPORT_SYMBOL(__simple_rmdir); + +int simple_unlink(struct inode *dir, struct dentry *dentry) +{ + __simple_unlink(dir, dentry); d_make_discardable(dentry); return 0; } @@ -807,9 +821,8 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry) if (!simple_empty(dentry)) return -ENOTEMPTY; - drop_nlink(d_inode(dentry)); - simple_unlink(dir, dentry); - drop_nlink(dir); + __simple_rmdir(dir, dentry); + d_make_discardable(dentry); return 0; } EXPORT_SYMBOL(simple_rmdir); diff --git a/include/linux/fs.h b/include/linux/fs.h index 95933ceaae51d..ef842adbd418b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3621,6 +3621,8 @@ extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); +extern void __simple_unlink(struct inode *, struct dentry *); +extern void __simple_rmdir(struct inode *, struct dentry *); void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); extern int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry, diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c index 391a586d0557f..9b9090d38ea22 100644 --- a/security/apparmor/apparmorfs.c +++ b/security/apparmor/apparmorfs.c @@ -358,10 +358,15 @@ static void aafs_remove(struct dentry *dentry) dir = d_inode(dentry->d_parent); inode_lock(dir); if (simple_positive(dentry)) { - if (d_is_dir(dentry)) - simple_rmdir(dir, dentry); - else - simple_unlink(dir, dentry); + if (d_is_dir(dentry)) { + if (!WARN_ON(!simple_empty(dentry))) { + __simple_rmdir(dir, dentry); + dput(dentry); + } + } else { + __simple_unlink(dir, dentry); + dput(dentry); + } d_delete(dentry); dput(dentry); }