Skip to content

Commit df820f8

Browse files
author
Miklos Szeredi
committed
ovl: make private mounts longterm
Overlayfs is using clone_private_mount() to create internal mounts for underlying layers. These are used for operations requiring a path, such as dentry_open(). Since these private mounts are not in any namespace they are treated as short term, "detached" mounts and mntput() involves taking the global mount_lock, which can result in serious cacheline pingpong. Make these private mounts longterm instead, which trade the penalty on mntput() for a slightly longer shutdown time due to an added RCU grace period when putting these mounts. Introduce a new helper kern_unmount_many() that can take care of multiple longterm mounts with a single RCU grace period. Cc: Al Viro <[email protected]> Signed-off-by: Miklos Szeredi <[email protected]>
1 parent b8e42a6 commit df820f8

File tree

4 files changed

+31
-1
lines changed

4 files changed

+31
-1
lines changed

Documentation/filesystems/porting.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,3 +858,10 @@ be misspelled d_alloc_anon().
858858
[should've been added in 2016] stale comment in finish_open() nonwithstanding,
859859
failure exits in ->atomic_open() instances should *NOT* fput() the file,
860860
no matter what. Everything is handled by the caller.
861+
862+
---
863+
864+
**mandatory**
865+
866+
clone_private_mount() returns a longterm mount now, so the proper destructor of
867+
its result is kern_unmount() or kern_unmount_array().

fs/namespace.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1879,6 +1879,9 @@ struct vfsmount *clone_private_mount(const struct path *path)
18791879
if (IS_ERR(new_mnt))
18801880
return ERR_CAST(new_mnt);
18811881

1882+
/* Longterm mount to be removed by kern_unmount*() */
1883+
new_mnt->mnt_ns = MNT_NS_INTERNAL;
1884+
18821885
return &new_mnt->mnt;
18831886
}
18841887
EXPORT_SYMBOL_GPL(clone_private_mount);
@@ -3804,6 +3807,19 @@ void kern_unmount(struct vfsmount *mnt)
38043807
}
38053808
EXPORT_SYMBOL(kern_unmount);
38063809

3810+
void kern_unmount_array(struct vfsmount *mnt[], unsigned int num)
3811+
{
3812+
unsigned int i;
3813+
3814+
for (i = 0; i < num; i++)
3815+
if (mnt[i])
3816+
real_mount(mnt[i])->mnt_ns = NULL;
3817+
synchronize_rcu_expedited();
3818+
for (i = 0; i < num; i++)
3819+
mntput(mnt[i]);
3820+
}
3821+
EXPORT_SYMBOL(kern_unmount_array);
3822+
38073823
bool our_mnt(struct vfsmount *mnt)
38083824
{
38093825
return check_mnt(real_mount(mnt));

fs/overlayfs/super.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ static void ovl_destroy_inode(struct inode *inode)
211211

212212
static void ovl_free_fs(struct ovl_fs *ofs)
213213
{
214+
struct vfsmount **mounts;
214215
unsigned i;
215216

216217
iput(ofs->workbasedir_trap);
@@ -224,10 +225,14 @@ static void ovl_free_fs(struct ovl_fs *ofs)
224225
dput(ofs->workbasedir);
225226
if (ofs->upperdir_locked)
226227
ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root);
228+
229+
/* Hack! Reuse ofs->layers as a vfsmount array before freeing it */
230+
mounts = (struct vfsmount **) ofs->layers;
227231
for (i = 0; i < ofs->numlayer; i++) {
228232
iput(ofs->layers[i].trap);
229-
mntput(ofs->layers[i].mnt);
233+
mounts[i] = ofs->layers[i].mnt;
230234
}
235+
kern_unmount_array(mounts, ofs->numlayer);
231236
kfree(ofs->layers);
232237
for (i = 0; i < ofs->numfs; i++)
233238
free_anon_bdev(ofs->fs[i].pseudo_dev);

include/linux/mount.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,4 +109,6 @@ extern unsigned int sysctl_mount_max;
109109

110110
extern bool path_is_mountpoint(const struct path *path);
111111

112+
extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num);
113+
112114
#endif /* _LINUX_MOUNT_H */

0 commit comments

Comments
 (0)