Skip to content

Commit 1f2300a

Browse files
committed
Merge tag 'v6.5/vfs.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs file handling updates from Christian Brauner: "This contains Amir's work to fix a long-standing problem where an unprivileged overlayfs mount can be used to avoid fanotify permission events that were requested for an inode or superblock on the underlying filesystem. Some background about files opened in overlayfs. If a file is opened in overlayfs @file->f_path will refer to a "fake" path. What this means is that while @file->f_inode will refer to inode of the underlying layer, @file->f_path refers to an overlayfs {dentry,vfsmount} pair. The reasons for doing this are out of scope here but it is the reason why the vfs has been providing the open_with_fake_path() helper for overlayfs for very long time now. So nothing new here. This is for sure not very elegant and everyone including the overlayfs maintainers agree. Improving this significantly would involve more fragile and potentially rather invasive changes. In various codepaths access to the path of the underlying filesystem is needed for such hybrid file. The best example is fsnotify where this becomes security relevant. Passing the overlayfs @file->f_path->dentry will cause fsnotify to skip generating fsnotify events registered on the underlying inode or superblock. To fix this we extend the vfs provided open_with_fake_path() concept for overlayfs to create a backing file container that holds the real path and to expose a helper that can be used by relevant callers to get access to the path of the underlying filesystem through the new file_real_path() helper. This pattern is similar to what we do in d_real() and d_real_inode(). The first beneficiary is fsnotify and fixes the security sensitive problem mentioned above. There's a couple of nice cleanups included as well. Over time, the old open_with_fake_path() helper added specifically for overlayfs a long time ago started to get used in other places such as cachefiles. Even though cachefiles have nothing to do with hybrid files. The only reason cachefiles used that concept was that files opened with open_with_fake_path() aren't charged against the caller's open file limit by raising FMODE_NOACCOUNT. It's just mere coincidence that both overlayfs and cachefiles need to ensure to not overcharge the caller for their internal open calls. So this work disentangles FMODE_NOACCOUNT use cases and backing file use-cases by adding the FMODE_BACKING flag which indicates that the file can be used to retrieve the backing file of another filesystem. (Fyi, Jens will be sending you a really nice cleanup from Christoph that gets rid of 3 FMODE_* flags otherwise this would be the last fmode_t bit we'd be using.) So now overlayfs becomes the sole user of the renamed open_with_fake_path() helper which is now named backing_file_open(). For internal kernel users such as cachefiles that are only interested in FMODE_NOACCOUNT but not in FMODE_BACKING we add a new kernel_file_open() helper which opens a file without being charged against the caller's open file limit. All new helpers are properly documented and clearly annotated to mention their special uses. We also rename vfs_tmpfile_open() to kernel_tmpfile_open() to clearly distinguish it from vfs_tmpfile() and align it the other kernel_*() internal helpers" * tag 'v6.5/vfs.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: ovl: enable fsnotify events on underlying real files fs: use backing_file container for internal files with "fake" f_path fs: move kmem_cache_zalloc() into alloc_empty_file*() helpers fs: use a helper for opening kernel internal files fs: rename {vfs,kernel}_tmpfile_open()
2 parents 2eedfa9 + bc2473c commit 1f2300a

File tree

9 files changed

+205
-62
lines changed

9 files changed

+205
-62
lines changed

fs/cachefiles/namei.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -451,10 +451,10 @@ struct file *cachefiles_create_tmpfile(struct cachefiles_object *object)
451451

452452
ret = cachefiles_inject_write_error();
453453
if (ret == 0) {
454-
file = vfs_tmpfile_open(&nop_mnt_idmap, &parentpath,
455-
S_IFREG | 0600,
456-
O_RDWR | O_LARGEFILE | O_DIRECT,
457-
cache->cache_cred);
454+
file = kernel_tmpfile_open(&nop_mnt_idmap, &parentpath,
455+
S_IFREG | 0600,
456+
O_RDWR | O_LARGEFILE | O_DIRECT,
457+
cache->cache_cred);
458458
ret = PTR_ERR_OR_ZERO(file);
459459
}
460460
if (ret) {
@@ -561,8 +561,8 @@ static bool cachefiles_open_file(struct cachefiles_object *object,
561561
*/
562562
path.mnt = cache->mnt;
563563
path.dentry = dentry;
564-
file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
565-
d_backing_inode(dentry), cache->cache_cred);
564+
file = kernel_file_open(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
565+
d_backing_inode(dentry), cache->cache_cred);
566566
if (IS_ERR(file)) {
567567
trace_cachefiles_vfs_error(object, d_backing_inode(dentry),
568568
PTR_ERR(file),

fs/file_table.c

Lines changed: 74 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,18 +44,40 @@ static struct kmem_cache *filp_cachep __read_mostly;
4444

4545
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
4646

47+
/* Container for backing file with optional real path */
48+
struct backing_file {
49+
struct file file;
50+
struct path real_path;
51+
};
52+
53+
static inline struct backing_file *backing_file(struct file *f)
54+
{
55+
return container_of(f, struct backing_file, file);
56+
}
57+
58+
struct path *backing_file_real_path(struct file *f)
59+
{
60+
return &backing_file(f)->real_path;
61+
}
62+
EXPORT_SYMBOL_GPL(backing_file_real_path);
63+
4764
static void file_free_rcu(struct rcu_head *head)
4865
{
4966
struct file *f = container_of(head, struct file, f_rcuhead);
5067

5168
put_cred(f->f_cred);
52-
kmem_cache_free(filp_cachep, f);
69+
if (unlikely(f->f_mode & FMODE_BACKING))
70+
kfree(backing_file(f));
71+
else
72+
kmem_cache_free(filp_cachep, f);
5373
}
5474

5575
static inline void file_free(struct file *f)
5676
{
5777
security_file_free(f);
58-
if (!(f->f_mode & FMODE_NOACCOUNT))
78+
if (unlikely(f->f_mode & FMODE_BACKING))
79+
path_put(backing_file_real_path(f));
80+
if (likely(!(f->f_mode & FMODE_NOACCOUNT)))
5981
percpu_counter_dec(&nr_files);
6082
call_rcu(&f->f_rcuhead, file_free_rcu);
6183
}
@@ -131,20 +153,15 @@ static int __init init_fs_stat_sysctls(void)
131153
fs_initcall(init_fs_stat_sysctls);
132154
#endif
133155

134-
static struct file *__alloc_file(int flags, const struct cred *cred)
156+
static int init_file(struct file *f, int flags, const struct cred *cred)
135157
{
136-
struct file *f;
137158
int error;
138159

139-
f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
140-
if (unlikely(!f))
141-
return ERR_PTR(-ENOMEM);
142-
143160
f->f_cred = get_cred(cred);
144161
error = security_file_alloc(f);
145162
if (unlikely(error)) {
146163
file_free_rcu(&f->f_rcuhead);
147-
return ERR_PTR(error);
164+
return error;
148165
}
149166

150167
atomic_long_set(&f->f_count, 1);
@@ -155,7 +172,7 @@ static struct file *__alloc_file(int flags, const struct cred *cred)
155172
f->f_mode = OPEN_FMODE(flags);
156173
/* f->f_version: 0 */
157174

158-
return f;
175+
return 0;
159176
}
160177

161178
/* Find an unused file structure and return a pointer to it.
@@ -172,6 +189,7 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
172189
{
173190
static long old_max;
174191
struct file *f;
192+
int error;
175193

176194
/*
177195
* Privileged users can go above max_files
@@ -185,9 +203,15 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
185203
goto over;
186204
}
187205

188-
f = __alloc_file(flags, cred);
189-
if (!IS_ERR(f))
190-
percpu_counter_inc(&nr_files);
206+
f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
207+
if (unlikely(!f))
208+
return ERR_PTR(-ENOMEM);
209+
210+
error = init_file(f, flags, cred);
211+
if (unlikely(error))
212+
return ERR_PTR(error);
213+
214+
percpu_counter_inc(&nr_files);
191215

192216
return f;
193217

@@ -203,18 +227,51 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
203227
/*
204228
* Variant of alloc_empty_file() that doesn't check and modify nr_files.
205229
*
206-
* Should not be used unless there's a very good reason to do so.
230+
* This is only for kernel internal use, and the allocate file must not be
231+
* installed into file tables or such.
207232
*/
208233
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred)
209234
{
210-
struct file *f = __alloc_file(flags, cred);
235+
struct file *f;
236+
int error;
211237

212-
if (!IS_ERR(f))
213-
f->f_mode |= FMODE_NOACCOUNT;
238+
f = kmem_cache_zalloc(filp_cachep, GFP_KERNEL);
239+
if (unlikely(!f))
240+
return ERR_PTR(-ENOMEM);
241+
242+
error = init_file(f, flags, cred);
243+
if (unlikely(error))
244+
return ERR_PTR(error);
245+
246+
f->f_mode |= FMODE_NOACCOUNT;
214247

215248
return f;
216249
}
217250

251+
/*
252+
* Variant of alloc_empty_file() that allocates a backing_file container
253+
* and doesn't check and modify nr_files.
254+
*
255+
* This is only for kernel internal use, and the allocate file must not be
256+
* installed into file tables or such.
257+
*/
258+
struct file *alloc_empty_backing_file(int flags, const struct cred *cred)
259+
{
260+
struct backing_file *ff;
261+
int error;
262+
263+
ff = kzalloc(sizeof(struct backing_file), GFP_KERNEL);
264+
if (unlikely(!ff))
265+
return ERR_PTR(-ENOMEM);
266+
267+
error = init_file(&ff->file, flags, cred);
268+
if (unlikely(error))
269+
return ERR_PTR(error);
270+
271+
ff->file.f_mode |= FMODE_BACKING | FMODE_NOACCOUNT;
272+
return &ff->file;
273+
}
274+
218275
/**
219276
* alloc_file - allocate and initialize a 'struct file'
220277
*

fs/internal.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,9 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
9797
/*
9898
* file_table.c
9999
*/
100-
extern struct file *alloc_empty_file(int, const struct cred *);
101-
extern struct file *alloc_empty_file_noaccount(int, const struct cred *);
100+
struct file *alloc_empty_file(int flags, const struct cred *cred);
101+
struct file *alloc_empty_file_noaccount(int flags, const struct cred *cred);
102+
struct file *alloc_empty_backing_file(int flags, const struct cred *cred);
102103

103104
static inline void put_file_access(struct file *file)
104105
{

fs/namei.c

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3703,7 +3703,7 @@ static int vfs_tmpfile(struct mnt_idmap *idmap,
37033703
}
37043704

37053705
/**
3706-
* vfs_tmpfile_open - open a tmpfile for kernel internal use
3706+
* kernel_tmpfile_open - open a tmpfile for kernel internal use
37073707
* @idmap: idmap of the mount the inode was found from
37083708
* @parentpath: path of the base directory
37093709
* @mode: mode of the new tmpfile
@@ -3714,24 +3714,26 @@ static int vfs_tmpfile(struct mnt_idmap *idmap,
37143714
* hence this is only for kernel internal use, and must not be installed into
37153715
* file tables or such.
37163716
*/
3717-
struct file *vfs_tmpfile_open(struct mnt_idmap *idmap,
3718-
const struct path *parentpath,
3719-
umode_t mode, int open_flag, const struct cred *cred)
3717+
struct file *kernel_tmpfile_open(struct mnt_idmap *idmap,
3718+
const struct path *parentpath,
3719+
umode_t mode, int open_flag,
3720+
const struct cred *cred)
37203721
{
37213722
struct file *file;
37223723
int error;
37233724

37243725
file = alloc_empty_file_noaccount(open_flag, cred);
3725-
if (!IS_ERR(file)) {
3726-
error = vfs_tmpfile(idmap, parentpath, file, mode);
3727-
if (error) {
3728-
fput(file);
3729-
file = ERR_PTR(error);
3730-
}
3726+
if (IS_ERR(file))
3727+
return file;
3728+
3729+
error = vfs_tmpfile(idmap, parentpath, file, mode);
3730+
if (error) {
3731+
fput(file);
3732+
file = ERR_PTR(error);
37313733
}
37323734
return file;
37333735
}
3734-
EXPORT_SYMBOL(vfs_tmpfile_open);
3736+
EXPORT_SYMBOL(kernel_tmpfile_open);
37353737

37363738
static int do_tmpfile(struct nameidata *nd, unsigned flags,
37373739
const struct open_flags *op,

fs/open.c

Lines changed: 65 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,23 +1108,77 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode,
11081108
}
11091109
EXPORT_SYMBOL(dentry_create);
11101110

1111-
struct file *open_with_fake_path(const struct path *path, int flags,
1111+
/**
1112+
* kernel_file_open - open a file for kernel internal use
1113+
* @path: path of the file to open
1114+
* @flags: open flags
1115+
* @inode: the inode
1116+
* @cred: credentials for open
1117+
*
1118+
* Open a file for use by in-kernel consumers. The file is not accounted
1119+
* against nr_files and must not be installed into the file descriptor
1120+
* table.
1121+
*
1122+
* Return: Opened file on success, an error pointer on failure.
1123+
*/
1124+
struct file *kernel_file_open(const struct path *path, int flags,
11121125
struct inode *inode, const struct cred *cred)
11131126
{
1114-
struct file *f = alloc_empty_file_noaccount(flags, cred);
1115-
if (!IS_ERR(f)) {
1116-
int error;
1127+
struct file *f;
1128+
int error;
11171129

1118-
f->f_path = *path;
1119-
error = do_dentry_open(f, inode, NULL);
1120-
if (error) {
1121-
fput(f);
1122-
f = ERR_PTR(error);
1123-
}
1130+
f = alloc_empty_file_noaccount(flags, cred);
1131+
if (IS_ERR(f))
1132+
return f;
1133+
1134+
f->f_path = *path;
1135+
error = do_dentry_open(f, inode, NULL);
1136+
if (error) {
1137+
fput(f);
1138+
f = ERR_PTR(error);
11241139
}
11251140
return f;
11261141
}
1127-
EXPORT_SYMBOL(open_with_fake_path);
1142+
EXPORT_SYMBOL_GPL(kernel_file_open);
1143+
1144+
/**
1145+
* backing_file_open - open a backing file for kernel internal use
1146+
* @path: path of the file to open
1147+
* @flags: open flags
1148+
* @path: path of the backing file
1149+
* @cred: credentials for open
1150+
*
1151+
* Open a backing file for a stackable filesystem (e.g., overlayfs).
1152+
* @path may be on the stackable filesystem and backing inode on the
1153+
* underlying filesystem. In this case, we want to be able to return
1154+
* the @real_path of the backing inode. This is done by embedding the
1155+
* returned file into a container structure that also stores the path of
1156+
* the backing inode on the underlying filesystem, which can be
1157+
* retrieved using backing_file_real_path().
1158+
*/
1159+
struct file *backing_file_open(const struct path *path, int flags,
1160+
const struct path *real_path,
1161+
const struct cred *cred)
1162+
{
1163+
struct file *f;
1164+
int error;
1165+
1166+
f = alloc_empty_backing_file(flags, cred);
1167+
if (IS_ERR(f))
1168+
return f;
1169+
1170+
f->f_path = *path;
1171+
path_get(real_path);
1172+
*backing_file_real_path(f) = *real_path;
1173+
error = do_dentry_open(f, d_inode(real_path->dentry), NULL);
1174+
if (error) {
1175+
fput(f);
1176+
f = ERR_PTR(error);
1177+
}
1178+
1179+
return f;
1180+
}
1181+
EXPORT_SYMBOL_GPL(backing_file_open);
11281182

11291183
#define WILL_CREATE(flags) (flags & (O_CREAT | __O_TMPFILE))
11301184
#define O_PATH_FLAGS (O_DIRECTORY | O_NOFOLLOW | O_PATH | O_CLOEXEC)

fs/overlayfs/file.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ static char ovl_whatisit(struct inode *inode, struct inode *realinode)
3434
return 'm';
3535
}
3636

37-
/* No atime modification nor notify on underlying */
38-
#define OVL_OPEN_FLAGS (O_NOATIME | __FMODE_NONOTIFY)
37+
/* No atime modification on underlying */
38+
#define OVL_OPEN_FLAGS (O_NOATIME)
3939

4040
static struct file *ovl_open_realfile(const struct file *file,
4141
const struct path *realpath)
@@ -61,8 +61,8 @@ static struct file *ovl_open_realfile(const struct file *file,
6161
if (!inode_owner_or_capable(real_idmap, realinode))
6262
flags &= ~O_NOATIME;
6363

64-
realfile = open_with_fake_path(&file->f_path, flags, realinode,
65-
current_cred());
64+
realfile = backing_file_open(&file->f_path, flags, realpath,
65+
current_cred());
6666
}
6767
revert_creds(old_cred);
6868

fs/overlayfs/overlayfs.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -329,8 +329,9 @@ static inline struct file *ovl_do_tmpfile(struct ovl_fs *ofs,
329329
struct dentry *dentry, umode_t mode)
330330
{
331331
struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = dentry };
332-
struct file *file = vfs_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path, mode,
333-
O_LARGEFILE | O_WRONLY, current_cred());
332+
struct file *file = kernel_tmpfile_open(ovl_upper_mnt_idmap(ofs), &path,
333+
mode, O_LARGEFILE | O_WRONLY,
334+
current_cred());
334335
int err = PTR_ERR_OR_ZERO(file);
335336

336337
pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);

0 commit comments

Comments
 (0)