Skip to content

Commit 7031769

Browse files
committed
Merge tag 'vfs-6.17-rc1.mmap_prepare' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull mmap_prepare updates from Christian Brauner: "Last cycle we introduce f_op->mmap_prepare() in c84bf6d ("mm: introduce new .mmap_prepare() file callback"). This is preferred to the existing f_op->mmap() hook as it does require a VMA to be established yet, thus allowing the mmap logic to invoke this hook far, far earlier, prior to inserting a VMA into the virtual address space, or performing any other heavy handed operations. This allows for much simpler unwinding on error, and for there to be a single attempt at merging a VMA rather than having to possibly reattempt a merge based on potentially altered VMA state. Far more importantly, it prevents inappropriate manipulation of incompletely initialised VMA state, which is something that has been the cause of bugs and complexity in the past. The intent is to gradually deprecate f_op->mmap, and in that vein this series coverts the majority of file systems to using f_op->mmap_prepare. Prerequisite steps are taken - firstly ensuring all checks for mmap capabilities use the file_has_valid_mmap_hooks() helper rather than directly checking for f_op->mmap (which is now not a valid check) and secondly updating daxdev_mapping_supported() to not require a VMA parameter to allow ext4 and xfs to be converted. Commit bb666b7 ("mm: add mmap_prepare() compatibility layer for nested file systems") handles the nasty edge-case of nested file systems like overlayfs, which introduces a compatibility shim to allow f_op->mmap_prepare() to be invoked from an f_op->mmap() callback. This allows for nested filesystems to continue to function correctly with all file systems regardless of which callback is used. Once we finally convert all file systems, this shim can be removed. As a result, ecryptfs, fuse, and overlayfs remain unaltered so they can nest all other file systems. We additionally do not update resctl - as this requires an update to remap_pfn_range() (or an alternative to it) which we defer to a later series, equally we do not update cramfs which needs a mixed mapping insertion with the same issue, nor do we update procfs, hugetlbfs, syfs or kernfs all of which require VMAs for internal state and hooks. We shall return to all of these later" * tag 'vfs-6.17-rc1.mmap_prepare' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: doc: update porting, vfs documentation to describe mmap_prepare() fs: replace mmap hook with .mmap_prepare for simple mappings fs: convert most other generic_file_*mmap() users to .mmap_prepare() fs: convert simple use of generic_file_*_mmap() to .mmap_prepare() mm/filemap: introduce generic_file_*_mmap_prepare() helpers fs/xfs: transition from deprecated .mmap hook to .mmap_prepare fs/ext4: transition from deprecated .mmap hook to .mmap_prepare fs/dax: make it possible to check dev dax support without a VMA fs: consistently use can_mmap_file() helper mm/nommu: use file_has_valid_mmap_hooks() helper mm: rename call_mmap/mmap_prepare to vfs_mmap/mmap_prepare
2 parents 278c7d9 + 425c8bb commit 7031769

File tree

64 files changed

+281
-187
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+281
-187
lines changed

Documentation/filesystems/porting.rst

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1273,3 +1273,15 @@ to have them set. Better yet, think hard whether you need different
12731273
->d_op for different dentries - if not, just use set_default_d_op()
12741274
at mount time and be done with that. Currently procfs is the only
12751275
thing that really needs ->d_op varying between dentries.
1276+
1277+
---
1278+
1279+
**highly recommended**
1280+
1281+
The file operations mmap() callback is deprecated in favour of
1282+
mmap_prepare(). This passes a pointer to a vm_area_desc to the callback
1283+
rather than a VMA, as the VMA at this stage is not yet valid.
1284+
1285+
The vm_area_desc provides the minimum required information for a filesystem
1286+
to initialise state upon memory mapping of a file-backed region, and output
1287+
parameters for the file system to set this state.

Documentation/filesystems/vfs.rst

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1072,12 +1072,14 @@ This describes how the VFS can manipulate an open file. As of kernel
10721072
10731073
struct file_operations {
10741074
struct module *owner;
1075+
fop_flags_t fop_flags;
10751076
loff_t (*llseek) (struct file *, loff_t, int);
10761077
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
10771078
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
10781079
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
10791080
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
1080-
int (*iopoll)(struct kiocb *kiocb, bool spin);
1081+
int (*iopoll)(struct kiocb *kiocb, struct io_comp_batch *,
1082+
unsigned int flags);
10811083
int (*iterate_shared) (struct file *, struct dir_context *);
10821084
__poll_t (*poll) (struct file *, struct poll_table_struct *);
10831085
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
@@ -1094,18 +1096,24 @@ This describes how the VFS can manipulate an open file. As of kernel
10941096
int (*flock) (struct file *, int, struct file_lock *);
10951097
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
10961098
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
1097-
int (*setlease)(struct file *, long, struct file_lock **, void **);
1099+
void (*splice_eof)(struct file *file);
1100+
int (*setlease)(struct file *, int, struct file_lease **, void **);
10981101
long (*fallocate)(struct file *file, int mode, loff_t offset,
10991102
loff_t len);
11001103
void (*show_fdinfo)(struct seq_file *m, struct file *f);
11011104
#ifndef CONFIG_MMU
11021105
unsigned (*mmap_capabilities)(struct file *);
11031106
#endif
1104-
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int);
1107+
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
1108+
loff_t, size_t, unsigned int);
11051109
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
11061110
struct file *file_out, loff_t pos_out,
11071111
loff_t len, unsigned int remap_flags);
11081112
int (*fadvise)(struct file *, loff_t, loff_t, int);
1113+
int (*uring_cmd)(struct io_uring_cmd *ioucmd, unsigned int issue_flags);
1114+
int (*uring_cmd_iopoll)(struct io_uring_cmd *, struct io_comp_batch *,
1115+
unsigned int poll_flags);
1116+
int (*mmap_prepare)(struct vm_area_desc *);
11091117
};
11101118
11111119
Again, all methods are called without any locks being held, unless
@@ -1145,7 +1153,8 @@ otherwise noted.
11451153
used on 64 bit kernels.
11461154

11471155
``mmap``
1148-
called by the mmap(2) system call
1156+
called by the mmap(2) system call. Deprecated in favour of
1157+
``mmap_prepare``.
11491158

11501159
``open``
11511160
called by the VFS when an inode should be opened. When the VFS
@@ -1222,6 +1231,11 @@ otherwise noted.
12221231
``fadvise``
12231232
possibly called by the fadvise64() system call.
12241233

1234+
``mmap_prepare``
1235+
Called by the mmap(2) system call. Allows a VFS to set up a
1236+
file-backed memory mapping, most notably establishing relevant
1237+
private state and VMA callbacks.
1238+
12251239
Note that the file operations are implemented by the specific
12261240
filesystem in which the inode resides. When opening a device node
12271241
(character or block special) most filesystems will call special

block/fops.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -920,14 +920,14 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
920920
return error;
921921
}
922922

923-
static int blkdev_mmap(struct file *file, struct vm_area_struct *vma)
923+
static int blkdev_mmap_prepare(struct vm_area_desc *desc)
924924
{
925-
struct inode *bd_inode = bdev_file_inode(file);
925+
struct file *file = desc->file;
926926

927-
if (bdev_read_only(I_BDEV(bd_inode)))
928-
return generic_file_readonly_mmap(file, vma);
927+
if (bdev_read_only(I_BDEV(bdev_file_inode(file))))
928+
return generic_file_readonly_mmap_prepare(desc);
929929

930-
return generic_file_mmap(file, vma);
930+
return generic_file_mmap_prepare(desc);
931931
}
932932

933933
const struct file_operations def_blk_fops = {
@@ -937,7 +937,7 @@ const struct file_operations def_blk_fops = {
937937
.read_iter = blkdev_read_iter,
938938
.write_iter = blkdev_write_iter,
939939
.iopoll = iocb_bio_iopoll,
940-
.mmap = blkdev_mmap,
940+
.mmap_prepare = blkdev_mmap_prepare,
941941
.fsync = blkdev_fsync,
942942
.unlocked_ioctl = blkdev_ioctl,
943943
#ifdef CONFIG_COMPAT

drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
105105
if (!obj->base.filp)
106106
return -ENODEV;
107107

108-
ret = call_mmap(obj->base.filp, vma);
108+
ret = vfs_mmap(obj->base.filp, vma);
109109
if (ret)
110110
return ret;
111111

fs/9p/vfs_file.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -454,22 +454,23 @@ int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
454454
}
455455

456456
static int
457-
v9fs_file_mmap(struct file *filp, struct vm_area_struct *vma)
457+
v9fs_file_mmap_prepare(struct vm_area_desc *desc)
458458
{
459459
int retval;
460+
struct file *filp = desc->file;
460461
struct inode *inode = file_inode(filp);
461462
struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
462463

463464
p9_debug(P9_DEBUG_MMAP, "filp :%p\n", filp);
464465

465466
if (!(v9ses->cache & CACHE_WRITEBACK)) {
466467
p9_debug(P9_DEBUG_CACHE, "(read-only mmap mode)");
467-
return generic_file_readonly_mmap(filp, vma);
468+
return generic_file_readonly_mmap_prepare(desc);
468469
}
469470

470-
retval = generic_file_mmap(filp, vma);
471+
retval = generic_file_mmap_prepare(desc);
471472
if (!retval)
472-
vma->vm_ops = &v9fs_mmap_file_vm_ops;
473+
desc->vm_ops = &v9fs_mmap_file_vm_ops;
473474

474475
return retval;
475476
}
@@ -516,7 +517,7 @@ const struct file_operations v9fs_file_operations = {
516517
.open = v9fs_file_open,
517518
.release = v9fs_dir_release,
518519
.lock = v9fs_file_lock,
519-
.mmap = generic_file_readonly_mmap,
520+
.mmap_prepare = generic_file_readonly_mmap_prepare,
520521
.splice_read = v9fs_file_splice_read,
521522
.splice_write = iter_file_splice_write,
522523
.fsync = v9fs_file_fsync,
@@ -531,7 +532,7 @@ const struct file_operations v9fs_file_operations_dotl = {
531532
.release = v9fs_dir_release,
532533
.lock = v9fs_file_lock_dotl,
533534
.flock = v9fs_file_flock_dotl,
534-
.mmap = v9fs_file_mmap,
535+
.mmap_prepare = v9fs_file_mmap_prepare,
535536
.splice_read = v9fs_file_splice_read,
536537
.splice_write = iter_file_splice_write,
537538
.fsync = v9fs_file_fsync_dotl,

fs/adfs/file.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
const struct file_operations adfs_file_operations = {
2626
.llseek = generic_file_llseek,
2727
.read_iter = generic_file_read_iter,
28-
.mmap = generic_file_mmap,
28+
.mmap_prepare = generic_file_mmap_prepare,
2929
.fsync = generic_file_fsync,
3030
.write_iter = generic_file_write_iter,
3131
.splice_read = filemap_splice_read,

fs/affs/file.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1003,7 +1003,7 @@ const struct file_operations affs_file_operations = {
10031003
.llseek = generic_file_llseek,
10041004
.read_iter = generic_file_read_iter,
10051005
.write_iter = generic_file_write_iter,
1006-
.mmap = generic_file_mmap,
1006+
.mmap_prepare = generic_file_mmap_prepare,
10071007
.open = affs_file_open,
10081008
.release = affs_file_release,
10091009
.fsync = affs_file_fsync,

fs/afs/file.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#include <trace/events/netfs.h>
2020
#include "internal.h"
2121

22-
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
22+
static int afs_file_mmap_prepare(struct vm_area_desc *desc);
2323

2424
static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter);
2525
static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos,
@@ -35,7 +35,7 @@ const struct file_operations afs_file_operations = {
3535
.llseek = generic_file_llseek,
3636
.read_iter = afs_file_read_iter,
3737
.write_iter = netfs_file_write_iter,
38-
.mmap = afs_file_mmap,
38+
.mmap_prepare = afs_file_mmap_prepare,
3939
.splice_read = afs_file_splice_read,
4040
.splice_write = iter_file_splice_write,
4141
.fsync = afs_fsync,
@@ -492,16 +492,16 @@ static void afs_drop_open_mmap(struct afs_vnode *vnode)
492492
/*
493493
* Handle setting up a memory mapping on an AFS file.
494494
*/
495-
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma)
495+
static int afs_file_mmap_prepare(struct vm_area_desc *desc)
496496
{
497-
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
497+
struct afs_vnode *vnode = AFS_FS_I(file_inode(desc->file));
498498
int ret;
499499

500500
afs_add_open_mmap(vnode);
501501

502-
ret = generic_file_mmap(file, vma);
502+
ret = generic_file_mmap_prepare(desc);
503503
if (ret == 0)
504-
vma->vm_ops = &afs_vm_ops;
504+
desc->vm_ops = &afs_vm_ops;
505505
else
506506
afs_drop_open_mmap(vnode);
507507
return ret;

fs/aio.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -392,15 +392,15 @@ static const struct vm_operations_struct aio_ring_vm_ops = {
392392
#endif
393393
};
394394

395-
static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
395+
static int aio_ring_mmap_prepare(struct vm_area_desc *desc)
396396
{
397-
vm_flags_set(vma, VM_DONTEXPAND);
398-
vma->vm_ops = &aio_ring_vm_ops;
397+
desc->vm_flags |= VM_DONTEXPAND;
398+
desc->vm_ops = &aio_ring_vm_ops;
399399
return 0;
400400
}
401401

402402
static const struct file_operations aio_ring_fops = {
403-
.mmap = aio_ring_mmap,
403+
.mmap_prepare = aio_ring_mmap_prepare,
404404
};
405405

406406
#if IS_ENABLED(CONFIG_MIGRATION)

fs/backing-file.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -333,13 +333,13 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma,
333333
if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)))
334334
return -EIO;
335335

336-
if (!file->f_op->mmap)
336+
if (!can_mmap_file(file))
337337
return -ENODEV;
338338

339339
vma_set_file(vma, file);
340340

341341
old_cred = override_creds(ctx->cred);
342-
ret = call_mmap(vma->vm_file, vma);
342+
ret = vfs_mmap(vma->vm_file, vma);
343343
revert_creds(old_cred);
344344

345345
if (ctx->accessed)

0 commit comments

Comments
 (0)