Skip to content

Commit 52c81fd

Browse files
lorenzo-stoakesgregkh
authored andcommitted
mm: resolve faulty mmap_region() error path behaviour
[ Upstream commit 5de195060b2e251a835f622759550e6202167641 ] The mmap_region() function is somewhat terrifying, with spaghetti-like control flow and numerous means by which issues can arise and incomplete state, memory leaks and other unpleasantness can occur. A large amount of the complexity arises from trying to handle errors late in the process of mapping a VMA, which forms the basis of recently observed issues with resource leaks and observable inconsistent state. Taking advantage of previous patches in this series we move a number of checks earlier in the code, simplifying things by moving the core of the logic into a static internal function __mmap_region(). Doing this allows us to perform a number of checks up front before we do any real work, and allows us to unwind the writable unmap check unconditionally as required and to perform a CONFIG_DEBUG_VM_MAPLE_TREE validation unconditionally also. We move a number of things here: 1. We preallocate memory for the iterator before we call the file-backed memory hook, allowing us to exit early and avoid having to perform complicated and error-prone close/free logic. We carefully free iterator state on both success and error paths. 2. The enclosing mmap_region() function handles the mapping_map_writable() logic early. Previously the logic had the mapping_map_writable() at the point of mapping a newly allocated file-backed VMA, and a matching mapping_unmap_writable() on success and error paths. We now do this unconditionally if this is a file-backed, shared writable mapping. If a driver changes the flags to eliminate VM_MAYWRITE, however doing so does not invalidate the seal check we just performed, and we in any case always decrement the counter in the wrapper. We perform a debug assert to ensure a driver does not attempt to do the opposite. 3. We also move arch_validate_flags() up into the mmap_region() function. This is only relevant on arm64 and sparc64, and the check is only meaningful for SPARC with ADI enabled. We explicitly add a warning for this arch if a driver invalidates this check, though the code ought eventually to be fixed to eliminate the need for this. With all of these measures in place, we no longer need to explicitly close the VMA on error paths, as we place all checks which might fail prior to a call to any driver mmap hook. This eliminates an entire class of errors, makes the code easier to reason about and more robust. Link: https://lkml.kernel.org/r/6e0becb36d2f5472053ac5d544c0edfe9b899e25.1730224667.git.lorenzo.stoakes@oracle.com Fixes: deb0f65 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") Signed-off-by: Lorenzo Stoakes <[email protected]> Reported-by: Jann Horn <[email protected]> Reviewed-by: Liam R. Howlett <[email protected]> Reviewed-by: Vlastimil Babka <[email protected]> Tested-by: Mark Brown <[email protected]> Cc: Andreas Larsson <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: David S. Miller <[email protected]> Cc: Helge Deller <[email protected]> Cc: James E.J. Bottomley <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Xu <[email protected]> Cc: Will Deacon <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Lorenzo Stoakes <[email protected]> Signed-off-by: Greg Kroah-Hartman <[email protected]>
1 parent 8fad7b0 commit 52c81fd

File tree

1 file changed

+57
-47
lines changed

1 file changed

+57
-47
lines changed

mm/mmap.c

Lines changed: 57 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -2652,7 +2652,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
26522652
return do_mas_munmap(&mas, mm, start, len, uf, false);
26532653
}
26542654

2655-
unsigned long mmap_region(struct file *file, unsigned long addr,
2655+
static unsigned long __mmap_region(struct file *file, unsigned long addr,
26562656
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
26572657
struct list_head *uf)
26582658
{
@@ -2750,26 +2750,28 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
27502750
vma->vm_page_prot = vm_get_page_prot(vm_flags);
27512751
vma->vm_pgoff = pgoff;
27522752

2753-
if (file) {
2754-
if (vm_flags & VM_SHARED) {
2755-
error = mapping_map_writable(file->f_mapping);
2756-
if (error)
2757-
goto free_vma;
2758-
}
2753+
if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
2754+
error = -ENOMEM;
2755+
goto free_vma;
2756+
}
27592757

2758+
if (file) {
27602759
vma->vm_file = get_file(file);
27612760
error = mmap_file(file, vma);
27622761
if (error)
2763-
goto unmap_and_free_vma;
2762+
goto unmap_and_free_file_vma;
2763+
2764+
/* Drivers cannot alter the address of the VMA. */
2765+
WARN_ON_ONCE(addr != vma->vm_start);
27642766

27652767
/*
2766-
* Expansion is handled above, merging is handled below.
2767-
* Drivers should not alter the address of the VMA.
2768+
* Drivers should not permit writability when previously it was
2769+
* disallowed.
27682770
*/
2769-
if (WARN_ON((addr != vma->vm_start))) {
2770-
error = -EINVAL;
2771-
goto close_and_free_vma;
2772-
}
2771+
VM_WARN_ON_ONCE(vm_flags != vma->vm_flags &&
2772+
!(vm_flags & VM_MAYWRITE) &&
2773+
(vma->vm_flags & VM_MAYWRITE));
2774+
27732775
mas_reset(&mas);
27742776

27752777
/*
@@ -2792,39 +2794,24 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
27922794
vma = merge;
27932795
/* Update vm_flags to pick up the change. */
27942796
vm_flags = vma->vm_flags;
2795-
goto unmap_writable;
2797+
mas_destroy(&mas);
2798+
goto file_expanded;
27962799
}
27972800
}
27982801

27992802
vm_flags = vma->vm_flags;
28002803
} else if (vm_flags & VM_SHARED) {
28012804
error = shmem_zero_setup(vma);
28022805
if (error)
2803-
goto free_vma;
2806+
goto free_iter_vma;
28042807
} else {
28052808
vma_set_anonymous(vma);
28062809
}
28072810

2808-
/* Allow architectures to sanity-check the vm_flags */
2809-
if (!arch_validate_flags(vma->vm_flags)) {
2810-
error = -EINVAL;
2811-
if (file)
2812-
goto close_and_free_vma;
2813-
else if (vma->vm_file)
2814-
goto unmap_and_free_vma;
2815-
else
2816-
goto free_vma;
2817-
}
2818-
2819-
if (mas_preallocate(&mas, vma, GFP_KERNEL)) {
2820-
error = -ENOMEM;
2821-
if (file)
2822-
goto close_and_free_vma;
2823-
else if (vma->vm_file)
2824-
goto unmap_and_free_vma;
2825-
else
2826-
goto free_vma;
2827-
}
2811+
#ifdef CONFIG_SPARC64
2812+
/* TODO: Fix SPARC ADI! */
2813+
WARN_ON_ONCE(!arch_validate_flags(vm_flags));
2814+
#endif
28282815

28292816
if (vma->vm_file)
28302817
i_mmap_lock_write(vma->vm_file->f_mapping);
@@ -2847,10 +2834,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
28472834
*/
28482835
khugepaged_enter_vma(vma, vma->vm_flags);
28492836

2850-
/* Once vma denies write, undo our temporary denial count */
2851-
unmap_writable:
2852-
if (file && vm_flags & VM_SHARED)
2853-
mapping_unmap_writable(file->f_mapping);
2837+
file_expanded:
28542838
file = vma->vm_file;
28552839
expanded:
28562840
perf_event_mmap(vma);
@@ -2879,28 +2863,54 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
28792863

28802864
vma_set_page_prot(vma);
28812865

2882-
validate_mm(mm);
28832866
return addr;
28842867

2885-
close_and_free_vma:
2886-
vma_close(vma);
2887-
unmap_and_free_vma:
2868+
unmap_and_free_file_vma:
28882869
fput(vma->vm_file);
28892870
vma->vm_file = NULL;
28902871

28912872
/* Undo any partial mapping done by a device driver. */
28922873
unmap_region(mm, mas.tree, vma, prev, next, vma->vm_start, vma->vm_end);
2893-
if (file && (vm_flags & VM_SHARED))
2894-
mapping_unmap_writable(file->f_mapping);
2874+
free_iter_vma:
2875+
mas_destroy(&mas);
28952876
free_vma:
28962877
vm_area_free(vma);
28972878
unacct_error:
28982879
if (charged)
28992880
vm_unacct_memory(charged);
2900-
validate_mm(mm);
29012881
return error;
29022882
}
29032883

2884+
unsigned long mmap_region(struct file *file, unsigned long addr,
2885+
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
2886+
struct list_head *uf)
2887+
{
2888+
unsigned long ret;
2889+
bool writable_file_mapping = false;
2890+
2891+
/* Allow architectures to sanity-check the vm_flags. */
2892+
if (!arch_validate_flags(vm_flags))
2893+
return -EINVAL;
2894+
2895+
/* Map writable and ensure this isn't a sealed memfd. */
2896+
if (file && (vm_flags & VM_SHARED)) {
2897+
int error = mapping_map_writable(file->f_mapping);
2898+
2899+
if (error)
2900+
return error;
2901+
writable_file_mapping = true;
2902+
}
2903+
2904+
ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
2905+
2906+
/* Clear our write mapping regardless of error. */
2907+
if (writable_file_mapping)
2908+
mapping_unmap_writable(file->f_mapping);
2909+
2910+
validate_mm(current->mm);
2911+
return ret;
2912+
}
2913+
29042914
static int __vm_munmap(unsigned long start, size_t len, bool downgrade)
29052915
{
29062916
int ret;

0 commit comments

Comments
 (0)