Skip to content

Commit 9457056

Browse files
hnaztorvalds
authored andcommitted
mm: madvise: MADV_DONTNEED_LOCKED
MADV_DONTNEED historically rejects mlocked ranges, but with MLOCK_ONFAULT and MCL_ONFAULT allowing to mlock without populating, there are valid use cases for depopulating locked ranges as well. Users mlock memory to protect secrets. There are allocators for secure buffers that want in-use memory generally mlocked, but cleared and invalidated memory to give up the physical pages. This could be done with explicit munlock -> mlock calls on free -> alloc of course, but that adds two unnecessary syscalls, heavy mmap_sem write locks, vma splits and re-merges - only to get rid of the backing pages. Users also mlockall(MCL_ONFAULT) to suppress sustained paging, but are okay with on-demand initial population. It seems valid to selectively free some memory during the lifetime of such a process, without having to mess with its overall policy. Why add a separate flag? Isn't this a pretty niche usecase? - MADV_DONTNEED has been bailing on locked vmas forever. It's at least conceivable that someone, somewhere is relying on mlock to protect data from perhaps broader invalidation calls. Changing this behavior now could lead to quiet data corruption. - It also clarifies expectations around MADV_FREE and maybe MADV_REMOVE. It avoids the situation where one quietly behaves different than the others. MADV_FREE_LOCKED can be added later. - The combination of mlock() and madvise() in the first place is probably niche. But where it happens, I'd say that dropping pages from a locked region once they don't contain secrets or won't page anymore is much saner than relying on mlock to protect memory from speculative or errant invalidation calls. It's just that we can't change the default behavior because of the two previous points. Given that, an explicit new flag seems to make the most sense. [[email protected]: fix mips build] Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Johannes Weiner <[email protected]> Acked-by: Michal Hocko <[email protected]> Reviewed-by: Mike Kravetz <[email protected]> Reviewed-by: Shakeel Butt <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Cc: Nadav Amit <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Dr. David Alan Gilbert <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 6c8e2a2 commit 9457056

File tree

6 files changed

+24
-10
lines changed

6 files changed

+24
-10
lines changed

arch/alpha/include/uapi/asm/mman.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,8 @@
7474
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
7575
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
7676

77+
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
78+
7779
/* compatibility flags */
7880
#define MAP_FILE 0
7981

arch/mips/include/uapi/asm/mman.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,8 @@
101101
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
102102
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
103103

104+
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
105+
104106
/* compatibility flags */
105107
#define MAP_FILE 0
106108

arch/parisc/include/uapi/asm/mman.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@
5555
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
5656
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
5757

58+
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
59+
5860
#define MADV_MERGEABLE 65 /* KSM may merge identical pages */
5961
#define MADV_UNMERGEABLE 66 /* KSM may not merge identical pages */
6062

arch/xtensa/include/uapi/asm/mman.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,8 @@
109109
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
110110
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
111111

112+
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
113+
112114
/* compatibility flags */
113115
#define MAP_FILE 0
114116

include/uapi/asm-generic/mman-common.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@
7575
#define MADV_POPULATE_READ 22 /* populate (prefault) page tables readable */
7676
#define MADV_POPULATE_WRITE 23 /* populate (prefault) page tables writable */
7777

78+
#define MADV_DONTNEED_LOCKED 24 /* like DONTNEED, but drop locked pages too */
79+
7880
/* compatibility flags */
7981
#define MAP_FILE 0
8082

mm/madvise.c

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ static int madvise_need_mmap_write(int behavior)
5252
case MADV_REMOVE:
5353
case MADV_WILLNEED:
5454
case MADV_DONTNEED:
55+
case MADV_DONTNEED_LOCKED:
5556
case MADV_COLD:
5657
case MADV_PAGEOUT:
5758
case MADV_FREE:
@@ -502,14 +503,9 @@ static void madvise_cold_page_range(struct mmu_gather *tlb,
502503
tlb_end_vma(tlb, vma);
503504
}
504505

505-
static inline bool can_madv_lru_non_huge_vma(struct vm_area_struct *vma)
506-
{
507-
return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP));
508-
}
509-
510506
static inline bool can_madv_lru_vma(struct vm_area_struct *vma)
511507
{
512-
return can_madv_lru_non_huge_vma(vma) && !is_vm_hugetlb_page(vma);
508+
return !(vma->vm_flags & (VM_LOCKED|VM_PFNMAP|VM_HUGETLB));
513509
}
514510

515511
static long madvise_cold(struct vm_area_struct *vma,
@@ -787,10 +783,16 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
787783
unsigned long *end,
788784
int behavior)
789785
{
790-
if (!is_vm_hugetlb_page(vma))
791-
return can_madv_lru_non_huge_vma(vma);
786+
if (!is_vm_hugetlb_page(vma)) {
787+
unsigned int forbidden = VM_PFNMAP;
788+
789+
if (behavior != MADV_DONTNEED_LOCKED)
790+
forbidden |= VM_LOCKED;
791+
792+
return !(vma->vm_flags & forbidden);
793+
}
792794

793-
if (behavior != MADV_DONTNEED)
795+
if (behavior != MADV_DONTNEED && behavior != MADV_DONTNEED_LOCKED)
794796
return false;
795797
if (start & ~huge_page_mask(hstate_vma(vma)))
796798
return false;
@@ -854,7 +856,7 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
854856
VM_WARN_ON(start >= end);
855857
}
856858

857-
if (behavior == MADV_DONTNEED)
859+
if (behavior == MADV_DONTNEED || behavior == MADV_DONTNEED_LOCKED)
858860
return madvise_dontneed_single_vma(vma, start, end);
859861
else if (behavior == MADV_FREE)
860862
return madvise_free_single_vma(vma, start, end);
@@ -993,6 +995,7 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
993995
return madvise_pageout(vma, prev, start, end);
994996
case MADV_FREE:
995997
case MADV_DONTNEED:
998+
case MADV_DONTNEED_LOCKED:
996999
return madvise_dontneed_free(vma, prev, start, end, behavior);
9971000
case MADV_POPULATE_READ:
9981001
case MADV_POPULATE_WRITE:
@@ -1123,6 +1126,7 @@ madvise_behavior_valid(int behavior)
11231126
case MADV_REMOVE:
11241127
case MADV_WILLNEED:
11251128
case MADV_DONTNEED:
1129+
case MADV_DONTNEED_LOCKED:
11261130
case MADV_FREE:
11271131
case MADV_COLD:
11281132
case MADV_PAGEOUT:

0 commit comments

Comments
 (0)