Skip to content

Commit 0cef0bb

Browse files
Ryan Robertsakpm00
authored andcommitted
mm: clear uffd-wp PTE/PMD state on mremap()
When mremap()ing a memory region previously registered with userfaultfd as write-protected but without UFFD_FEATURE_EVENT_REMAP, an inconsistency in flag clearing leads to a mismatch between the vma flags (which have uffd-wp cleared) and the pte/pmd flags (which do not have uffd-wp cleared). This mismatch causes a subsequent mprotect(PROT_WRITE) to trigger a warning in page_table_check_pte_flags() due to setting the pte to writable while uffd-wp is still set. Fix this by always explicitly clearing the uffd-wp pte/pmd flags on any such mremap() so that the values are consistent with the existing clearing of VM_UFFD_WP. Be careful to clear the logical flag regardless of its physical form; a PTE bit, a swap PTE bit, or a PTE marker. Cover PTE, huge PMD and hugetlb paths. Link: https://lkml.kernel.org/r/[email protected] Co-developed-by: Mikołaj Lenczewski <[email protected]> Signed-off-by: Mikołaj Lenczewski <[email protected]> Signed-off-by: Ryan Roberts <[email protected]> Closes: https://lore.kernel.org/linux-mm/[email protected]/ Fixes: 63b2d41 ("userfaultfd: wp: add the writeprotect API to userfaultfd ioctl") Cc: David Hildenbrand <[email protected]> Cc: Jann Horn <[email protected]> Cc: Liam R. Howlett <[email protected]> Cc: Lorenzo Stoakes <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Muchun Song <[email protected]> Cc: Peter Xu <[email protected]> Cc: Shuah Khan <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 4bcf297 commit 0cef0bb

File tree

4 files changed

+68
-2
lines changed

4 files changed

+68
-2
lines changed

include/linux/userfaultfd_k.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,13 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
247247
vma_is_shmem(vma);
248248
}
249249

250+
static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
251+
{
252+
struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx;
253+
254+
return uffd_ctx && (uffd_ctx->features & UFFD_FEATURE_EVENT_REMAP) == 0;
255+
}
256+
250257
extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
251258
extern void dup_userfaultfd_complete(struct list_head *);
252259
void dup_userfaultfd_fail(struct list_head *);
@@ -402,6 +409,11 @@ static inline bool userfaultfd_wp_async(struct vm_area_struct *vma)
402409
return false;
403410
}
404411

412+
static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
413+
{
414+
return false;
415+
}
416+
405417
#endif /* CONFIG_USERFAULTFD */
406418

407419
static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)

mm/huge_memory.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2206,6 +2206,16 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd)
22062206
return pmd;
22072207
}
22082208

2209+
static pmd_t clear_uffd_wp_pmd(pmd_t pmd)
2210+
{
2211+
if (pmd_present(pmd))
2212+
pmd = pmd_clear_uffd_wp(pmd);
2213+
else if (is_swap_pmd(pmd))
2214+
pmd = pmd_swp_clear_uffd_wp(pmd);
2215+
2216+
return pmd;
2217+
}
2218+
22092219
bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
22102220
unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd)
22112221
{
@@ -2244,6 +2254,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
22442254
pgtable_trans_huge_deposit(mm, new_pmd, pgtable);
22452255
}
22462256
pmd = move_soft_dirty_pmd(pmd);
2257+
if (vma_has_uffd_without_event_remap(vma))
2258+
pmd = clear_uffd_wp_pmd(pmd);
22472259
set_pmd_at(mm, new_addr, new_pmd, pmd);
22482260
if (force_flush)
22492261
flush_pmd_tlb_range(vma, old_addr, old_addr + PMD_SIZE);

mm/hugetlb.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5402,6 +5402,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
54025402
unsigned long new_addr, pte_t *src_pte, pte_t *dst_pte,
54035403
unsigned long sz)
54045404
{
5405+
bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
54055406
struct hstate *h = hstate_vma(vma);
54065407
struct mm_struct *mm = vma->vm_mm;
54075408
spinlock_t *src_ptl, *dst_ptl;
@@ -5418,7 +5419,18 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
54185419
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
54195420

54205421
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte);
5421-
set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
5422+
5423+
if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
5424+
huge_pte_clear(mm, new_addr, dst_pte, sz);
5425+
else {
5426+
if (need_clear_uffd_wp) {
5427+
if (pte_present(pte))
5428+
pte = huge_pte_clear_uffd_wp(pte);
5429+
else if (is_swap_pte(pte))
5430+
pte = pte_swp_clear_uffd_wp(pte);
5431+
}
5432+
set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
5433+
}
54225434

54235435
if (src_ptl != dst_ptl)
54245436
spin_unlock(src_ptl);

mm/mremap.c

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
138138
struct vm_area_struct *new_vma, pmd_t *new_pmd,
139139
unsigned long new_addr, bool need_rmap_locks)
140140
{
141+
bool need_clear_uffd_wp = vma_has_uffd_without_event_remap(vma);
141142
struct mm_struct *mm = vma->vm_mm;
142143
pte_t *old_pte, *new_pte, pte;
143144
pmd_t dummy_pmdval;
@@ -216,7 +217,18 @@ static int move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
216217
force_flush = true;
217218
pte = move_pte(pte, old_addr, new_addr);
218219
pte = move_soft_dirty_pte(pte);
219-
set_pte_at(mm, new_addr, new_pte, pte);
220+
221+
if (need_clear_uffd_wp && pte_marker_uffd_wp(pte))
222+
pte_clear(mm, new_addr, new_pte);
223+
else {
224+
if (need_clear_uffd_wp) {
225+
if (pte_present(pte))
226+
pte = pte_clear_uffd_wp(pte);
227+
else if (is_swap_pte(pte))
228+
pte = pte_swp_clear_uffd_wp(pte);
229+
}
230+
set_pte_at(mm, new_addr, new_pte, pte);
231+
}
220232
}
221233

222234
arch_leave_lazy_mmu_mode();
@@ -278,6 +290,15 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
278290
if (WARN_ON_ONCE(!pmd_none(*new_pmd)))
279291
return false;
280292

293+
/* If this pmd belongs to a uffd vma with remap events disabled, we need
294+
* to ensure that the uffd-wp state is cleared from all pgtables. This
295+
* means recursing into lower page tables in move_page_tables(), and we
296+
* can reuse the existing code if we simply treat the entry as "not
297+
* moved".
298+
*/
299+
if (vma_has_uffd_without_event_remap(vma))
300+
return false;
301+
281302
/*
282303
* We don't have to worry about the ordering of src and dst
283304
* ptlocks because exclusive mmap_lock prevents deadlock.
@@ -333,6 +354,15 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
333354
if (WARN_ON_ONCE(!pud_none(*new_pud)))
334355
return false;
335356

357+
/* If this pud belongs to a uffd vma with remap events disabled, we need
358+
* to ensure that the uffd-wp state is cleared from all pgtables. This
359+
* means recursing into lower page tables in move_page_tables(), and we
360+
* can reuse the existing code if we simply treat the entry as "not
361+
* moved".
362+
*/
363+
if (vma_has_uffd_without_event_remap(vma))
364+
return false;
365+
336366
/*
337367
* We don't have to worry about the ordering of src and dst
338368
* ptlocks because exclusive mmap_lock prevents deadlock.

0 commit comments

Comments
 (0)