Skip to content

Commit 49b0638

Browse files
surenbaghdasaryanakpm00
authored andcommitted
mm: enable page walking API to lock vmas during the walk
walk_page_range() and friends often operate under write-locked mmap_lock. With introduction of vma locks, the vmas have to be locked as well during such walks to prevent concurrent page faults in these areas. Add an additional member to mm_walk_ops to indicate locking requirements for the walk. The change ensures that page walks which prevent concurrent page faults by write-locking mmap_lock, operate correctly after introduction of per-vma locks. With per-vma locks page faults can be handled under vma lock without taking mmap_lock at all, so write locking mmap_lock would not stop them. The change ensures vmas are properly locked during such walks. A sample issue this solves is do_mbind() performing queue_pages_range() to queue pages for migration. Without this change a concurrent page can be faulted into the area and be left out of migration. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Suren Baghdasaryan <[email protected]> Suggested-by: Linus Torvalds <[email protected]> Suggested-by: Jann Horn <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Davidlohr Bueso <[email protected]> Cc: Hugh Dickins <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Laurent Dufour <[email protected]> Cc: Liam Howlett <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Michel Lespinasse <[email protected]> Cc: Peter Xu <[email protected]> Cc: Vlastimil Babka <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 8b9c1cc commit 49b0638

File tree

18 files changed

+100
-20
lines changed

18 files changed

+100
-20
lines changed

arch/powerpc/mm/book3s64/subpage_prot.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ static int subpage_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
145145

146146
static const struct mm_walk_ops subpage_walk_ops = {
147147
.pmd_entry = subpage_walk_pmd_entry,
148+
.walk_lock = PGWALK_WRLOCK_VERIFY,
148149
};
149150

150151
static void subpage_mark_vma_nohuge(struct mm_struct *mm, unsigned long addr,

arch/riscv/mm/pageattr.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ static const struct mm_walk_ops pageattr_ops = {
102102
.pmd_entry = pageattr_pmd_entry,
103103
.pte_entry = pageattr_pte_entry,
104104
.pte_hole = pageattr_pte_hole,
105+
.walk_lock = PGWALK_RDLOCK,
105106
};
106107

107108
static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,

arch/s390/mm/gmap.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2514,6 +2514,7 @@ static int thp_split_walk_pmd_entry(pmd_t *pmd, unsigned long addr,
25142514

25152515
static const struct mm_walk_ops thp_split_walk_ops = {
25162516
.pmd_entry = thp_split_walk_pmd_entry,
2517+
.walk_lock = PGWALK_WRLOCK_VERIFY,
25172518
};
25182519

25192520
static inline void thp_split_mm(struct mm_struct *mm)
@@ -2565,6 +2566,7 @@ static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
25652566

25662567
static const struct mm_walk_ops zap_zero_walk_ops = {
25672568
.pmd_entry = __zap_zero_pages,
2569+
.walk_lock = PGWALK_WRLOCK,
25682570
};
25692571

25702572
/*
@@ -2655,6 +2657,7 @@ static const struct mm_walk_ops enable_skey_walk_ops = {
26552657
.hugetlb_entry = __s390_enable_skey_hugetlb,
26562658
.pte_entry = __s390_enable_skey_pte,
26572659
.pmd_entry = __s390_enable_skey_pmd,
2660+
.walk_lock = PGWALK_WRLOCK,
26582661
};
26592662

26602663
int s390_enable_skey(void)
@@ -2692,6 +2695,7 @@ static int __s390_reset_cmma(pte_t *pte, unsigned long addr,
26922695

26932696
static const struct mm_walk_ops reset_cmma_walk_ops = {
26942697
.pte_entry = __s390_reset_cmma,
2698+
.walk_lock = PGWALK_WRLOCK,
26952699
};
26962700

26972701
void s390_reset_cmma(struct mm_struct *mm)
@@ -2728,6 +2732,7 @@ static int s390_gather_pages(pte_t *ptep, unsigned long addr,
27282732

27292733
static const struct mm_walk_ops gather_pages_ops = {
27302734
.pte_entry = s390_gather_pages,
2735+
.walk_lock = PGWALK_RDLOCK,
27312736
};
27322737

27332738
/*

fs/proc/task_mmu.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,12 +757,14 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
757757
static const struct mm_walk_ops smaps_walk_ops = {
758758
.pmd_entry = smaps_pte_range,
759759
.hugetlb_entry = smaps_hugetlb_range,
760+
.walk_lock = PGWALK_RDLOCK,
760761
};
761762

762763
static const struct mm_walk_ops smaps_shmem_walk_ops = {
763764
.pmd_entry = smaps_pte_range,
764765
.hugetlb_entry = smaps_hugetlb_range,
765766
.pte_hole = smaps_pte_hole,
767+
.walk_lock = PGWALK_RDLOCK,
766768
};
767769

768770
/*
@@ -1244,6 +1246,7 @@ static int clear_refs_test_walk(unsigned long start, unsigned long end,
12441246
static const struct mm_walk_ops clear_refs_walk_ops = {
12451247
.pmd_entry = clear_refs_pte_range,
12461248
.test_walk = clear_refs_test_walk,
1249+
.walk_lock = PGWALK_WRLOCK,
12471250
};
12481251

12491252
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
@@ -1621,6 +1624,7 @@ static const struct mm_walk_ops pagemap_ops = {
16211624
.pmd_entry = pagemap_pmd_range,
16221625
.pte_hole = pagemap_pte_hole,
16231626
.hugetlb_entry = pagemap_hugetlb_range,
1627+
.walk_lock = PGWALK_RDLOCK,
16241628
};
16251629

16261630
/*
@@ -1934,6 +1938,7 @@ static int gather_hugetlb_stats(pte_t *pte, unsigned long hmask,
19341938
static const struct mm_walk_ops show_numa_ops = {
19351939
.hugetlb_entry = gather_hugetlb_stats,
19361940
.pmd_entry = gather_pte_stats,
1941+
.walk_lock = PGWALK_RDLOCK,
19371942
};
19381943

19391944
/*

include/linux/pagewalk.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,16 @@
66

77
struct mm_walk;
88

9+
/* Locking requirement during a page walk. */
10+
enum page_walk_lock {
11+
/* mmap_lock should be locked for read to stabilize the vma tree */
12+
PGWALK_RDLOCK = 0,
13+
/* vma will be write-locked during the walk */
14+
PGWALK_WRLOCK = 1,
15+
/* vma is expected to be already write-locked during the walk */
16+
PGWALK_WRLOCK_VERIFY = 2,
17+
};
18+
919
/**
1020
* struct mm_walk_ops - callbacks for walk_page_range
1121
* @pgd_entry: if set, called for each non-empty PGD (top-level) entry
@@ -66,6 +76,7 @@ struct mm_walk_ops {
6676
int (*pre_vma)(unsigned long start, unsigned long end,
6777
struct mm_walk *walk);
6878
void (*post_vma)(struct mm_walk *walk);
79+
enum page_walk_lock walk_lock;
6980
};
7081

7182
/*

mm/damon/vaddr.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ static int damon_mkold_hugetlb_entry(pte_t *pte, unsigned long hmask,
386386
static const struct mm_walk_ops damon_mkold_ops = {
387387
.pmd_entry = damon_mkold_pmd_entry,
388388
.hugetlb_entry = damon_mkold_hugetlb_entry,
389+
.walk_lock = PGWALK_RDLOCK,
389390
};
390391

391392
static void damon_va_mkold(struct mm_struct *mm, unsigned long addr)
@@ -525,6 +526,7 @@ static int damon_young_hugetlb_entry(pte_t *pte, unsigned long hmask,
525526
static const struct mm_walk_ops damon_young_ops = {
526527
.pmd_entry = damon_young_pmd_entry,
527528
.hugetlb_entry = damon_young_hugetlb_entry,
529+
.walk_lock = PGWALK_RDLOCK,
528530
};
529531

530532
static bool damon_va_young(struct mm_struct *mm, unsigned long addr,

mm/hmm.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -562,6 +562,7 @@ static const struct mm_walk_ops hmm_walk_ops = {
562562
.pte_hole = hmm_vma_walk_hole,
563563
.hugetlb_entry = hmm_vma_walk_hugetlb_entry,
564564
.test_walk = hmm_vma_walk_test,
565+
.walk_lock = PGWALK_RDLOCK,
565566
};
566567

567568
/**

mm/ksm.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,12 @@ static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long nex
455455

456456
static const struct mm_walk_ops break_ksm_ops = {
457457
.pmd_entry = break_ksm_pmd_entry,
458+
.walk_lock = PGWALK_RDLOCK,
459+
};
460+
461+
static const struct mm_walk_ops break_ksm_lock_vma_ops = {
462+
.pmd_entry = break_ksm_pmd_entry,
463+
.walk_lock = PGWALK_WRLOCK,
458464
};
459465

460466
/*
@@ -470,16 +476,17 @@ static const struct mm_walk_ops break_ksm_ops = {
470476
* of the process that owns 'vma'. We also do not want to enforce
471477
* protection keys here anyway.
472478
*/
473-
static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
479+
static int break_ksm(struct vm_area_struct *vma, unsigned long addr, bool lock_vma)
474480
{
475481
vm_fault_t ret = 0;
482+
const struct mm_walk_ops *ops = lock_vma ?
483+
&break_ksm_lock_vma_ops : &break_ksm_ops;
476484

477485
do {
478486
int ksm_page;
479487

480488
cond_resched();
481-
ksm_page = walk_page_range_vma(vma, addr, addr + 1,
482-
&break_ksm_ops, NULL);
489+
ksm_page = walk_page_range_vma(vma, addr, addr + 1, ops, NULL);
483490
if (WARN_ON_ONCE(ksm_page < 0))
484491
return ksm_page;
485492
if (!ksm_page)
@@ -565,7 +572,7 @@ static void break_cow(struct ksm_rmap_item *rmap_item)
565572
mmap_read_lock(mm);
566573
vma = find_mergeable_vma(mm, addr);
567574
if (vma)
568-
break_ksm(vma, addr);
575+
break_ksm(vma, addr, false);
569576
mmap_read_unlock(mm);
570577
}
571578

@@ -871,7 +878,7 @@ static void remove_trailing_rmap_items(struct ksm_rmap_item **rmap_list)
871878
* in cmp_and_merge_page on one of the rmap_items we would be removing.
872879
*/
873880
static int unmerge_ksm_pages(struct vm_area_struct *vma,
874-
unsigned long start, unsigned long end)
881+
unsigned long start, unsigned long end, bool lock_vma)
875882
{
876883
unsigned long addr;
877884
int err = 0;
@@ -882,7 +889,7 @@ static int unmerge_ksm_pages(struct vm_area_struct *vma,
882889
if (signal_pending(current))
883890
err = -ERESTARTSYS;
884891
else
885-
err = break_ksm(vma, addr);
892+
err = break_ksm(vma, addr, lock_vma);
886893
}
887894
return err;
888895
}
@@ -1029,7 +1036,7 @@ static int unmerge_and_remove_all_rmap_items(void)
10291036
if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
10301037
continue;
10311038
err = unmerge_ksm_pages(vma,
1032-
vma->vm_start, vma->vm_end);
1039+
vma->vm_start, vma->vm_end, false);
10331040
if (err)
10341041
goto error;
10351042
}
@@ -2530,7 +2537,7 @@ static int __ksm_del_vma(struct vm_area_struct *vma)
25302537
return 0;
25312538

25322539
if (vma->anon_vma) {
2533-
err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end);
2540+
err = unmerge_ksm_pages(vma, vma->vm_start, vma->vm_end, true);
25342541
if (err)
25352542
return err;
25362543
}
@@ -2668,7 +2675,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
26682675
return 0; /* just ignore the advice */
26692676

26702677
if (vma->anon_vma) {
2671-
err = unmerge_ksm_pages(vma, start, end);
2678+
err = unmerge_ksm_pages(vma, start, end, true);
26722679
if (err)
26732680
return err;
26742681
}

mm/madvise.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
233233

234234
static const struct mm_walk_ops swapin_walk_ops = {
235235
.pmd_entry = swapin_walk_pmd_entry,
236+
.walk_lock = PGWALK_RDLOCK,
236237
};
237238

238239
static void shmem_swapin_range(struct vm_area_struct *vma,
@@ -534,6 +535,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
534535

535536
static const struct mm_walk_ops cold_walk_ops = {
536537
.pmd_entry = madvise_cold_or_pageout_pte_range,
538+
.walk_lock = PGWALK_RDLOCK,
537539
};
538540

539541
static void madvise_cold_page_range(struct mmu_gather *tlb,
@@ -757,6 +759,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
757759

758760
static const struct mm_walk_ops madvise_free_walk_ops = {
759761
.pmd_entry = madvise_free_pte_range,
762+
.walk_lock = PGWALK_RDLOCK,
760763
};
761764

762765
static int madvise_free_single_vma(struct vm_area_struct *vma,

mm/memcontrol.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6024,6 +6024,7 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
60246024

60256025
static const struct mm_walk_ops precharge_walk_ops = {
60266026
.pmd_entry = mem_cgroup_count_precharge_pte_range,
6027+
.walk_lock = PGWALK_RDLOCK,
60276028
};
60286029

60296030
static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
@@ -6303,6 +6304,7 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
63036304

63046305
static const struct mm_walk_ops charge_walk_ops = {
63056306
.pmd_entry = mem_cgroup_move_charge_pte_range,
6307+
.walk_lock = PGWALK_RDLOCK,
63066308
};
63076309

63086310
static void mem_cgroup_move_charge(void)

0 commit comments

Comments
 (0)