Skip to content

Commit 01d89b9

Browse files
xzpeterakpm00
authored andcommitted
mm/gup: fix hugepd handling in hugetlb rework
Commit a12083d added hugepd handling for gup-slow, reusing gup-fast functions. follow_hugepd() correctly took the vma pointer in, however didn't pass it over into the lower functions, which was overlooked. The issue is gup_fast_hugepte() uses the vma pointer to make the correct decision on whether an unshare is needed for a FOLL_PIN|FOLL_LONGTERM. Now without vma ponter it will constantly return "true" (needs an unshare) for a page cache, even though in the SHARED case it will be wrong to unshare. The other problem is, even if an unshare is needed, it now returns 0 rather than -EMLINK, which will not trigger a follow up FAULT_FLAG_UNSHARE fault. That will need to be fixed too when the unshare is wanted. gup_longterm test didn't expose this issue in the past because it didn't yet test R/O unshare in this case, another separate patch will enable that in future tests. Fix it by passing vma correctly to the bottom, rename gup_fast_hugepte() back to gup_hugepte() as it is shared between the fast/slow paths, and also allow -EMLINK to be returned properly by gup_hugepte() even though gup-fast will take it the same as zero. Link: https://lkml.kernel.org/r/[email protected] Fixes: a12083d ("mm/gup: handle hugepd for follow_page()") Signed-off-by: Peter Xu <[email protected]> Reported-by: David Hildenbrand <[email protected]> Reviewed-by: David Hildenbrand <[email protected]> Cc: Aneesh Kumar K.V <[email protected]> Cc: Christophe Leroy <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: John Hubbard <[email protected]> Cc: Lorenzo Stoakes <[email protected]> Cc: Muchun Song <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 67f4c91 commit 01d89b9

File tree

1 file changed

+39
-25
lines changed

1 file changed

+39
-25
lines changed

mm/gup.c

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -525,9 +525,17 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
525525
return (__boundary - 1 < end - 1) ? __boundary : end;
526526
}
527527

528-
static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
529-
unsigned long end, unsigned int flags, struct page **pages,
530-
int *nr)
528+
/*
529+
* Returns 1 if succeeded, 0 if failed, -EMLINK if unshare needed.
530+
*
531+
* NOTE: for the same entry, gup-fast and gup-slow can return different
532+
* results (0 v.s. -EMLINK) depending on whether vma is available. This is
533+
* the expected behavior, where we simply want gup-fast to fallback to
534+
* gup-slow to take the vma reference first.
535+
*/
536+
static int gup_hugepte(struct vm_area_struct *vma, pte_t *ptep, unsigned long sz,
537+
unsigned long addr, unsigned long end, unsigned int flags,
538+
struct page **pages, int *nr)
531539
{
532540
unsigned long pte_end;
533541
struct page *page;
@@ -559,9 +567,9 @@ static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
559567
return 0;
560568
}
561569

562-
if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) {
570+
if (!pte_write(pte) && gup_must_unshare(vma, flags, &folio->page)) {
563571
gup_put_folio(folio, refs, flags);
564-
return 0;
572+
return -EMLINK;
565573
}
566574

567575
*nr += refs;
@@ -577,19 +585,22 @@ static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
577585
* of the other folios. See writable_file_mapping_allowed() and
578586
* gup_fast_folio_allowed() for more information.
579587
*/
580-
static int gup_fast_hugepd(hugepd_t hugepd, unsigned long addr,
581-
unsigned int pdshift, unsigned long end, unsigned int flags,
582-
struct page **pages, int *nr)
588+
static int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
589+
unsigned long addr, unsigned int pdshift,
590+
unsigned long end, unsigned int flags,
591+
struct page **pages, int *nr)
583592
{
584593
pte_t *ptep;
585594
unsigned long sz = 1UL << hugepd_shift(hugepd);
586595
unsigned long next;
596+
int ret;
587597

588598
ptep = hugepte_offset(hugepd, addr, pdshift);
589599
do {
590600
next = hugepte_addr_end(addr, end, sz);
591-
if (!gup_fast_hugepte(ptep, sz, addr, end, flags, pages, nr))
592-
return 0;
601+
ret = gup_hugepte(vma, ptep, sz, addr, end, flags, pages, nr);
602+
if (ret != 1)
603+
return ret;
593604
} while (ptep++, addr = next, addr != end);
594605

595606
return 1;
@@ -613,22 +624,25 @@ static struct page *follow_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
613624
h = hstate_vma(vma);
614625
ptep = hugepte_offset(hugepd, addr, pdshift);
615626
ptl = huge_pte_lock(h, vma->vm_mm, ptep);
616-
ret = gup_fast_hugepd(hugepd, addr, pdshift, addr + PAGE_SIZE,
617-
flags, &page, &nr);
627+
ret = gup_hugepd(vma, hugepd, addr, pdshift, addr + PAGE_SIZE,
628+
flags, &page, &nr);
618629
spin_unlock(ptl);
619630

620-
if (ret) {
631+
if (ret == 1) {
632+
/* GUP succeeded */
621633
WARN_ON_ONCE(nr != 1);
622634
ctx->page_mask = (1U << huge_page_order(h)) - 1;
623635
return page;
624636
}
625637

626-
return NULL;
638+
/* ret can be either 0 (translates to NULL) or negative */
639+
return ERR_PTR(ret);
627640
}
628641
#else /* CONFIG_ARCH_HAS_HUGEPD */
629-
static inline int gup_fast_hugepd(hugepd_t hugepd, unsigned long addr,
630-
unsigned int pdshift, unsigned long end, unsigned int flags,
631-
struct page **pages, int *nr)
642+
static inline int gup_hugepd(struct vm_area_struct *vma, hugepd_t hugepd,
643+
unsigned long addr, unsigned int pdshift,
644+
unsigned long end, unsigned int flags,
645+
struct page **pages, int *nr)
632646
{
633647
return 0;
634648
}
@@ -3261,8 +3275,8 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
32613275
* architecture have different format for hugetlbfs
32623276
* pmd format and THP pmd format
32633277
*/
3264-
if (!gup_fast_hugepd(__hugepd(pmd_val(pmd)), addr,
3265-
PMD_SHIFT, next, flags, pages, nr))
3278+
if (gup_hugepd(NULL, __hugepd(pmd_val(pmd)), addr,
3279+
PMD_SHIFT, next, flags, pages, nr) != 1)
32663280
return 0;
32673281
} else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,
32683282
pages, nr))
@@ -3291,8 +3305,8 @@ static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,
32913305
pages, nr))
32923306
return 0;
32933307
} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
3294-
if (!gup_fast_hugepd(__hugepd(pud_val(pud)), addr,
3295-
PUD_SHIFT, next, flags, pages, nr))
3308+
if (gup_hugepd(NULL, __hugepd(pud_val(pud)), addr,
3309+
PUD_SHIFT, next, flags, pages, nr) != 1)
32963310
return 0;
32973311
} else if (!gup_fast_pmd_range(pudp, pud, addr, next, flags,
32983312
pages, nr))
@@ -3318,8 +3332,8 @@ static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr,
33183332
return 0;
33193333
BUILD_BUG_ON(p4d_leaf(p4d));
33203334
if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
3321-
if (!gup_fast_hugepd(__hugepd(p4d_val(p4d)), addr,
3322-
P4D_SHIFT, next, flags, pages, nr))
3335+
if (gup_hugepd(NULL, __hugepd(p4d_val(p4d)), addr,
3336+
P4D_SHIFT, next, flags, pages, nr) != 1)
33233337
return 0;
33243338
} else if (!gup_fast_pud_range(p4dp, p4d, addr, next, flags,
33253339
pages, nr))
@@ -3347,8 +3361,8 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end,
33473361
pages, nr))
33483362
return;
33493363
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
3350-
if (!gup_fast_hugepd(__hugepd(pgd_val(pgd)), addr,
3351-
PGDIR_SHIFT, next, flags, pages, nr))
3364+
if (gup_hugepd(NULL, __hugepd(pgd_val(pgd)), addr,
3365+
PGDIR_SHIFT, next, flags, pages, nr) != 1)
33523366
return;
33533367
} else if (!gup_fast_p4d_range(pgdp, pgd, addr, next, flags,
33543368
pages, nr))

0 commit comments

Comments
 (0)