Skip to content

Commit 119a5fc

Browse files
Hugh Dickinstorvalds
authored andcommitted
khugepaged: collapse_pte_mapped_thp() protect the pmd lock
When retract_page_tables() removes a page table to make way for a huge pmd, it holds huge page lock, i_mmap_lock_write, mmap_write_trylock and pmd lock; but when collapse_pte_mapped_thp() does the same (to handle the case when the original mmap_write_trylock had failed), only mmap_write_trylock and pmd lock are held. That's not enough. One machine has twice crashed under load, with "BUG: spinlock bad magic" and GPF on 6b6b6b6b6b6b6b6b. Examining the second crash, page_vma_mapped_walk_done()'s spin_unlock of pvmw->ptl (serving page_referenced() on a file THP, that had found a page table at *pmd) discovers that the page table page and its lock have already been freed by the time it comes to unlock. Follow the example of retract_page_tables(), but we only need one of huge page lock or i_mmap_lock_write to secure against this: because it's the narrower lock, and because it simplifies collapse_pte_mapped_thp() to know the hpage earlier, choose to rely on huge page lock here. Fixes: 27e1f82 ("khugepaged: enable collapse pmd for pte-mapped THP") Signed-off-by: Hugh Dickins <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Acked-by: Kirill A. Shutemov <[email protected]> Cc: Andrea Arcangeli <[email protected]> Cc: Mike Kravetz <[email protected]> Cc: Song Liu <[email protected]> Cc: <[email protected]> [5.4+] Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Linus Torvalds <[email protected]>
1 parent 723a80d commit 119a5fc

File tree

1 file changed

+19
-25
lines changed

1 file changed

+19
-25
lines changed

mm/khugepaged.c

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,7 +1412,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
14121412
{
14131413
unsigned long haddr = addr & HPAGE_PMD_MASK;
14141414
struct vm_area_struct *vma = find_vma(mm, haddr);
1415-
struct page *hpage = NULL;
1415+
struct page *hpage;
14161416
pte_t *start_pte, *pte;
14171417
pmd_t *pmd, _pmd;
14181418
spinlock_t *ptl;
@@ -1432,9 +1432,17 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
14321432
if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE))
14331433
return;
14341434

1435+
hpage = find_lock_page(vma->vm_file->f_mapping,
1436+
linear_page_index(vma, haddr));
1437+
if (!hpage)
1438+
return;
1439+
1440+
if (!PageHead(hpage))
1441+
goto drop_hpage;
1442+
14351443
pmd = mm_find_pmd(mm, haddr);
14361444
if (!pmd)
1437-
return;
1445+
goto drop_hpage;
14381446

14391447
start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl);
14401448

@@ -1453,30 +1461,11 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
14531461

14541462
page = vm_normal_page(vma, addr, *pte);
14551463

1456-
if (!page || !PageCompound(page))
1457-
goto abort;
1458-
1459-
if (!hpage) {
1460-
hpage = compound_head(page);
1461-
/*
1462-
* The mapping of the THP should not change.
1463-
*
1464-
* Note that uprobe, debugger, or MAP_PRIVATE may
1465-
* change the page table, but the new page will
1466-
* not pass PageCompound() check.
1467-
*/
1468-
if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping))
1469-
goto abort;
1470-
}
1471-
14721464
/*
1473-
* Confirm the page maps to the correct subpage.
1474-
*
1475-
* Note that uprobe, debugger, or MAP_PRIVATE may change
1476-
* the page table, but the new page will not pass
1477-
* PageCompound() check.
1465+
* Note that uprobe, debugger, or MAP_PRIVATE may change the
1466+
* page table, but the new page will not be a subpage of hpage.
14781467
*/
1479-
if (WARN_ON(hpage + i != page))
1468+
if (hpage + i != page)
14801469
goto abort;
14811470
count++;
14821471
}
@@ -1495,7 +1484,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
14951484
pte_unmap_unlock(start_pte, ptl);
14961485

14971486
/* step 3: set proper refcount and mm_counters. */
1498-
if (hpage) {
1487+
if (count) {
14991488
page_ref_sub(hpage, count);
15001489
add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count);
15011490
}
@@ -1506,10 +1495,15 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr)
15061495
spin_unlock(ptl);
15071496
mm_dec_nr_ptes(mm);
15081497
pte_free(mm, pmd_pgtable(_pmd));
1498+
1499+
drop_hpage:
1500+
unlock_page(hpage);
1501+
put_page(hpage);
15091502
return;
15101503

15111504
abort:
15121505
pte_unmap_unlock(start_pte, ptl);
1506+
goto drop_hpage;
15131507
}
15141508

15151509
static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)

0 commit comments

Comments
 (0)