Skip to content

Commit 6da8e96

Browse files
xzpeterakpm00
authored andcommitted
mm: new follow_pfnmap API
Introduce a pair of APIs to follow pfn mappings to get entry information. It's very similar to what follow_pte() does before, but different in that it recognizes huge pfn mappings. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Peter Xu <[email protected]> Cc: Alexander Gordeev <[email protected]> Cc: Alex Williamson <[email protected]> Cc: Aneesh Kumar K.V <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Dave Hansen <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Gavin Shan <[email protected]> Cc: Gerald Schaefer <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Niklas Schnelle <[email protected]> Cc: Paolo Bonzini <[email protected]> Cc: Ryan Roberts <[email protected]> Cc: Sean Christopherson <[email protected]> Cc: Sven Schnelle <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vasily Gorbik <[email protected]> Cc: Will Deacon <[email protected]> Cc: Zi Yan <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 0515e02 commit 6da8e96

File tree

2 files changed

+181
-0
lines changed

2 files changed

+181
-0
lines changed

include/linux/mm.h

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2373,6 +2373,37 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address,
23732373
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
23742374
void *buf, int len, int write);
23752375

2376+
struct follow_pfnmap_args {
2377+
/**
2378+
* Inputs:
2379+
* @vma: Pointer to @vm_area_struct struct
2380+
* @address: the virtual address to walk
2381+
*/
2382+
struct vm_area_struct *vma;
2383+
unsigned long address;
2384+
/**
2385+
* Internals:
2386+
*
2387+
* The caller shouldn't touch any of these.
2388+
*/
2389+
spinlock_t *lock;
2390+
pte_t *ptep;
2391+
/**
2392+
* Outputs:
2393+
*
2394+
* @pfn: the PFN of the address
2395+
* @pgprot: the pgprot_t of the mapping
2396+
* @writable: whether the mapping is writable
2397+
* @special: whether the mapping is a special mapping (real PFN maps)
2398+
*/
2399+
unsigned long pfn;
2400+
pgprot_t pgprot;
2401+
bool writable;
2402+
bool special;
2403+
};
2404+
int follow_pfnmap_start(struct follow_pfnmap_args *args);
2405+
void follow_pfnmap_end(struct follow_pfnmap_args *args);
2406+
23762407
extern void truncate_pagecache(struct inode *inode, loff_t new);
23772408
extern void truncate_setsize(struct inode *inode, loff_t newsize);
23782409
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);

mm/memory.c

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6172,6 +6172,156 @@ int follow_pte(struct vm_area_struct *vma, unsigned long address,
61726172
}
61736173
EXPORT_SYMBOL_GPL(follow_pte);
61746174

6175+
static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
6176+
spinlock_t *lock, pte_t *ptep,
6177+
pgprot_t pgprot, unsigned long pfn_base,
6178+
unsigned long addr_mask, bool writable,
6179+
bool special)
6180+
{
6181+
args->lock = lock;
6182+
args->ptep = ptep;
6183+
args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
6184+
args->pgprot = pgprot;
6185+
args->writable = writable;
6186+
args->special = special;
6187+
}
6188+
6189+
static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
6190+
{
6191+
#ifdef CONFIG_LOCKDEP
6192+
struct address_space *mapping = vma->vm_file->f_mapping;
6193+
6194+
if (mapping)
6195+
lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
6196+
lockdep_is_held(&vma->vm_mm->mmap_lock));
6197+
else
6198+
lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
6199+
#endif
6200+
}
6201+
6202+
/**
6203+
* follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
6204+
* @args: Pointer to struct @follow_pfnmap_args
6205+
*
6206+
* The caller needs to setup args->vma and args->address to point to the
6207+
* virtual address as the target of such lookup. On a successful return,
6208+
* the results will be put into other output fields.
6209+
*
6210+
* After the caller finished using the fields, the caller must invoke
6211+
* another follow_pfnmap_end() to proper releases the locks and resources
6212+
* of such look up request.
6213+
*
6214+
* During the start() and end() calls, the results in @args will be valid
6215+
* as proper locks will be held. After the end() is called, all the fields
6216+
* in @follow_pfnmap_args will be invalid to be further accessed. Further
6217+
* use of such information after end() may require proper synchronizations
6218+
* by the caller with page table updates, otherwise it can create a
6219+
* security bug.
6220+
*
6221+
* If the PTE maps a refcounted page, callers are responsible to protect
6222+
* against invalidation with MMU notifiers; otherwise access to the PFN at
6223+
* a later point in time can trigger use-after-free.
6224+
*
6225+
* Only IO mappings and raw PFN mappings are allowed. The mmap semaphore
6226+
* should be taken for read, and the mmap semaphore cannot be released
6227+
* before the end() is invoked.
6228+
*
6229+
* This function must not be used to modify PTE content.
6230+
*
6231+
* Return: zero on success, negative otherwise.
6232+
*/
6233+
int follow_pfnmap_start(struct follow_pfnmap_args *args)
6234+
{
6235+
struct vm_area_struct *vma = args->vma;
6236+
unsigned long address = args->address;
6237+
struct mm_struct *mm = vma->vm_mm;
6238+
spinlock_t *lock;
6239+
pgd_t *pgdp;
6240+
p4d_t *p4dp, p4d;
6241+
pud_t *pudp, pud;
6242+
pmd_t *pmdp, pmd;
6243+
pte_t *ptep, pte;
6244+
6245+
pfnmap_lockdep_assert(vma);
6246+
6247+
if (unlikely(address < vma->vm_start || address >= vma->vm_end))
6248+
goto out;
6249+
6250+
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
6251+
goto out;
6252+
retry:
6253+
pgdp = pgd_offset(mm, address);
6254+
if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
6255+
goto out;
6256+
6257+
p4dp = p4d_offset(pgdp, address);
6258+
p4d = READ_ONCE(*p4dp);
6259+
if (p4d_none(p4d) || unlikely(p4d_bad(p4d)))
6260+
goto out;
6261+
6262+
pudp = pud_offset(p4dp, address);
6263+
pud = READ_ONCE(*pudp);
6264+
if (pud_none(pud))
6265+
goto out;
6266+
if (pud_leaf(pud)) {
6267+
lock = pud_lock(mm, pudp);
6268+
if (!unlikely(pud_leaf(pud))) {
6269+
spin_unlock(lock);
6270+
goto retry;
6271+
}
6272+
pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud),
6273+
pud_pfn(pud), PUD_MASK, pud_write(pud),
6274+
pud_special(pud));
6275+
return 0;
6276+
}
6277+
6278+
pmdp = pmd_offset(pudp, address);
6279+
pmd = pmdp_get_lockless(pmdp);
6280+
if (pmd_leaf(pmd)) {
6281+
lock = pmd_lock(mm, pmdp);
6282+
if (!unlikely(pmd_leaf(pmd))) {
6283+
spin_unlock(lock);
6284+
goto retry;
6285+
}
6286+
pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd),
6287+
pmd_pfn(pmd), PMD_MASK, pmd_write(pmd),
6288+
pmd_special(pmd));
6289+
return 0;
6290+
}
6291+
6292+
ptep = pte_offset_map_lock(mm, pmdp, address, &lock);
6293+
if (!ptep)
6294+
goto out;
6295+
pte = ptep_get(ptep);
6296+
if (!pte_present(pte))
6297+
goto unlock;
6298+
pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte),
6299+
pte_pfn(pte), PAGE_MASK, pte_write(pte),
6300+
pte_special(pte));
6301+
return 0;
6302+
unlock:
6303+
pte_unmap_unlock(ptep, lock);
6304+
out:
6305+
return -EINVAL;
6306+
}
6307+
EXPORT_SYMBOL_GPL(follow_pfnmap_start);
6308+
6309+
/**
6310+
* follow_pfnmap_end(): End a follow_pfnmap_start() process
6311+
* @args: Pointer to struct @follow_pfnmap_args
6312+
*
6313+
* Must be used in pair of follow_pfnmap_start(). See the start() function
6314+
* above for more information.
6315+
*/
6316+
void follow_pfnmap_end(struct follow_pfnmap_args *args)
6317+
{
6318+
if (args->lock)
6319+
spin_unlock(args->lock);
6320+
if (args->ptep)
6321+
pte_unmap(args->ptep);
6322+
}
6323+
EXPORT_SYMBOL_GPL(follow_pfnmap_end);
6324+
61756325
#ifdef CONFIG_HAVE_IOREMAP_PROT
61766326
/**
61776327
* generic_access_phys - generic implementation for iomem mmap access

0 commit comments

Comments
 (0)