|
| 1 | +mm: new follow_pfnmap API |
| 2 | + |
| 3 | +jira LE-3557 |
| 4 | +Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6 |
| 5 | +commit-author Peter Xu < [email protected]> |
| 6 | +commit 6da8e9634bb7e3fdad9ae0e4db873a05036c4343 |
| 7 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 8 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 9 | +ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6da8e963.failed |
| 10 | + |
| 11 | +Introduce a pair of APIs to follow pfn mappings to get entry information. |
| 12 | +It's very similar to what follow_pte() does before, but different in that |
| 13 | +it recognizes huge pfn mappings. |
| 14 | + |
| 15 | +Link: https://lkml.kernel.org/r/ [email protected] |
| 16 | + Signed-off-by: Peter Xu < [email protected]> |
| 17 | + Cc: Alexander Gordeev < [email protected]> |
| 18 | + Cc: Alex Williamson < [email protected]> |
| 19 | + Cc: Aneesh Kumar K.V < [email protected]> |
| 20 | + Cc: Borislav Petkov < [email protected]> |
| 21 | + Cc: Catalin Marinas < [email protected]> |
| 22 | + Cc: Christian Borntraeger < [email protected]> |
| 23 | + Cc: Dave Hansen < [email protected]> |
| 24 | + Cc: David Hildenbrand < [email protected]> |
| 25 | + Cc: Gavin Shan < [email protected]> |
| 26 | + Cc: Gerald Schaefer < [email protected]> |
| 27 | + Cc: Heiko Carstens < [email protected]> |
| 28 | + Cc: Ingo Molnar < [email protected]> |
| 29 | + Cc: Jason Gunthorpe < [email protected]> |
| 30 | + Cc: Matthew Wilcox < [email protected]> |
| 31 | + Cc: Niklas Schnelle < [email protected]> |
| 32 | + Cc: Paolo Bonzini < [email protected]> |
| 33 | + Cc: Ryan Roberts < [email protected]> |
| 34 | + Cc: Sean Christopherson < [email protected]> |
| 35 | + Cc: Sven Schnelle < [email protected]> |
| 36 | + Cc: Thomas Gleixner < [email protected]> |
| 37 | + Cc: Vasily Gorbik < [email protected]> |
| 38 | + Cc: Will Deacon < [email protected]> |
| 39 | + |
| 40 | + Signed-off-by: Andrew Morton < [email protected]> |
| 41 | +(cherry picked from commit 6da8e9634bb7e3fdad9ae0e4db873a05036c4343) |
| 42 | + Signed-off-by: Jonathan Maple < [email protected]> |
| 43 | + |
| 44 | +# Conflicts: |
| 45 | +# mm/memory.c |
| 46 | +diff --cc mm/memory.c |
| 47 | +index e2794e3b8919,3878bf69bc14..000000000000 |
| 48 | +--- a/mm/memory.c |
| 49 | ++++ b/mm/memory.c |
| 50 | +@@@ -5666,71 -6172,157 +5666,223 @@@ out |
| 51 | + } |
| 52 | + EXPORT_SYMBOL_GPL(follow_pte); |
| 53 | + |
| 54 | +++<<<<<<< HEAD |
| 55 | + +/** |
| 56 | + + * follow_pfn - look up PFN at a user virtual address |
| 57 | + + * @vma: memory mapping |
| 58 | + + * @address: user virtual address |
| 59 | + + * @pfn: location to store found PFN |
| 60 | + + * |
| 61 | + + * Only IO mappings and raw PFN mappings are allowed. |
| 62 | + + * |
| 63 | + + * This function does not allow the caller to read the permissions |
| 64 | + + * of the PTE. Do not use it. |
| 65 | + + * |
| 66 | + + * Return: zero and the pfn at @pfn on success, -ve otherwise. |
| 67 | + + */ |
| 68 | + +int follow_pfn(struct vm_area_struct *vma, unsigned long address, |
| 69 | + + unsigned long *pfn) |
| 70 | + +{ |
| 71 | + + int ret = -EINVAL; |
| 72 | + + spinlock_t *ptl; |
| 73 | + + pte_t *ptep; |
| 74 | + + |
| 75 | + + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) |
| 76 | + + return ret; |
| 77 | + + |
| 78 | + + ret = follow_pte(vma->vm_mm, address, &ptep, &ptl); |
| 79 | + + if (ret) |
| 80 | + + return ret; |
| 81 | + + *pfn = pte_pfn(ptep_get(ptep)); |
| 82 | + + pte_unmap_unlock(ptep, ptl); |
| 83 | + + return 0; |
| 84 | + +} |
| 85 | + +EXPORT_SYMBOL(follow_pfn); |
| 86 | +++======= |
| 87 | ++ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args, |
| 88 | ++ spinlock_t *lock, pte_t *ptep, |
| 89 | ++ pgprot_t pgprot, unsigned long pfn_base, |
| 90 | ++ unsigned long addr_mask, bool writable, |
| 91 | ++ bool special) |
| 92 | ++ { |
| 93 | ++ args->lock = lock; |
| 94 | ++ args->ptep = ptep; |
| 95 | ++ args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT); |
| 96 | ++ args->pgprot = pgprot; |
| 97 | ++ args->writable = writable; |
| 98 | ++ args->special = special; |
| 99 | ++ } |
| 100 | ++ |
| 101 | ++ static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma) |
| 102 | ++ { |
| 103 | ++ #ifdef CONFIG_LOCKDEP |
| 104 | ++ struct address_space *mapping = vma->vm_file->f_mapping; |
| 105 | ++ |
| 106 | ++ if (mapping) |
| 107 | ++ lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) || |
| 108 | ++ lockdep_is_held(&vma->vm_mm->mmap_lock)); |
| 109 | ++ else |
| 110 | ++ lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock)); |
| 111 | ++ #endif |
| 112 | ++ } |
| 113 | ++ |
| 114 | ++ /** |
| 115 | ++ * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address |
| 116 | ++ * @args: Pointer to struct @follow_pfnmap_args |
| 117 | ++ * |
| 118 | ++ * The caller needs to setup args->vma and args->address to point to the |
| 119 | ++ * virtual address as the target of such lookup. On a successful return, |
| 120 | ++ * the results will be put into other output fields. |
| 121 | ++ * |
| 122 | ++ * After the caller finished using the fields, the caller must invoke |
| 123 | ++ * another follow_pfnmap_end() to proper releases the locks and resources |
| 124 | ++ * of such look up request. |
| 125 | ++ * |
| 126 | ++ * During the start() and end() calls, the results in @args will be valid |
| 127 | ++ * as proper locks will be held. After the end() is called, all the fields |
| 128 | ++ * in @follow_pfnmap_args will be invalid to be further accessed. Further |
| 129 | ++ * use of such information after end() may require proper synchronizations |
| 130 | ++ * by the caller with page table updates, otherwise it can create a |
| 131 | ++ * security bug. |
| 132 | ++ * |
| 133 | ++ * If the PTE maps a refcounted page, callers are responsible to protect |
| 134 | ++ * against invalidation with MMU notifiers; otherwise access to the PFN at |
| 135 | ++ * a later point in time can trigger use-after-free. |
| 136 | ++ * |
| 137 | ++ * Only IO mappings and raw PFN mappings are allowed. The mmap semaphore |
| 138 | ++ * should be taken for read, and the mmap semaphore cannot be released |
| 139 | ++ * before the end() is invoked. |
| 140 | ++ * |
| 141 | ++ * This function must not be used to modify PTE content. |
| 142 | ++ * |
| 143 | ++ * Return: zero on success, negative otherwise. |
| 144 | ++ */ |
| 145 | ++ int follow_pfnmap_start(struct follow_pfnmap_args *args) |
| 146 | ++ { |
| 147 | ++ struct vm_area_struct *vma = args->vma; |
| 148 | ++ unsigned long address = args->address; |
| 149 | ++ struct mm_struct *mm = vma->vm_mm; |
| 150 | ++ spinlock_t *lock; |
| 151 | ++ pgd_t *pgdp; |
| 152 | ++ p4d_t *p4dp, p4d; |
| 153 | ++ pud_t *pudp, pud; |
| 154 | ++ pmd_t *pmdp, pmd; |
| 155 | ++ pte_t *ptep, pte; |
| 156 | ++ |
| 157 | ++ pfnmap_lockdep_assert(vma); |
| 158 | ++ |
| 159 | ++ if (unlikely(address < vma->vm_start || address >= vma->vm_end)) |
| 160 | ++ goto out; |
| 161 | ++ |
| 162 | ++ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) |
| 163 | ++ goto out; |
| 164 | ++ retry: |
| 165 | ++ pgdp = pgd_offset(mm, address); |
| 166 | ++ if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp))) |
| 167 | ++ goto out; |
| 168 | ++ |
| 169 | ++ p4dp = p4d_offset(pgdp, address); |
| 170 | ++ p4d = READ_ONCE(*p4dp); |
| 171 | ++ if (p4d_none(p4d) || unlikely(p4d_bad(p4d))) |
| 172 | ++ goto out; |
| 173 | ++ |
| 174 | ++ pudp = pud_offset(p4dp, address); |
| 175 | ++ pud = READ_ONCE(*pudp); |
| 176 | ++ if (pud_none(pud)) |
| 177 | ++ goto out; |
| 178 | ++ if (pud_leaf(pud)) { |
| 179 | ++ lock = pud_lock(mm, pudp); |
| 180 | ++ if (!unlikely(pud_leaf(pud))) { |
| 181 | ++ spin_unlock(lock); |
| 182 | ++ goto retry; |
| 183 | ++ } |
| 184 | ++ pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud), |
| 185 | ++ pud_pfn(pud), PUD_MASK, pud_write(pud), |
| 186 | ++ pud_special(pud)); |
| 187 | ++ return 0; |
| 188 | ++ } |
| 189 | ++ |
| 190 | ++ pmdp = pmd_offset(pudp, address); |
| 191 | ++ pmd = pmdp_get_lockless(pmdp); |
| 192 | ++ if (pmd_leaf(pmd)) { |
| 193 | ++ lock = pmd_lock(mm, pmdp); |
| 194 | ++ if (!unlikely(pmd_leaf(pmd))) { |
| 195 | ++ spin_unlock(lock); |
| 196 | ++ goto retry; |
| 197 | ++ } |
| 198 | ++ pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd), |
| 199 | ++ pmd_pfn(pmd), PMD_MASK, pmd_write(pmd), |
| 200 | ++ pmd_special(pmd)); |
| 201 | ++ return 0; |
| 202 | ++ } |
| 203 | ++ |
| 204 | ++ ptep = pte_offset_map_lock(mm, pmdp, address, &lock); |
| 205 | ++ if (!ptep) |
| 206 | ++ goto out; |
| 207 | ++ pte = ptep_get(ptep); |
| 208 | ++ if (!pte_present(pte)) |
| 209 | ++ goto unlock; |
| 210 | ++ pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte), |
| 211 | ++ pte_pfn(pte), PAGE_MASK, pte_write(pte), |
| 212 | ++ pte_special(pte)); |
| 213 | ++ return 0; |
| 214 | ++ unlock: |
| 215 | ++ pte_unmap_unlock(ptep, lock); |
| 216 | ++ out: |
| 217 | ++ return -EINVAL; |
| 218 | ++ } |
| 219 | ++ EXPORT_SYMBOL_GPL(follow_pfnmap_start); |
| 220 | ++ |
| 221 | ++ /** |
| 222 | ++ * follow_pfnmap_end(): End a follow_pfnmap_start() process |
| 223 | ++ * @args: Pointer to struct @follow_pfnmap_args |
| 224 | ++ * |
| 225 | ++ * Must be used in pair of follow_pfnmap_start(). See the start() function |
| 226 | ++ * above for more information. |
| 227 | ++ */ |
| 228 | ++ void follow_pfnmap_end(struct follow_pfnmap_args *args) |
| 229 | ++ { |
| 230 | ++ if (args->lock) |
| 231 | ++ spin_unlock(args->lock); |
| 232 | ++ if (args->ptep) |
| 233 | ++ pte_unmap(args->ptep); |
| 234 | ++ } |
| 235 | ++ EXPORT_SYMBOL_GPL(follow_pfnmap_end); |
| 236 | +++>>>>>>> 6da8e9634bb7 (mm: new follow_pfnmap API) |
| 237 | + |
| 238 | + #ifdef CONFIG_HAVE_IOREMAP_PROT |
| 239 | + +int follow_phys(struct vm_area_struct *vma, |
| 240 | + + unsigned long address, unsigned int flags, |
| 241 | + + unsigned long *prot, resource_size_t *phys) |
| 242 | + +{ |
| 243 | + + int ret = -EINVAL; |
| 244 | + + pte_t *ptep, pte; |
| 245 | + + spinlock_t *ptl; |
| 246 | + + |
| 247 | + + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) |
| 248 | + + goto out; |
| 249 | + + |
| 250 | + + if (follow_pte(vma->vm_mm, address, &ptep, &ptl)) |
| 251 | + + goto out; |
| 252 | + + pte = ptep_get(ptep); |
| 253 | + + |
| 254 | + + /* Never return PFNs of anon folios in COW mappings. */ |
| 255 | + + if (vm_normal_folio(vma, address, pte)) |
| 256 | + + goto unlock; |
| 257 | + + |
| 258 | + + if ((flags & FOLL_WRITE) && !pte_write(pte)) |
| 259 | + + goto unlock; |
| 260 | + + |
| 261 | + + *prot = pgprot_val(pte_pgprot(pte)); |
| 262 | + + *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT; |
| 263 | + + |
| 264 | + + ret = 0; |
| 265 | + +unlock: |
| 266 | + + pte_unmap_unlock(ptep, ptl); |
| 267 | + +out: |
| 268 | + + return ret; |
| 269 | + +} |
| 270 | + + |
| 271 | + /** |
| 272 | + * generic_access_phys - generic implementation for iomem mmap access |
| 273 | + * @vma: the vma to access |
| 274 | +diff --git a/include/linux/mm.h b/include/linux/mm.h |
| 275 | +index 196c481ec160..51f28b4e78fc 100644 |
| 276 | +--- a/include/linux/mm.h |
| 277 | ++++ b/include/linux/mm.h |
| 278 | +@@ -2436,6 +2436,37 @@ int follow_phys(struct vm_area_struct *vma, unsigned long address, |
| 279 | + int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, |
| 280 | + void *buf, int len, int write); |
| 281 | + |
| 282 | ++struct follow_pfnmap_args { |
| 283 | ++ /** |
| 284 | ++ * Inputs: |
| 285 | ++ * @vma: Pointer to @vm_area_struct struct |
| 286 | ++ * @address: the virtual address to walk |
| 287 | ++ */ |
| 288 | ++ struct vm_area_struct *vma; |
| 289 | ++ unsigned long address; |
| 290 | ++ /** |
| 291 | ++ * Internals: |
| 292 | ++ * |
| 293 | ++ * The caller shouldn't touch any of these. |
| 294 | ++ */ |
| 295 | ++ spinlock_t *lock; |
| 296 | ++ pte_t *ptep; |
| 297 | ++ /** |
| 298 | ++ * Outputs: |
| 299 | ++ * |
| 300 | ++ * @pfn: the PFN of the address |
| 301 | ++ * @pgprot: the pgprot_t of the mapping |
| 302 | ++ * @writable: whether the mapping is writable |
| 303 | ++ * @special: whether the mapping is a special mapping (real PFN maps) |
| 304 | ++ */ |
| 305 | ++ unsigned long pfn; |
| 306 | ++ pgprot_t pgprot; |
| 307 | ++ bool writable; |
| 308 | ++ bool special; |
| 309 | ++}; |
| 310 | ++int follow_pfnmap_start(struct follow_pfnmap_args *args); |
| 311 | ++void follow_pfnmap_end(struct follow_pfnmap_args *args); |
| 312 | ++ |
| 313 | + extern void truncate_pagecache(struct inode *inode, loff_t new); |
| 314 | + extern void truncate_setsize(struct inode *inode, loff_t newsize); |
| 315 | + void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to); |
| 316 | +* Unmerged path mm/memory.c |
0 commit comments