|
| 1 | +mm: pass VMA instead of MM to follow_pte() |
| 2 | + |
| 3 | +jira LE-3557 |
| 4 | +Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6 |
| 5 | +commit-author David Hildenbrand < [email protected]> |
| 6 | +commit 29ae7d96d166fa08c7232daf8a314ef5ba1efd20 |
| 7 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 8 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 9 | +ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/29ae7d96.failed |
| 10 | + |
| 11 | +... and centralize the VM_IO/VM_PFNMAP sanity check in there. We'll |
| 12 | +now also perform these sanity checks for direct follow_pte() |
| 13 | +invocations. |
| 14 | + |
| 15 | +For generic_access_phys(), we might now check multiple times: nothing to |
| 16 | +worry about, really. |
| 17 | + |
| 18 | +Link: https://lkml.kernel.org/r/ [email protected] |
| 19 | + Signed-off-by: David Hildenbrand < [email protected]> |
| 20 | + Acked-by: Sean Christopherson < [email protected]> [KVM] |
| 21 | + Cc: Alex Williamson < [email protected]> |
| 22 | + Cc: Christoph Hellwig < [email protected]> |
| 23 | + |
| 24 | + Cc: Gerald Schaefer < [email protected]> |
| 25 | + Cc: Heiko Carstens < [email protected]> |
| 26 | + Cc: Ingo Molnar < [email protected]> |
| 27 | + Cc: Paolo Bonzini < [email protected]> |
| 28 | + Cc: Yonghua Huang < [email protected]> |
| 29 | + Signed-off-by: Andrew Morton < [email protected]> |
| 30 | +(cherry picked from commit 29ae7d96d166fa08c7232daf8a314ef5ba1efd20) |
| 31 | + Signed-off-by: Jonathan Maple < [email protected]> |
| 32 | + |
| 33 | +# Conflicts: |
| 34 | +# arch/x86/mm/pat/memtype.c |
| 35 | +# drivers/virt/acrn/mm.c |
| 36 | +diff --cc arch/x86/mm/pat/memtype.c |
| 37 | +index 36b603d0cdde,bdc2a240c2aa..000000000000 |
| 38 | +--- a/arch/x86/mm/pat/memtype.c |
| 39 | ++++ b/arch/x86/mm/pat/memtype.c |
| 40 | +@@@ -947,6 -948,29 +947,32 @@@ static void free_pfn_range(u64 paddr, u |
| 41 | + memtype_free(paddr, paddr + size); |
| 42 | + } |
| 43 | + |
| 44 | +++<<<<<<< HEAD |
| 45 | +++======= |
| 46 | ++ static int follow_phys(struct vm_area_struct *vma, unsigned long *prot, |
| 47 | ++ resource_size_t *phys) |
| 48 | ++ { |
| 49 | ++ pte_t *ptep, pte; |
| 50 | ++ spinlock_t *ptl; |
| 51 | ++ |
| 52 | ++ if (follow_pte(vma, vma->vm_start, &ptep, &ptl)) |
| 53 | ++ return -EINVAL; |
| 54 | ++ |
| 55 | ++ pte = ptep_get(ptep); |
| 56 | ++ |
| 57 | ++ /* Never return PFNs of anon folios in COW mappings. */ |
| 58 | ++ if (vm_normal_folio(vma, vma->vm_start, pte)) { |
| 59 | ++ pte_unmap_unlock(ptep, ptl); |
| 60 | ++ return -EINVAL; |
| 61 | ++ } |
| 62 | ++ |
| 63 | ++ *prot = pgprot_val(pte_pgprot(pte)); |
| 64 | ++ *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT; |
| 65 | ++ pte_unmap_unlock(ptep, ptl); |
| 66 | ++ return 0; |
| 67 | ++ } |
| 68 | ++ |
| 69 | +++>>>>>>> 29ae7d96d166 (mm: pass VMA instead of MM to follow_pte()) |
| 70 | + static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, |
| 71 | + pgprot_t *pgprot) |
| 72 | + { |
| 73 | +diff --cc drivers/virt/acrn/mm.c |
| 74 | +index c4f2e15c8a2b,db8ff1d0ac23..000000000000 |
| 75 | +--- a/drivers/virt/acrn/mm.c |
| 76 | ++++ b/drivers/virt/acrn/mm.c |
| 77 | +@@@ -168,7 -170,69 +168,73 @@@ int acrn_vm_ram_map(struct acrn_vm *vm |
| 78 | + |
| 79 | + /* Get the page number of the map region */ |
| 80 | + nr_pages = memmap->len >> PAGE_SHIFT; |
| 81 | +++<<<<<<< HEAD |
| 82 | + + pages = vzalloc(nr_pages * sizeof(struct page *)); |
| 83 | +++======= |
| 84 | ++ if (!nr_pages) |
| 85 | ++ return -EINVAL; |
| 86 | ++ |
| 87 | ++ mmap_read_lock(current->mm); |
| 88 | ++ vma = vma_lookup(current->mm, memmap->vma_base); |
| 89 | ++ if (vma && ((vma->vm_flags & VM_PFNMAP) != 0)) { |
| 90 | ++ unsigned long start_pfn, cur_pfn; |
| 91 | ++ spinlock_t *ptl; |
| 92 | ++ bool writable; |
| 93 | ++ pte_t *ptep; |
| 94 | ++ |
| 95 | ++ if ((memmap->vma_base + memmap->len) > vma->vm_end) { |
| 96 | ++ mmap_read_unlock(current->mm); |
| 97 | ++ return -EINVAL; |
| 98 | ++ } |
| 99 | ++ |
| 100 | ++ for (i = 0; i < nr_pages; i++) { |
| 101 | ++ ret = follow_pte(vma, memmap->vma_base + i * PAGE_SIZE, |
| 102 | ++ &ptep, &ptl); |
| 103 | ++ if (ret) |
| 104 | ++ break; |
| 105 | ++ |
| 106 | ++ cur_pfn = pte_pfn(ptep_get(ptep)); |
| 107 | ++ if (i == 0) |
| 108 | ++ start_pfn = cur_pfn; |
| 109 | ++ writable = !!pte_write(ptep_get(ptep)); |
| 110 | ++ pte_unmap_unlock(ptep, ptl); |
| 111 | ++ |
| 112 | ++ /* Disallow write access if the PTE is not writable. */ |
| 113 | ++ if (!writable && |
| 114 | ++ (memmap->attr & ACRN_MEM_ACCESS_WRITE)) { |
| 115 | ++ ret = -EFAULT; |
| 116 | ++ break; |
| 117 | ++ } |
| 118 | ++ |
| 119 | ++ /* Disallow refcounted pages. */ |
| 120 | ++ if (pfn_valid(cur_pfn) && |
| 121 | ++ !PageReserved(pfn_to_page(cur_pfn))) { |
| 122 | ++ ret = -EFAULT; |
| 123 | ++ break; |
| 124 | ++ } |
| 125 | ++ |
| 126 | ++ /* Disallow non-contiguous ranges. */ |
| 127 | ++ if (cur_pfn != start_pfn + i) { |
| 128 | ++ ret = -EINVAL; |
| 129 | ++ break; |
| 130 | ++ } |
| 131 | ++ } |
| 132 | ++ mmap_read_unlock(current->mm); |
| 133 | ++ |
| 134 | ++ if (ret) { |
| 135 | ++ dev_dbg(acrn_dev.this_device, |
| 136 | ++ "Failed to lookup PFN at VMA:%pK.\n", (void *)memmap->vma_base); |
| 137 | ++ return ret; |
| 138 | ++ } |
| 139 | ++ |
| 140 | ++ return acrn_mm_region_add(vm, memmap->user_vm_pa, |
| 141 | ++ PFN_PHYS(start_pfn), memmap->len, |
| 142 | ++ ACRN_MEM_TYPE_WB, memmap->attr); |
| 143 | ++ } |
| 144 | ++ mmap_read_unlock(current->mm); |
| 145 | ++ |
| 146 | ++ pages = vzalloc(array_size(nr_pages, sizeof(*pages))); |
| 147 | +++>>>>>>> 29ae7d96d166 (mm: pass VMA instead of MM to follow_pte()) |
| 148 | + if (!pages) |
| 149 | + return -ENOMEM; |
| 150 | + |
| 151 | +diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c |
| 152 | +index 588089332931..bca6af2ee723 100644 |
| 153 | +--- a/arch/s390/pci/pci_mmio.c |
| 154 | ++++ b/arch/s390/pci/pci_mmio.c |
| 155 | +@@ -169,7 +169,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, |
| 156 | + if (!(vma->vm_flags & VM_WRITE)) |
| 157 | + goto out_unlock_mmap; |
| 158 | + |
| 159 | +- ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl); |
| 160 | ++ ret = follow_pte(vma, mmio_addr, &ptep, &ptl); |
| 161 | + if (ret) |
| 162 | + goto out_unlock_mmap; |
| 163 | + |
| 164 | +@@ -308,7 +308,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, |
| 165 | + if (!(vma->vm_flags & VM_WRITE)) |
| 166 | + goto out_unlock_mmap; |
| 167 | + |
| 168 | +- ret = follow_pte(vma->vm_mm, mmio_addr, &ptep, &ptl); |
| 169 | ++ ret = follow_pte(vma, mmio_addr, &ptep, &ptl); |
| 170 | + if (ret) |
| 171 | + goto out_unlock_mmap; |
| 172 | + |
| 173 | +* Unmerged path arch/x86/mm/pat/memtype.c |
| 174 | +diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c |
| 175 | +index 6c6586af7953..ec4d0003ba2f 100644 |
| 176 | +--- a/drivers/vfio/vfio_iommu_type1.c |
| 177 | ++++ b/drivers/vfio/vfio_iommu_type1.c |
| 178 | +@@ -520,7 +520,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, |
| 179 | + spinlock_t *ptl; |
| 180 | + int ret; |
| 181 | + |
| 182 | +- ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl); |
| 183 | ++ ret = follow_pte(vma, vaddr, &ptep, &ptl); |
| 184 | + if (ret) { |
| 185 | + bool unlocked = false; |
| 186 | + |
| 187 | +@@ -534,7 +534,7 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, |
| 188 | + if (ret) |
| 189 | + return ret; |
| 190 | + |
| 191 | +- ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl); |
| 192 | ++ ret = follow_pte(vma, vaddr, &ptep, &ptl); |
| 193 | + if (ret) |
| 194 | + return ret; |
| 195 | + } |
| 196 | +* Unmerged path drivers/virt/acrn/mm.c |
| 197 | +diff --git a/include/linux/mm.h b/include/linux/mm.h |
| 198 | +index 196c481ec160..b85fd05660e5 100644 |
| 199 | +--- a/include/linux/mm.h |
| 200 | ++++ b/include/linux/mm.h |
| 201 | +@@ -2427,7 +2427,7 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, |
| 202 | + unsigned long end, unsigned long floor, unsigned long ceiling); |
| 203 | + int |
| 204 | + copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); |
| 205 | +-int follow_pte(struct mm_struct *mm, unsigned long address, |
| 206 | ++int follow_pte(struct vm_area_struct *vma, unsigned long address, |
| 207 | + pte_t **ptepp, spinlock_t **ptlp); |
| 208 | + int follow_pfn(struct vm_area_struct *vma, unsigned long address, |
| 209 | + unsigned long *pfn); |
| 210 | +diff --git a/mm/memory.c b/mm/memory.c |
| 211 | +index e2794e3b8919..6706b9830402 100644 |
| 212 | +--- a/mm/memory.c |
| 213 | ++++ b/mm/memory.c |
| 214 | +@@ -5609,7 +5609,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) |
| 215 | + |
| 216 | + /** |
| 217 | + * follow_pte - look up PTE at a user virtual address |
| 218 | +- * @mm: the mm_struct of the target address space |
| 219 | ++ * @vma: the memory mapping |
| 220 | + * @address: user virtual address |
| 221 | + * @ptepp: location to store found PTE |
| 222 | + * @ptlp: location to store the lock for the PTE |
| 223 | +@@ -5628,15 +5628,19 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) |
| 224 | + * |
| 225 | + * Return: zero on success, -ve otherwise. |
| 226 | + */ |
| 227 | +-int follow_pte(struct mm_struct *mm, unsigned long address, |
| 228 | ++int follow_pte(struct vm_area_struct *vma, unsigned long address, |
| 229 | + pte_t **ptepp, spinlock_t **ptlp) |
| 230 | + { |
| 231 | ++ struct mm_struct *mm = vma->vm_mm; |
| 232 | + pgd_t *pgd; |
| 233 | + p4d_t *p4d; |
| 234 | + pud_t *pud; |
| 235 | + pmd_t *pmd; |
| 236 | + pte_t *ptep; |
| 237 | + |
| 238 | ++ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) |
| 239 | ++ goto out; |
| 240 | ++ |
| 241 | + pgd = pgd_offset(mm, address); |
| 242 | + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) |
| 243 | + goto out; |
| 244 | +@@ -5754,11 +5758,8 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, |
| 245 | + int offset = offset_in_page(addr); |
| 246 | + int ret = -EINVAL; |
| 247 | + |
| 248 | +- if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) |
| 249 | +- return -EINVAL; |
| 250 | +- |
| 251 | + retry: |
| 252 | +- if (follow_pte(vma->vm_mm, addr, &ptep, &ptl)) |
| 253 | ++ if (follow_pte(vma, addr, &ptep, &ptl)) |
| 254 | + return -EINVAL; |
| 255 | + pte = ptep_get(ptep); |
| 256 | + pte_unmap_unlock(ptep, ptl); |
| 257 | +@@ -5773,7 +5774,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, |
| 258 | + if (!maddr) |
| 259 | + return -ENOMEM; |
| 260 | + |
| 261 | +- if (follow_pte(vma->vm_mm, addr, &ptep, &ptl)) |
| 262 | ++ if (follow_pte(vma, addr, &ptep, &ptl)) |
| 263 | + goto out_unmap; |
| 264 | + |
| 265 | + if (!pte_same(pte, ptep_get(ptep))) { |
| 266 | +diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c |
| 267 | +index b163a079fe65..acd8c5aee080 100644 |
| 268 | +--- a/virt/kvm/kvm_main.c |
| 269 | ++++ b/virt/kvm/kvm_main.c |
| 270 | +@@ -2884,7 +2884,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, |
| 271 | + spinlock_t *ptl; |
| 272 | + int r; |
| 273 | + |
| 274 | +- r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); |
| 275 | ++ r = follow_pte(vma, addr, &ptep, &ptl); |
| 276 | + if (r) { |
| 277 | + /* |
| 278 | + * get_user_pages fails for VM_IO and VM_PFNMAP vmas and does |
| 279 | +@@ -2899,7 +2899,7 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma, |
| 280 | + if (r) |
| 281 | + return r; |
| 282 | + |
| 283 | +- r = follow_pte(vma->vm_mm, addr, &ptep, &ptl); |
| 284 | ++ r = follow_pte(vma, addr, &ptep, &ptl); |
| 285 | + if (r) |
| 286 | + return r; |
| 287 | + } |
0 commit comments