Skip to content

Commit 1117214

Browse files
committed
mm: new follow_pfnmap API
jira LE-3557 Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6 commit-author Peter Xu <[email protected]> commit 6da8e96 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6da8e963.failed Introduce a pair of APIs to follow pfn mappings to get entry information. It's very similar to what follow_pte() does before, but different in that it recognizes huge pfn mappings. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Peter Xu <[email protected]> Cc: Alexander Gordeev <[email protected]> Cc: Alex Williamson <[email protected]> Cc: Aneesh Kumar K.V <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Dave Hansen <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Gavin Shan <[email protected]> Cc: Gerald Schaefer <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Niklas Schnelle <[email protected]> Cc: Paolo Bonzini <[email protected]> Cc: Ryan Roberts <[email protected]> Cc: Sean Christopherson <[email protected]> Cc: Sven Schnelle <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vasily Gorbik <[email protected]> Cc: Will Deacon <[email protected]> Cc: Zi Yan <[email protected]> Signed-off-by: Andrew Morton <[email protected]> (cherry picked from commit 6da8e96) Signed-off-by: Jonathan Maple <[email protected]> # Conflicts: # mm/memory.c
1 parent 50d5012 commit 1117214

File tree

1 file changed

+316
-0
lines changed

1 file changed

+316
-0
lines changed
Lines changed: 316 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
mm: new follow_pfnmap API
2+
3+
jira LE-3557
4+
Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
5+
commit-author Peter Xu <[email protected]>
6+
commit 6da8e9634bb7e3fdad9ae0e4db873a05036c4343
7+
Empty-Commit: Cherry-Pick Conflicts during history rebuild.
8+
Will be included in final tarball splat. Ref for failed cherry-pick at:
9+
ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/6da8e963.failed
10+
11+
Introduce a pair of APIs to follow pfn mappings to get entry information.
12+
It's very similar to what follow_pte() does before, but different in that
13+
it recognizes huge pfn mappings.
14+
15+
Link: https://lkml.kernel.org/r/[email protected]
16+
Signed-off-by: Peter Xu <[email protected]>
17+
Cc: Alexander Gordeev <[email protected]>
18+
Cc: Alex Williamson <[email protected]>
19+
Cc: Aneesh Kumar K.V <[email protected]>
20+
Cc: Borislav Petkov <[email protected]>
21+
Cc: Catalin Marinas <[email protected]>
22+
Cc: Christian Borntraeger <[email protected]>
23+
Cc: Dave Hansen <[email protected]>
24+
Cc: David Hildenbrand <[email protected]>
25+
Cc: Gavin Shan <[email protected]>
26+
Cc: Gerald Schaefer <[email protected]>
27+
Cc: Heiko Carstens <[email protected]>
28+
Cc: Ingo Molnar <[email protected]>
29+
Cc: Jason Gunthorpe <[email protected]>
30+
Cc: Matthew Wilcox <[email protected]>
31+
Cc: Niklas Schnelle <[email protected]>
32+
Cc: Paolo Bonzini <[email protected]>
33+
Cc: Ryan Roberts <[email protected]>
34+
Cc: Sean Christopherson <[email protected]>
35+
Cc: Sven Schnelle <[email protected]>
36+
Cc: Thomas Gleixner <[email protected]>
37+
Cc: Vasily Gorbik <[email protected]>
38+
Cc: Will Deacon <[email protected]>
39+
Cc: Zi Yan <[email protected]>
40+
Signed-off-by: Andrew Morton <[email protected]>
41+
(cherry picked from commit 6da8e9634bb7e3fdad9ae0e4db873a05036c4343)
42+
Signed-off-by: Jonathan Maple <[email protected]>
43+
44+
# Conflicts:
45+
# mm/memory.c
46+
diff --cc mm/memory.c
47+
index e2794e3b8919,3878bf69bc14..000000000000
48+
--- a/mm/memory.c
49+
+++ b/mm/memory.c
50+
@@@ -5666,71 -6172,157 +5666,223 @@@ out
51+
}
52+
EXPORT_SYMBOL_GPL(follow_pte);
53+
54+
++<<<<<<< HEAD
55+
+/**
56+
+ * follow_pfn - look up PFN at a user virtual address
57+
+ * @vma: memory mapping
58+
+ * @address: user virtual address
59+
+ * @pfn: location to store found PFN
60+
+ *
61+
+ * Only IO mappings and raw PFN mappings are allowed.
62+
+ *
63+
+ * This function does not allow the caller to read the permissions
64+
+ * of the PTE. Do not use it.
65+
+ *
66+
+ * Return: zero and the pfn at @pfn on success, -ve otherwise.
67+
+ */
68+
+int follow_pfn(struct vm_area_struct *vma, unsigned long address,
69+
+ unsigned long *pfn)
70+
+{
71+
+ int ret = -EINVAL;
72+
+ spinlock_t *ptl;
73+
+ pte_t *ptep;
74+
+
75+
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
76+
+ return ret;
77+
+
78+
+ ret = follow_pte(vma->vm_mm, address, &ptep, &ptl);
79+
+ if (ret)
80+
+ return ret;
81+
+ *pfn = pte_pfn(ptep_get(ptep));
82+
+ pte_unmap_unlock(ptep, ptl);
83+
+ return 0;
84+
+}
85+
+EXPORT_SYMBOL(follow_pfn);
86+
++=======
87+
+ static inline void pfnmap_args_setup(struct follow_pfnmap_args *args,
88+
+ spinlock_t *lock, pte_t *ptep,
89+
+ pgprot_t pgprot, unsigned long pfn_base,
90+
+ unsigned long addr_mask, bool writable,
91+
+ bool special)
92+
+ {
93+
+ args->lock = lock;
94+
+ args->ptep = ptep;
95+
+ args->pfn = pfn_base + ((args->address & ~addr_mask) >> PAGE_SHIFT);
96+
+ args->pgprot = pgprot;
97+
+ args->writable = writable;
98+
+ args->special = special;
99+
+ }
100+
+
101+
+ static inline void pfnmap_lockdep_assert(struct vm_area_struct *vma)
102+
+ {
103+
+ #ifdef CONFIG_LOCKDEP
104+
+ struct address_space *mapping = vma->vm_file->f_mapping;
105+
+
106+
+ if (mapping)
107+
+ lockdep_assert(lockdep_is_held(&vma->vm_file->f_mapping->i_mmap_rwsem) ||
108+
+ lockdep_is_held(&vma->vm_mm->mmap_lock));
109+
+ else
110+
+ lockdep_assert(lockdep_is_held(&vma->vm_mm->mmap_lock));
111+
+ #endif
112+
+ }
113+
+
114+
+ /**
115+
+ * follow_pfnmap_start() - Look up a pfn mapping at a user virtual address
116+
+ * @args: Pointer to struct @follow_pfnmap_args
117+
+ *
118+
+ * The caller needs to setup args->vma and args->address to point to the
119+
+ * virtual address as the target of such lookup. On a successful return,
120+
+ * the results will be put into other output fields.
121+
+ *
122+
+ * After the caller finished using the fields, the caller must invoke
123+
+ * another follow_pfnmap_end() to proper releases the locks and resources
124+
+ * of such look up request.
125+
+ *
126+
+ * During the start() and end() calls, the results in @args will be valid
127+
+ * as proper locks will be held. After the end() is called, all the fields
128+
+ * in @follow_pfnmap_args will be invalid to be further accessed. Further
129+
+ * use of such information after end() may require proper synchronizations
130+
+ * by the caller with page table updates, otherwise it can create a
131+
+ * security bug.
132+
+ *
133+
+ * If the PTE maps a refcounted page, callers are responsible to protect
134+
+ * against invalidation with MMU notifiers; otherwise access to the PFN at
135+
+ * a later point in time can trigger use-after-free.
136+
+ *
137+
+ * Only IO mappings and raw PFN mappings are allowed. The mmap semaphore
138+
+ * should be taken for read, and the mmap semaphore cannot be released
139+
+ * before the end() is invoked.
140+
+ *
141+
+ * This function must not be used to modify PTE content.
142+
+ *
143+
+ * Return: zero on success, negative otherwise.
144+
+ */
145+
+ int follow_pfnmap_start(struct follow_pfnmap_args *args)
146+
+ {
147+
+ struct vm_area_struct *vma = args->vma;
148+
+ unsigned long address = args->address;
149+
+ struct mm_struct *mm = vma->vm_mm;
150+
+ spinlock_t *lock;
151+
+ pgd_t *pgdp;
152+
+ p4d_t *p4dp, p4d;
153+
+ pud_t *pudp, pud;
154+
+ pmd_t *pmdp, pmd;
155+
+ pte_t *ptep, pte;
156+
+
157+
+ pfnmap_lockdep_assert(vma);
158+
+
159+
+ if (unlikely(address < vma->vm_start || address >= vma->vm_end))
160+
+ goto out;
161+
+
162+
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
163+
+ goto out;
164+
+ retry:
165+
+ pgdp = pgd_offset(mm, address);
166+
+ if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp)))
167+
+ goto out;
168+
+
169+
+ p4dp = p4d_offset(pgdp, address);
170+
+ p4d = READ_ONCE(*p4dp);
171+
+ if (p4d_none(p4d) || unlikely(p4d_bad(p4d)))
172+
+ goto out;
173+
+
174+
+ pudp = pud_offset(p4dp, address);
175+
+ pud = READ_ONCE(*pudp);
176+
+ if (pud_none(pud))
177+
+ goto out;
178+
+ if (pud_leaf(pud)) {
179+
+ lock = pud_lock(mm, pudp);
180+
+ if (!unlikely(pud_leaf(pud))) {
181+
+ spin_unlock(lock);
182+
+ goto retry;
183+
+ }
184+
+ pfnmap_args_setup(args, lock, NULL, pud_pgprot(pud),
185+
+ pud_pfn(pud), PUD_MASK, pud_write(pud),
186+
+ pud_special(pud));
187+
+ return 0;
188+
+ }
189+
+
190+
+ pmdp = pmd_offset(pudp, address);
191+
+ pmd = pmdp_get_lockless(pmdp);
192+
+ if (pmd_leaf(pmd)) {
193+
+ lock = pmd_lock(mm, pmdp);
194+
+ if (!unlikely(pmd_leaf(pmd))) {
195+
+ spin_unlock(lock);
196+
+ goto retry;
197+
+ }
198+
+ pfnmap_args_setup(args, lock, NULL, pmd_pgprot(pmd),
199+
+ pmd_pfn(pmd), PMD_MASK, pmd_write(pmd),
200+
+ pmd_special(pmd));
201+
+ return 0;
202+
+ }
203+
+
204+
+ ptep = pte_offset_map_lock(mm, pmdp, address, &lock);
205+
+ if (!ptep)
206+
+ goto out;
207+
+ pte = ptep_get(ptep);
208+
+ if (!pte_present(pte))
209+
+ goto unlock;
210+
+ pfnmap_args_setup(args, lock, ptep, pte_pgprot(pte),
211+
+ pte_pfn(pte), PAGE_MASK, pte_write(pte),
212+
+ pte_special(pte));
213+
+ return 0;
214+
+ unlock:
215+
+ pte_unmap_unlock(ptep, lock);
216+
+ out:
217+
+ return -EINVAL;
218+
+ }
219+
+ EXPORT_SYMBOL_GPL(follow_pfnmap_start);
220+
+
221+
+ /**
222+
+ * follow_pfnmap_end(): End a follow_pfnmap_start() process
223+
+ * @args: Pointer to struct @follow_pfnmap_args
224+
+ *
225+
+ * Must be used in pair of follow_pfnmap_start(). See the start() function
226+
+ * above for more information.
227+
+ */
228+
+ void follow_pfnmap_end(struct follow_pfnmap_args *args)
229+
+ {
230+
+ if (args->lock)
231+
+ spin_unlock(args->lock);
232+
+ if (args->ptep)
233+
+ pte_unmap(args->ptep);
234+
+ }
235+
+ EXPORT_SYMBOL_GPL(follow_pfnmap_end);
236+
++>>>>>>> 6da8e9634bb7 (mm: new follow_pfnmap API)
237+
238+
#ifdef CONFIG_HAVE_IOREMAP_PROT
239+
+int follow_phys(struct vm_area_struct *vma,
240+
+ unsigned long address, unsigned int flags,
241+
+ unsigned long *prot, resource_size_t *phys)
242+
+{
243+
+ int ret = -EINVAL;
244+
+ pte_t *ptep, pte;
245+
+ spinlock_t *ptl;
246+
+
247+
+ if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
248+
+ goto out;
249+
+
250+
+ if (follow_pte(vma->vm_mm, address, &ptep, &ptl))
251+
+ goto out;
252+
+ pte = ptep_get(ptep);
253+
+
254+
+ /* Never return PFNs of anon folios in COW mappings. */
255+
+ if (vm_normal_folio(vma, address, pte))
256+
+ goto unlock;
257+
+
258+
+ if ((flags & FOLL_WRITE) && !pte_write(pte))
259+
+ goto unlock;
260+
+
261+
+ *prot = pgprot_val(pte_pgprot(pte));
262+
+ *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
263+
+
264+
+ ret = 0;
265+
+unlock:
266+
+ pte_unmap_unlock(ptep, ptl);
267+
+out:
268+
+ return ret;
269+
+}
270+
+
271+
/**
272+
* generic_access_phys - generic implementation for iomem mmap access
273+
* @vma: the vma to access
274+
diff --git a/include/linux/mm.h b/include/linux/mm.h
275+
index 196c481ec160..51f28b4e78fc 100644
276+
--- a/include/linux/mm.h
277+
+++ b/include/linux/mm.h
278+
@@ -2436,6 +2436,37 @@ int follow_phys(struct vm_area_struct *vma, unsigned long address,
279+
int generic_access_phys(struct vm_area_struct *vma, unsigned long addr,
280+
void *buf, int len, int write);
281+
282+
+struct follow_pfnmap_args {
283+
+ /**
284+
+ * Inputs:
285+
+ * @vma: Pointer to @vm_area_struct struct
286+
+ * @address: the virtual address to walk
287+
+ */
288+
+ struct vm_area_struct *vma;
289+
+ unsigned long address;
290+
+ /**
291+
+ * Internals:
292+
+ *
293+
+ * The caller shouldn't touch any of these.
294+
+ */
295+
+ spinlock_t *lock;
296+
+ pte_t *ptep;
297+
+ /**
298+
+ * Outputs:
299+
+ *
300+
+ * @pfn: the PFN of the address
301+
+ * @pgprot: the pgprot_t of the mapping
302+
+ * @writable: whether the mapping is writable
303+
+ * @special: whether the mapping is a special mapping (real PFN maps)
304+
+ */
305+
+ unsigned long pfn;
306+
+ pgprot_t pgprot;
307+
+ bool writable;
308+
+ bool special;
309+
+};
310+
+int follow_pfnmap_start(struct follow_pfnmap_args *args);
311+
+void follow_pfnmap_end(struct follow_pfnmap_args *args);
312+
+
313+
extern void truncate_pagecache(struct inode *inode, loff_t new);
314+
extern void truncate_setsize(struct inode *inode, loff_t newsize);
315+
void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
316+
* Unmerged path mm/memory.c

0 commit comments

Comments
 (0)