Skip to content

Commit 43c17b5

Browse files
committed
vfio/pci: implement huge_fault support
jira LE-3557 Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6 commit-author Alex Williamson <[email protected]> commit f9e54c3 Empty-Commit: Cherry-Pick Conflicts during history rebuild. Will be included in final tarball splat. Ref for failed cherry-pick at: ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/f9e54c3a.failed With the addition of pfnmap support in vmf_insert_pfn_{pmd,pud}() we can take advantage of PMD and PUD faults to PCI BAR mmaps and create more efficient mappings. PCI BARs are always a power of two and will typically get at least PMD alignment without userspace even trying. Userspace alignment for PUD mappings is also not too difficult. Consolidate faults through a single handler with a new wrapper for standard single page faults. The pre-faulting behavior of commit d71a989 ("vfio/pci: Insert full vma on mmap'd MMIO fault") is removed in this refactoring since huge_fault will cover the bulk of the faults and results in more efficient page table usage. We also want to avoid that pre-faulted single page mappings preempt huge page mappings. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Alex Williamson <[email protected]> Signed-off-by: Peter Xu <[email protected]> Cc: Alexander Gordeev <[email protected]> Cc: Aneesh Kumar K.V <[email protected]> Cc: Borislav Petkov <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Christian Borntraeger <[email protected]> Cc: Dave Hansen <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Gavin Shan <[email protected]> Cc: Gerald Schaefer <[email protected]> Cc: Heiko Carstens <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jason Gunthorpe <[email protected]> Cc: Matthew Wilcox <[email protected]> Cc: Niklas Schnelle <[email protected]> Cc: Paolo Bonzini <[email protected]> Cc: Ryan Roberts <[email protected]> Cc: Sean Christopherson <[email protected]> Cc: Sven Schnelle <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: Vasily Gorbik <[email protected]> Cc: Will Deacon <[email protected]> Cc: Zi Yan <[email protected]> Signed-off-by: Andrew Morton <[email protected]> (cherry picked from commit f9e54c3) Signed-off-by: Jonathan Maple <[email protected]> # Conflicts: # drivers/vfio/pci/vfio_pci_core.c
1 parent 42ad41e commit 43c17b5

File tree

1 file changed

+229
-0
lines changed

1 file changed

+229
-0
lines changed
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
vfio/pci: implement huge_fault support
2+
3+
jira LE-3557
4+
Rebuild_History Non-Buildable kernel-5.14.0-570.26.1.el9_6
5+
commit-author Alex Williamson <[email protected]>
6+
commit f9e54c3a2f5b79ecc57c7bc7d0d3521e461a2101
7+
Empty-Commit: Cherry-Pick Conflicts during history rebuild.
8+
Will be included in final tarball splat. Ref for failed cherry-pick at:
9+
ciq/ciq_backports/kernel-5.14.0-570.26.1.el9_6/f9e54c3a.failed
10+
11+
With the addition of pfnmap support in vmf_insert_pfn_{pmd,pud}() we can
12+
take advantage of PMD and PUD faults to PCI BAR mmaps and create more
13+
efficient mappings. PCI BARs are always a power of two and will typically
14+
get at least PMD alignment without userspace even trying. Userspace
15+
alignment for PUD mappings is also not too difficult.
16+
17+
Consolidate faults through a single handler with a new wrapper for
18+
standard single page faults. The pre-faulting behavior of commit
19+
d71a989cf5d9 ("vfio/pci: Insert full vma on mmap'd MMIO fault") is removed
20+
in this refactoring since huge_fault will cover the bulk of the faults and
21+
results in more efficient page table usage. We also want to avoid that
22+
pre-faulted single page mappings preempt huge page mappings.
23+
24+
Link: https://lkml.kernel.org/r/[email protected]
25+
Signed-off-by: Alex Williamson <[email protected]>
26+
Signed-off-by: Peter Xu <[email protected]>
27+
Cc: Alexander Gordeev <[email protected]>
28+
Cc: Aneesh Kumar K.V <[email protected]>
29+
Cc: Borislav Petkov <[email protected]>
30+
Cc: Catalin Marinas <[email protected]>
31+
Cc: Christian Borntraeger <[email protected]>
32+
Cc: Dave Hansen <[email protected]>
33+
Cc: David Hildenbrand <[email protected]>
34+
Cc: Gavin Shan <[email protected]>
35+
Cc: Gerald Schaefer <[email protected]>
36+
Cc: Heiko Carstens <[email protected]>
37+
Cc: Ingo Molnar <[email protected]>
38+
Cc: Jason Gunthorpe <[email protected]>
39+
Cc: Matthew Wilcox <[email protected]>
40+
Cc: Niklas Schnelle <[email protected]>
41+
Cc: Paolo Bonzini <[email protected]>
42+
Cc: Ryan Roberts <[email protected]>
43+
Cc: Sean Christopherson <[email protected]>
44+
Cc: Sven Schnelle <[email protected]>
45+
Cc: Thomas Gleixner <[email protected]>
46+
Cc: Vasily Gorbik <[email protected]>
47+
Cc: Will Deacon <[email protected]>
48+
Cc: Zi Yan <[email protected]>
49+
Signed-off-by: Andrew Morton <[email protected]>
50+
(cherry picked from commit f9e54c3a2f5b79ecc57c7bc7d0d3521e461a2101)
51+
Signed-off-by: Jonathan Maple <[email protected]>
52+
53+
# Conflicts:
54+
# drivers/vfio/pci/vfio_pci_core.c
55+
diff --cc drivers/vfio/pci/vfio_pci_core.c
56+
index ffda816e0119,2d7478e9a62d..000000000000
57+
--- a/drivers/vfio/pci/vfio_pci_core.c
58+
+++ b/drivers/vfio/pci/vfio_pci_core.c
59+
@@@ -1725,100 -1646,82 +1726,161 @@@ void vfio_pci_memory_unlock_and_restore
60+
up_write(&vdev->memory_lock);
61+
}
62+
63+
-static unsigned long vma_to_pfn(struct vm_area_struct *vma)
64+
+/* Caller holds vma_lock */
65+
+static int __vfio_pci_add_vma(struct vfio_pci_core_device *vdev,
66+
+ struct vm_area_struct *vma)
67+
{
68+
- struct vfio_pci_core_device *vdev = vma->vm_private_data;
69+
- int index = vma->vm_pgoff >> (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT);
70+
- u64 pgoff;
71+
+ struct vfio_pci_mmap_vma *mmap_vma;
72+
73+
- pgoff = vma->vm_pgoff &
74+
- ((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
75+
+ mmap_vma = kmalloc(sizeof(*mmap_vma), GFP_KERNEL_ACCOUNT);
76+
+ if (!mmap_vma)
77+
+ return -ENOMEM;
78+
+
79+
+ mmap_vma->vma = vma;
80+
+ list_add(&mmap_vma->vma_next, &vdev->vma_list);
81+
+
82+
+ return 0;
83+
+}
84+
+
85+
+/*
86+
+ * Zap mmaps on open so that we can fault them in on access and therefore
87+
+ * our vma_list only tracks mappings accessed since last zap.
88+
+ */
89+
+static void vfio_pci_mmap_open(struct vm_area_struct *vma)
90+
+{
91+
+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
92+
+}
93+
+
94+
+static void vfio_pci_mmap_close(struct vm_area_struct *vma)
95+
+{
96+
+ struct vfio_pci_core_device *vdev = vma->vm_private_data;
97+
+ struct vfio_pci_mmap_vma *mmap_vma;
98+
99+
- return (pci_resource_start(vdev->pdev, index) >> PAGE_SHIFT) + pgoff;
100+
+ mutex_lock(&vdev->vma_lock);
101+
+ list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
102+
+ if (mmap_vma->vma == vma) {
103+
+ list_del(&mmap_vma->vma_next);
104+
+ kfree(mmap_vma);
105+
+ break;
106+
+ }
107+
+ }
108+
+ mutex_unlock(&vdev->vma_lock);
109+
}
110+
111+
- static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
112+
+ static vm_fault_t vfio_pci_mmap_huge_fault(struct vm_fault *vmf,
113+
+ unsigned int order)
114+
{
115+
struct vm_area_struct *vma = vmf->vma;
116+
struct vfio_pci_core_device *vdev = vma->vm_private_data;
117+
++<<<<<<< HEAD
118+
+ struct vfio_pci_mmap_vma *mmap_vma;
119+
+ vm_fault_t ret = VM_FAULT_NOPAGE;
120+
++=======
121+
+ unsigned long pfn, pgoff = vmf->pgoff - vma->vm_pgoff;
122+
+ vm_fault_t ret = VM_FAULT_SIGBUS;
123+
+
124+
+ if (order && (vmf->address & ((PAGE_SIZE << order) - 1) ||
125+
+ vmf->address + (PAGE_SIZE << order) > vma->vm_end)) {
126+
+ ret = VM_FAULT_FALLBACK;
127+
+ goto out;
128+
+ }
129+
+
130+
+ pfn = vma_to_pfn(vma);
131+
++>>>>>>> f9e54c3a2f5b (vfio/pci: implement huge_fault support)
132+
133+
+ mutex_lock(&vdev->vma_lock);
134+
down_read(&vdev->memory_lock);
135+
136+
++<<<<<<< HEAD
137+
+ /*
138+
+ * Memory region cannot be accessed if the low power feature is engaged
139+
+ * or memory access is disabled.
140+
+ */
141+
+ if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) {
142+
+ ret = VM_FAULT_SIGBUS;
143+
+ goto up_out;
144+
+ }
145+
+
146+
+ /*
147+
+ * We populate the whole vma on fault, so we need to test whether
148+
+ * the vma has already been mapped, such as for concurrent faults
149+
+ * to the same vma. io_remap_pfn_range() will trigger a BUG_ON if
150+
+ * we ask it to fill the same range again.
151+
+ */
152+
+ list_for_each_entry(mmap_vma, &vdev->vma_list, vma_next) {
153+
+ if (mmap_vma->vma == vma)
154+
+ goto up_out;
155+
+ }
156+
++=======
157+
+ if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev))
158+
+ goto out_unlock;
159+
+
160+
+ switch (order) {
161+
+ case 0:
162+
+ ret = vmf_insert_pfn(vma, vmf->address, pfn + pgoff);
163+
+ break;
164+
+ #ifdef CONFIG_ARCH_SUPPORTS_PMD_PFNMAP
165+
+ case PMD_ORDER:
166+
+ ret = vmf_insert_pfn_pmd(vmf, __pfn_to_pfn_t(pfn + pgoff,
167+
+ PFN_DEV), false);
168+
+ break;
169+
+ #endif
170+
+ #ifdef CONFIG_ARCH_SUPPORTS_PUD_PFNMAP
171+
+ case PUD_ORDER:
172+
+ ret = vmf_insert_pfn_pud(vmf, __pfn_to_pfn_t(pfn + pgoff,
173+
+ PFN_DEV), false);
174+
+ break;
175+
+ #endif
176+
+ default:
177+
+ ret = VM_FAULT_FALLBACK;
178+
+ }
179+
+
180+
+ out_unlock:
181+
+ up_read(&vdev->memory_lock);
182+
+ out:
183+
+ dev_dbg_ratelimited(&vdev->pdev->dev,
184+
+ "%s(,order = %d) BAR %ld page offset 0x%lx: 0x%x\n",
185+
+ __func__, order,
186+
+ vma->vm_pgoff >>
187+
+ (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT),
188+
+ pgoff, (unsigned int)ret);
189+
++>>>>>>> f9e54c3a2f5b (vfio/pci: implement huge_fault support)
190+
+
191+
+ if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
192+
+ vma->vm_end - vma->vm_start,
193+
+ vma->vm_page_prot)) {
194+
+ ret = VM_FAULT_SIGBUS;
195+
+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
196+
+ goto up_out;
197+
+ }
198+
+
199+
+ if (__vfio_pci_add_vma(vdev, vma)) {
200+
+ ret = VM_FAULT_OOM;
201+
+ zap_vma_ptes(vma, vma->vm_start, vma->vm_end - vma->vm_start);
202+
+ }
203+
204+
+up_out:
205+
+ up_read(&vdev->memory_lock);
206+
+ mutex_unlock(&vdev->vma_lock);
207+
return ret;
208+
}
209+
210+
+ static vm_fault_t vfio_pci_mmap_page_fault(struct vm_fault *vmf)
211+
+ {
212+
+ return vfio_pci_mmap_huge_fault(vmf, 0);
213+
+ }
214+
+
215+
static const struct vm_operations_struct vfio_pci_mmap_ops = {
216+
++<<<<<<< HEAD
217+
+ .open = vfio_pci_mmap_open,
218+
+ .close = vfio_pci_mmap_close,
219+
+ .fault = vfio_pci_mmap_fault,
220+
++=======
221+
+ .fault = vfio_pci_mmap_page_fault,
222+
+ #ifdef CONFIG_ARCH_SUPPORTS_HUGE_PFNMAP
223+
+ .huge_fault = vfio_pci_mmap_huge_fault,
224+
+ #endif
225+
++>>>>>>> f9e54c3a2f5b (vfio/pci: implement huge_fault support)
226+
};
227+
228+
int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
229+
* Unmerged path drivers/vfio/pci/vfio_pci_core.c

0 commit comments

Comments
 (0)