Skip to content

Commit 0e7e619

Browse files
committed
Merge branch 'ttm-transhuge' of git://people.freedesktop.org/~thomash/linux into drm-next
Huge page-table entries for TTM In order to reduce CPU usage [1] and in theory TLB misses this patchset enables huge- and giant page-table entries for TTM and TTM-enabled graphics drivers. Signed-off-by: Dave Airlie <[email protected]> From: Thomas Hellstrom (VMware) <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
2 parents 59e7a8c + 9431042 commit 0e7e619

File tree

16 files changed

+692
-28
lines changed

16 files changed

+692
-28
lines changed

drivers/gpu/drm/drm_file.c

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@
4848
#include "drm_internal.h"
4949
#include "drm_legacy.h"
5050

51+
#if defined(CONFIG_MMU) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
52+
#include <uapi/asm/mman.h>
53+
#include <drm/drm_vma_manager.h>
54+
#endif
55+
5156
/* from BKL pushdown */
5257
DEFINE_MUTEX(drm_global_mutex);
5358

@@ -872,3 +877,139 @@ struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags)
872877
return file;
873878
}
874879
EXPORT_SYMBOL_FOR_TESTS_ONLY(mock_drm_getfile);
880+
881+
#ifdef CONFIG_MMU
882+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
883+
/*
884+
* drm_addr_inflate() attempts to construct an aligned area by inflating
885+
* the area size and skipping the unaligned start of the area.
886+
* adapted from shmem_get_unmapped_area()
887+
*/
888+
static unsigned long drm_addr_inflate(unsigned long addr,
889+
unsigned long len,
890+
unsigned long pgoff,
891+
unsigned long flags,
892+
unsigned long huge_size)
893+
{
894+
unsigned long offset, inflated_len;
895+
unsigned long inflated_addr;
896+
unsigned long inflated_offset;
897+
898+
offset = (pgoff << PAGE_SHIFT) & (huge_size - 1);
899+
if (offset && offset + len < 2 * huge_size)
900+
return addr;
901+
if ((addr & (huge_size - 1)) == offset)
902+
return addr;
903+
904+
inflated_len = len + huge_size - PAGE_SIZE;
905+
if (inflated_len > TASK_SIZE)
906+
return addr;
907+
if (inflated_len < len)
908+
return addr;
909+
910+
inflated_addr = current->mm->get_unmapped_area(NULL, 0, inflated_len,
911+
0, flags);
912+
if (IS_ERR_VALUE(inflated_addr))
913+
return addr;
914+
if (inflated_addr & ~PAGE_MASK)
915+
return addr;
916+
917+
inflated_offset = inflated_addr & (huge_size - 1);
918+
inflated_addr += offset - inflated_offset;
919+
if (inflated_offset > offset)
920+
inflated_addr += huge_size;
921+
922+
if (inflated_addr > TASK_SIZE - len)
923+
return addr;
924+
925+
return inflated_addr;
926+
}
927+
928+
/**
929+
* drm_get_unmapped_area() - Get an unused user-space virtual memory area
930+
* suitable for huge page table entries.
931+
* @file: The struct file representing the address space being mmap()'d.
932+
* @uaddr: Start address suggested by user-space.
933+
* @len: Length of the area.
934+
* @pgoff: The page offset into the address space.
935+
* @flags: mmap flags
936+
* @mgr: The address space manager used by the drm driver. This argument can
937+
* probably be removed at some point when all drivers use the same
938+
* address space manager.
939+
*
940+
* This function attempts to find an unused user-space virtual memory area
941+
* that can accommodate the size we want to map, and that is properly
942+
* aligned to facilitate huge page table entries matching actual
943+
* huge pages or huge page aligned memory in buffer objects. Buffer objects
944+
* are assumed to start at huge page boundary pfns (io memory) or be
945+
* populated by huge pages aligned to the start of the buffer object
946+
* (system- or coherent memory). Adapted from shmem_get_unmapped_area.
947+
*
948+
* Return: aligned user-space address.
949+
*/
950+
unsigned long drm_get_unmapped_area(struct file *file,
951+
unsigned long uaddr, unsigned long len,
952+
unsigned long pgoff, unsigned long flags,
953+
struct drm_vma_offset_manager *mgr)
954+
{
955+
unsigned long addr;
956+
unsigned long inflated_addr;
957+
struct drm_vma_offset_node *node;
958+
959+
if (len > TASK_SIZE)
960+
return -ENOMEM;
961+
962+
/*
963+
* @pgoff is the file page-offset the huge page boundaries of
964+
* which typically aligns to physical address huge page boundaries.
965+
* That's not true for DRM, however, where physical address huge
966+
* page boundaries instead are aligned with the offset from
967+
* buffer object start. So adjust @pgoff to be the offset from
968+
* buffer object start.
969+
*/
970+
drm_vma_offset_lock_lookup(mgr);
971+
node = drm_vma_offset_lookup_locked(mgr, pgoff, 1);
972+
if (node)
973+
pgoff -= node->vm_node.start;
974+
drm_vma_offset_unlock_lookup(mgr);
975+
976+
addr = current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
977+
if (IS_ERR_VALUE(addr))
978+
return addr;
979+
if (addr & ~PAGE_MASK)
980+
return addr;
981+
if (addr > TASK_SIZE - len)
982+
return addr;
983+
984+
if (len < HPAGE_PMD_SIZE)
985+
return addr;
986+
if (flags & MAP_FIXED)
987+
return addr;
988+
/*
989+
* Our priority is to support MAP_SHARED mapped hugely;
990+
* and support MAP_PRIVATE mapped hugely too, until it is COWed.
991+
* But if caller specified an address hint, respect that as before.
992+
*/
993+
if (uaddr)
994+
return addr;
995+
996+
inflated_addr = drm_addr_inflate(addr, len, pgoff, flags,
997+
HPAGE_PMD_SIZE);
998+
999+
if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) &&
1000+
len >= HPAGE_PUD_SIZE)
1001+
inflated_addr = drm_addr_inflate(inflated_addr, len, pgoff,
1002+
flags, HPAGE_PUD_SIZE);
1003+
return inflated_addr;
1004+
}
1005+
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
1006+
unsigned long drm_get_unmapped_area(struct file *file,
1007+
unsigned long uaddr, unsigned long len,
1008+
unsigned long pgoff, unsigned long flags,
1009+
struct drm_vma_offset_manager *mgr)
1010+
{
1011+
return current->mm->get_unmapped_area(file, uaddr, len, pgoff, flags);
1012+
}
1013+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
1014+
EXPORT_SYMBOL_GPL(drm_get_unmapped_area);
1015+
#endif /* CONFIG_MMU */

drivers/gpu/drm/ttm/ttm_bo_vm.c

Lines changed: 158 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,97 @@ vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
156156
}
157157
EXPORT_SYMBOL(ttm_bo_vm_reserve);
158158

159+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
160+
/**
161+
* ttm_bo_vm_insert_huge - Insert a pfn for PUD or PMD faults
162+
* @vmf: Fault data
163+
* @bo: The buffer object
164+
* @page_offset: Page offset from bo start
165+
* @fault_page_size: The size of the fault in pages.
166+
* @pgprot: The page protections.
167+
* Does additional checking whether it's possible to insert a PUD or PMD
168+
* pfn and performs the insertion.
169+
*
170+
* Return: VM_FAULT_NOPAGE on successful insertion, VM_FAULT_FALLBACK if
171+
* a huge fault was not possible, or on insertion error.
172+
*/
173+
static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
174+
struct ttm_buffer_object *bo,
175+
pgoff_t page_offset,
176+
pgoff_t fault_page_size,
177+
pgprot_t pgprot)
178+
{
179+
pgoff_t i;
180+
vm_fault_t ret;
181+
unsigned long pfn;
182+
pfn_t pfnt;
183+
struct ttm_tt *ttm = bo->ttm;
184+
bool write = vmf->flags & FAULT_FLAG_WRITE;
185+
186+
/* Fault should not cross bo boundary. */
187+
page_offset &= ~(fault_page_size - 1);
188+
if (page_offset + fault_page_size > bo->num_pages)
189+
goto out_fallback;
190+
191+
if (bo->mem.bus.is_iomem)
192+
pfn = ttm_bo_io_mem_pfn(bo, page_offset);
193+
else
194+
pfn = page_to_pfn(ttm->pages[page_offset]);
195+
196+
/* pfn must be fault_page_size aligned. */
197+
if ((pfn & (fault_page_size - 1)) != 0)
198+
goto out_fallback;
199+
200+
/* Check that memory is contiguous. */
201+
if (!bo->mem.bus.is_iomem) {
202+
for (i = 1; i < fault_page_size; ++i) {
203+
if (page_to_pfn(ttm->pages[page_offset + i]) != pfn + i)
204+
goto out_fallback;
205+
}
206+
} else if (bo->bdev->driver->io_mem_pfn) {
207+
for (i = 1; i < fault_page_size; ++i) {
208+
if (ttm_bo_io_mem_pfn(bo, page_offset + i) != pfn + i)
209+
goto out_fallback;
210+
}
211+
}
212+
213+
pfnt = __pfn_to_pfn_t(pfn, PFN_DEV);
214+
if (fault_page_size == (HPAGE_PMD_SIZE >> PAGE_SHIFT))
215+
ret = vmf_insert_pfn_pmd_prot(vmf, pfnt, pgprot, write);
216+
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
217+
else if (fault_page_size == (HPAGE_PUD_SIZE >> PAGE_SHIFT))
218+
ret = vmf_insert_pfn_pud_prot(vmf, pfnt, pgprot, write);
219+
#endif
220+
else
221+
WARN_ON_ONCE(ret = VM_FAULT_FALLBACK);
222+
223+
if (ret != VM_FAULT_NOPAGE)
224+
goto out_fallback;
225+
226+
return VM_FAULT_NOPAGE;
227+
out_fallback:
228+
count_vm_event(THP_FAULT_FALLBACK);
229+
return VM_FAULT_FALLBACK;
230+
}
231+
#else
232+
static vm_fault_t ttm_bo_vm_insert_huge(struct vm_fault *vmf,
233+
struct ttm_buffer_object *bo,
234+
pgoff_t page_offset,
235+
pgoff_t fault_page_size,
236+
pgprot_t pgprot)
237+
{
238+
return VM_FAULT_FALLBACK;
239+
}
240+
#endif
241+
159242
/**
160243
* ttm_bo_vm_fault_reserved - TTM fault helper
161244
* @vmf: The struct vm_fault given as argument to the fault callback
162245
* @prot: The page protection to be used for this memory area.
163246
* @num_prefault: Maximum number of prefault pages. The caller may want to
164247
* specify this based on madvice settings and the size of the GPU object
165248
* backed by the memory.
249+
* @fault_page_size: The size of the fault in pages.
166250
*
167251
* This function inserts one or more page table entries pointing to the
168252
* memory backing the buffer object, and then returns a return code
@@ -176,7 +260,8 @@ EXPORT_SYMBOL(ttm_bo_vm_reserve);
176260
*/
177261
vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
178262
pgprot_t prot,
179-
pgoff_t num_prefault)
263+
pgoff_t num_prefault,
264+
pgoff_t fault_page_size)
180265
{
181266
struct vm_area_struct *vma = vmf->vma;
182267
struct ttm_buffer_object *bo = vma->vm_private_data;
@@ -268,6 +353,13 @@ vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
268353
prot = pgprot_decrypted(prot);
269354
}
270355

356+
/* We don't prefault on huge faults. Yet. */
357+
if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && fault_page_size != 1) {
358+
ret = ttm_bo_vm_insert_huge(vmf, bo, page_offset,
359+
fault_page_size, prot);
360+
goto out_io_unlock;
361+
}
362+
271363
/*
272364
* Speculatively prefault a number of pages. Only error on
273365
* first page.
@@ -334,7 +426,7 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
334426
return ret;
335427

336428
prot = vma->vm_page_prot;
337-
ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
429+
ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT, 1);
338430
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
339431
return ret;
340432

@@ -344,6 +436,66 @@ vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
344436
}
345437
EXPORT_SYMBOL(ttm_bo_vm_fault);
346438

439+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
440+
/**
441+
* ttm_pgprot_is_wrprotecting - Is a page protection value write-protecting?
442+
* @prot: The page protection value
443+
*
444+
* Return: true if @prot is write-protecting. false otherwise.
445+
*/
446+
static bool ttm_pgprot_is_wrprotecting(pgprot_t prot)
447+
{
448+
/*
449+
* This is meant to say "pgprot_wrprotect(prot) == prot" in a generic
450+
* way. Unfortunately there is no generic pgprot_wrprotect.
451+
*/
452+
return pte_val(pte_wrprotect(__pte(pgprot_val(prot)))) ==
453+
pgprot_val(prot);
454+
}
455+
456+
static vm_fault_t ttm_bo_vm_huge_fault(struct vm_fault *vmf,
457+
enum page_entry_size pe_size)
458+
{
459+
struct vm_area_struct *vma = vmf->vma;
460+
pgprot_t prot;
461+
struct ttm_buffer_object *bo = vma->vm_private_data;
462+
vm_fault_t ret;
463+
pgoff_t fault_page_size = 0;
464+
bool write = vmf->flags & FAULT_FLAG_WRITE;
465+
466+
switch (pe_size) {
467+
case PE_SIZE_PMD:
468+
fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
469+
break;
470+
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
471+
case PE_SIZE_PUD:
472+
fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
473+
break;
474+
#endif
475+
default:
476+
WARN_ON_ONCE(1);
477+
return VM_FAULT_FALLBACK;
478+
}
479+
480+
/* Fallback on write dirty-tracking or COW */
481+
if (write && ttm_pgprot_is_wrprotecting(vma->vm_page_prot))
482+
return VM_FAULT_FALLBACK;
483+
484+
ret = ttm_bo_vm_reserve(bo, vmf);
485+
if (ret)
486+
return ret;
487+
488+
prot = vm_get_page_prot(vma->vm_flags);
489+
ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
490+
if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
491+
return ret;
492+
493+
dma_resv_unlock(bo->base.resv);
494+
495+
return ret;
496+
}
497+
#endif
498+
347499
void ttm_bo_vm_open(struct vm_area_struct *vma)
348500
{
349501
struct ttm_buffer_object *bo = vma->vm_private_data;
@@ -445,7 +597,10 @@ static const struct vm_operations_struct ttm_bo_vm_ops = {
445597
.fault = ttm_bo_vm_fault,
446598
.open = ttm_bo_vm_open,
447599
.close = ttm_bo_vm_close,
448-
.access = ttm_bo_vm_access
600+
.access = ttm_bo_vm_access,
601+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
602+
.huge_fault = ttm_bo_vm_huge_fault,
603+
#endif
449604
};
450605

451606
static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,

drivers/gpu/drm/vmwgfx/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
1111
vmwgfx_validation.o vmwgfx_page_dirty.o vmwgfx_streamoutput.o \
1212
ttm_object.o ttm_lock.o
1313

14+
vmwgfx-$(CONFIG_TRANSPARENT_HUGEPAGE) += vmwgfx_thp.o
1415
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o

drivers/gpu/drm/vmwgfx/vmwgfx_drv.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,18 @@ static void vmw_remove(struct pci_dev *pdev)
12471247
pci_disable_device(pdev);
12481248
}
12491249

1250+
static unsigned long
1251+
vmw_get_unmapped_area(struct file *file, unsigned long uaddr,
1252+
unsigned long len, unsigned long pgoff,
1253+
unsigned long flags)
1254+
{
1255+
struct drm_file *file_priv = file->private_data;
1256+
struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
1257+
1258+
return drm_get_unmapped_area(file, uaddr, len, pgoff, flags,
1259+
&dev_priv->vma_manager);
1260+
}
1261+
12501262
static int vmwgfx_pm_notifier(struct notifier_block *nb, unsigned long val,
12511263
void *ptr)
12521264
{
@@ -1418,6 +1430,7 @@ static const struct file_operations vmwgfx_driver_fops = {
14181430
.compat_ioctl = vmw_compat_ioctl,
14191431
#endif
14201432
.llseek = noop_llseek,
1433+
.get_unmapped_area = vmw_get_unmapped_area,
14211434
};
14221435

14231436
static struct drm_driver driver = {

0 commit comments

Comments
 (0)