Skip to content

Commit 7d3332b

Browse files
bjorn-rivospalmer-dabbelt
authored andcommitted
riscv: mm: Pre-allocate PGD entries for vmalloc/modules area
The RISC-V port requires that kernel PGD entries are to be synchronized between MMs. This is done via the vmalloc_fault() function, that simply copies the PGD entries from init_mm to the faulting one. Historically, faulting in PGD entries have been a source for both bugs [1], and poor performance. One way to get rid of vmalloc faults is by pre-allocating the PGD entries. Pre-allocating the entries potientially wastes 64 * 4K (65 on SV39). The pre-allocation function is pulled from Jörg Rödel's x86 work, with the addition of 3-level page tables (PMD allocations). The pmd_alloc() function needs the ptlock cache to be initialized (when split page locks is enabled), so the pre-allocation is done in a RISC-V specific pgtable_cache_init() implementation. Pre-allocate the kernel PGD entries for the vmalloc/modules area, but only for 64b platforms. Link: https://lore.kernel.org/lkml/[email protected]/ # [1] Signed-off-by: Björn Töpel <[email protected]> Reviewed-by: Alexandre Ghiti <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent 16252e0 commit 7d3332b

File tree

2 files changed

+60
-14
lines changed

2 files changed

+60
-14
lines changed

arch/riscv/mm/fault.c

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -238,24 +238,12 @@ void handle_page_fault(struct pt_regs *regs)
238238
* only copy the information from the master page table,
239239
* nothing more.
240240
*/
241-
if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) {
241+
if ((!IS_ENABLED(CONFIG_MMU) || !IS_ENABLED(CONFIG_64BIT)) &&
242+
unlikely(addr >= VMALLOC_START && addr < VMALLOC_END)) {
242243
vmalloc_fault(regs, code, addr);
243244
return;
244245
}
245246

246-
#ifdef CONFIG_64BIT
247-
/*
248-
* Modules in 64bit kernels lie in their own virtual region which is not
249-
* in the vmalloc region, but dealing with page faults in this region
250-
* or the vmalloc region amounts to doing the same thing: checking that
251-
* the mapping exists in init_mm.pgd and updating user page table, so
252-
* just use vmalloc_fault.
253-
*/
254-
if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) {
255-
vmalloc_fault(regs, code, addr);
256-
return;
257-
}
258-
#endif
259247
/* Enable interrupts if they were enabled in the parent context. */
260248
if (!regs_irqs_disabled(regs))
261249
local_irq_enable();

arch/riscv/mm/init.c

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,3 +1363,61 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
13631363
return vmemmap_populate_basepages(start, end, node, NULL);
13641364
}
13651365
#endif
1366+
1367+
#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
1368+
/*
1369+
* Pre-allocates page-table pages for a specific area in the kernel
1370+
* page-table. Only the level which needs to be synchronized between
1371+
* all page-tables is allocated because the synchronization can be
1372+
* expensive.
1373+
*/
1374+
static void __init preallocate_pgd_pages_range(unsigned long start, unsigned long end,
1375+
const char *area)
1376+
{
1377+
unsigned long addr;
1378+
const char *lvl;
1379+
1380+
for (addr = start; addr < end && addr >= start; addr = ALIGN(addr + 1, PGDIR_SIZE)) {
1381+
pgd_t *pgd = pgd_offset_k(addr);
1382+
p4d_t *p4d;
1383+
pud_t *pud;
1384+
pmd_t *pmd;
1385+
1386+
lvl = "p4d";
1387+
p4d = p4d_alloc(&init_mm, pgd, addr);
1388+
if (!p4d)
1389+
goto failed;
1390+
1391+
if (pgtable_l5_enabled)
1392+
continue;
1393+
1394+
lvl = "pud";
1395+
pud = pud_alloc(&init_mm, p4d, addr);
1396+
if (!pud)
1397+
goto failed;
1398+
1399+
if (pgtable_l4_enabled)
1400+
continue;
1401+
1402+
lvl = "pmd";
1403+
pmd = pmd_alloc(&init_mm, pud, addr);
1404+
if (!pmd)
1405+
goto failed;
1406+
}
1407+
return;
1408+
1409+
failed:
1410+
/*
1411+
* The pages have to be there now or they will be missing in
1412+
* process page-tables later.
1413+
*/
1414+
panic("Failed to pre-allocate %s pages for %s area\n", lvl, area);
1415+
}
1416+
1417+
void __init pgtable_cache_init(void)
1418+
{
1419+
preallocate_pgd_pages_range(VMALLOC_START, VMALLOC_END, "vmalloc");
1420+
if (IS_ENABLED(CONFIG_MODULES))
1421+
preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules");
1422+
}
1423+
#endif

0 commit comments

Comments
 (0)