Skip to content

Commit e8a62cc

Browse files
AlexGhitipalmer-dabbelt
authored andcommitted
riscv: Implement sv48 support
By adding a new 4th level of page table, give the possibility to 64bit kernel to address 2^48 bytes of virtual address: in practice, that offers 128TB of virtual address space to userspace and allows up to 64TB of physical memory. If the underlying hardware does not support sv48, we will automatically fallback to a standard 3-level page table by folding the new PUD level into PGDIR level. In order to detect HW capabilities at runtime, we use SATP feature that ignores writes with an unsupported mode. Signed-off-by: Alexandre Ghiti <[email protected]> Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent 60639f7 commit e8a62cc

File tree

13 files changed

+514
-44
lines changed

13 files changed

+514
-44
lines changed

arch/riscv/Kconfig

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ config PAGE_OFFSET
150150
hex
151151
default 0xC0000000 if 32BIT
152152
default 0x80000000 if 64BIT && !MMU
153-
default 0xffffffd800000000 if 64BIT
153+
default 0xffffaf8000000000 if 64BIT
154154

155155
config KASAN_SHADOW_OFFSET
156156
hex
@@ -201,7 +201,7 @@ config FIX_EARLYCON_MEM
201201

202202
config PGTABLE_LEVELS
203203
int
204-
default 3 if 64BIT
204+
default 4 if 64BIT
205205
default 2
206206

207207
config LOCKDEP_SUPPORT

arch/riscv/include/asm/csr.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,13 @@
4040
#ifndef CONFIG_64BIT
4141
#define SATP_PPN _AC(0x003FFFFF, UL)
4242
#define SATP_MODE_32 _AC(0x80000000, UL)
43-
#define SATP_MODE SATP_MODE_32
4443
#define SATP_ASID_BITS 9
4544
#define SATP_ASID_SHIFT 22
4645
#define SATP_ASID_MASK _AC(0x1FF, UL)
4746
#else
4847
#define SATP_PPN _AC(0x00000FFFFFFFFFFF, UL)
4948
#define SATP_MODE_39 _AC(0x8000000000000000, UL)
50-
#define SATP_MODE SATP_MODE_39
49+
#define SATP_MODE_48 _AC(0x9000000000000000, UL)
5150
#define SATP_ASID_BITS 16
5251
#define SATP_ASID_SHIFT 44
5352
#define SATP_ASID_MASK _AC(0xFFFF, UL)

arch/riscv/include/asm/fixmap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ enum fixed_addresses {
2424
FIX_HOLE,
2525
FIX_PTE,
2626
FIX_PMD,
27+
FIX_PUD,
2728
FIX_TEXT_POKE1,
2829
FIX_TEXT_POKE0,
2930
FIX_EARLYCON_MEM_BASE,

arch/riscv/include/asm/kasan.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,11 @@
2828
#define KASAN_SHADOW_SCALE_SHIFT 3
2929

3030
#define KASAN_SHADOW_SIZE (UL(1) << ((VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
31-
#define KASAN_SHADOW_START (KASAN_SHADOW_END - KASAN_SHADOW_SIZE)
31+
/*
32+
* Depending on the size of the virtual address space, the region may not be
33+
* aligned on PGDIR_SIZE, so force its alignment to ease its population.
34+
*/
35+
#define KASAN_SHADOW_START ((KASAN_SHADOW_END - KASAN_SHADOW_SIZE) & PGDIR_MASK)
3236
#define KASAN_SHADOW_END MODULES_LOWEST_VADDR
3337
#define KASAN_SHADOW_OFFSET _AC(CONFIG_KASAN_SHADOW_OFFSET, UL)
3438

arch/riscv/include/asm/page.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,20 @@
3131
* When not using MMU this corresponds to the first free page in
3232
* physical memory (aligned on a page boundary).
3333
*/
34+
#ifdef CONFIG_64BIT
35+
#ifdef CONFIG_MMU
36+
#define PAGE_OFFSET kernel_map.page_offset
37+
#else
38+
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
39+
#endif
40+
/*
41+
* By default, CONFIG_PAGE_OFFSET value corresponds to SV48 address space so
42+
* define the PAGE_OFFSET value for SV39.
43+
*/
44+
#define PAGE_OFFSET_L3 _AC(0xffffffd800000000, UL)
45+
#else
3446
#define PAGE_OFFSET _AC(CONFIG_PAGE_OFFSET, UL)
47+
#endif /* CONFIG_64BIT */
3548

3649
#ifndef __ASSEMBLY__
3750

@@ -84,6 +97,7 @@ extern unsigned long riscv_pfn_base;
8497
#endif /* CONFIG_MMU */
8598

8699
struct kernel_mapping {
100+
unsigned long page_offset;
87101
unsigned long virt_addr;
88102
uintptr_t phys_addr;
89103
uintptr_t size;

arch/riscv/include/asm/pgalloc.h

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include <asm/tlb.h>
1212

1313
#ifdef CONFIG_MMU
14+
#define __HAVE_ARCH_PUD_ALLOC_ONE
15+
#define __HAVE_ARCH_PUD_FREE
1416
#include <asm-generic/pgalloc.h>
1517

1618
static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -36,6 +38,44 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
3638

3739
set_pud(pud, __pud((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
3840
}
41+
42+
static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
43+
{
44+
if (pgtable_l4_enabled) {
45+
unsigned long pfn = virt_to_pfn(pud);
46+
47+
set_p4d(p4d, __p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
48+
}
49+
}
50+
51+
static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d,
52+
pud_t *pud)
53+
{
54+
if (pgtable_l4_enabled) {
55+
unsigned long pfn = virt_to_pfn(pud);
56+
57+
set_p4d_safe(p4d,
58+
__p4d((pfn << _PAGE_PFN_SHIFT) | _PAGE_TABLE));
59+
}
60+
}
61+
62+
#define pud_alloc_one pud_alloc_one
63+
static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
64+
{
65+
if (pgtable_l4_enabled)
66+
return __pud_alloc_one(mm, addr);
67+
68+
return NULL;
69+
}
70+
71+
#define pud_free pud_free
72+
static inline void pud_free(struct mm_struct *mm, pud_t *pud)
73+
{
74+
if (pgtable_l4_enabled)
75+
__pud_free(mm, pud);
76+
}
77+
78+
#define __pud_free_tlb(tlb, pud, addr) pud_free((tlb)->mm, pud)
3979
#endif /* __PAGETABLE_PMD_FOLDED */
4080

4181
static inline pgd_t *pgd_alloc(struct mm_struct *mm)

arch/riscv/include/asm/pgtable-64.h

Lines changed: 107 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,16 +8,36 @@
88

99
#include <linux/const.h>
1010

11-
#define PGDIR_SHIFT 30
11+
extern bool pgtable_l4_enabled;
12+
13+
#define PGDIR_SHIFT_L3 30
14+
#define PGDIR_SHIFT_L4 39
15+
#define PGDIR_SIZE_L3 (_AC(1, UL) << PGDIR_SHIFT_L3)
16+
17+
#define PGDIR_SHIFT (pgtable_l4_enabled ? PGDIR_SHIFT_L4 : PGDIR_SHIFT_L3)
1218
/* Size of region mapped by a page global directory */
1319
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
1420
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
1521

22+
/* pud is folded into pgd in case of 3-level page table */
23+
#define PUD_SHIFT 30
24+
#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
25+
#define PUD_MASK (~(PUD_SIZE - 1))
26+
1627
#define PMD_SHIFT 21
1728
/* Size of region mapped by a page middle directory */
1829
#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
1930
#define PMD_MASK (~(PMD_SIZE - 1))
2031

32+
/* Page Upper Directory entry */
33+
typedef struct {
34+
unsigned long pud;
35+
} pud_t;
36+
37+
#define pud_val(x) ((x).pud)
38+
#define __pud(x) ((pud_t) { (x) })
39+
#define PTRS_PER_PUD (PAGE_SIZE / sizeof(pud_t))
40+
2141
/* Page Middle Directory entry */
2242
typedef struct {
2343
unsigned long pmd;
@@ -59,6 +79,16 @@ static inline void pud_clear(pud_t *pudp)
5979
set_pud(pudp, __pud(0));
6080
}
6181

82+
static inline pud_t pfn_pud(unsigned long pfn, pgprot_t prot)
83+
{
84+
return __pud((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
85+
}
86+
87+
static inline unsigned long _pud_pfn(pud_t pud)
88+
{
89+
return pud_val(pud) >> _PAGE_PFN_SHIFT;
90+
}
91+
6292
static inline pmd_t *pud_pgtable(pud_t pud)
6393
{
6494
return (pmd_t *)pfn_to_virt(pud_val(pud) >> _PAGE_PFN_SHIFT);
@@ -69,6 +99,17 @@ static inline struct page *pud_page(pud_t pud)
6999
return pfn_to_page(pud_val(pud) >> _PAGE_PFN_SHIFT);
70100
}
71101

102+
#define mm_pud_folded mm_pud_folded
103+
static inline bool mm_pud_folded(struct mm_struct *mm)
104+
{
105+
if (pgtable_l4_enabled)
106+
return false;
107+
108+
return true;
109+
}
110+
111+
#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
112+
72113
static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot)
73114
{
74115
return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot));
@@ -84,4 +125,69 @@ static inline unsigned long _pmd_pfn(pmd_t pmd)
84125
#define pmd_ERROR(e) \
85126
pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e))
86127

128+
#define pud_ERROR(e) \
129+
pr_err("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e))
130+
131+
static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
132+
{
133+
if (pgtable_l4_enabled)
134+
*p4dp = p4d;
135+
else
136+
set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) });
137+
}
138+
139+
static inline int p4d_none(p4d_t p4d)
140+
{
141+
if (pgtable_l4_enabled)
142+
return (p4d_val(p4d) == 0);
143+
144+
return 0;
145+
}
146+
147+
static inline int p4d_present(p4d_t p4d)
148+
{
149+
if (pgtable_l4_enabled)
150+
return (p4d_val(p4d) & _PAGE_PRESENT);
151+
152+
return 1;
153+
}
154+
155+
static inline int p4d_bad(p4d_t p4d)
156+
{
157+
if (pgtable_l4_enabled)
158+
return !p4d_present(p4d);
159+
160+
return 0;
161+
}
162+
163+
static inline void p4d_clear(p4d_t *p4d)
164+
{
165+
if (pgtable_l4_enabled)
166+
set_p4d(p4d, __p4d(0));
167+
}
168+
169+
static inline pud_t *p4d_pgtable(p4d_t p4d)
170+
{
171+
if (pgtable_l4_enabled)
172+
return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
173+
174+
return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
175+
}
176+
177+
static inline struct page *p4d_page(p4d_t p4d)
178+
{
179+
return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
180+
}
181+
182+
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
183+
184+
#define pud_offset pud_offset
185+
static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
186+
{
187+
if (pgtable_l4_enabled)
188+
return p4d_pgtable(*p4d) + pud_index(address);
189+
190+
return (pud_t *)p4d;
191+
}
192+
87193
#endif /* _ASM_RISCV_PGTABLE_64_H */

arch/riscv/include/asm/pgtable.h

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
* position vmemmap directly below the VMALLOC region.
6363
*/
6464
#ifdef CONFIG_64BIT
65-
#define VA_BITS 39
65+
#define VA_BITS (pgtable_l4_enabled ? 48 : 39)
6666
#else
6767
#define VA_BITS 32
6868
#endif
@@ -102,8 +102,7 @@
102102

103103
#ifndef __ASSEMBLY__
104104

105-
/* Page Upper Directory not used in RISC-V */
106-
#include <asm-generic/pgtable-nopud.h>
105+
#include <asm-generic/pgtable-nop4d.h>
107106
#include <asm/page.h>
108107
#include <asm/tlbflush.h>
109108
#include <linux/mm_types.h>
@@ -126,6 +125,17 @@
126125
#define XIP_FIXUP(addr) (addr)
127126
#endif /* CONFIG_XIP_KERNEL */
128127

128+
struct pt_alloc_ops {
129+
pte_t *(*get_pte_virt)(phys_addr_t pa);
130+
phys_addr_t (*alloc_pte)(uintptr_t va);
131+
#ifndef __PAGETABLE_PMD_FOLDED
132+
pmd_t *(*get_pmd_virt)(phys_addr_t pa);
133+
phys_addr_t (*alloc_pmd)(uintptr_t va);
134+
pud_t *(*get_pud_virt)(phys_addr_t pa);
135+
phys_addr_t (*alloc_pud)(uintptr_t va);
136+
#endif
137+
};
138+
129139
#ifdef CONFIG_MMU
130140
/* Number of PGD entries that a user-mode program can use */
131141
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
@@ -677,9 +687,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
677687
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.
678688
*/
679689
#ifdef CONFIG_64BIT
680-
#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
690+
#define TASK_SIZE (PGDIR_SIZE * PTRS_PER_PGD / 2)
691+
#define TASK_SIZE_MIN (PGDIR_SIZE_L3 * PTRS_PER_PGD / 2)
681692
#else
682-
#define TASK_SIZE FIXADDR_START
693+
#define TASK_SIZE FIXADDR_START
694+
#define TASK_SIZE_MIN TASK_SIZE
683695
#endif
684696

685697
#else /* CONFIG_MMU */
@@ -705,6 +717,8 @@ extern uintptr_t _dtb_early_pa;
705717
#define dtb_early_va _dtb_early_va
706718
#define dtb_early_pa _dtb_early_pa
707719
#endif /* CONFIG_XIP_KERNEL */
720+
extern u64 satp_mode;
721+
extern bool pgtable_l4_enabled;
708722

709723
void paging_init(void);
710724
void misc_mem_init(void);

arch/riscv/kernel/head.S

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ relocate:
105105

106106
/* Compute satp for kernel page tables, but don't load it yet */
107107
srl a2, a0, PAGE_SHIFT
108-
li a1, SATP_MODE
108+
la a1, satp_mode
109+
REG_L a1, 0(a1)
109110
or a2, a2, a1
110111

111112
/*

arch/riscv/mm/context.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
192192
switch_mm_fast:
193193
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) |
194194
((cntx & asid_mask) << SATP_ASID_SHIFT) |
195-
SATP_MODE);
195+
satp_mode);
196196

197197
if (need_flush_tlb)
198198
local_flush_tlb_all();
@@ -201,7 +201,7 @@ static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
201201
static void set_mm_noasid(struct mm_struct *mm)
202202
{
203203
/* Switch the page table and blindly nuke entire local TLB */
204-
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE);
204+
csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | satp_mode);
205205
local_flush_tlb_all();
206206
}
207207

0 commit comments

Comments
 (0)