Skip to content

Commit 59c4da8

Browse files
zongboxpalmer-dabbelt
authored andcommitted
riscv: Add support to dump the kernel page tables
In a similar manner to arm64, x86, powerpc, etc., it can traverse all page tables, and dump the page table layout with the memory types and permissions. Add a debugfs file at /sys/kernel/debug/kernel_page_tables to export the page table layout to userspace. Signed-off-by: Zong Li <[email protected]> Tested-by: Alexandre Ghiti <[email protected]> Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent 8fdddb2 commit 59c4da8

File tree

5 files changed

+340
-0
lines changed

5 files changed

+340
-0
lines changed

arch/riscv/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ config RISCV
2929
select GENERIC_SMP_IDLE_THREAD
3030
select GENERIC_ATOMIC64 if !64BIT
3131
select GENERIC_IOREMAP
32+
select GENERIC_PTDUMP if MMU
3233
select HAVE_ARCH_AUDITSYSCALL
3334
select HAVE_ARCH_SECCOMP_FILTER
3435
select HAVE_ASM_MODVERSIONS

arch/riscv/include/asm/pgtable.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,16 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
448448
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
449449
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
450450

451+
/*
452+
* In the RV64 Linux scheme, we give the user half of the virtual-address space
453+
* and give the kernel the other (upper) half.
454+
*/
455+
#ifdef CONFIG_64BIT
456+
#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE)
457+
#else
458+
#define KERN_VIRT_START FIXADDR_START
459+
#endif
460+
451461
/*
452462
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
453463
* Note that PGDIR_SIZE must evenly divide TASK_SIZE.

arch/riscv/include/asm/ptdump.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (C) 2019 SiFive
4+
*/
5+
6+
#ifndef _ASM_RISCV_PTDUMP_H
7+
#define _ASM_RISCV_PTDUMP_H
8+
9+
void ptdump_check_wx(void);
10+
11+
#endif /* _ASM_RISCV_PTDUMP_H */

arch/riscv/mm/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ ifeq ($(CONFIG_MMU),y)
1515
obj-$(CONFIG_SMP) += tlbflush.o
1616
endif
1717
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
18+
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
1819
obj-$(CONFIG_KASAN) += kasan_init.o
1920

2021
ifdef CONFIG_KASAN

arch/riscv/mm/ptdump.c

Lines changed: 317 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,317 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Copyright (C) 2019 SiFive
4+
*/
5+
6+
#include <linux/init.h>
7+
#include <linux/debugfs.h>
8+
#include <linux/seq_file.h>
9+
#include <linux/ptdump.h>
10+
11+
#include <asm/ptdump.h>
12+
#include <asm/pgtable.h>
13+
#include <asm/kasan.h>
14+
15+
#define pt_dump_seq_printf(m, fmt, args...) \
16+
({ \
17+
if (m) \
18+
seq_printf(m, fmt, ##args); \
19+
})
20+
21+
#define pt_dump_seq_puts(m, fmt) \
22+
({ \
23+
if (m) \
24+
seq_printf(m, fmt); \
25+
})
26+
27+
/*
28+
* The page dumper groups page table entries of the same type into a single
29+
* description. It uses pg_state to track the range information while
30+
* iterating over the pte entries. When the continuity is broken it then
31+
* dumps out a description of the range.
32+
*/
33+
struct pg_state {
34+
struct ptdump_state ptdump;
35+
struct seq_file *seq;
36+
const struct addr_marker *marker;
37+
unsigned long start_address;
38+
unsigned long start_pa;
39+
unsigned long last_pa;
40+
int level;
41+
u64 current_prot;
42+
bool check_wx;
43+
unsigned long wx_pages;
44+
};
45+
46+
/* Address marker */
47+
struct addr_marker {
48+
unsigned long start_address;
49+
const char *name;
50+
};
51+
52+
static struct addr_marker address_markers[] = {
53+
#ifdef CONFIG_KASAN
54+
{KASAN_SHADOW_START, "Kasan shadow start"},
55+
{KASAN_SHADOW_END, "Kasan shadow end"},
56+
#endif
57+
{FIXADDR_START, "Fixmap start"},
58+
{FIXADDR_TOP, "Fixmap end"},
59+
{PCI_IO_START, "PCI I/O start"},
60+
{PCI_IO_END, "PCI I/O end"},
61+
#ifdef CONFIG_SPARSEMEM_VMEMMAP
62+
{VMEMMAP_START, "vmemmap start"},
63+
{VMEMMAP_END, "vmemmap end"},
64+
#endif
65+
{VMALLOC_START, "vmalloc() area"},
66+
{VMALLOC_END, "vmalloc() end"},
67+
{PAGE_OFFSET, "Linear mapping"},
68+
{-1, NULL},
69+
};
70+
71+
/* Page Table Entry */
72+
struct prot_bits {
73+
u64 mask;
74+
u64 val;
75+
const char *set;
76+
const char *clear;
77+
};
78+
79+
static const struct prot_bits pte_bits[] = {
80+
{
81+
.mask = _PAGE_SOFT,
82+
.val = _PAGE_SOFT,
83+
.set = "RSW",
84+
.clear = " ",
85+
}, {
86+
.mask = _PAGE_DIRTY,
87+
.val = _PAGE_DIRTY,
88+
.set = "D",
89+
.clear = ".",
90+
}, {
91+
.mask = _PAGE_ACCESSED,
92+
.val = _PAGE_ACCESSED,
93+
.set = "A",
94+
.clear = ".",
95+
}, {
96+
.mask = _PAGE_GLOBAL,
97+
.val = _PAGE_GLOBAL,
98+
.set = "G",
99+
.clear = ".",
100+
}, {
101+
.mask = _PAGE_USER,
102+
.val = _PAGE_USER,
103+
.set = "U",
104+
.clear = ".",
105+
}, {
106+
.mask = _PAGE_EXEC,
107+
.val = _PAGE_EXEC,
108+
.set = "X",
109+
.clear = ".",
110+
}, {
111+
.mask = _PAGE_WRITE,
112+
.val = _PAGE_WRITE,
113+
.set = "W",
114+
.clear = ".",
115+
}, {
116+
.mask = _PAGE_READ,
117+
.val = _PAGE_READ,
118+
.set = "R",
119+
.clear = ".",
120+
}, {
121+
.mask = _PAGE_PRESENT,
122+
.val = _PAGE_PRESENT,
123+
.set = "V",
124+
.clear = ".",
125+
}
126+
};
127+
128+
/* Page Level */
129+
struct pg_level {
130+
const char *name;
131+
u64 mask;
132+
};
133+
134+
static struct pg_level pg_level[] = {
135+
{ /* pgd */
136+
.name = "PGD",
137+
}, { /* p4d */
138+
.name = (CONFIG_PGTABLE_LEVELS > 4) ? "P4D" : "PGD",
139+
}, { /* pud */
140+
.name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD",
141+
}, { /* pmd */
142+
.name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD",
143+
}, { /* pte */
144+
.name = "PTE",
145+
},
146+
};
147+
148+
static void dump_prot(struct pg_state *st)
149+
{
150+
unsigned int i;
151+
152+
for (i = 0; i < ARRAY_SIZE(pte_bits); i++) {
153+
const char *s;
154+
155+
if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val)
156+
s = pte_bits[i].set;
157+
else
158+
s = pte_bits[i].clear;
159+
160+
if (s)
161+
pt_dump_seq_printf(st->seq, " %s", s);
162+
}
163+
}
164+
165+
#ifdef CONFIG_64BIT
166+
#define ADDR_FORMAT "0x%016lx"
167+
#else
168+
#define ADDR_FORMAT "0x%08lx"
169+
#endif
170+
static void dump_addr(struct pg_state *st, unsigned long addr)
171+
{
172+
static const char units[] = "KMGTPE";
173+
const char *unit = units;
174+
unsigned long delta;
175+
176+
pt_dump_seq_printf(st->seq, ADDR_FORMAT "-" ADDR_FORMAT " ",
177+
st->start_address, addr);
178+
179+
pt_dump_seq_printf(st->seq, " " ADDR_FORMAT " ", st->start_pa);
180+
delta = (addr - st->start_address) >> 10;
181+
182+
while (!(delta & 1023) && unit[1]) {
183+
delta >>= 10;
184+
unit++;
185+
}
186+
187+
pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit,
188+
pg_level[st->level].name);
189+
}
190+
191+
static void note_prot_wx(struct pg_state *st, unsigned long addr)
192+
{
193+
if (!st->check_wx)
194+
return;
195+
196+
if ((st->current_prot & (_PAGE_WRITE | _PAGE_EXEC)) !=
197+
(_PAGE_WRITE | _PAGE_EXEC))
198+
return;
199+
200+
WARN_ONCE(1, "riscv/mm: Found insecure W+X mapping at address %p/%pS\n",
201+
(void *)st->start_address, (void *)st->start_address);
202+
203+
st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
204+
}
205+
206+
static void note_page(struct ptdump_state *pt_st, unsigned long addr,
207+
int level, unsigned long val)
208+
{
209+
struct pg_state *st = container_of(pt_st, struct pg_state, ptdump);
210+
u64 pa = PFN_PHYS(pte_pfn(__pte(val)));
211+
u64 prot = 0;
212+
213+
if (level >= 0)
214+
prot = val & pg_level[level].mask;
215+
216+
if (st->level == -1) {
217+
st->level = level;
218+
st->current_prot = prot;
219+
st->start_address = addr;
220+
st->start_pa = pa;
221+
st->last_pa = pa;
222+
pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
223+
} else if (prot != st->current_prot ||
224+
level != st->level || addr >= st->marker[1].start_address) {
225+
if (st->current_prot) {
226+
note_prot_wx(st, addr);
227+
dump_addr(st, addr);
228+
dump_prot(st);
229+
pt_dump_seq_puts(st->seq, "\n");
230+
}
231+
232+
while (addr >= st->marker[1].start_address) {
233+
st->marker++;
234+
pt_dump_seq_printf(st->seq, "---[ %s ]---\n",
235+
st->marker->name);
236+
}
237+
238+
st->start_address = addr;
239+
st->start_pa = pa;
240+
st->last_pa = pa;
241+
st->current_prot = prot;
242+
st->level = level;
243+
} else {
244+
st->last_pa = pa;
245+
}
246+
}
247+
248+
static void ptdump_walk(struct seq_file *s)
249+
{
250+
struct pg_state st = {
251+
.seq = s,
252+
.marker = address_markers,
253+
.level = -1,
254+
.ptdump = {
255+
.note_page = note_page,
256+
.range = (struct ptdump_range[]) {
257+
{KERN_VIRT_START, ULONG_MAX},
258+
{0, 0}
259+
}
260+
}
261+
};
262+
263+
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
264+
}
265+
266+
void ptdump_check_wx(void)
267+
{
268+
struct pg_state st = {
269+
.seq = NULL,
270+
.marker = (struct addr_marker[]) {
271+
{0, NULL},
272+
{-1, NULL},
273+
},
274+
.level = -1,
275+
.check_wx = true,
276+
.ptdump = {
277+
.note_page = note_page,
278+
.range = (struct ptdump_range[]) {
279+
{KERN_VIRT_START, ULONG_MAX},
280+
{0, 0}
281+
}
282+
}
283+
};
284+
285+
ptdump_walk_pgd(&st.ptdump, &init_mm, NULL);
286+
287+
if (st.wx_pages)
288+
pr_warn("Checked W+X mappings: failed, %lu W+X pages found\n",
289+
st.wx_pages);
290+
else
291+
pr_info("Checked W+X mappings: passed, no W+X pages found\n");
292+
}
293+
294+
static int ptdump_show(struct seq_file *m, void *v)
295+
{
296+
ptdump_walk(m);
297+
298+
return 0;
299+
}
300+
301+
DEFINE_SHOW_ATTRIBUTE(ptdump);
302+
303+
static int ptdump_init(void)
304+
{
305+
unsigned int i, j;
306+
307+
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
308+
for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
309+
pg_level[i].mask |= pte_bits[j].mask;
310+
311+
debugfs_create_file("kernel_page_tables", 0400, NULL, NULL,
312+
&ptdump_fops);
313+
314+
return 0;
315+
}
316+
317+
device_initcall(ptdump_init);

0 commit comments

Comments
 (0)