Skip to content

Commit 05942f7

Browse files
Merge patch series "riscv: Fix set_memory_XX() and set_direct_map_XX()"
Alexandre Ghiti <[email protected]> says: Those 2 patches fix the set_memory_XX() and set_direct_map_XX() APIs, which in turn fix STRICT_KERNEL_RWX and memfd_secret(). Those were broken since the permission changes were not applied to the linear mapping because the linear mapping is mapped using hugepages and walk_page_range_novma() does not split such mappings. To fix that, patch 1 disables PGD mappings in the linear mapping as it is hard to propagate changes at this level in *all* the page tables, this has the downside of disabling PMD mapping for sv32 and PUD (1GB) mapping for sv39 in the linear mapping (for specific kernels, we could add a Kconfig to enable ARCH_HAS_SET_DIRECT_MAP and STRICT_KERNEL_RWX if needed, I'm pretty sure we'll discuss that). patch 2 implements the split of the huge linear mappings so that walk_page_range_novma() can properly apply the permissions. The whole split is protected with mmap_sem in write mode, but I'm wondering if that's enough, any opinion on that is appreciated. * b4-shazam-merge: riscv: Fix set_memory_XX() and set_direct_map_XX() by splitting huge linear mappings riscv: Don't use PGD entries for the linear mapping Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
2 parents 55e0bf4 + 311cd2f commit 05942f7

File tree

2 files changed

+236
-46
lines changed

2 files changed

+236
-46
lines changed

arch/riscv/mm/init.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -666,16 +666,16 @@ void __init create_pgd_mapping(pgd_t *pgdp,
666666
static uintptr_t __init best_map_size(phys_addr_t pa, uintptr_t va,
667667
phys_addr_t size)
668668
{
669-
if (!(pa & (PGDIR_SIZE - 1)) && !(va & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
670-
return PGDIR_SIZE;
671-
672-
if (!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
669+
if (pgtable_l5_enabled &&
670+
!(pa & (P4D_SIZE - 1)) && !(va & (P4D_SIZE - 1)) && size >= P4D_SIZE)
673671
return P4D_SIZE;
674672

675-
if (!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
673+
if (pgtable_l4_enabled &&
674+
!(pa & (PUD_SIZE - 1)) && !(va & (PUD_SIZE - 1)) && size >= PUD_SIZE)
676675
return PUD_SIZE;
677676

678-
if (!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
677+
if (IS_ENABLED(CONFIG_64BIT) &&
678+
!(pa & (PMD_SIZE - 1)) && !(va & (PMD_SIZE - 1)) && size >= PMD_SIZE)
679679
return PMD_SIZE;
680680

681681
return PAGE_SIZE;

arch/riscv/mm/pageattr.c

Lines changed: 230 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
#include <linux/pagewalk.h>
77
#include <linux/pgtable.h>
8+
#include <linux/vmalloc.h>
89
#include <asm/tlbflush.h>
910
#include <asm/bitops.h>
1011
#include <asm/set_memory.h>
@@ -25,19 +26,6 @@ static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk)
2526
return new_val;
2627
}
2728

28-
static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr,
29-
unsigned long next, struct mm_walk *walk)
30-
{
31-
pgd_t val = READ_ONCE(*pgd);
32-
33-
if (pgd_leaf(val)) {
34-
val = __pgd(set_pageattr_masks(pgd_val(val), walk));
35-
set_pgd(pgd, val);
36-
}
37-
38-
return 0;
39-
}
40-
4129
static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr,
4230
unsigned long next, struct mm_walk *walk)
4331
{
@@ -96,7 +84,6 @@ static int pageattr_pte_hole(unsigned long addr, unsigned long next,
9684
}
9785

9886
static const struct mm_walk_ops pageattr_ops = {
99-
.pgd_entry = pageattr_pgd_entry,
10087
.p4d_entry = pageattr_p4d_entry,
10188
.pud_entry = pageattr_pud_entry,
10289
.pmd_entry = pageattr_pmd_entry,
@@ -105,12 +92,181 @@ static const struct mm_walk_ops pageattr_ops = {
10592
.walk_lock = PGWALK_RDLOCK,
10693
};
10794

95+
#ifdef CONFIG_64BIT
96+
static int __split_linear_mapping_pmd(pud_t *pudp,
97+
unsigned long vaddr, unsigned long end)
98+
{
99+
pmd_t *pmdp;
100+
unsigned long next;
101+
102+
pmdp = pmd_offset(pudp, vaddr);
103+
104+
do {
105+
next = pmd_addr_end(vaddr, end);
106+
107+
if (next - vaddr >= PMD_SIZE &&
108+
vaddr <= (vaddr & PMD_MASK) && end >= next)
109+
continue;
110+
111+
if (pmd_leaf(*pmdp)) {
112+
struct page *pte_page;
113+
unsigned long pfn = _pmd_pfn(*pmdp);
114+
pgprot_t prot = __pgprot(pmd_val(*pmdp) & ~_PAGE_PFN_MASK);
115+
pte_t *ptep_new;
116+
int i;
117+
118+
pte_page = alloc_page(GFP_KERNEL);
119+
if (!pte_page)
120+
return -ENOMEM;
121+
122+
ptep_new = (pte_t *)page_address(pte_page);
123+
for (i = 0; i < PTRS_PER_PTE; ++i, ++ptep_new)
124+
set_pte(ptep_new, pfn_pte(pfn + i, prot));
125+
126+
smp_wmb();
127+
128+
set_pmd(pmdp, pfn_pmd(page_to_pfn(pte_page), PAGE_TABLE));
129+
}
130+
} while (pmdp++, vaddr = next, vaddr != end);
131+
132+
return 0;
133+
}
134+
135+
static int __split_linear_mapping_pud(p4d_t *p4dp,
136+
unsigned long vaddr, unsigned long end)
137+
{
138+
pud_t *pudp;
139+
unsigned long next;
140+
int ret;
141+
142+
pudp = pud_offset(p4dp, vaddr);
143+
144+
do {
145+
next = pud_addr_end(vaddr, end);
146+
147+
if (next - vaddr >= PUD_SIZE &&
148+
vaddr <= (vaddr & PUD_MASK) && end >= next)
149+
continue;
150+
151+
if (pud_leaf(*pudp)) {
152+
struct page *pmd_page;
153+
unsigned long pfn = _pud_pfn(*pudp);
154+
pgprot_t prot = __pgprot(pud_val(*pudp) & ~_PAGE_PFN_MASK);
155+
pmd_t *pmdp_new;
156+
int i;
157+
158+
pmd_page = alloc_page(GFP_KERNEL);
159+
if (!pmd_page)
160+
return -ENOMEM;
161+
162+
pmdp_new = (pmd_t *)page_address(pmd_page);
163+
for (i = 0; i < PTRS_PER_PMD; ++i, ++pmdp_new)
164+
set_pmd(pmdp_new,
165+
pfn_pmd(pfn + ((i * PMD_SIZE) >> PAGE_SHIFT), prot));
166+
167+
smp_wmb();
168+
169+
set_pud(pudp, pfn_pud(page_to_pfn(pmd_page), PAGE_TABLE));
170+
}
171+
172+
ret = __split_linear_mapping_pmd(pudp, vaddr, next);
173+
if (ret)
174+
return ret;
175+
} while (pudp++, vaddr = next, vaddr != end);
176+
177+
return 0;
178+
}
179+
180+
static int __split_linear_mapping_p4d(pgd_t *pgdp,
181+
unsigned long vaddr, unsigned long end)
182+
{
183+
p4d_t *p4dp;
184+
unsigned long next;
185+
int ret;
186+
187+
p4dp = p4d_offset(pgdp, vaddr);
188+
189+
do {
190+
next = p4d_addr_end(vaddr, end);
191+
192+
/*
193+
* If [vaddr; end] contains [vaddr & P4D_MASK; next], we don't
194+
* need to split, we'll change the protections on the whole P4D.
195+
*/
196+
if (next - vaddr >= P4D_SIZE &&
197+
vaddr <= (vaddr & P4D_MASK) && end >= next)
198+
continue;
199+
200+
if (p4d_leaf(*p4dp)) {
201+
struct page *pud_page;
202+
unsigned long pfn = _p4d_pfn(*p4dp);
203+
pgprot_t prot = __pgprot(p4d_val(*p4dp) & ~_PAGE_PFN_MASK);
204+
pud_t *pudp_new;
205+
int i;
206+
207+
pud_page = alloc_page(GFP_KERNEL);
208+
if (!pud_page)
209+
return -ENOMEM;
210+
211+
/*
212+
* Fill the pud level with leaf puds that have the same
213+
* protections as the leaf p4d.
214+
*/
215+
pudp_new = (pud_t *)page_address(pud_page);
216+
for (i = 0; i < PTRS_PER_PUD; ++i, ++pudp_new)
217+
set_pud(pudp_new,
218+
pfn_pud(pfn + ((i * PUD_SIZE) >> PAGE_SHIFT), prot));
219+
220+
/*
221+
* Make sure the pud filling is not reordered with the
222+
* p4d store which could result in seeing a partially
223+
* filled pud level.
224+
*/
225+
smp_wmb();
226+
227+
set_p4d(p4dp, pfn_p4d(page_to_pfn(pud_page), PAGE_TABLE));
228+
}
229+
230+
ret = __split_linear_mapping_pud(p4dp, vaddr, next);
231+
if (ret)
232+
return ret;
233+
} while (p4dp++, vaddr = next, vaddr != end);
234+
235+
return 0;
236+
}
237+
238+
static int __split_linear_mapping_pgd(pgd_t *pgdp,
239+
unsigned long vaddr,
240+
unsigned long end)
241+
{
242+
unsigned long next;
243+
int ret;
244+
245+
do {
246+
next = pgd_addr_end(vaddr, end);
247+
/* We never use PGD mappings for the linear mapping */
248+
ret = __split_linear_mapping_p4d(pgdp, vaddr, next);
249+
if (ret)
250+
return ret;
251+
} while (pgdp++, vaddr = next, vaddr != end);
252+
253+
return 0;
254+
}
255+
256+
static int split_linear_mapping(unsigned long start, unsigned long end)
257+
{
258+
return __split_linear_mapping_pgd(pgd_offset_k(start), start, end);
259+
}
260+
#endif /* CONFIG_64BIT */
261+
108262
static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
109263
pgprot_t clear_mask)
110264
{
111265
int ret;
112266
unsigned long start = addr;
113267
unsigned long end = start + PAGE_SIZE * numpages;
268+
unsigned long __maybe_unused lm_start;
269+
unsigned long __maybe_unused lm_end;
114270
struct pageattr_masks masks = {
115271
.set_mask = set_mask,
116272
.clear_mask = clear_mask
@@ -120,11 +276,67 @@ static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask,
120276
return 0;
121277

122278
mmap_write_lock(&init_mm);
279+
280+
#ifdef CONFIG_64BIT
281+
/*
282+
* We are about to change the permissions of a kernel mapping, we must
283+
* apply the same changes to its linear mapping alias, which may imply
284+
* splitting a huge mapping.
285+
*/
286+
287+
if (is_vmalloc_or_module_addr((void *)start)) {
288+
struct vm_struct *area = NULL;
289+
int i, page_start;
290+
291+
area = find_vm_area((void *)start);
292+
page_start = (start - (unsigned long)area->addr) >> PAGE_SHIFT;
293+
294+
for (i = page_start; i < page_start + numpages; ++i) {
295+
lm_start = (unsigned long)page_address(area->pages[i]);
296+
lm_end = lm_start + PAGE_SIZE;
297+
298+
ret = split_linear_mapping(lm_start, lm_end);
299+
if (ret)
300+
goto unlock;
301+
302+
ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
303+
&pageattr_ops, NULL, &masks);
304+
if (ret)
305+
goto unlock;
306+
}
307+
} else if (is_kernel_mapping(start) || is_linear_mapping(start)) {
308+
lm_start = (unsigned long)lm_alias(start);
309+
lm_end = (unsigned long)lm_alias(end);
310+
311+
ret = split_linear_mapping(lm_start, lm_end);
312+
if (ret)
313+
goto unlock;
314+
315+
ret = walk_page_range_novma(&init_mm, lm_start, lm_end,
316+
&pageattr_ops, NULL, &masks);
317+
if (ret)
318+
goto unlock;
319+
}
320+
123321
ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
124322
&masks);
323+
324+
unlock:
325+
mmap_write_unlock(&init_mm);
326+
327+
/*
328+
* We can't use flush_tlb_kernel_range() here as we may have split a
329+
* hugepage that is larger than that, so let's flush everything.
330+
*/
331+
flush_tlb_all();
332+
#else
333+
ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL,
334+
&masks);
335+
125336
mmap_write_unlock(&init_mm);
126337

127338
flush_tlb_kernel_range(start, end);
339+
#endif
128340

129341
return ret;
130342
}
@@ -159,36 +371,14 @@ int set_memory_nx(unsigned long addr, int numpages)
159371

160372
int set_direct_map_invalid_noflush(struct page *page)
161373
{
162-
int ret;
163-
unsigned long start = (unsigned long)page_address(page);
164-
unsigned long end = start + PAGE_SIZE;
165-
struct pageattr_masks masks = {
166-
.set_mask = __pgprot(0),
167-
.clear_mask = __pgprot(_PAGE_PRESENT)
168-
};
169-
170-
mmap_read_lock(&init_mm);
171-
ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
172-
mmap_read_unlock(&init_mm);
173-
174-
return ret;
374+
return __set_memory((unsigned long)page_address(page), 1,
375+
__pgprot(0), __pgprot(_PAGE_PRESENT));
175376
}
176377

177378
int set_direct_map_default_noflush(struct page *page)
178379
{
179-
int ret;
180-
unsigned long start = (unsigned long)page_address(page);
181-
unsigned long end = start + PAGE_SIZE;
182-
struct pageattr_masks masks = {
183-
.set_mask = PAGE_KERNEL,
184-
.clear_mask = __pgprot(0)
185-
};
186-
187-
mmap_read_lock(&init_mm);
188-
ret = walk_page_range(&init_mm, start, end, &pageattr_ops, &masks);
189-
mmap_read_unlock(&init_mm);
190-
191-
return ret;
380+
return __set_memory((unsigned long)page_address(page), 1,
381+
PAGE_KERNEL, __pgprot(0));
192382
}
193383

194384
#ifdef CONFIG_DEBUG_PAGEALLOC

0 commit comments

Comments
 (0)