Skip to content

Commit 414c109

Browse files
mrutland-armwilldeacon
authored andcommitted
arm64: mm: always map fixmap at page granularity
Today the fixmap code largely maps elements at PAGE_SIZE granularity, but we special-case the FDT mapping such that it can be mapped with 2M block mappings when 4K pages are in use. The original rationale for this was simplicity, but it has some unfortunate side-effects, and complicates portions of the fixmap code (i.e. is not so simple after all). The FDT can be up to 2M in size but is only required to have 8-byte alignment, and so it may straddle a 2M boundary. Thus when using 2M block mappings we may map up to 4M of memory surrounding the FDT. This is unfortunate as most of that memory will be unrelated to the FDT, and any pages which happen to share a 2M block with the FDT will by mapped with Normal Write-Back Cacheable attributes, which might not be what we want elsewhere (e.g. for carve-outs using Non-Cacheable attributes). The logic to handle mapping the FDT with 2M blocks requires some special cases in the fixmap code, and ties it to the early page table configuration by virtue of the SWAPPER_TABLE_SHIFT and SWAPPER_BLOCK_SIZE constants used to determine the granularity used to map the FDT. This patch simplifies the FDT logic and removes the unnecessary mappings of surrounding pages by always mapping the FDT at page granularity as with all other fixmap mappings. To do so we statically reserve multiple PTE tables to cover the fixmap VA range. Since the FDT can be at most 2M, for 4K pages we only need to allocate a single additional PTE table, and for 16K and 64K pages the existing single PTE table is sufficient. The PTE table allocation scales with the number of slots reserved in the fixmap, and so this also makes it easier to add more fixmap entries if we require those in future. Our VA layout means that the fixmap will always fall within a single PMD table (and consequently, within a single PUD/P4D/PGD entry), which we can verify at compile time with a static_assert(). With that assert a number of runtime warnings become impossible, and are removed. I've boot-tested this patch with both 4K and 64K pages. Signed-off-by: Mark Rutland <[email protected]> Cc: Anshuman Khandual <[email protected]> Cc: Ard Biesheuvel <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Ryan Roberts <[email protected]> Cc: Will Deacon <[email protected]> Reviewed-by: Ryan Roberts <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Will Deacon <[email protected]>
1 parent b975477 commit 414c109

File tree

3 files changed

+78
-90
lines changed

3 files changed

+78
-90
lines changed

arch/arm64/include/asm/fixmap.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
#ifndef __ASSEMBLY__
1919
#include <linux/kernel.h>
20+
#include <linux/math.h>
2021
#include <linux/sizes.h>
2122
#include <asm/boot.h>
2223
#include <asm/page.h>
@@ -36,17 +37,13 @@ enum fixed_addresses {
3637
FIX_HOLE,
3738

3839
/*
39-
* Reserve a virtual window for the FDT that is 2 MB larger than the
40-
* maximum supported size, and put it at the top of the fixmap region.
41-
* The additional space ensures that any FDT that does not exceed
42-
* MAX_FDT_SIZE can be mapped regardless of whether it crosses any
43-
* 2 MB alignment boundaries.
44-
*
45-
* Keep this at the top so it remains 2 MB aligned.
40+
* Reserve a virtual window for the FDT that is a page bigger than the
41+
* maximum supported size. The additional space ensures that any FDT
42+
* that does not exceed MAX_FDT_SIZE can be mapped regardless of
43+
* whether it crosses any page boundary.
4644
*/
47-
#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M)
4845
FIX_FDT_END,
49-
FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
46+
FIX_FDT = FIX_FDT_END + DIV_ROUND_UP(MAX_FDT_SIZE, PAGE_SIZE) + 1,
5047

5148
FIX_EARLYCON_MEM_BASE,
5249
FIX_TEXT_POKE0,

arch/arm64/include/asm/kernel-pgtable.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,11 @@
5959
#define EARLY_KASLR (0)
6060
#endif
6161

62+
#define SPAN_NR_ENTRIES(vstart, vend, shift) \
63+
((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1)
64+
6265
#define EARLY_ENTRIES(vstart, vend, shift, add) \
63-
((((vend) - 1) >> (shift)) - ((vstart) >> (shift)) + 1 + add)
66+
(SPAN_NR_ENTRIES(vstart, vend, shift) + (add))
6467

6568
#define EARLY_PGDS(vstart, vend, add) (EARLY_ENTRIES(vstart, vend, PGDIR_SHIFT, add))
6669

arch/arm64/mm/fixmap.c

Lines changed: 68 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -16,34 +16,77 @@
1616
#include <asm/pgalloc.h>
1717
#include <asm/tlbflush.h>
1818

19-
static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss;
19+
#define NR_BM_PTE_TABLES \
20+
SPAN_NR_ENTRIES(FIXADDR_TOT_START, FIXADDR_TOP, PMD_SHIFT)
21+
#define NR_BM_PMD_TABLES \
22+
SPAN_NR_ENTRIES(FIXADDR_TOT_START, FIXADDR_TOP, PUD_SHIFT)
23+
24+
static_assert(NR_BM_PMD_TABLES == 1);
25+
26+
#define __BM_TABLE_IDX(addr, shift) \
27+
(((addr) >> (shift)) - (FIXADDR_TOT_START >> (shift)))
28+
29+
#define BM_PTE_TABLE_IDX(addr) __BM_TABLE_IDX(addr, PMD_SHIFT)
30+
31+
static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss;
2032
static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
2133
static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
2234

23-
static inline pud_t *fixmap_pud(unsigned long addr)
35+
static inline pte_t *fixmap_pte(unsigned long addr)
2436
{
25-
pgd_t *pgdp = pgd_offset_k(addr);
26-
p4d_t *p4dp = p4d_offset(pgdp, addr);
27-
p4d_t p4d = READ_ONCE(*p4dp);
37+
return &bm_pte[BM_PTE_TABLE_IDX(addr)][pte_index(addr)];
38+
}
2839

29-
BUG_ON(p4d_none(p4d) || p4d_bad(p4d));
40+
static void __init early_fixmap_init_pte(pmd_t *pmdp, unsigned long addr)
41+
{
42+
pmd_t pmd = READ_ONCE(*pmdp);
43+
pte_t *ptep;
3044

31-
return pud_offset_kimg(p4dp, addr);
45+
if (pmd_none(pmd)) {
46+
ptep = bm_pte[BM_PTE_TABLE_IDX(addr)];
47+
__pmd_populate(pmdp, __pa_symbol(ptep), PMD_TYPE_TABLE);
48+
}
3249
}
3350

34-
static inline pmd_t *fixmap_pmd(unsigned long addr)
51+
static void __init early_fixmap_init_pmd(pud_t *pudp, unsigned long addr,
52+
unsigned long end)
3553
{
36-
pud_t *pudp = fixmap_pud(addr);
54+
unsigned long next;
3755
pud_t pud = READ_ONCE(*pudp);
56+
pmd_t *pmdp;
3857

39-
BUG_ON(pud_none(pud) || pud_bad(pud));
58+
if (pud_none(pud))
59+
__pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE);
4060

41-
return pmd_offset_kimg(pudp, addr);
61+
pmdp = pmd_offset_kimg(pudp, addr);
62+
do {
63+
next = pmd_addr_end(addr, end);
64+
early_fixmap_init_pte(pmdp, addr);
65+
} while (pmdp++, addr = next, addr != end);
4266
}
4367

44-
static inline pte_t *fixmap_pte(unsigned long addr)
68+
69+
static void __init early_fixmap_init_pud(p4d_t *p4dp, unsigned long addr,
70+
unsigned long end)
4571
{
46-
return &bm_pte[pte_index(addr)];
72+
p4d_t p4d = READ_ONCE(*p4dp);
73+
pud_t *pudp;
74+
75+
if (CONFIG_PGTABLE_LEVELS > 3 && !p4d_none(p4d) &&
76+
p4d_page_paddr(p4d) != __pa_symbol(bm_pud)) {
77+
/*
78+
* We only end up here if the kernel mapping and the fixmap
79+
* share the top level pgd entry, which should only happen on
80+
* 16k/4 levels configurations.
81+
*/
82+
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
83+
}
84+
85+
if (p4d_none(p4d))
86+
__p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE);
87+
88+
pudp = pud_offset_kimg(p4dp, addr);
89+
early_fixmap_init_pmd(pudp, addr, end);
4790
}
4891

4992
/*
@@ -54,55 +97,13 @@ static inline pte_t *fixmap_pte(unsigned long addr)
5497
*/
5598
void __init early_fixmap_init(void)
5699
{
57-
pgd_t *pgdp;
58-
p4d_t *p4dp, p4d;
59-
pud_t *pudp;
60-
pmd_t *pmdp;
61100
unsigned long addr = FIXADDR_TOT_START;
101+
unsigned long end = FIXADDR_TOP;
62102

63-
pgdp = pgd_offset_k(addr);
64-
p4dp = p4d_offset(pgdp, addr);
65-
p4d = READ_ONCE(*p4dp);
66-
if (CONFIG_PGTABLE_LEVELS > 3 &&
67-
!(p4d_none(p4d) || p4d_page_paddr(p4d) == __pa_symbol(bm_pud))) {
68-
/*
69-
* We only end up here if the kernel mapping and the fixmap
70-
* share the top level pgd entry, which should only happen on
71-
* 16k/4 levels configurations.
72-
*/
73-
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
74-
pudp = pud_offset_kimg(p4dp, addr);
75-
} else {
76-
if (p4d_none(p4d))
77-
__p4d_populate(p4dp, __pa_symbol(bm_pud), P4D_TYPE_TABLE);
78-
pudp = fixmap_pud(addr);
79-
}
80-
if (pud_none(READ_ONCE(*pudp)))
81-
__pud_populate(pudp, __pa_symbol(bm_pmd), PUD_TYPE_TABLE);
82-
pmdp = fixmap_pmd(addr);
83-
__pmd_populate(pmdp, __pa_symbol(bm_pte), PMD_TYPE_TABLE);
103+
pgd_t *pgdp = pgd_offset_k(addr);
104+
p4d_t *p4dp = p4d_offset(pgdp, addr);
84105

85-
/*
86-
* The boot-ioremap range spans multiple pmds, for which
87-
* we are not prepared:
88-
*/
89-
BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
90-
!= (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
91-
92-
if ((pmdp != fixmap_pmd(__fix_to_virt(FIX_BTMAP_BEGIN)))
93-
|| pmdp != fixmap_pmd(__fix_to_virt(FIX_BTMAP_END))) {
94-
WARN_ON(1);
95-
pr_warn("pmdp %p != %p, %p\n",
96-
pmdp, fixmap_pmd(__fix_to_virt(FIX_BTMAP_BEGIN)),
97-
fixmap_pmd(__fix_to_virt(FIX_BTMAP_END)));
98-
pr_warn("fix_to_virt(FIX_BTMAP_BEGIN): %08lx\n",
99-
__fix_to_virt(FIX_BTMAP_BEGIN));
100-
pr_warn("fix_to_virt(FIX_BTMAP_END): %08lx\n",
101-
__fix_to_virt(FIX_BTMAP_END));
102-
103-
pr_warn("FIX_BTMAP_END: %d\n", FIX_BTMAP_END);
104-
pr_warn("FIX_BTMAP_BEGIN: %d\n", FIX_BTMAP_BEGIN);
105-
}
106+
early_fixmap_init_pud(p4dp, addr, end);
106107
}
107108

108109
/*
@@ -130,6 +131,7 @@ void __set_fixmap(enum fixed_addresses idx,
130131
void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
131132
{
132133
const u64 dt_virt_base = __fix_to_virt(FIX_FDT);
134+
phys_addr_t dt_phys_base;
133135
int offset;
134136
void *dt_virt;
135137

@@ -144,27 +146,12 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
144146
if (!dt_phys || dt_phys % MIN_FDT_ALIGN)
145147
return NULL;
146148

147-
/*
148-
* Make sure that the FDT region can be mapped without the need to
149-
* allocate additional translation table pages, so that it is safe
150-
* to call create_mapping_noalloc() this early.
151-
*
152-
* On 64k pages, the FDT will be mapped using PTEs, so we need to
153-
* be in the same PMD as the rest of the fixmap.
154-
* On 4k pages, we'll use section mappings for the FDT so we only
155-
* have to be in the same PUD.
156-
*/
157-
BUILD_BUG_ON(dt_virt_base % SZ_2M);
158-
159-
BUILD_BUG_ON(__fix_to_virt(FIX_FDT_END) >> SWAPPER_TABLE_SHIFT !=
160-
__fix_to_virt(FIX_BTMAP_BEGIN) >> SWAPPER_TABLE_SHIFT);
161-
162-
offset = dt_phys % SWAPPER_BLOCK_SIZE;
149+
dt_phys_base = round_down(dt_phys, PAGE_SIZE);
150+
offset = dt_phys % PAGE_SIZE;
163151
dt_virt = (void *)dt_virt_base + offset;
164152

165153
/* map the first chunk so we can read the size from the header */
166-
create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE),
167-
dt_virt_base, SWAPPER_BLOCK_SIZE, prot);
154+
create_mapping_noalloc(dt_phys_base, dt_virt_base, PAGE_SIZE, prot);
168155

169156
if (fdt_magic(dt_virt) != FDT_MAGIC)
170157
return NULL;
@@ -173,9 +160,10 @@ void *__init fixmap_remap_fdt(phys_addr_t dt_phys, int *size, pgprot_t prot)
173160
if (*size > MAX_FDT_SIZE)
174161
return NULL;
175162

176-
if (offset + *size > SWAPPER_BLOCK_SIZE)
177-
create_mapping_noalloc(round_down(dt_phys, SWAPPER_BLOCK_SIZE), dt_virt_base,
178-
round_up(offset + *size, SWAPPER_BLOCK_SIZE), prot);
163+
if (offset + *size > PAGE_SIZE) {
164+
create_mapping_noalloc(dt_phys_base, dt_virt_base,
165+
offset + *size, prot);
166+
}
179167

180168
return dt_virt;
181169
}

0 commit comments

Comments
 (0)