Skip to content

Commit 5fdd05e

Browse files
Ryan Robertswilldeacon
authored andcommitted
arm64/mm: Batch barriers when updating kernel mappings
Because the kernel can't tolerate page faults for kernel mappings, when setting a valid, kernel space pte (or pmd/pud/p4d/pgd), it emits a dsb(ishst) to ensure that the store to the pgtable is observed by the table walker immediately. Additionally it emits an isb() to ensure that any already speculatively determined invalid mapping fault gets canceled. We can improve the performance of vmalloc operations by batching these barriers until the end of a set of entry updates. arch_enter_lazy_mmu_mode() and arch_leave_lazy_mmu_mode() provide the required hooks. vmalloc improves by up to 30% as a result. Two new TIF_ flags are created; TIF_LAZY_MMU tells us if the task is in the lazy mode and can therefore defer any barriers until exit from the lazy mode. TIF_LAZY_MMU_PENDING is used to remember if any pte operation was performed while in the lazy mode that required barriers. Then when leaving lazy mode, if that flag is set, we emit the barriers. Since arch_enter_lazy_mmu_mode() and arch_leave_lazy_mmu_mode() are used for both user and kernel mappings, we need the second flag to avoid emitting barriers unnecessarily if only user mappings were updated. Reviewed-by: Catalin Marinas <[email protected]> Signed-off-by: Ryan Roberts <[email protected]> Reviewed-by: Anshuman Khandual <[email protected]> Tested-by: Luiz Capitulino <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Will Deacon <[email protected]>
1 parent 44562c7 commit 5fdd05e

File tree

3 files changed

+72
-20
lines changed

3 files changed

+72
-20
lines changed

arch/arm64/include/asm/pgtable.h

Lines changed: 65 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,63 @@
4040
#include <linux/sched.h>
4141
#include <linux/page_table_check.h>
4242

43+
static inline void emit_pte_barriers(void)
44+
{
45+
/*
46+
* These barriers are emitted under certain conditions after a pte entry
47+
* was modified (see e.g. __set_pte_complete()). The dsb makes the store
48+
* visible to the table walker. The isb ensures that any previous
49+
* speculative "invalid translation" marker that is in the CPU's
50+
* pipeline gets cleared, so that any access to that address after
51+
* setting the pte to valid won't cause a spurious fault. If the thread
52+
* gets preempted after storing to the pgtable but before emitting these
53+
* barriers, __switch_to() emits a dsb which ensure the walker gets to
54+
* see the store. There is no guarantee of an isb being issued though.
55+
* This is safe because it will still get issued (albeit on a
56+
* potentially different CPU) when the thread starts running again,
57+
* before any access to the address.
58+
*/
59+
dsb(ishst);
60+
isb();
61+
}
62+
63+
static inline void queue_pte_barriers(void)
64+
{
65+
unsigned long flags;
66+
67+
VM_WARN_ON(in_interrupt());
68+
flags = read_thread_flags();
69+
70+
if (flags & BIT(TIF_LAZY_MMU)) {
71+
/* Avoid the atomic op if already set. */
72+
if (!(flags & BIT(TIF_LAZY_MMU_PENDING)))
73+
set_thread_flag(TIF_LAZY_MMU_PENDING);
74+
} else {
75+
emit_pte_barriers();
76+
}
77+
}
78+
79+
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
80+
static inline void arch_enter_lazy_mmu_mode(void)
81+
{
82+
VM_WARN_ON(in_interrupt());
83+
VM_WARN_ON(test_thread_flag(TIF_LAZY_MMU));
84+
85+
set_thread_flag(TIF_LAZY_MMU);
86+
}
87+
88+
static inline void arch_flush_lazy_mmu_mode(void)
89+
{
90+
if (test_and_clear_thread_flag(TIF_LAZY_MMU_PENDING))
91+
emit_pte_barriers();
92+
}
93+
94+
static inline void arch_leave_lazy_mmu_mode(void)
95+
{
96+
arch_flush_lazy_mmu_mode();
97+
clear_thread_flag(TIF_LAZY_MMU);
98+
}
99+
43100
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
44101
#define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE
45102

@@ -326,10 +383,8 @@ static inline void __set_pte_complete(pte_t pte)
326383
* Only if the new pte is valid and kernel, otherwise TLB maintenance
327384
* has the necessary barriers.
328385
*/
329-
if (pte_valid_not_user(pte)) {
330-
dsb(ishst);
331-
isb();
332-
}
386+
if (pte_valid_not_user(pte))
387+
queue_pte_barriers();
333388
}
334389

335390
static inline void __set_pte(pte_t *ptep, pte_t pte)
@@ -801,10 +856,8 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd)
801856

802857
WRITE_ONCE(*pmdp, pmd);
803858

804-
if (pmd_valid(pmd)) {
805-
dsb(ishst);
806-
isb();
807-
}
859+
if (pmd_valid(pmd))
860+
queue_pte_barriers();
808861
}
809862

810863
static inline void pmd_clear(pmd_t *pmdp)
@@ -869,10 +922,8 @@ static inline void set_pud(pud_t *pudp, pud_t pud)
869922

870923
WRITE_ONCE(*pudp, pud);
871924

872-
if (pud_valid(pud)) {
873-
dsb(ishst);
874-
isb();
875-
}
925+
if (pud_valid(pud))
926+
queue_pte_barriers();
876927
}
877928

878929
static inline void pud_clear(pud_t *pudp)
@@ -951,8 +1002,7 @@ static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
9511002
}
9521003

9531004
WRITE_ONCE(*p4dp, p4d);
954-
dsb(ishst);
955-
isb();
1005+
queue_pte_barriers();
9561006
}
9571007

9581008
static inline void p4d_clear(p4d_t *p4dp)
@@ -1080,8 +1130,7 @@ static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
10801130
}
10811131

10821132
WRITE_ONCE(*pgdp, pgd);
1083-
dsb(ishst);
1084-
isb();
1133+
queue_pte_barriers();
10851134
}
10861135

10871136
static inline void pgd_clear(pgd_t *pgdp)

arch/arm64/include/asm/thread_info.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ void arch_setup_new_exec(void);
8282
#define TIF_SME_VL_INHERIT 28 /* Inherit SME vl_onexec across exec */
8383
#define TIF_KERNEL_FPSTATE 29 /* Task is in a kernel mode FPSIMD section */
8484
#define TIF_TSC_SIGSEGV 30 /* SIGSEGV on counter-timer access */
85+
#define TIF_LAZY_MMU 31 /* Task in lazy mmu mode */
86+
#define TIF_LAZY_MMU_PENDING 32 /* Ops pending for lazy mmu mode exit */
8587

8688
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
8789
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)

arch/arm64/kernel/process.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -680,10 +680,11 @@ struct task_struct *__switch_to(struct task_struct *prev,
680680
gcs_thread_switch(next);
681681

682682
/*
683-
* Complete any pending TLB or cache maintenance on this CPU in case
684-
* the thread migrates to a different CPU.
685-
* This full barrier is also required by the membarrier system
686-
* call.
683+
* Complete any pending TLB or cache maintenance on this CPU in case the
684+
* thread migrates to a different CPU. This full barrier is also
685+
* required by the membarrier system call. Additionally it makes any
686+
* in-progress pgtable writes visible to the table walker; See
687+
* emit_pte_barriers().
687688
*/
688689
dsb(ish);
689690

0 commit comments

Comments
 (0)