Skip to content

Commit 6db2526

Browse files
Rik van RielIngo Molnar
authored andcommitted
x86/mm/tlb: Only trim the mm_cpumask once a second
Setting and clearing CPU bits in the mm_cpumask is only ever done by the CPU itself, from the context switch code or the TLB flush code. Synchronization is handled by switch_mm_irqs_off() blocking interrupts. Sending TLB flush IPIs to CPUs that are in the mm_cpumask, but no longer running the program causes a regression in the will-it-scale tlbflush2 test. This test is contrived, but a large regression here might cause a small regression in some real world workload. Instead of always sending IPIs to CPUs that are in the mm_cpumask, but no longer running the program, send these IPIs only once a second. The rest of the time we can skip over CPUs where the loaded_mm is different from the target mm. Reported-by: kernel test roboto <[email protected]> Signed-off-by: Rik van Riel <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Cc: Dave Hansen <[email protected]> Cc: Andy Lutomirski <[email protected]> Cc: Mathieu Desnoyers <[email protected]> Cc: Peter Zijlstra <[email protected]> Cc: Linus Torvalds <[email protected]> Link: https://lore.kernel.org/r/20241204210316.612ee573@fangorn Closes: https://lore.kernel.org/oe-lkp/[email protected]/
1 parent 953753d commit 6db2526

File tree

4 files changed

+36
-3
lines changed

4 files changed

+36
-3
lines changed

arch/x86/include/asm/mmu.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ typedef struct {
3737
*/
3838
atomic64_t tlb_gen;
3939

40+
unsigned long next_trim_cpumask;
41+
4042
#ifdef CONFIG_MODIFY_LDT_SYSCALL
4143
struct rw_semaphore ldt_usr_sem;
4244
struct ldt_struct *ldt;

arch/x86/include/asm/mmu_context.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ static inline int init_new_context(struct task_struct *tsk,
151151

152152
mm->context.ctx_id = atomic64_inc_return(&last_mm_ctx_id);
153153
atomic64_set(&mm->context.tlb_gen, 0);
154+
mm->context.next_trim_cpumask = jiffies + HZ;
154155

155156
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
156157
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {

arch/x86/include/asm/tlbflush.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ struct flush_tlb_info {
222222
unsigned int initiating_cpu;
223223
u8 stride_shift;
224224
u8 freed_tables;
225+
u8 trim_cpumask;
225226
};
226227

227228
void flush_tlb_local(void);

arch/x86/mm/tlb.c

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -892,9 +892,36 @@ static void flush_tlb_func(void *info)
892892
nr_invalidate);
893893
}
894894

895-
static bool tlb_is_not_lazy(int cpu, void *data)
895+
static bool should_flush_tlb(int cpu, void *data)
896896
{
897-
return !per_cpu(cpu_tlbstate_shared.is_lazy, cpu);
897+
struct flush_tlb_info *info = data;
898+
899+
/* Lazy TLB will get flushed at the next context switch. */
900+
if (per_cpu(cpu_tlbstate_shared.is_lazy, cpu))
901+
return false;
902+
903+
/* No mm means kernel memory flush. */
904+
if (!info->mm)
905+
return true;
906+
907+
/* The target mm is loaded, and the CPU is not lazy. */
908+
if (per_cpu(cpu_tlbstate.loaded_mm, cpu) == info->mm)
909+
return true;
910+
911+
/* In cpumask, but not the loaded mm? Periodically remove by flushing. */
912+
if (info->trim_cpumask)
913+
return true;
914+
915+
return false;
916+
}
917+
918+
static bool should_trim_cpumask(struct mm_struct *mm)
919+
{
920+
if (time_after(jiffies, READ_ONCE(mm->context.next_trim_cpumask))) {
921+
WRITE_ONCE(mm->context.next_trim_cpumask, jiffies + HZ);
922+
return true;
923+
}
924+
return false;
898925
}
899926

900927
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state_shared, cpu_tlbstate_shared);
@@ -928,7 +955,7 @@ STATIC_NOPV void native_flush_tlb_multi(const struct cpumask *cpumask,
928955
if (info->freed_tables)
929956
on_each_cpu_mask(cpumask, flush_tlb_func, (void *)info, true);
930957
else
931-
on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
958+
on_each_cpu_cond_mask(should_flush_tlb, flush_tlb_func,
932959
(void *)info, 1, cpumask);
933960
}
934961

@@ -979,6 +1006,7 @@ static struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
9791006
info->freed_tables = freed_tables;
9801007
info->new_tlb_gen = new_tlb_gen;
9811008
info->initiating_cpu = smp_processor_id();
1009+
info->trim_cpumask = 0;
9821010

9831011
return info;
9841012
}
@@ -1021,6 +1049,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
10211049
* flush_tlb_func_local() directly in this case.
10221050
*/
10231051
if (cpumask_any_but(mm_cpumask(mm), cpu) < nr_cpu_ids) {
1052+
info->trim_cpumask = should_trim_cpumask(mm);
10241053
flush_tlb_multi(mm_cpumask(mm), info);
10251054
} else if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
10261055
lockdep_assert_irqs_enabled();

0 commit comments

Comments
 (0)