Skip to content

Commit 4c1ba39

Browse files
anadavIngo Molnar
authored andcommitted
x86/mm/tlb: Unify flush_tlb_func_local() and flush_tlb_func_remote()
The unification of these two functions allows to use them in the updated SMP infrastrucutre. To do so, remove the reason argument from flush_tlb_func_local(), add a member to struct tlb_flush_info that says which CPU initiated the flush and act accordingly. Optimize the size of flush_tlb_info while we are at it. Unfortunately, this prevents us from using a constant tlb_flush_info for arch_tlbbatch_flush(), but in a later stage we may be able to inline tlb_flush_info into the IPI data, so it should not have an impact eventually. Signed-off-by: Nadav Amit <[email protected]> Signed-off-by: Ingo Molnar <[email protected]> Reviewed-by: Dave Hansen <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent a32a4d8 commit 4c1ba39

File tree

2 files changed

+39
-47
lines changed

2 files changed

+39
-47
lines changed

arch/x86/include/asm/tlbflush.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,9 @@ struct flush_tlb_info {
201201
unsigned long start;
202202
unsigned long end;
203203
u64 new_tlb_gen;
204-
unsigned int stride_shift;
205-
bool freed_tables;
204+
unsigned int initiating_cpu;
205+
u8 stride_shift;
206+
u8 freed_tables;
206207
};
207208

208209
void flush_tlb_local(void);

arch/x86/mm/tlb.c

Lines changed: 36 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
439439
* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
440440
*/
441441

442-
/* We don't want flush_tlb_func_* to run concurrently with us. */
442+
/* We don't want flush_tlb_func() to run concurrently with us. */
443443
if (IS_ENABLED(CONFIG_PROVE_LOCKING))
444444
WARN_ON_ONCE(!irqs_disabled());
445445

@@ -647,14 +647,13 @@ void initialize_tlbstate_and_flush(void)
647647
}
648648

649649
/*
650-
* flush_tlb_func_common()'s memory ordering requirement is that any
650+
* flush_tlb_func()'s memory ordering requirement is that any
651651
* TLB fills that happen after we flush the TLB are ordered after we
652652
* read active_mm's tlb_gen. We don't need any explicit barriers
653653
* because all x86 flush operations are serializing and the
654654
* atomic64_read operation won't be reordered by the compiler.
655655
*/
656-
static void flush_tlb_func_common(const struct flush_tlb_info *f,
657-
bool local, enum tlb_flush_reason reason)
656+
static void flush_tlb_func(void *info)
658657
{
659658
/*
660659
* We have three different tlb_gen values in here. They are:
@@ -665,14 +664,26 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
665664
* - f->new_tlb_gen: the generation that the requester of the flush
666665
* wants us to catch up to.
667666
*/
667+
const struct flush_tlb_info *f = info;
668668
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
669669
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
670670
u64 mm_tlb_gen = atomic64_read(&loaded_mm->context.tlb_gen);
671671
u64 local_tlb_gen = this_cpu_read(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen);
672+
bool local = smp_processor_id() == f->initiating_cpu;
673+
unsigned long nr_invalidate = 0;
672674

673675
/* This code cannot presently handle being reentered. */
674676
VM_WARN_ON(!irqs_disabled());
675677

678+
if (!local) {
679+
inc_irq_stat(irq_tlb_count);
680+
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
681+
682+
/* Can only happen on remote CPUs */
683+
if (f->mm && f->mm != loaded_mm)
684+
return;
685+
}
686+
676687
if (unlikely(loaded_mm == &init_mm))
677688
return;
678689

@@ -700,8 +711,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
700711
* be handled can catch us all the way up, leaving no work for
701712
* the second flush.
702713
*/
703-
trace_tlb_flush(reason, 0);
704-
return;
714+
goto done;
705715
}
706716

707717
WARN_ON_ONCE(local_tlb_gen > mm_tlb_gen);
@@ -748,46 +758,34 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
748758
f->new_tlb_gen == local_tlb_gen + 1 &&
749759
f->new_tlb_gen == mm_tlb_gen) {
750760
/* Partial flush */
751-
unsigned long nr_invalidate = (f->end - f->start) >> f->stride_shift;
752761
unsigned long addr = f->start;
753762

763+
nr_invalidate = (f->end - f->start) >> f->stride_shift;
764+
754765
while (addr < f->end) {
755766
flush_tlb_one_user(addr);
756767
addr += 1UL << f->stride_shift;
757768
}
758769
if (local)
759770
count_vm_tlb_events(NR_TLB_LOCAL_FLUSH_ONE, nr_invalidate);
760-
trace_tlb_flush(reason, nr_invalidate);
761771
} else {
762772
/* Full flush. */
773+
nr_invalidate = TLB_FLUSH_ALL;
774+
763775
flush_tlb_local();
764776
if (local)
765777
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
766-
trace_tlb_flush(reason, TLB_FLUSH_ALL);
767778
}
768779

769780
/* Both paths above update our state to mm_tlb_gen. */
770781
this_cpu_write(cpu_tlbstate.ctxs[loaded_mm_asid].tlb_gen, mm_tlb_gen);
771-
}
772-
773-
static void flush_tlb_func_local(const void *info, enum tlb_flush_reason reason)
774-
{
775-
const struct flush_tlb_info *f = info;
776-
777-
flush_tlb_func_common(f, true, reason);
778-
}
779782

780-
static void flush_tlb_func_remote(void *info)
781-
{
782-
const struct flush_tlb_info *f = info;
783-
784-
inc_irq_stat(irq_tlb_count);
785-
786-
if (f->mm && f->mm != this_cpu_read(cpu_tlbstate.loaded_mm))
787-
return;
788-
789-
count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
790-
flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
783+
/* Tracing is done in a unified manner to reduce the code size */
784+
done:
785+
trace_tlb_flush(!local ? TLB_REMOTE_SHOOTDOWN :
786+
(f->mm == NULL) ? TLB_LOCAL_SHOOTDOWN :
787+
TLB_LOCAL_MM_SHOOTDOWN,
788+
nr_invalidate);
791789
}
792790

793791
static bool tlb_is_not_lazy(int cpu, void *data)
@@ -816,10 +814,10 @@ STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask,
816814
* doing a speculative memory access.
817815
*/
818816
if (info->freed_tables)
819-
smp_call_function_many(cpumask, flush_tlb_func_remote,
817+
smp_call_function_many(cpumask, flush_tlb_func,
820818
(void *)info, 1);
821819
else
822-
on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
820+
on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func,
823821
(void *)info, 1, cpumask);
824822
}
825823

@@ -869,6 +867,7 @@ static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
869867
info->stride_shift = stride_shift;
870868
info->freed_tables = freed_tables;
871869
info->new_tlb_gen = new_tlb_gen;
870+
info->initiating_cpu = smp_processor_id();
872871

873872
return info;
874873
}
@@ -908,7 +907,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
908907
if (mm == this_cpu_read(cpu_tlbstate.loaded_mm)) {
909908
lockdep_assert_irqs_enabled();
910909
local_irq_disable();
911-
flush_tlb_func_local(info, TLB_LOCAL_MM_SHOOTDOWN);
910+
flush_tlb_func(info);
912911
local_irq_enable();
913912
}
914913

@@ -1119,34 +1118,26 @@ void __flush_tlb_all(void)
11191118
}
11201119
EXPORT_SYMBOL_GPL(__flush_tlb_all);
11211120

1122-
/*
1123-
* arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm.
1124-
* This means that the 'struct flush_tlb_info' that describes which mappings to
1125-
* flush is actually fixed. We therefore set a single fixed struct and use it in
1126-
* arch_tlbbatch_flush().
1127-
*/
1128-
static const struct flush_tlb_info full_flush_tlb_info = {
1129-
.mm = NULL,
1130-
.start = 0,
1131-
.end = TLB_FLUSH_ALL,
1132-
};
1133-
11341121
void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch)
11351122
{
1123+
struct flush_tlb_info *info;
1124+
11361125
int cpu = get_cpu();
11371126

1127+
info = get_flush_tlb_info(NULL, 0, TLB_FLUSH_ALL, 0, false, 0);
11381128
if (cpumask_test_cpu(cpu, &batch->cpumask)) {
11391129
lockdep_assert_irqs_enabled();
11401130
local_irq_disable();
1141-
flush_tlb_func_local(&full_flush_tlb_info, TLB_LOCAL_SHOOTDOWN);
1131+
flush_tlb_func(info);
11421132
local_irq_enable();
11431133
}
11441134

11451135
if (cpumask_any_but(&batch->cpumask, cpu) < nr_cpu_ids)
1146-
flush_tlb_others(&batch->cpumask, &full_flush_tlb_info);
1136+
flush_tlb_others(&batch->cpumask, info);
11471137

11481138
cpumask_clear(&batch->cpumask);
11491139

1140+
put_flush_tlb_info();
11501141
put_cpu();
11511142
}
11521143

0 commit comments

Comments
 (0)