@@ -439,7 +439,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
439
439
* NB: leave_mm() calls us with prev == NULL and tsk == NULL.
440
440
*/
441
441
442
- /* We don't want flush_tlb_func_* to run concurrently with us. */
442
+ /* We don't want flush_tlb_func() to run concurrently with us. */
443
443
if (IS_ENABLED (CONFIG_PROVE_LOCKING ))
444
444
WARN_ON_ONCE (!irqs_disabled ());
445
445
@@ -647,14 +647,13 @@ void initialize_tlbstate_and_flush(void)
647
647
}
648
648
649
649
/*
650
- * flush_tlb_func_common ()'s memory ordering requirement is that any
650
+ * flush_tlb_func ()'s memory ordering requirement is that any
651
651
* TLB fills that happen after we flush the TLB are ordered after we
652
652
* read active_mm's tlb_gen. We don't need any explicit barriers
653
653
* because all x86 flush operations are serializing and the
654
654
* atomic64_read operation won't be reordered by the compiler.
655
655
*/
656
- static void flush_tlb_func_common (const struct flush_tlb_info * f ,
657
- bool local , enum tlb_flush_reason reason )
656
+ static void flush_tlb_func (void * info )
658
657
{
659
658
/*
660
659
* We have three different tlb_gen values in here. They are:
@@ -665,14 +664,26 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
665
664
* - f->new_tlb_gen: the generation that the requester of the flush
666
665
* wants us to catch up to.
667
666
*/
667
+ const struct flush_tlb_info * f = info ;
668
668
struct mm_struct * loaded_mm = this_cpu_read (cpu_tlbstate .loaded_mm );
669
669
u32 loaded_mm_asid = this_cpu_read (cpu_tlbstate .loaded_mm_asid );
670
670
u64 mm_tlb_gen = atomic64_read (& loaded_mm -> context .tlb_gen );
671
671
u64 local_tlb_gen = this_cpu_read (cpu_tlbstate .ctxs [loaded_mm_asid ].tlb_gen );
672
+ bool local = smp_processor_id () == f -> initiating_cpu ;
673
+ unsigned long nr_invalidate = 0 ;
672
674
673
675
/* This code cannot presently handle being reentered. */
674
676
VM_WARN_ON (!irqs_disabled ());
675
677
678
+ if (!local ) {
679
+ inc_irq_stat (irq_tlb_count );
680
+ count_vm_tlb_event (NR_TLB_REMOTE_FLUSH_RECEIVED );
681
+
682
+ /* Can only happen on remote CPUs */
683
+ if (f -> mm && f -> mm != loaded_mm )
684
+ return ;
685
+ }
686
+
676
687
if (unlikely (loaded_mm == & init_mm ))
677
688
return ;
678
689
@@ -700,8 +711,7 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
700
711
* be handled can catch us all the way up, leaving no work for
701
712
* the second flush.
702
713
*/
703
- trace_tlb_flush (reason , 0 );
704
- return ;
714
+ goto done ;
705
715
}
706
716
707
717
WARN_ON_ONCE (local_tlb_gen > mm_tlb_gen );
@@ -748,46 +758,34 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
748
758
f -> new_tlb_gen == local_tlb_gen + 1 &&
749
759
f -> new_tlb_gen == mm_tlb_gen ) {
750
760
/* Partial flush */
751
- unsigned long nr_invalidate = (f -> end - f -> start ) >> f -> stride_shift ;
752
761
unsigned long addr = f -> start ;
753
762
763
+ nr_invalidate = (f -> end - f -> start ) >> f -> stride_shift ;
764
+
754
765
while (addr < f -> end ) {
755
766
flush_tlb_one_user (addr );
756
767
addr += 1UL << f -> stride_shift ;
757
768
}
758
769
if (local )
759
770
count_vm_tlb_events (NR_TLB_LOCAL_FLUSH_ONE , nr_invalidate );
760
- trace_tlb_flush (reason , nr_invalidate );
761
771
} else {
762
772
/* Full flush. */
773
+ nr_invalidate = TLB_FLUSH_ALL ;
774
+
763
775
flush_tlb_local ();
764
776
if (local )
765
777
count_vm_tlb_event (NR_TLB_LOCAL_FLUSH_ALL );
766
- trace_tlb_flush (reason , TLB_FLUSH_ALL );
767
778
}
768
779
769
780
/* Both paths above update our state to mm_tlb_gen. */
770
781
this_cpu_write (cpu_tlbstate .ctxs [loaded_mm_asid ].tlb_gen , mm_tlb_gen );
771
- }
772
-
773
- static void flush_tlb_func_local (const void * info , enum tlb_flush_reason reason )
774
- {
775
- const struct flush_tlb_info * f = info ;
776
-
777
- flush_tlb_func_common (f , true, reason );
778
- }
779
782
780
- static void flush_tlb_func_remote (void * info )
781
- {
782
- const struct flush_tlb_info * f = info ;
783
-
784
- inc_irq_stat (irq_tlb_count );
785
-
786
- if (f -> mm && f -> mm != this_cpu_read (cpu_tlbstate .loaded_mm ))
787
- return ;
788
-
789
- count_vm_tlb_event (NR_TLB_REMOTE_FLUSH_RECEIVED );
790
- flush_tlb_func_common (f , false, TLB_REMOTE_SHOOTDOWN );
783
+ /* Tracing is done in a unified manner to reduce the code size */
784
+ done :
785
+ trace_tlb_flush (!local ? TLB_REMOTE_SHOOTDOWN :
786
+ (f -> mm == NULL ) ? TLB_LOCAL_SHOOTDOWN :
787
+ TLB_LOCAL_MM_SHOOTDOWN ,
788
+ nr_invalidate );
791
789
}
792
790
793
791
static bool tlb_is_not_lazy (int cpu , void * data )
@@ -816,10 +814,10 @@ STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask,
816
814
* doing a speculative memory access.
817
815
*/
818
816
if (info -> freed_tables )
819
- smp_call_function_many (cpumask , flush_tlb_func_remote ,
817
+ smp_call_function_many (cpumask , flush_tlb_func ,
820
818
(void * )info , 1 );
821
819
else
822
- on_each_cpu_cond_mask (tlb_is_not_lazy , flush_tlb_func_remote ,
820
+ on_each_cpu_cond_mask (tlb_is_not_lazy , flush_tlb_func ,
823
821
(void * )info , 1 , cpumask );
824
822
}
825
823
@@ -869,6 +867,7 @@ static inline struct flush_tlb_info *get_flush_tlb_info(struct mm_struct *mm,
869
867
info -> stride_shift = stride_shift ;
870
868
info -> freed_tables = freed_tables ;
871
869
info -> new_tlb_gen = new_tlb_gen ;
870
+ info -> initiating_cpu = smp_processor_id ();
872
871
873
872
return info ;
874
873
}
@@ -908,7 +907,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
908
907
if (mm == this_cpu_read (cpu_tlbstate .loaded_mm )) {
909
908
lockdep_assert_irqs_enabled ();
910
909
local_irq_disable ();
911
- flush_tlb_func_local (info , TLB_LOCAL_MM_SHOOTDOWN );
910
+ flush_tlb_func (info );
912
911
local_irq_enable ();
913
912
}
914
913
@@ -1119,34 +1118,26 @@ void __flush_tlb_all(void)
1119
1118
}
1120
1119
EXPORT_SYMBOL_GPL (__flush_tlb_all );
1121
1120
1122
- /*
1123
- * arch_tlbbatch_flush() performs a full TLB flush regardless of the active mm.
1124
- * This means that the 'struct flush_tlb_info' that describes which mappings to
1125
- * flush is actually fixed. We therefore set a single fixed struct and use it in
1126
- * arch_tlbbatch_flush().
1127
- */
1128
- static const struct flush_tlb_info full_flush_tlb_info = {
1129
- .mm = NULL ,
1130
- .start = 0 ,
1131
- .end = TLB_FLUSH_ALL ,
1132
- };
1133
-
1134
1121
void arch_tlbbatch_flush (struct arch_tlbflush_unmap_batch * batch )
1135
1122
{
1123
+ struct flush_tlb_info * info ;
1124
+
1136
1125
int cpu = get_cpu ();
1137
1126
1127
+ info = get_flush_tlb_info (NULL , 0 , TLB_FLUSH_ALL , 0 , false, 0 );
1138
1128
if (cpumask_test_cpu (cpu , & batch -> cpumask )) {
1139
1129
lockdep_assert_irqs_enabled ();
1140
1130
local_irq_disable ();
1141
- flush_tlb_func_local ( & full_flush_tlb_info , TLB_LOCAL_SHOOTDOWN );
1131
+ flush_tlb_func ( info );
1142
1132
local_irq_enable ();
1143
1133
}
1144
1134
1145
1135
if (cpumask_any_but (& batch -> cpumask , cpu ) < nr_cpu_ids )
1146
- flush_tlb_others (& batch -> cpumask , & full_flush_tlb_info );
1136
+ flush_tlb_others (& batch -> cpumask , info );
1147
1137
1148
1138
cpumask_clear (& batch -> cpumask );
1149
1139
1140
+ put_flush_tlb_info ();
1150
1141
put_cpu ();
1151
1142
}
1152
1143
0 commit comments