Skip to content

Commit ba6f359

Browse files
Alexandre Ghitipalmer-dabbelt
authored andcommitted
riscv: Make __flush_tlb_range() loop over pte instead of flushing the whole tlb
Currently, when the range to flush covers more than one page (a 4K page or a hugepage), __flush_tlb_range() flushes the whole tlb. Flushing the whole tlb comes with a greater cost than flushing a single entry so we should flush single entries up to a certain threshold so that: threshold * cost of flushing a single entry < cost of flushing the whole tlb. Co-developed-by: Mayuresh Chitale <[email protected]> Signed-off-by: Mayuresh Chitale <[email protected]> Signed-off-by: Alexandre Ghiti <[email protected]> Reviewed-by: Andrew Jones <[email protected]> Tested-by: Lad Prabhakar <[email protected]> # On RZ/Five SMARC Reviewed-by: Samuel Holland <[email protected]> Tested-by: Samuel Holland <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent 9e11306 commit ba6f359

File tree

4 files changed

+72
-81
lines changed

4 files changed

+72
-81
lines changed

arch/riscv/include/asm/sbi.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -273,9 +273,6 @@ void sbi_set_timer(uint64_t stime_value);
273273
void sbi_shutdown(void);
274274
void sbi_send_ipi(unsigned int cpu);
275275
int sbi_remote_fence_i(const struct cpumask *cpu_mask);
276-
int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
277-
unsigned long start,
278-
unsigned long size);
279276

280277
int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
281278
unsigned long start,

arch/riscv/include/asm/tlbflush.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
#include <asm/smp.h>
1212
#include <asm/errata_list.h>
1313

14+
#define FLUSH_TLB_MAX_SIZE ((unsigned long)-1)
15+
#define FLUSH_TLB_NO_ASID ((unsigned long)-1)
16+
1417
#ifdef CONFIG_MMU
1518
extern unsigned long asid_mask;
1619

arch/riscv/kernel/sbi.c

Lines changed: 10 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <linux/reboot.h>
1212
#include <asm/sbi.h>
1313
#include <asm/smp.h>
14+
#include <asm/tlbflush.h>
1415

1516
/* default SBI version is 0.1 */
1617
unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
@@ -376,32 +377,15 @@ int sbi_remote_fence_i(const struct cpumask *cpu_mask)
376377
}
377378
EXPORT_SYMBOL(sbi_remote_fence_i);
378379

379-
/**
380-
* sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote
381-
* harts for the specified virtual address range.
382-
* @cpu_mask: A cpu mask containing all the target harts.
383-
* @start: Start of the virtual address
384-
* @size: Total size of the virtual address range.
385-
*
386-
* Return: 0 on success, appropriate linux error code otherwise.
387-
*/
388-
int sbi_remote_sfence_vma(const struct cpumask *cpu_mask,
389-
unsigned long start,
390-
unsigned long size)
391-
{
392-
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
393-
cpu_mask, start, size, 0, 0);
394-
}
395-
EXPORT_SYMBOL(sbi_remote_sfence_vma);
396-
397380
/**
398381
* sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given
399-
* remote harts for a virtual address range belonging to a specific ASID.
382+
* remote harts for a virtual address range belonging to a specific ASID or not.
400383
*
401384
* @cpu_mask: A cpu mask containing all the target harts.
402385
* @start: Start of the virtual address
403386
* @size: Total size of the virtual address range.
404-
* @asid: The value of address space identifier (ASID).
387+
* @asid: The value of address space identifier (ASID), or FLUSH_TLB_NO_ASID
388+
* for flushing all address spaces.
405389
*
406390
* Return: 0 on success, appropriate linux error code otherwise.
407391
*/
@@ -410,8 +394,12 @@ int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask,
410394
unsigned long size,
411395
unsigned long asid)
412396
{
413-
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
414-
cpu_mask, start, size, asid, 0);
397+
if (asid == FLUSH_TLB_NO_ASID)
398+
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
399+
cpu_mask, start, size, 0, 0);
400+
else
401+
return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
402+
cpu_mask, start, size, asid, 0);
415403
}
416404
EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
417405

arch/riscv/mm/tlbflush.c

Lines changed: 59 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -9,37 +9,61 @@
99

1010
static inline void local_flush_tlb_all_asid(unsigned long asid)
1111
{
12-
__asm__ __volatile__ ("sfence.vma x0, %0"
13-
:
14-
: "r" (asid)
15-
: "memory");
12+
if (asid != FLUSH_TLB_NO_ASID)
13+
__asm__ __volatile__ ("sfence.vma x0, %0"
14+
:
15+
: "r" (asid)
16+
: "memory");
17+
else
18+
local_flush_tlb_all();
1619
}
1720

1821
static inline void local_flush_tlb_page_asid(unsigned long addr,
1922
unsigned long asid)
2023
{
21-
__asm__ __volatile__ ("sfence.vma %0, %1"
22-
:
23-
: "r" (addr), "r" (asid)
24-
: "memory");
24+
if (asid != FLUSH_TLB_NO_ASID)
25+
__asm__ __volatile__ ("sfence.vma %0, %1"
26+
:
27+
: "r" (addr), "r" (asid)
28+
: "memory");
29+
else
30+
local_flush_tlb_page(addr);
2531
}
2632

27-
static inline void local_flush_tlb_range(unsigned long start,
28-
unsigned long size, unsigned long stride)
33+
/*
34+
* Flush entire TLB if number of entries to be flushed is greater
35+
* than the threshold below.
36+
*/
37+
static unsigned long tlb_flush_all_threshold __read_mostly = 64;
38+
39+
static void local_flush_tlb_range_threshold_asid(unsigned long start,
40+
unsigned long size,
41+
unsigned long stride,
42+
unsigned long asid)
2943
{
30-
if (size <= stride)
31-
local_flush_tlb_page(start);
32-
else
33-
local_flush_tlb_all();
44+
unsigned long nr_ptes_in_range = DIV_ROUND_UP(size, stride);
45+
int i;
46+
47+
if (nr_ptes_in_range > tlb_flush_all_threshold) {
48+
local_flush_tlb_all_asid(asid);
49+
return;
50+
}
51+
52+
for (i = 0; i < nr_ptes_in_range; ++i) {
53+
local_flush_tlb_page_asid(start, asid);
54+
start += stride;
55+
}
3456
}
3557

3658
static inline void local_flush_tlb_range_asid(unsigned long start,
3759
unsigned long size, unsigned long stride, unsigned long asid)
3860
{
3961
if (size <= stride)
4062
local_flush_tlb_page_asid(start, asid);
41-
else
63+
else if (size == FLUSH_TLB_MAX_SIZE)
4264
local_flush_tlb_all_asid(asid);
65+
else
66+
local_flush_tlb_range_threshold_asid(start, size, stride, asid);
4367
}
4468

4569
static void __ipi_flush_tlb_all(void *info)
@@ -52,7 +76,7 @@ void flush_tlb_all(void)
5276
if (riscv_use_ipi_for_rfence())
5377
on_each_cpu(__ipi_flush_tlb_all, NULL, 1);
5478
else
55-
sbi_remote_sfence_vma(NULL, 0, -1);
79+
sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID);
5680
}
5781

5882
struct flush_tlb_range_data {
@@ -69,18 +93,12 @@ static void __ipi_flush_tlb_range_asid(void *info)
6993
local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid);
7094
}
7195

72-
static void __ipi_flush_tlb_range(void *info)
73-
{
74-
struct flush_tlb_range_data *d = info;
75-
76-
local_flush_tlb_range(d->start, d->size, d->stride);
77-
}
78-
7996
static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
8097
unsigned long size, unsigned long stride)
8198
{
8299
struct flush_tlb_range_data ftd;
83100
struct cpumask *cmask = mm_cpumask(mm);
101+
unsigned long asid = FLUSH_TLB_NO_ASID;
84102
unsigned int cpuid;
85103
bool broadcast;
86104

@@ -90,47 +108,32 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start,
90108
cpuid = get_cpu();
91109
/* check if the tlbflush needs to be sent to other CPUs */
92110
broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids;
93-
if (static_branch_unlikely(&use_asid_allocator)) {
94-
unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask;
95-
96-
if (broadcast) {
97-
if (riscv_use_ipi_for_rfence()) {
98-
ftd.asid = asid;
99-
ftd.start = start;
100-
ftd.size = size;
101-
ftd.stride = stride;
102-
on_each_cpu_mask(cmask,
103-
__ipi_flush_tlb_range_asid,
104-
&ftd, 1);
105-
} else
106-
sbi_remote_sfence_vma_asid(cmask,
107-
start, size, asid);
108-
} else {
109-
local_flush_tlb_range_asid(start, size, stride, asid);
110-
}
111+
112+
if (static_branch_unlikely(&use_asid_allocator))
113+
asid = atomic_long_read(&mm->context.id) & asid_mask;
114+
115+
if (broadcast) {
116+
if (riscv_use_ipi_for_rfence()) {
117+
ftd.asid = asid;
118+
ftd.start = start;
119+
ftd.size = size;
120+
ftd.stride = stride;
121+
on_each_cpu_mask(cmask,
122+
__ipi_flush_tlb_range_asid,
123+
&ftd, 1);
124+
} else
125+
sbi_remote_sfence_vma_asid(cmask,
126+
start, size, asid);
111127
} else {
112-
if (broadcast) {
113-
if (riscv_use_ipi_for_rfence()) {
114-
ftd.asid = 0;
115-
ftd.start = start;
116-
ftd.size = size;
117-
ftd.stride = stride;
118-
on_each_cpu_mask(cmask,
119-
__ipi_flush_tlb_range,
120-
&ftd, 1);
121-
} else
122-
sbi_remote_sfence_vma(cmask, start, size);
123-
} else {
124-
local_flush_tlb_range(start, size, stride);
125-
}
128+
local_flush_tlb_range_asid(start, size, stride, asid);
126129
}
127130

128131
put_cpu();
129132
}
130133

131134
void flush_tlb_mm(struct mm_struct *mm)
132135
{
133-
__flush_tlb_range(mm, 0, -1, PAGE_SIZE);
136+
__flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE);
134137
}
135138

136139
void flush_tlb_mm_range(struct mm_struct *mm,

0 commit comments

Comments
 (0)