Skip to content

Commit d1d3aa9

Browse files
Zhenyu Yectmarinas
authored andcommitted
arm64: tlb: Use the TLBI RANGE feature in arm64
Add __TLBI_VADDR_RANGE macro and rewrite __flush_tlb_range(). When cpu supports TLBI feature, the minimum range granularity is decided by 'scale', so we can not flush all pages by one instruction in some cases. For example, when the pages = 0xe81a, let's start 'scale' from maximum, and find right 'num' for each 'scale': 1. scale = 3, we can flush no pages because the minimum range is 2^(5*3 + 1) = 0x10000. 2. scale = 2, the minimum range is 2^(5*2 + 1) = 0x800, we can flush 0xe800 pages this time, the num = 0xe800/0x800 - 1 = 0x1c. Remaining pages is 0x1a; 3. scale = 1, the minimum range is 2^(5*1 + 1) = 0x40, no page can be flushed. 4. scale = 0, we flush the remaining 0x1a pages, the num = 0x1a/0x2 - 1 = 0xd. However, in most scenarios, the pages = 1 when flush_tlb_range() is called. Start from scale = 3 or other proper value (such as scale = ilog2(pages)), will incur extra overhead. So increase 'scale' from 0 to maximum, the flush order is exactly opposite to the example. Signed-off-by: Zhenyu Ye <[email protected]> Link: https://lore.kernel.org/r/[email protected] [[email protected]: removed unnecessary masks in __TLBI_VADDR_RANGE] [[email protected]: __TLB_RANGE_NUM subtracts 1] [[email protected]: minor adjustments to the comments] [[email protected]: introduce system_supports_tlb_range()] Signed-off-by: Catalin Marinas <[email protected]>
1 parent 7c78f67 commit d1d3aa9

File tree

2 files changed

+131
-29
lines changed

2 files changed

+131
-29
lines changed

arch/arm64/include/asm/cpufeature.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -692,6 +692,12 @@ static inline bool system_supports_bti(void)
692692
return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI);
693693
}
694694

695+
static inline bool system_supports_tlb_range(void)
696+
{
697+
return IS_ENABLED(CONFIG_ARM64_TLB_RANGE) &&
698+
cpus_have_const_cap(ARM64_HAS_TLB_RANGE);
699+
}
700+
695701
#define ARM64_BP_HARDEN_UNKNOWN -1
696702
#define ARM64_BP_HARDEN_WA_NEEDED 0
697703
#define ARM64_BP_HARDEN_NOT_REQUIRED 1

arch/arm64/include/asm/tlbflush.h

Lines changed: 125 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,31 @@
6060
__ta; \
6161
})
6262

63+
/*
64+
* Get translation granule of the system, which is decided by
65+
* PAGE_SIZE. Used by TTL.
66+
* - 4KB : 1
67+
* - 16KB : 2
68+
* - 64KB : 3
69+
*/
70+
#define TLBI_TTL_TG_4K 1
71+
#define TLBI_TTL_TG_16K 2
72+
#define TLBI_TTL_TG_64K 3
73+
74+
static inline unsigned long get_trans_granule(void)
75+
{
76+
switch (PAGE_SIZE) {
77+
case SZ_4K:
78+
return TLBI_TTL_TG_4K;
79+
case SZ_16K:
80+
return TLBI_TTL_TG_16K;
81+
case SZ_64K:
82+
return TLBI_TTL_TG_64K;
83+
default:
84+
return 0;
85+
}
86+
}
87+
6388
/*
6489
* Level-based TLBI operations.
6590
*
@@ -73,29 +98,14 @@
7398
* in asm/stage2_pgtable.h.
7499
*/
75100
#define TLBI_TTL_MASK GENMASK_ULL(47, 44)
76-
#define TLBI_TTL_TG_4K 1
77-
#define TLBI_TTL_TG_16K 2
78-
#define TLBI_TTL_TG_64K 3
79101

80102
#define __tlbi_level(op, addr, level) do { \
81103
u64 arg = addr; \
82104
\
83105
if (cpus_have_const_cap(ARM64_HAS_ARMv8_4_TTL) && \
84106
level) { \
85107
u64 ttl = level & 3; \
86-
\
87-
switch (PAGE_SIZE) { \
88-
case SZ_4K: \
89-
ttl |= TLBI_TTL_TG_4K << 2; \
90-
break; \
91-
case SZ_16K: \
92-
ttl |= TLBI_TTL_TG_16K << 2; \
93-
break; \
94-
case SZ_64K: \
95-
ttl |= TLBI_TTL_TG_64K << 2; \
96-
break; \
97-
} \
98-
\
108+
ttl |= get_trans_granule() << 2; \
99109
arg &= ~TLBI_TTL_MASK; \
100110
arg |= FIELD_PREP(TLBI_TTL_MASK, ttl); \
101111
} \
@@ -108,6 +118,44 @@
108118
__tlbi_level(op, (arg | USER_ASID_FLAG), level); \
109119
} while (0)
110120

121+
/*
122+
* This macro creates a properly formatted VA operand for the TLB RANGE.
123+
* The value bit assignments are:
124+
*
125+
* +----------+------+-------+-------+-------+----------------------+
126+
* | ASID | TG | SCALE | NUM | TTL | BADDR |
127+
* +-----------------+-------+-------+-------+----------------------+
128+
* |63 48|47 46|45 44|43 39|38 37|36 0|
129+
*
130+
* The address range is determined by below formula:
131+
* [BADDR, BADDR + (NUM + 1) * 2^(5*SCALE + 1) * PAGESIZE)
132+
*
133+
*/
134+
#define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \
135+
({ \
136+
unsigned long __ta = (addr) >> PAGE_SHIFT; \
137+
__ta &= GENMASK_ULL(36, 0); \
138+
__ta |= (unsigned long)(ttl) << 37; \
139+
__ta |= (unsigned long)(num) << 39; \
140+
__ta |= (unsigned long)(scale) << 44; \
141+
__ta |= get_trans_granule() << 46; \
142+
__ta |= (unsigned long)(asid) << 48; \
143+
__ta; \
144+
})
145+
146+
/* These macros are used by the TLBI RANGE feature. */
147+
#define __TLBI_RANGE_PAGES(num, scale) \
148+
((unsigned long)((num) + 1) << (5 * (scale) + 1))
149+
#define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3)
150+
151+
/*
152+
* Generate 'num' values from -1 to 30 with -1 rejected by the
153+
* __flush_tlb_range() loop below.
154+
*/
155+
#define TLBI_RANGE_MASK GENMASK_ULL(4, 0)
156+
#define __TLBI_RANGE_NUM(pages, scale) \
157+
((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1)
158+
111159
/*
112160
* TLB Invalidation
113161
* ================
@@ -231,32 +279,80 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma,
231279
unsigned long stride, bool last_level,
232280
int tlb_level)
233281
{
282+
int num = 0;
283+
int scale = 0;
234284
unsigned long asid = ASID(vma->vm_mm);
235285
unsigned long addr;
286+
unsigned long pages;
236287

237288
start = round_down(start, stride);
238289
end = round_up(end, stride);
290+
pages = (end - start) >> PAGE_SHIFT;
239291

240-
if ((end - start) >= (MAX_TLBI_OPS * stride)) {
292+
/*
293+
* When not uses TLB range ops, we can handle up to
294+
* (MAX_TLBI_OPS - 1) pages;
295+
* When uses TLB range ops, we can handle up to
296+
* (MAX_TLBI_RANGE_PAGES - 1) pages.
297+
*/
298+
if ((!system_supports_tlb_range() &&
299+
(end - start) >= (MAX_TLBI_OPS * stride)) ||
300+
pages >= MAX_TLBI_RANGE_PAGES) {
241301
flush_tlb_mm(vma->vm_mm);
242302
return;
243303
}
244304

245-
/* Convert the stride into units of 4k */
246-
stride >>= 12;
305+
dsb(ishst);
247306

248-
start = __TLBI_VADDR(start, asid);
249-
end = __TLBI_VADDR(end, asid);
307+
/*
308+
* When the CPU does not support TLB range operations, flush the TLB
309+
* entries one by one at the granularity of 'stride'. If the the TLB
310+
* range ops are supported, then:
311+
*
312+
* 1. If 'pages' is odd, flush the first page through non-range
313+
* operations;
314+
*
315+
* 2. For remaining pages: the minimum range granularity is decided
316+
* by 'scale', so multiple range TLBI operations may be required.
317+
* Start from scale = 0, flush the corresponding number of pages
318+
* ((num+1)*2^(5*scale+1) starting from 'addr'), then increase it
319+
* until no pages left.
320+
*
321+
* Note that certain ranges can be represented by either num = 31 and
322+
* scale or num = 0 and scale + 1. The loop below favours the latter
323+
* since num is limited to 30 by the __TLBI_RANGE_NUM() macro.
324+
*/
325+
while (pages > 0) {
326+
if (!system_supports_tlb_range() ||
327+
pages % 2 == 1) {
328+
addr = __TLBI_VADDR(start, asid);
329+
if (last_level) {
330+
__tlbi_level(vale1is, addr, tlb_level);
331+
__tlbi_user_level(vale1is, addr, tlb_level);
332+
} else {
333+
__tlbi_level(vae1is, addr, tlb_level);
334+
__tlbi_user_level(vae1is, addr, tlb_level);
335+
}
336+
start += stride;
337+
pages -= stride >> PAGE_SHIFT;
338+
continue;
339+
}
250340

251-
dsb(ishst);
252-
for (addr = start; addr < end; addr += stride) {
253-
if (last_level) {
254-
__tlbi_level(vale1is, addr, tlb_level);
255-
__tlbi_user_level(vale1is, addr, tlb_level);
256-
} else {
257-
__tlbi_level(vae1is, addr, tlb_level);
258-
__tlbi_user_level(vae1is, addr, tlb_level);
341+
num = __TLBI_RANGE_NUM(pages, scale);
342+
if (num >= 0) {
343+
addr = __TLBI_VADDR_RANGE(start, asid, scale,
344+
num, tlb_level);
345+
if (last_level) {
346+
__tlbi(rvale1is, addr);
347+
__tlbi_user(rvale1is, addr);
348+
} else {
349+
__tlbi(rvae1is, addr);
350+
__tlbi_user(rvae1is, addr);
351+
}
352+
start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT;
353+
pages -= __TLBI_RANGE_PAGES(num, scale);
259354
}
355+
scale++;
260356
}
261357
dsb(ish);
262358
}

0 commit comments

Comments
 (0)