Skip to content

Commit 847689d

Browse files
Alexandre Ghitipalmer-dabbelt
authored andcommitted
Merge patch series "riscv: Add Zicbop & prefetchw support"
Alexandre Ghiti <[email protected]> says: I found this lost series developed by Guo so here is a respin with the comments on v2 applied. This patch series adds Zicbop support and then enables the Linux prefetch features. * patches from https://lore.kernel.org/r/[email protected]: riscv: xchg: Prefetch the destination word for sc.w riscv: Add ARCH_HAS_PREFETCH[W] support with Zicbop riscv: Add support for Zicbop riscv: Introduce Zicbop instructions Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Alexandre Ghiti <[email protected]> Signed-off-by: Palmer Dabbelt <[email protected]>
2 parents 415a8c8 + eb87e56 commit 847689d

File tree

9 files changed

+142
-10
lines changed

9 files changed

+142
-10
lines changed

arch/riscv/Kconfig

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -847,6 +847,21 @@ config RISCV_ISA_ZICBOZ
847847

848848
If you don't know what to do here, say Y.
849849

850+
config RISCV_ISA_ZICBOP
851+
bool "Zicbop extension support for cache block prefetch"
852+
depends on MMU
853+
depends on RISCV_ALTERNATIVE
854+
default y
855+
help
856+
Adds support to dynamically detect the presence of the ZICBOP
857+
extension (Cache Block Prefetch Operations) and enable its
858+
usage.
859+
860+
The Zicbop extension can be used to prefetch cache blocks for
861+
read/write fetch.
862+
863+
If you don't know what to do here, say Y.
864+
850865
config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI
851866
def_bool y
852867
# https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc

arch/riscv/include/asm/barrier.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,6 @@
1414
#include <asm/cmpxchg.h>
1515
#include <asm/fence.h>
1616

17-
#define nop() __asm__ __volatile__ ("nop")
18-
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
19-
#define nops(n) __asm__ __volatile__ (__nops(n))
20-
21-
2217
/* These barriers need to enforce ordering on both devices or memory. */
2318
#define __mb() RISCV_FENCE(iorw, iorw)
2419
#define __rmb() RISCV_FENCE(ir, ir)

arch/riscv/include/asm/cacheflush.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local);
8080

8181
extern unsigned int riscv_cbom_block_size;
8282
extern unsigned int riscv_cboz_block_size;
83+
extern unsigned int riscv_cbop_block_size;
8384
void riscv_init_cbo_blocksizes(void);
8485

8586
#ifdef CONFIG_RISCV_DMA_NONCOHERENT

arch/riscv/include/asm/cmpxchg.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <asm/hwcap.h>
1414
#include <asm/insn-def.h>
1515
#include <asm/cpufeature-macros.h>
16+
#include <asm/processor.h>
1617

1718
#define __arch_xchg_masked(sc_sfx, swap_sfx, prepend, sc_append, \
1819
swap_append, r, p, n) \
@@ -37,14 +38,15 @@
3738
\
3839
__asm__ __volatile__ ( \
3940
prepend \
41+
PREFETCHW_ASM(%5) \
4042
"0: lr.w %0, %2\n" \
4143
" and %1, %0, %z4\n" \
4244
" or %1, %1, %z3\n" \
4345
" sc.w" sc_sfx " %1, %1, %2\n" \
4446
" bnez %1, 0b\n" \
4547
sc_append \
4648
: "=&r" (__retx), "=&r" (__rc), "+A" (*(__ptr32b)) \
47-
: "rJ" (__newx), "rJ" (~__mask) \
49+
: "rJ" (__newx), "rJ" (~__mask), "rJ" (__ptr32b) \
4850
: "memory"); \
4951
\
5052
r = (__typeof__(*(p)))((__retx & __mask) >> __s); \

arch/riscv/include/asm/hwcap.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
#define RISCV_ISA_EXT_ZVFBFWMA 96
106106
#define RISCV_ISA_EXT_ZAAMO 97
107107
#define RISCV_ISA_EXT_ZALRSC 98
108+
#define RISCV_ISA_EXT_ZICBOP 99
108109

109110
#define RISCV_ISA_EXT_XLINUXENVCFG 127
110111

arch/riscv/include/asm/insn-def.h

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,13 @@
1818
#define INSN_I_RD_SHIFT 7
1919
#define INSN_I_OPCODE_SHIFT 0
2020

21+
#define INSN_S_SIMM7_SHIFT 25
22+
#define INSN_S_RS2_SHIFT 20
23+
#define INSN_S_RS1_SHIFT 15
24+
#define INSN_S_FUNC3_SHIFT 12
25+
#define INSN_S_SIMM5_SHIFT 7
26+
#define INSN_S_OPCODE_SHIFT 0
27+
2128
#ifdef __ASSEMBLY__
2229

2330
#ifdef CONFIG_AS_HAS_INSN
@@ -30,6 +37,10 @@
3037
.insn i \opcode, \func3, \rd, \rs1, \simm12
3138
.endm
3239

40+
.macro insn_s, opcode, func3, rs2, simm12, rs1
41+
.insn s \opcode, \func3, \rs2, \simm12(\rs1)
42+
.endm
43+
3344
#else
3445

3546
#include <asm/gpr-num.h>
@@ -51,10 +62,20 @@
5162
(\simm12 << INSN_I_SIMM12_SHIFT))
5263
.endm
5364

65+
.macro insn_s, opcode, func3, rs2, simm12, rs1
66+
.4byte ((\opcode << INSN_S_OPCODE_SHIFT) | \
67+
(\func3 << INSN_S_FUNC3_SHIFT) | \
68+
(.L__gpr_num_\rs2 << INSN_S_RS2_SHIFT) | \
69+
(.L__gpr_num_\rs1 << INSN_S_RS1_SHIFT) | \
70+
((\simm12 & 0x1f) << INSN_S_SIMM5_SHIFT) | \
71+
(((\simm12 >> 5) & 0x7f) << INSN_S_SIMM7_SHIFT))
72+
.endm
73+
5474
#endif
5575

5676
#define __INSN_R(...) insn_r __VA_ARGS__
5777
#define __INSN_I(...) insn_i __VA_ARGS__
78+
#define __INSN_S(...) insn_s __VA_ARGS__
5879

5980
#else /* ! __ASSEMBLY__ */
6081

@@ -66,6 +87,9 @@
6687
#define __INSN_I(opcode, func3, rd, rs1, simm12) \
6788
".insn i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n"
6889

90+
#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
91+
".insn s " opcode ", " func3 ", " rs2 ", " simm12 "(" rs1 ")\n"
92+
6993
#else
7094

7195
#include <linux/stringify.h>
@@ -92,12 +116,26 @@
92116
" (\\simm12 << " __stringify(INSN_I_SIMM12_SHIFT) "))\n" \
93117
" .endm\n"
94118

119+
#define DEFINE_INSN_S \
120+
__DEFINE_ASM_GPR_NUMS \
121+
" .macro insn_s, opcode, func3, rs2, simm12, rs1\n" \
122+
" .4byte ((\\opcode << " __stringify(INSN_S_OPCODE_SHIFT) ") |" \
123+
" (\\func3 << " __stringify(INSN_S_FUNC3_SHIFT) ") |" \
124+
" (.L__gpr_num_\\rs2 << " __stringify(INSN_S_RS2_SHIFT) ") |" \
125+
" (.L__gpr_num_\\rs1 << " __stringify(INSN_S_RS1_SHIFT) ") |" \
126+
" ((\\simm12 & 0x1f) << " __stringify(INSN_S_SIMM5_SHIFT) ") |" \
127+
" (((\\simm12 >> 5) & 0x7f) << " __stringify(INSN_S_SIMM7_SHIFT) "))\n" \
128+
" .endm\n"
129+
95130
#define UNDEFINE_INSN_R \
96131
" .purgem insn_r\n"
97132

98133
#define UNDEFINE_INSN_I \
99134
" .purgem insn_i\n"
100135

136+
#define UNDEFINE_INSN_S \
137+
" .purgem insn_s\n"
138+
101139
#define __INSN_R(opcode, func3, func7, rd, rs1, rs2) \
102140
DEFINE_INSN_R \
103141
"insn_r " opcode ", " func3 ", " func7 ", " rd ", " rs1 ", " rs2 "\n" \
@@ -108,6 +146,11 @@
108146
"insn_i " opcode ", " func3 ", " rd ", " rs1 ", " simm12 "\n" \
109147
UNDEFINE_INSN_I
110148

149+
#define __INSN_S(opcode, func3, rs2, simm12, rs1) \
150+
DEFINE_INSN_S \
151+
"insn_s " opcode ", " func3 ", " rs2 ", " simm12 ", " rs1 "\n" \
152+
UNDEFINE_INSN_S
153+
111154
#endif
112155

113156
#endif /* ! __ASSEMBLY__ */
@@ -120,6 +163,10 @@
120163
__INSN_I(RV_##opcode, RV_##func3, RV_##rd, \
121164
RV_##rs1, RV_##simm12)
122165

166+
#define INSN_S(opcode, func3, rs2, simm12, rs1) \
167+
__INSN_S(RV_##opcode, RV_##func3, RV_##rs2, \
168+
RV_##simm12, RV_##rs1)
169+
123170
#define RV_OPCODE(v) __ASM_STR(v)
124171
#define RV_FUNC3(v) __ASM_STR(v)
125172
#define RV_FUNC7(v) __ASM_STR(v)
@@ -133,6 +180,7 @@
133180
#define RV___RS2(v) __RV_REG(v)
134181

135182
#define RV_OPCODE_MISC_MEM RV_OPCODE(15)
183+
#define RV_OPCODE_OP_IMM RV_OPCODE(19)
136184
#define RV_OPCODE_SYSTEM RV_OPCODE(115)
137185

138186
#define HFENCE_VVMA(vaddr, asid) \
@@ -196,11 +244,29 @@
196244
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
197245
RS1(base), SIMM12(4))
198246

247+
#define PREFETCH_I(base, offset) \
248+
INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(0), \
249+
SIMM12((offset) & 0xfe0), RS1(base))
250+
251+
#define PREFETCH_R(base, offset) \
252+
INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(1), \
253+
SIMM12((offset) & 0xfe0), RS1(base))
254+
255+
#define PREFETCH_W(base, offset) \
256+
INSN_S(OPCODE_OP_IMM, FUNC3(6), __RS2(3), \
257+
SIMM12((offset) & 0xfe0), RS1(base))
258+
199259
#define RISCV_PAUSE ".4byte 0x100000f"
200260
#define ZAWRS_WRS_NTO ".4byte 0x00d00073"
201261
#define ZAWRS_WRS_STO ".4byte 0x01d00073"
202262
#define RISCV_NOP4 ".4byte 0x00000013"
203263

204264
#define RISCV_INSN_NOP4 _AC(0x00000013, U)
205265

266+
#ifndef __ASSEMBLY__
267+
#define nop() __asm__ __volatile__ ("nop")
268+
#define __nops(n) ".rept " #n "\nnop\n.endr\n"
269+
#define nops(n) __asm__ __volatile__ (__nops(n))
270+
#endif
271+
206272
#endif /* __ASM_INSN_DEF_H */

arch/riscv/include/asm/processor.h

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@
1313
#include <vdso/processor.h>
1414

1515
#include <asm/ptrace.h>
16+
#include <asm/insn-def.h>
17+
#include <asm/alternative-macros.h>
18+
#include <asm/hwcap.h>
1619

1720
#define arch_get_mmap_end(addr, len, flags) \
1821
({ \
@@ -52,7 +55,6 @@
5255
#endif
5356

5457
#ifndef __ASSEMBLY__
55-
#include <linux/cpumask.h>
5658

5759
struct task_struct;
5860
struct pt_regs;
@@ -141,6 +143,27 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
141143
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->epc)
142144
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp)
143145

146+
#define PREFETCH_ASM(x) \
147+
ALTERNATIVE(__nops(1), PREFETCH_R(x, 0), 0, \
148+
RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
149+
150+
#define PREFETCHW_ASM(x) \
151+
ALTERNATIVE(__nops(1), PREFETCH_W(x, 0), 0, \
152+
RISCV_ISA_EXT_ZICBOP, CONFIG_RISCV_ISA_ZICBOP)
153+
154+
#ifdef CONFIG_RISCV_ISA_ZICBOP
155+
#define ARCH_HAS_PREFETCH
156+
static inline void prefetch(const void *x)
157+
{
158+
__asm__ __volatile__(PREFETCH_ASM(%0) : : "r" (x) : "memory");
159+
}
160+
161+
#define ARCH_HAS_PREFETCHW
162+
static inline void prefetchw(const void *x)
163+
{
164+
__asm__ __volatile__(PREFETCHW_ASM(%0) : : "r" (x) : "memory");
165+
}
166+
#endif /* CONFIG_RISCV_ISA_ZICBOP */
144167

145168
/* Do necessary setup to start up a newly executed thread. */
146169
extern void start_thread(struct pt_regs *regs,

arch/riscv/kernel/cpufeature.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
3333

3434
static bool any_cpu_has_zicboz;
35+
static bool any_cpu_has_zicbop;
3536
static bool any_cpu_has_zicbom;
3637

3738
unsigned long elf_hwcap __read_mostly;
@@ -119,6 +120,21 @@ static int riscv_ext_zicboz_validate(const struct riscv_isa_ext_data *data,
119120
return 0;
120121
}
121122

123+
static int riscv_ext_zicbop_validate(const struct riscv_isa_ext_data *data,
124+
const unsigned long *isa_bitmap)
125+
{
126+
if (!riscv_cbop_block_size) {
127+
pr_err("Zicbop detected in ISA string, disabling as no cbop-block-size found\n");
128+
return -EINVAL;
129+
}
130+
if (!is_power_of_2(riscv_cbop_block_size)) {
131+
pr_err("Zicbop disabled as cbop-block-size present, but is not a power-of-2\n");
132+
return -EINVAL;
133+
}
134+
any_cpu_has_zicbop = true;
135+
return 0;
136+
}
137+
122138
static int riscv_ext_f_validate(const struct riscv_isa_ext_data *data,
123139
const unsigned long *isa_bitmap)
124140
{
@@ -442,6 +458,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = {
442458
__RISCV_ISA_EXT_SUPERSET_VALIDATE(v, RISCV_ISA_EXT_v, riscv_v_exts, riscv_ext_vector_float_validate),
443459
__RISCV_ISA_EXT_DATA(h, RISCV_ISA_EXT_h),
444460
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicbom, RISCV_ISA_EXT_ZICBOM, riscv_xlinuxenvcfg_exts, riscv_ext_zicbom_validate),
461+
__RISCV_ISA_EXT_DATA_VALIDATE(zicbop, RISCV_ISA_EXT_ZICBOP, riscv_ext_zicbop_validate),
445462
__RISCV_ISA_EXT_SUPERSET_VALIDATE(zicboz, RISCV_ISA_EXT_ZICBOZ, riscv_xlinuxenvcfg_exts, riscv_ext_zicboz_validate),
446463
__RISCV_ISA_EXT_DATA(ziccrse, RISCV_ISA_EXT_ZICCRSE),
447464
__RISCV_ISA_EXT_DATA(zicntr, RISCV_ISA_EXT_ZICNTR),
@@ -1112,6 +1129,10 @@ void __init riscv_user_isa_enable(void)
11121129
current->thread.envcfg |= ENVCFG_CBCFE;
11131130
else if (any_cpu_has_zicbom)
11141131
pr_warn("Zicbom disabled as it is unavailable on some harts\n");
1132+
1133+
if (!riscv_has_extension_unlikely(RISCV_ISA_EXT_ZICBOP) &&
1134+
any_cpu_has_zicbop)
1135+
pr_warn("Zicbop disabled as it is unavailable on some harts\n");
11151136
}
11161137

11171138
#ifdef CONFIG_RISCV_ALTERNATIVE

arch/riscv/mm/cacheflush.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,9 @@ EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
114114
unsigned int riscv_cboz_block_size;
115115
EXPORT_SYMBOL_GPL(riscv_cboz_block_size);
116116

117+
unsigned int riscv_cbop_block_size;
118+
EXPORT_SYMBOL_GPL(riscv_cbop_block_size);
119+
117120
static void __init cbo_get_block_size(struct device_node *node,
118121
const char *name, u32 *block_size,
119122
unsigned long *first_hartid)
@@ -138,8 +141,8 @@ static void __init cbo_get_block_size(struct device_node *node,
138141

139142
void __init riscv_init_cbo_blocksizes(void)
140143
{
141-
unsigned long cbom_hartid, cboz_hartid;
142-
u32 cbom_block_size = 0, cboz_block_size = 0;
144+
unsigned long cbom_hartid, cboz_hartid, cbop_hartid;
145+
u32 cbom_block_size = 0, cboz_block_size = 0, cbop_block_size = 0;
143146
struct device_node *node;
144147
struct acpi_table_header *rhct;
145148
acpi_status status;
@@ -151,13 +154,15 @@ void __init riscv_init_cbo_blocksizes(void)
151154
&cbom_block_size, &cbom_hartid);
152155
cbo_get_block_size(node, "riscv,cboz-block-size",
153156
&cboz_block_size, &cboz_hartid);
157+
cbo_get_block_size(node, "riscv,cbop-block-size",
158+
&cbop_block_size, &cbop_hartid);
154159
}
155160
} else {
156161
status = acpi_get_table(ACPI_SIG_RHCT, 0, &rhct);
157162
if (ACPI_FAILURE(status))
158163
return;
159164

160-
acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, NULL);
165+
acpi_get_cbo_block_size(rhct, &cbom_block_size, &cboz_block_size, &cbop_block_size);
161166
acpi_put_table((struct acpi_table_header *)rhct);
162167
}
163168

@@ -166,6 +171,9 @@ void __init riscv_init_cbo_blocksizes(void)
166171

167172
if (cboz_block_size)
168173
riscv_cboz_block_size = cboz_block_size;
174+
175+
if (cbop_block_size)
176+
riscv_cbop_block_size = cbop_block_size;
169177
}
170178

171179
#ifdef CONFIG_SMP

0 commit comments

Comments
 (0)