Skip to content

Commit d863910

Browse files
charlie-rivospalmer-dabbelt
authored andcommitted
riscv: vector: Support xtheadvector save/restore
Use alternatives to add support for xtheadvector vector save/restore routines. Signed-off-by: Charlie Jenkins <[email protected]> Reviewed-by: Conor Dooley <[email protected]> Tested-by: Yangyu Chen <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent 01e3313 commit d863910

File tree

8 files changed

+198
-68
lines changed

8 files changed

+198
-68
lines changed

arch/riscv/include/asm/csr.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@
3030
#define SR_VS_CLEAN _AC(0x00000400, UL)
3131
#define SR_VS_DIRTY _AC(0x00000600, UL)
3232

33+
#define SR_VS_THEAD _AC(0x01800000, UL) /* xtheadvector Status */
34+
#define SR_VS_OFF_THEAD _AC(0x00000000, UL)
35+
#define SR_VS_INITIAL_THEAD _AC(0x00800000, UL)
36+
#define SR_VS_CLEAN_THEAD _AC(0x01000000, UL)
37+
#define SR_VS_DIRTY_THEAD _AC(0x01800000, UL)
38+
3339
#define SR_XS _AC(0x00018000, UL) /* Extension Status */
3440
#define SR_XS_OFF _AC(0x00000000, UL)
3541
#define SR_XS_INITIAL _AC(0x00008000, UL)

arch/riscv/include/asm/switch_to.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ do { \
117117
__set_prev_cpu(__prev->thread); \
118118
if (has_fpu()) \
119119
__switch_to_fpu(__prev, __next); \
120-
if (has_vector()) \
120+
if (has_vector() || has_xtheadvector()) \
121121
__switch_to_vector(__prev, __next); \
122122
if (switch_to_should_flush_icache(__next)) \
123123
local_flush_icache_all(); \

arch/riscv/include/asm/vector.h

Lines changed: 173 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,27 @@
1818
#include <asm/cpufeature.h>
1919
#include <asm/csr.h>
2020
#include <asm/asm.h>
21+
#include <asm/vendorid_list.h>
22+
#include <asm/vendor_extensions.h>
23+
#include <asm/vendor_extensions/thead.h>
24+
25+
#define __riscv_v_vstate_or(_val, TYPE) ({ \
26+
typeof(_val) _res = _val; \
27+
if (has_xtheadvector()) \
28+
_res = (_res & ~SR_VS_THEAD) | SR_VS_##TYPE##_THEAD; \
29+
else \
30+
_res = (_res & ~SR_VS) | SR_VS_##TYPE; \
31+
_res; \
32+
})
33+
34+
#define __riscv_v_vstate_check(_val, TYPE) ({ \
35+
bool _res; \
36+
if (has_xtheadvector()) \
37+
_res = ((_val) & SR_VS_THEAD) == SR_VS_##TYPE##_THEAD; \
38+
else \
39+
_res = ((_val) & SR_VS) == SR_VS_##TYPE; \
40+
_res; \
41+
})
2142

2243
extern unsigned long riscv_v_vsize;
2344
int riscv_v_setup_vsize(void);
@@ -41,39 +62,62 @@ static __always_inline bool has_vector(void)
4162
return riscv_has_extension_unlikely(RISCV_ISA_EXT_ZVE32X);
4263
}
4364

65+
static __always_inline bool has_xtheadvector_no_alternatives(void)
66+
{
67+
if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR))
68+
return riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR);
69+
else
70+
return false;
71+
}
72+
73+
static __always_inline bool has_xtheadvector(void)
74+
{
75+
if (IS_ENABLED(CONFIG_RISCV_ISA_XTHEADVECTOR))
76+
return riscv_has_vendor_extension_unlikely(THEAD_VENDOR_ID,
77+
RISCV_ISA_VENDOR_EXT_XTHEADVECTOR);
78+
else
79+
return false;
80+
}
81+
4482
static inline void __riscv_v_vstate_clean(struct pt_regs *regs)
4583
{
46-
regs->status = (regs->status & ~SR_VS) | SR_VS_CLEAN;
84+
regs->status = __riscv_v_vstate_or(regs->status, CLEAN);
4785
}
4886

4987
static inline void __riscv_v_vstate_dirty(struct pt_regs *regs)
5088
{
51-
regs->status = (regs->status & ~SR_VS) | SR_VS_DIRTY;
89+
regs->status = __riscv_v_vstate_or(regs->status, DIRTY);
5290
}
5391

5492
static inline void riscv_v_vstate_off(struct pt_regs *regs)
5593
{
56-
regs->status = (regs->status & ~SR_VS) | SR_VS_OFF;
94+
regs->status = __riscv_v_vstate_or(regs->status, OFF);
5795
}
5896

5997
static inline void riscv_v_vstate_on(struct pt_regs *regs)
6098
{
61-
regs->status = (regs->status & ~SR_VS) | SR_VS_INITIAL;
99+
regs->status = __riscv_v_vstate_or(regs->status, INITIAL);
62100
}
63101

64102
static inline bool riscv_v_vstate_query(struct pt_regs *regs)
65103
{
66-
return (regs->status & SR_VS) != 0;
104+
return !__riscv_v_vstate_check(regs->status, OFF);
67105
}
68106

69107
static __always_inline void riscv_v_enable(void)
70108
{
71-
csr_set(CSR_SSTATUS, SR_VS);
109+
if (has_xtheadvector())
110+
csr_set(CSR_SSTATUS, SR_VS_THEAD);
111+
else
112+
csr_set(CSR_SSTATUS, SR_VS);
72113
}
73114

74115
static __always_inline void riscv_v_disable(void)
75116
{
76-
csr_clear(CSR_SSTATUS, SR_VS);
117+
if (has_xtheadvector())
118+
csr_clear(CSR_SSTATUS, SR_VS_THEAD);
119+
else
120+
csr_clear(CSR_SSTATUS, SR_VS);
77121
}
78122

79123
static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
@@ -82,10 +126,36 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest)
82126
"csrr %0, " __stringify(CSR_VSTART) "\n\t"
83127
"csrr %1, " __stringify(CSR_VTYPE) "\n\t"
84128
"csrr %2, " __stringify(CSR_VL) "\n\t"
85-
"csrr %3, " __stringify(CSR_VCSR) "\n\t"
86-
"csrr %4, " __stringify(CSR_VLENB) "\n\t"
87129
: "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl),
88-
"=r" (dest->vcsr), "=r" (dest->vlenb) : :);
130+
"=r" (dest->vcsr) : :);
131+
132+
if (has_xtheadvector()) {
133+
unsigned long status;
134+
135+
/*
136+
* CSR_VCSR is defined as
137+
* [2:1] - vxrm[1:0]
138+
* [0] - vxsat
139+
* The earlier vector spec implemented by T-Head uses separate
140+
* registers for the same bit-elements, so just combine those
141+
* into the existing output field.
142+
*
143+
* Additionally T-Head cores need FS to be enabled when accessing
144+
* the VXRM and VXSAT CSRs, otherwise ending in illegal instructions.
145+
* Though the cores do not implement the VXRM and VXSAT fields in the
146+
* FCSR CSR that vector-0.7.1 specifies.
147+
*/
148+
status = csr_read_set(CSR_STATUS, SR_FS_DIRTY);
149+
dest->vcsr = csr_read(CSR_VXSAT) | csr_read(CSR_VXRM) << CSR_VXRM_SHIFT;
150+
151+
dest->vlenb = riscv_v_vsize / 32;
152+
153+
if ((status & SR_FS) != SR_FS_DIRTY)
154+
csr_write(CSR_STATUS, status);
155+
} else {
156+
dest->vcsr = csr_read(CSR_VCSR);
157+
dest->vlenb = csr_read(CSR_VLENB);
158+
}
89159
}
90160

91161
static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src)
@@ -96,9 +166,25 @@ static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src
96166
"vsetvl x0, %2, %1\n\t"
97167
".option pop\n\t"
98168
"csrw " __stringify(CSR_VSTART) ", %0\n\t"
99-
"csrw " __stringify(CSR_VCSR) ", %3\n\t"
100-
: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl),
101-
"r" (src->vcsr) :);
169+
: : "r" (src->vstart), "r" (src->vtype), "r" (src->vl));
170+
171+
if (has_xtheadvector()) {
172+
unsigned long status = csr_read(CSR_SSTATUS);
173+
174+
/*
175+
* Similar to __vstate_csr_save above, restore values for the
176+
* separate VXRM and VXSAT CSRs from the vcsr variable.
177+
*/
178+
status = csr_read_set(CSR_STATUS, SR_FS_DIRTY);
179+
180+
csr_write(CSR_VXRM, (src->vcsr >> CSR_VXRM_SHIFT) & CSR_VXRM_MASK);
181+
csr_write(CSR_VXSAT, src->vcsr & CSR_VXSAT_MASK);
182+
183+
if ((status & SR_FS) != SR_FS_DIRTY)
184+
csr_write(CSR_STATUS, status);
185+
} else {
186+
csr_write(CSR_VCSR, src->vcsr);
187+
}
102188
}
103189

104190
static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
@@ -108,19 +194,33 @@ static inline void __riscv_v_vstate_save(struct __riscv_v_ext_state *save_to,
108194

109195
riscv_v_enable();
110196
__vstate_csr_save(save_to);
111-
asm volatile (
112-
".option push\n\t"
113-
".option arch, +zve32x\n\t"
114-
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
115-
"vse8.v v0, (%1)\n\t"
116-
"add %1, %1, %0\n\t"
117-
"vse8.v v8, (%1)\n\t"
118-
"add %1, %1, %0\n\t"
119-
"vse8.v v16, (%1)\n\t"
120-
"add %1, %1, %0\n\t"
121-
"vse8.v v24, (%1)\n\t"
122-
".option pop\n\t"
123-
: "=&r" (vl) : "r" (datap) : "memory");
197+
if (has_xtheadvector()) {
198+
asm volatile (
199+
"mv t0, %0\n\t"
200+
THEAD_VSETVLI_T4X0E8M8D1
201+
THEAD_VSB_V_V0T0
202+
"add t0, t0, t4\n\t"
203+
THEAD_VSB_V_V0T0
204+
"add t0, t0, t4\n\t"
205+
THEAD_VSB_V_V0T0
206+
"add t0, t0, t4\n\t"
207+
THEAD_VSB_V_V0T0
208+
: : "r" (datap) : "memory", "t0", "t4");
209+
} else {
210+
asm volatile (
211+
".option push\n\t"
212+
".option arch, +zve32x\n\t"
213+
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
214+
"vse8.v v0, (%1)\n\t"
215+
"add %1, %1, %0\n\t"
216+
"vse8.v v8, (%1)\n\t"
217+
"add %1, %1, %0\n\t"
218+
"vse8.v v16, (%1)\n\t"
219+
"add %1, %1, %0\n\t"
220+
"vse8.v v24, (%1)\n\t"
221+
".option pop\n\t"
222+
: "=&r" (vl) : "r" (datap) : "memory");
223+
}
124224
riscv_v_disable();
125225
}
126226

@@ -130,19 +230,33 @@ static inline void __riscv_v_vstate_restore(struct __riscv_v_ext_state *restore_
130230
unsigned long vl;
131231

132232
riscv_v_enable();
133-
asm volatile (
134-
".option push\n\t"
135-
".option arch, +zve32x\n\t"
136-
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
137-
"vle8.v v0, (%1)\n\t"
138-
"add %1, %1, %0\n\t"
139-
"vle8.v v8, (%1)\n\t"
140-
"add %1, %1, %0\n\t"
141-
"vle8.v v16, (%1)\n\t"
142-
"add %1, %1, %0\n\t"
143-
"vle8.v v24, (%1)\n\t"
144-
".option pop\n\t"
145-
: "=&r" (vl) : "r" (datap) : "memory");
233+
if (has_xtheadvector()) {
234+
asm volatile (
235+
"mv t0, %0\n\t"
236+
THEAD_VSETVLI_T4X0E8M8D1
237+
THEAD_VLB_V_V0T0
238+
"add t0, t0, t4\n\t"
239+
THEAD_VLB_V_V0T0
240+
"add t0, t0, t4\n\t"
241+
THEAD_VLB_V_V0T0
242+
"add t0, t0, t4\n\t"
243+
THEAD_VLB_V_V0T0
244+
: : "r" (datap) : "memory", "t0", "t4");
245+
} else {
246+
asm volatile (
247+
".option push\n\t"
248+
".option arch, +zve32x\n\t"
249+
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
250+
"vle8.v v0, (%1)\n\t"
251+
"add %1, %1, %0\n\t"
252+
"vle8.v v8, (%1)\n\t"
253+
"add %1, %1, %0\n\t"
254+
"vle8.v v16, (%1)\n\t"
255+
"add %1, %1, %0\n\t"
256+
"vle8.v v24, (%1)\n\t"
257+
".option pop\n\t"
258+
: "=&r" (vl) : "r" (datap) : "memory");
259+
}
146260
__vstate_csr_restore(restore_from);
147261
riscv_v_disable();
148262
}
@@ -152,33 +266,41 @@ static inline void __riscv_v_vstate_discard(void)
152266
unsigned long vl, vtype_inval = 1UL << (BITS_PER_LONG - 1);
153267

154268
riscv_v_enable();
269+
if (has_xtheadvector())
270+
asm volatile (THEAD_VSETVLI_T4X0E8M8D1 : : : "t4");
271+
else
272+
asm volatile (
273+
".option push\n\t"
274+
".option arch, +zve32x\n\t"
275+
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
276+
".option pop\n\t": "=&r" (vl));
277+
155278
asm volatile (
156279
".option push\n\t"
157280
".option arch, +zve32x\n\t"
158-
"vsetvli %0, x0, e8, m8, ta, ma\n\t"
159281
"vmv.v.i v0, -1\n\t"
160282
"vmv.v.i v8, -1\n\t"
161283
"vmv.v.i v16, -1\n\t"
162284
"vmv.v.i v24, -1\n\t"
163285
"vsetvl %0, x0, %1\n\t"
164286
".option pop\n\t"
165-
: "=&r" (vl) : "r" (vtype_inval) : "memory");
287+
: "=&r" (vl) : "r" (vtype_inval));
288+
166289
riscv_v_disable();
167290
}
168291

169292
static inline void riscv_v_vstate_discard(struct pt_regs *regs)
170293
{
171-
if ((regs->status & SR_VS) == SR_VS_OFF)
172-
return;
173-
174-
__riscv_v_vstate_discard();
175-
__riscv_v_vstate_dirty(regs);
294+
if (riscv_v_vstate_query(regs)) {
295+
__riscv_v_vstate_discard();
296+
__riscv_v_vstate_dirty(regs);
297+
}
176298
}
177299

178300
static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
179301
struct pt_regs *regs)
180302
{
181-
if ((regs->status & SR_VS) == SR_VS_DIRTY) {
303+
if (__riscv_v_vstate_check(regs->status, DIRTY)) {
182304
__riscv_v_vstate_save(vstate, vstate->datap);
183305
__riscv_v_vstate_clean(regs);
184306
}
@@ -187,7 +309,7 @@ static inline void riscv_v_vstate_save(struct __riscv_v_ext_state *vstate,
187309
static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
188310
struct pt_regs *regs)
189311
{
190-
if ((regs->status & SR_VS) != SR_VS_OFF) {
312+
if (riscv_v_vstate_query(regs)) {
191313
__riscv_v_vstate_restore(vstate, vstate->datap);
192314
__riscv_v_vstate_clean(regs);
193315
}
@@ -196,7 +318,7 @@ static inline void riscv_v_vstate_restore(struct __riscv_v_ext_state *vstate,
196318
static inline void riscv_v_vstate_set_restore(struct task_struct *task,
197319
struct pt_regs *regs)
198320
{
199-
if ((regs->status & SR_VS) != SR_VS_OFF) {
321+
if (riscv_v_vstate_query(regs)) {
200322
set_tsk_thread_flag(task, TIF_RISCV_V_DEFER_RESTORE);
201323
riscv_v_vstate_on(regs);
202324
}
@@ -270,6 +392,8 @@ struct pt_regs;
270392
static inline int riscv_v_setup_vsize(void) { return -EOPNOTSUPP; }
271393
static __always_inline bool has_vector(void) { return false; }
272394
static __always_inline bool insn_is_vector(u32 insn_buf) { return false; }
395+
static __always_inline bool has_xtheadvector_no_alternatives(void) { return false; }
396+
static __always_inline bool has_xtheadvector(void) { return false; }
273397
static inline bool riscv_v_first_use_handler(struct pt_regs *regs) { return false; }
274398
static inline bool riscv_v_vstate_query(struct pt_regs *regs) { return false; }
275399
static inline bool riscv_v_vstate_ctrl_user_allowed(void) { return false; }

arch/riscv/kernel/cpufeature.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -874,8 +874,7 @@ static int __init riscv_fill_hwcap_from_ext_list(unsigned long *isa2hwcap)
874874
riscv_fill_vendor_ext_list(cpu);
875875
}
876876

877-
if (riscv_isa_vendor_extension_available(THEAD_VENDOR_ID, XTHEADVECTOR) &&
878-
has_thead_homogeneous_vlenb() < 0) {
877+
if (has_xtheadvector_no_alternatives() && has_thead_homogeneous_vlenb() < 0) {
879878
pr_warn("Unsupported heterogeneous vlenb detected, vector extension disabled.\n");
880879
disable_xtheadvector();
881880
}
@@ -932,7 +931,8 @@ void __init riscv_fill_hwcap(void)
932931
elf_hwcap &= ~COMPAT_HWCAP_ISA_F;
933932
}
934933

935-
if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X)) {
934+
if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_ZVE32X) ||
935+
has_xtheadvector_no_alternatives()) {
936936
/*
937937
* This cannot fail when called on the boot hart
938938
*/

0 commit comments

Comments
 (0)