Skip to content

Commit af7167d

Browse files
brooniectmarinas
authored andcommitted
arm64/sme: Implement streaming SVE context switching
When in streaming mode we need to save and restore the streaming mode SVE register state rather than the regular SVE register state. This uses the streaming mode vector length and omits FFR but is otherwise identical, if TIF_SVE is enabled when we are in streaming mode then streaming mode takes precedence. This does not handle use of streaming SVE state with KVM, ptrace or signals. This will be updated in further patches. Signed-off-by: Mark Brown <[email protected]> Reviewed-by: Catalin Marinas <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Catalin Marinas <[email protected]>
1 parent b40c559 commit af7167d

File tree

6 files changed

+136
-23
lines changed

6 files changed

+136
-23
lines changed

arch/arm64/include/asm/fpsimd.h

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,21 @@ extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
4747

4848
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
4949
void *sve_state, unsigned int sve_vl,
50-
u64 *svcr);
50+
unsigned int sme_vl, u64 *svcr);
5151

5252
extern void fpsimd_flush_task_state(struct task_struct *target);
5353
extern void fpsimd_save_and_flush_cpu_state(void);
5454

55+
static inline bool thread_sm_enabled(struct thread_struct *thread)
56+
{
57+
return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_SM_MASK);
58+
}
59+
60+
static inline bool thread_za_enabled(struct thread_struct *thread)
61+
{
62+
return system_supports_sme() && (thread->svcr & SYS_SVCR_EL0_ZA_MASK);
63+
}
64+
5565
/* Maximum VL that SVE/SME VL-agnostic software can transparently support */
5666
#define VL_ARCH_MAX 0x100
5767

@@ -63,7 +73,14 @@ static inline size_t sve_ffr_offset(int vl)
6373

6474
static inline void *sve_pffr(struct thread_struct *thread)
6575
{
66-
return (char *)thread->sve_state + sve_ffr_offset(thread_get_sve_vl(thread));
76+
unsigned int vl;
77+
78+
if (system_supports_sme() && thread_sm_enabled(thread))
79+
vl = thread_get_sme_vl(thread);
80+
else
81+
vl = thread_get_sve_vl(thread);
82+
83+
return (char *)thread->sve_state + sve_ffr_offset(vl);
6784
}
6885

6986
extern void sve_save_state(void *state, u32 *pfpsr, int save_ffr);
@@ -72,6 +89,7 @@ extern void sve_load_state(void const *state, u32 const *pfpsr,
7289
extern void sve_flush_live(bool flush_ffr, unsigned long vq_minus_1);
7390
extern unsigned int sve_get_vl(void);
7491
extern void sve_set_vq(unsigned long vq_minus_1);
92+
extern void sme_set_vq(unsigned long vq_minus_1);
7593

7694
struct arm64_cpu_capabilities;
7795
extern void sve_kernel_enable(const struct arm64_cpu_capabilities *__unused);

arch/arm64/include/asm/fpsimdmacros.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,17 @@
262262
921:
263263
.endm
264264

265+
/* Update SMCR_EL1.LEN with the new VQ */
266+
.macro sme_load_vq xvqminus1, xtmp, xtmp2
267+
mrs_s \xtmp, SYS_SMCR_EL1
268+
bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
269+
orr \xtmp2, \xtmp2, \xvqminus1
270+
cmp \xtmp2, \xtmp
271+
b.eq 921f
272+
msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
273+
921:
274+
.endm
275+
265276
/* Preserve the first 128-bits of Znz and zero the rest. */
266277
.macro _sve_flush_z nz
267278
_sve_check_zreg \nz

arch/arm64/include/asm/processor.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,11 @@ static inline unsigned int thread_get_sve_vl(struct thread_struct *thread)
184184
return thread_get_vl(thread, ARM64_VEC_SVE);
185185
}
186186

187+
static inline unsigned int thread_get_sme_vl(struct thread_struct *thread)
188+
{
189+
return thread_get_vl(thread, ARM64_VEC_SME);
190+
}
191+
187192
unsigned int task_get_vl(const struct task_struct *task, enum vec_type type);
188193
void task_set_vl(struct task_struct *task, enum vec_type type,
189194
unsigned long vl);
@@ -197,6 +202,11 @@ static inline unsigned int task_get_sve_vl(const struct task_struct *task)
197202
return task_get_vl(task, ARM64_VEC_SVE);
198203
}
199204

205+
static inline unsigned int task_get_sme_vl(const struct task_struct *task)
206+
{
207+
return task_get_vl(task, ARM64_VEC_SME);
208+
}
209+
200210
static inline void task_set_sve_vl(struct task_struct *task, unsigned long vl)
201211
{
202212
task_set_vl(task, ARM64_VEC_SVE, vl);

arch/arm64/kernel/entry-fpsimd.S

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,4 +94,9 @@ SYM_FUNC_START(sme_get_vl)
9494
ret
9595
SYM_FUNC_END(sme_get_vl)
9696

97+
SYM_FUNC_START(sme_set_vq)
98+
sme_load_vq x0, x1, x2
99+
ret
100+
SYM_FUNC_END(sme_set_vq)
101+
97102
#endif /* CONFIG_ARM64_SME */

arch/arm64/kernel/fpsimd.c

Lines changed: 89 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ struct fpsimd_last_state_struct {
123123
void *sve_state;
124124
u64 *svcr;
125125
unsigned int sve_vl;
126+
unsigned int sme_vl;
126127
};
127128

128129
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
@@ -301,25 +302,37 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
301302
task->thread.vl_onexec[type] = vl;
302303
}
303304

305+
/*
306+
* TIF_SME controls whether a task can use SME without trapping while
307+
* in userspace, when TIF_SME is set then we must have storage
308+
* alocated in sve_state and za_state to store the contents of both ZA
309+
* and the SVE registers for both streaming and non-streaming modes.
310+
*
311+
* If both SVCR.ZA and SVCR.SM are disabled then at any point we
312+
* may disable TIF_SME and reenable traps.
313+
*/
314+
315+
304316
/*
305317
* TIF_SVE controls whether a task can use SVE without trapping while
306-
* in userspace, and also the way a task's FPSIMD/SVE state is stored
307-
* in thread_struct.
318+
* in userspace, and also (together with TIF_SME) the way a task's
319+
* FPSIMD/SVE state is stored in thread_struct.
308320
*
309321
* The kernel uses this flag to track whether a user task is actively
310322
* using SVE, and therefore whether full SVE register state needs to
311323
* be tracked. If not, the cheaper FPSIMD context handling code can
312324
* be used instead of the more costly SVE equivalents.
313325
*
314-
* * TIF_SVE set:
326+
* * TIF_SVE or SVCR.SM set:
315327
*
316328
* The task can execute SVE instructions while in userspace without
317329
* trapping to the kernel.
318330
*
319331
* When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
320332
* corresponding Zn), P0-P15 and FFR are encoded in in
321333
* task->thread.sve_state, formatted appropriately for vector
322-
* length task->thread.sve_vl.
334+
* length task->thread.sve_vl or, if SVCR.SM is set,
335+
* task->thread.sme_vl.
323336
*
324337
* task->thread.sve_state must point to a valid buffer at least
325338
* sve_state_size(task) bytes in size.
@@ -357,19 +370,40 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
357370
*/
358371
static void task_fpsimd_load(void)
359372
{
373+
bool restore_sve_regs = false;
374+
bool restore_ffr;
375+
360376
WARN_ON(!system_supports_fpsimd());
361377
WARN_ON(!have_cpu_fpsimd_context());
362378

363-
if (IS_ENABLED(CONFIG_ARM64_SME) && test_thread_flag(TIF_SME))
364-
write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0);
365-
379+
/* Check if we should restore SVE first */
366380
if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
367381
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
382+
restore_sve_regs = true;
383+
restore_ffr = true;
384+
}
385+
386+
/* Restore SME, override SVE register configuration if needed */
387+
if (system_supports_sme()) {
388+
unsigned long sme_vl = task_get_sme_vl(current);
389+
390+
if (test_thread_flag(TIF_SME))
391+
sme_set_vq(sve_vq_from_vl(sme_vl) - 1);
392+
393+
write_sysreg_s(current->thread.svcr, SYS_SVCR_EL0);
394+
395+
if (thread_sm_enabled(&current->thread)) {
396+
restore_sve_regs = true;
397+
restore_ffr = system_supports_fa64();
398+
}
399+
}
400+
401+
if (restore_sve_regs)
368402
sve_load_state(sve_pffr(&current->thread),
369-
&current->thread.uw.fpsimd_state.fpsr, true);
370-
} else {
403+
&current->thread.uw.fpsimd_state.fpsr,
404+
restore_ffr);
405+
else
371406
fpsimd_load_state(&current->thread.uw.fpsimd_state);
372-
}
373407
}
374408

375409
/*
@@ -387,22 +421,43 @@ static void fpsimd_save(void)
387421
struct fpsimd_last_state_struct const *last =
388422
this_cpu_ptr(&fpsimd_last_state);
389423
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
424+
bool save_sve_regs = false;
425+
bool save_ffr;
426+
unsigned int vl;
390427

391428
WARN_ON(!system_supports_fpsimd());
392429
WARN_ON(!have_cpu_fpsimd_context());
393430

394431
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
395432
return;
396433

397-
if (IS_ENABLED(CONFIG_ARM64_SME) &&
398-
test_thread_flag(TIF_SME)) {
434+
if (test_thread_flag(TIF_SVE)) {
435+
save_sve_regs = true;
436+
save_ffr = true;
437+
vl = last->sve_vl;
438+
}
439+
440+
if (system_supports_sme()) {
399441
u64 *svcr = last->svcr;
400442
*svcr = read_sysreg_s(SYS_SVCR_EL0);
443+
444+
if (thread_za_enabled(&current->thread)) {
445+
/* ZA state managment is not implemented yet */
446+
force_signal_inject(SIGKILL, SI_KERNEL, 0, 0);
447+
return;
448+
}
449+
450+
/* If we are in streaming mode override regular SVE. */
451+
if (*svcr & SYS_SVCR_EL0_SM_MASK) {
452+
save_sve_regs = true;
453+
save_ffr = system_supports_fa64();
454+
vl = last->sme_vl;
455+
}
401456
}
402457

403-
if (IS_ENABLED(CONFIG_ARM64_SVE) &&
404-
test_thread_flag(TIF_SVE)) {
405-
if (WARN_ON(sve_get_vl() != last->sve_vl)) {
458+
if (IS_ENABLED(CONFIG_ARM64_SVE) && save_sve_regs) {
459+
/* Get the configured VL from RDVL, will account for SM */
460+
if (WARN_ON(sve_get_vl() != vl)) {
406461
/*
407462
* Can't save the user regs, so current would
408463
* re-enter user with corrupt state.
@@ -413,8 +468,8 @@ static void fpsimd_save(void)
413468
}
414469

415470
sve_save_state((char *)last->sve_state +
416-
sve_ffr_offset(last->sve_vl),
417-
&last->st->fpsr, true);
471+
sve_ffr_offset(vl),
472+
&last->st->fpsr, save_ffr);
418473
} else {
419474
fpsimd_save_state(last->st);
420475
}
@@ -619,7 +674,14 @@ static void sve_to_fpsimd(struct task_struct *task)
619674
*/
620675
static size_t sve_state_size(struct task_struct const *task)
621676
{
622-
return SVE_SIG_REGS_SIZE(sve_vq_from_vl(task_get_sve_vl(task)));
677+
unsigned int vl = 0;
678+
679+
if (system_supports_sve())
680+
vl = task_get_sve_vl(task);
681+
if (system_supports_sme())
682+
vl = max(vl, task_get_sme_vl(task));
683+
684+
return SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl));
623685
}
624686

625687
/*
@@ -748,7 +810,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
748810
}
749811

750812
fpsimd_flush_task_state(task);
751-
if (test_and_clear_tsk_thread_flag(task, TIF_SVE))
813+
if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
814+
thread_sm_enabled(&task->thread))
752815
sve_to_fpsimd(task);
753816

754817
if (system_supports_sme() && type == ARM64_VEC_SME)
@@ -1375,6 +1438,9 @@ void fpsimd_flush_thread(void)
13751438
fpsimd_flush_thread_vl(ARM64_VEC_SVE);
13761439
}
13771440

1441+
if (system_supports_sme())
1442+
fpsimd_flush_thread_vl(ARM64_VEC_SME);
1443+
13781444
put_cpu_fpsimd_context();
13791445
}
13801446

@@ -1418,6 +1484,7 @@ static void fpsimd_bind_task_to_cpu(void)
14181484
last->st = &current->thread.uw.fpsimd_state;
14191485
last->sve_state = current->thread.sve_state;
14201486
last->sve_vl = task_get_sve_vl(current);
1487+
last->sme_vl = task_get_sme_vl(current);
14211488
last->svcr = &current->thread.svcr;
14221489
current->thread.fpsimd_cpu = smp_processor_id();
14231490

@@ -1433,7 +1500,8 @@ static void fpsimd_bind_task_to_cpu(void)
14331500
}
14341501

14351502
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
1436-
unsigned int sve_vl, u64 *svcr)
1503+
unsigned int sve_vl, unsigned int sme_vl,
1504+
u64 *svcr)
14371505
{
14381506
struct fpsimd_last_state_struct *last =
14391507
this_cpu_ptr(&fpsimd_last_state);
@@ -1445,6 +1513,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
14451513
last->svcr = svcr;
14461514
last->sve_state = sve_state;
14471515
last->sve_vl = sve_vl;
1516+
last->sme_vl = sme_vl;
14481517
}
14491518

14501519
/*

arch/arm64/kvm/fpsimd.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
116116
fpsimd_bind_state_to_cpu(&vcpu->arch.ctxt.fp_regs,
117117
vcpu->arch.sve_state,
118118
vcpu->arch.sve_max_vl,
119-
NULL);
119+
0, NULL);
120120

121121
clear_thread_flag(TIF_FOREIGN_FPSTATE);
122122
update_thread_flag(TIF_SVE, vcpu_has_sve(vcpu));

0 commit comments

Comments
 (0)