Skip to content

Commit 75bc81d

Browse files
committed
Merge branch 'for-next/sve-state' into for-next/core
* for-next/sve-state: arm64/fp: Use a struct to pass data to fpsimd_bind_state_to_cpu() arm64/sve: Leave SVE enabled on syscall if we don't context switch arm64/fpsimd: SME no longer requires SVE register state arm64/fpsimd: Load FP state based on recorded data type arm64/fpsimd: Stop using TIF_SVE to manage register saving in KVM arm64/fpsimd: Have KVM explicitly say which FP registers to save arm64/fpsimd: Track the saved FPSIMD state type separately to TIF_SVE KVM: arm64: Discard any SVE state when entering KVM guests
2 parents 595a121 + 1192b93 commit 75bc81d

File tree

9 files changed

+180
-80
lines changed

9 files changed

+180
-80
lines changed

arch/arm64/include/asm/fpsimd.h

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,20 @@ extern void fpsimd_signal_preserve_current_state(void);
5656
extern void fpsimd_preserve_current_state(void);
5757
extern void fpsimd_restore_current_state(void);
5858
extern void fpsimd_update_current_state(struct user_fpsimd_state const *state);
59+
extern void fpsimd_kvm_prepare(void);
60+
61+
struct cpu_fp_state {
62+
struct user_fpsimd_state *st;
63+
void *sve_state;
64+
void *za_state;
65+
u64 *svcr;
66+
unsigned int sve_vl;
67+
unsigned int sme_vl;
68+
enum fp_type *fp_type;
69+
enum fp_type to_save;
70+
};
5971

60-
extern void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *state,
61-
void *sve_state, unsigned int sve_vl,
62-
void *za_state, unsigned int sme_vl,
63-
u64 *svcr);
72+
extern void fpsimd_bind_state_to_cpu(struct cpu_fp_state *fp_state);
6473

6574
extern void fpsimd_flush_task_state(struct task_struct *target);
6675
extern void fpsimd_save_and_flush_cpu_state(void);

arch/arm64/include/asm/kvm_host.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,18 @@ struct vcpu_reset_state {
306306
struct kvm_vcpu_arch {
307307
struct kvm_cpu_context ctxt;
308308

309-
/* Guest floating point state */
309+
/*
310+
* Guest floating point state
311+
*
312+
* The architecture has two main floating point extensions,
313+
* the original FPSIMD and SVE. These have overlapping
314+
* register views, with the FPSIMD V registers occupying the
315+
* low 128 bits of the SVE Z registers. When the core
316+
* floating point code saves the register state of a task it
317+
* records which view it saved in fp_type.
318+
*/
310319
void *sve_state;
320+
enum fp_type fp_type;
311321
unsigned int sve_max_vl;
312322
u64 svcr;
313323

arch/arm64/include/asm/processor.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,12 @@ enum vec_type {
122122
ARM64_VEC_MAX,
123123
};
124124

125+
enum fp_type {
126+
FP_STATE_CURRENT, /* Save based on current task state. */
127+
FP_STATE_FPSIMD,
128+
FP_STATE_SVE,
129+
};
130+
125131
struct cpu_context {
126132
unsigned long x19;
127133
unsigned long x20;
@@ -152,6 +158,7 @@ struct thread_struct {
152158
struct user_fpsimd_state fpsimd_state;
153159
} uw;
154160

161+
enum fp_type fp_type; /* registers FPSIMD or SVE? */
155162
unsigned int fpsimd_cpu;
156163
void *sve_state; /* SVE registers, if any */
157164
void *za_state; /* ZA register, if any */

arch/arm64/kernel/fpsimd.c

Lines changed: 115 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -118,16 +118,8 @@
118118
* returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so
119119
* whatever is in the FPSIMD registers is not saved to memory, but discarded.
120120
*/
121-
struct fpsimd_last_state_struct {
122-
struct user_fpsimd_state *st;
123-
void *sve_state;
124-
void *za_state;
125-
u64 *svcr;
126-
unsigned int sve_vl;
127-
unsigned int sme_vl;
128-
};
129121

130-
static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state);
122+
static DEFINE_PER_CPU(struct cpu_fp_state, fpsimd_last_state);
131123

132124
__ro_after_init struct vl_info vl_info[ARM64_VEC_MAX] = {
133125
#ifdef CONFIG_ARM64_SVE
@@ -330,15 +322,6 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
330322
* The task can execute SVE instructions while in userspace without
331323
* trapping to the kernel.
332324
*
333-
* When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
334-
* corresponding Zn), P0-P15 and FFR are encoded in
335-
* task->thread.sve_state, formatted appropriately for vector
336-
* length task->thread.sve_vl or, if SVCR.SM is set,
337-
* task->thread.sme_vl.
338-
*
339-
* task->thread.sve_state must point to a valid buffer at least
340-
* sve_state_size(task) bytes in size.
341-
*
342325
* During any syscall, the kernel may optionally clear TIF_SVE and
343326
* discard the vector state except for the FPSIMD subset.
344327
*
@@ -348,15 +331,39 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
348331
* do_sve_acc() to be called, which does some preparation and then
349332
* sets TIF_SVE.
350333
*
351-
* When stored, FPSIMD registers V0-V31 are encoded in
334+
* During any syscall, the kernel may optionally clear TIF_SVE and
335+
* discard the vector state except for the FPSIMD subset.
336+
*
337+
* The data will be stored in one of two formats:
338+
*
339+
* * FPSIMD only - FP_STATE_FPSIMD:
340+
*
341+
* When the FPSIMD only state stored task->thread.fp_type is set to
342+
* FP_STATE_FPSIMD, the FPSIMD registers V0-V31 are encoded in
352343
* task->thread.uw.fpsimd_state; bits [max : 128] for each of Z0-Z31 are
353344
* logically zero but not stored anywhere; P0-P15 and FFR are not
354345
* stored and have unspecified values from userspace's point of
355346
* view. For hygiene purposes, the kernel zeroes them on next use,
356347
* but userspace is discouraged from relying on this.
357348
*
358349
* task->thread.sve_state does not need to be non-NULL, valid or any
359-
* particular size: it must not be dereferenced.
350+
* particular size: it must not be dereferenced and any data stored
351+
* there should be considered stale and not referenced.
352+
*
353+
* * SVE state - FP_STATE_SVE:
354+
*
355+
* When the full SVE state is stored task->thread.fp_type is set to
356+
* FP_STATE_SVE and Z0-Z31 (incorporating Vn in bits[127:0] or the
357+
* corresponding Zn), P0-P15 and FFR are encoded in in
358+
* task->thread.sve_state, formatted appropriately for vector
359+
* length task->thread.sve_vl or, if SVCR.SM is set,
360+
* task->thread.sme_vl. The storage for the vector registers in
361+
* task->thread.uw.fpsimd_state should be ignored.
362+
*
363+
* task->thread.sve_state must point to a valid buffer at least
364+
* sve_state_size(task) bytes in size. The data stored in
365+
* task->thread.uw.fpsimd_state.vregs should be considered stale
366+
* and not referenced.
360367
*
361368
* * FPSR and FPCR are always stored in task->thread.uw.fpsimd_state
362369
* irrespective of whether TIF_SVE is clear or set, since these are
@@ -378,11 +385,37 @@ static void task_fpsimd_load(void)
378385
WARN_ON(!system_supports_fpsimd());
379386
WARN_ON(!have_cpu_fpsimd_context());
380387

381-
/* Check if we should restore SVE first */
382-
if (IS_ENABLED(CONFIG_ARM64_SVE) && test_thread_flag(TIF_SVE)) {
383-
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
384-
restore_sve_regs = true;
385-
restore_ffr = true;
388+
if (system_supports_sve()) {
389+
switch (current->thread.fp_type) {
390+
case FP_STATE_FPSIMD:
391+
/* Stop tracking SVE for this task until next use. */
392+
if (test_and_clear_thread_flag(TIF_SVE))
393+
sve_user_disable();
394+
break;
395+
case FP_STATE_SVE:
396+
if (!thread_sm_enabled(&current->thread) &&
397+
!WARN_ON_ONCE(!test_and_set_thread_flag(TIF_SVE)))
398+
sve_user_enable();
399+
400+
if (test_thread_flag(TIF_SVE))
401+
sve_set_vq(sve_vq_from_vl(task_get_sve_vl(current)) - 1);
402+
403+
restore_sve_regs = true;
404+
restore_ffr = true;
405+
break;
406+
default:
407+
/*
408+
* This indicates either a bug in
409+
* fpsimd_save() or memory corruption, we
410+
* should always record an explicit format
411+
* when we save. We always at least have the
412+
* memory allocated for FPSMID registers so
413+
* try that and hope for the best.
414+
*/
415+
WARN_ON_ONCE(1);
416+
clear_thread_flag(TIF_SVE);
417+
break;
418+
}
386419
}
387420

388421
/* Restore SME, override SVE register configuration if needed */
@@ -398,18 +431,19 @@ static void task_fpsimd_load(void)
398431
if (thread_za_enabled(&current->thread))
399432
za_load_state(current->thread.za_state);
400433

401-
if (thread_sm_enabled(&current->thread)) {
402-
restore_sve_regs = true;
434+
if (thread_sm_enabled(&current->thread))
403435
restore_ffr = system_supports_fa64();
404-
}
405436
}
406437

407-
if (restore_sve_regs)
438+
if (restore_sve_regs) {
439+
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_SVE);
408440
sve_load_state(sve_pffr(&current->thread),
409441
&current->thread.uw.fpsimd_state.fpsr,
410442
restore_ffr);
411-
else
443+
} else {
444+
WARN_ON_ONCE(current->thread.fp_type != FP_STATE_FPSIMD);
412445
fpsimd_load_state(&current->thread.uw.fpsimd_state);
446+
}
413447
}
414448

415449
/*
@@ -419,12 +453,12 @@ static void task_fpsimd_load(void)
419453
* last, if KVM is involved this may be the guest VM context rather
420454
* than the host thread for the VM pointed to by current. This means
421455
* that we must always reference the state storage via last rather
422-
* than via current, other than the TIF_ flags which KVM will
423-
* carefully maintain for us.
456+
* than via current, if we are saving KVM state then it will have
457+
* ensured that the type of registers to save is set in last->to_save.
424458
*/
425459
static void fpsimd_save(void)
426460
{
427-
struct fpsimd_last_state_struct const *last =
461+
struct cpu_fp_state const *last =
428462
this_cpu_ptr(&fpsimd_last_state);
429463
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
430464
bool save_sve_regs = false;
@@ -437,7 +471,14 @@ static void fpsimd_save(void)
437471
if (test_thread_flag(TIF_FOREIGN_FPSTATE))
438472
return;
439473

440-
if (test_thread_flag(TIF_SVE)) {
474+
/*
475+
* If a task is in a syscall the ABI allows us to only
476+
* preserve the state shared with FPSIMD so don't bother
477+
* saving the full SVE state in that case.
478+
*/
479+
if ((last->to_save == FP_STATE_CURRENT && test_thread_flag(TIF_SVE) &&
480+
!in_syscall(current_pt_regs())) ||
481+
last->to_save == FP_STATE_SVE) {
441482
save_sve_regs = true;
442483
save_ffr = true;
443484
vl = last->sve_vl;
@@ -474,8 +515,10 @@ static void fpsimd_save(void)
474515
sve_save_state((char *)last->sve_state +
475516
sve_ffr_offset(vl),
476517
&last->st->fpsr, save_ffr);
518+
*last->fp_type = FP_STATE_SVE;
477519
} else {
478520
fpsimd_save_state(last->st);
521+
*last->fp_type = FP_STATE_FPSIMD;
479522
}
480523
}
481524

@@ -768,8 +811,7 @@ void fpsimd_sync_to_sve(struct task_struct *task)
768811
*/
769812
void sve_sync_to_fpsimd(struct task_struct *task)
770813
{
771-
if (test_tsk_thread_flag(task, TIF_SVE) ||
772-
thread_sm_enabled(&task->thread))
814+
if (task->thread.fp_type == FP_STATE_SVE)
773815
sve_to_fpsimd(task);
774816
}
775817

@@ -848,8 +890,10 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
848890

849891
fpsimd_flush_task_state(task);
850892
if (test_and_clear_tsk_thread_flag(task, TIF_SVE) ||
851-
thread_sm_enabled(&task->thread))
893+
thread_sm_enabled(&task->thread)) {
852894
sve_to_fpsimd(task);
895+
task->thread.fp_type = FP_STATE_FPSIMD;
896+
}
853897

854898
if (system_supports_sme() && type == ARM64_VEC_SME) {
855899
task->thread.svcr &= ~(SVCR_SM_MASK |
@@ -1368,6 +1412,7 @@ static void sve_init_regs(void)
13681412
fpsimd_bind_task_to_cpu();
13691413
} else {
13701414
fpsimd_to_sve(current);
1415+
current->thread.fp_type = FP_STATE_SVE;
13711416
}
13721417
}
13731418

@@ -1596,6 +1641,8 @@ void fpsimd_flush_thread(void)
15961641
current->thread.svcr = 0;
15971642
}
15981643

1644+
current->thread.fp_type = FP_STATE_FPSIMD;
1645+
15991646
put_cpu_fpsimd_context();
16001647
kfree(sve_state);
16011648
kfree(za_state);
@@ -1627,15 +1674,39 @@ void fpsimd_signal_preserve_current_state(void)
16271674
sve_to_fpsimd(current);
16281675
}
16291676

1677+
/*
1678+
* Called by KVM when entering the guest.
1679+
*/
1680+
void fpsimd_kvm_prepare(void)
1681+
{
1682+
if (!system_supports_sve())
1683+
return;
1684+
1685+
/*
1686+
* KVM does not save host SVE state since we can only enter
1687+
* the guest from a syscall so the ABI means that only the
1688+
* non-saved SVE state needs to be saved. If we have left
1689+
* SVE enabled for performance reasons then update the task
1690+
* state to be FPSIMD only.
1691+
*/
1692+
get_cpu_fpsimd_context();
1693+
1694+
if (test_and_clear_thread_flag(TIF_SVE)) {
1695+
sve_to_fpsimd(current);
1696+
current->thread.fp_type = FP_STATE_FPSIMD;
1697+
}
1698+
1699+
put_cpu_fpsimd_context();
1700+
}
1701+
16301702
/*
16311703
* Associate current's FPSIMD context with this cpu
16321704
* The caller must have ownership of the cpu FPSIMD context before calling
16331705
* this function.
16341706
*/
16351707
static void fpsimd_bind_task_to_cpu(void)
16361708
{
1637-
struct fpsimd_last_state_struct *last =
1638-
this_cpu_ptr(&fpsimd_last_state);
1709+
struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
16391710

16401711
WARN_ON(!system_supports_fpsimd());
16411712
last->st = &current->thread.uw.fpsimd_state;
@@ -1644,6 +1715,8 @@ static void fpsimd_bind_task_to_cpu(void)
16441715
last->sve_vl = task_get_sve_vl(current);
16451716
last->sme_vl = task_get_sme_vl(current);
16461717
last->svcr = &current->thread.svcr;
1718+
last->fp_type = &current->thread.fp_type;
1719+
last->to_save = FP_STATE_CURRENT;
16471720
current->thread.fpsimd_cpu = smp_processor_id();
16481721

16491722
/*
@@ -1665,22 +1738,14 @@ static void fpsimd_bind_task_to_cpu(void)
16651738
}
16661739
}
16671740

1668-
void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
1669-
unsigned int sve_vl, void *za_state,
1670-
unsigned int sme_vl, u64 *svcr)
1741+
void fpsimd_bind_state_to_cpu(struct cpu_fp_state *state)
16711742
{
1672-
struct fpsimd_last_state_struct *last =
1673-
this_cpu_ptr(&fpsimd_last_state);
1743+
struct cpu_fp_state *last = this_cpu_ptr(&fpsimd_last_state);
16741744

16751745
WARN_ON(!system_supports_fpsimd());
16761746
WARN_ON(!in_softirq() && !irqs_disabled());
16771747

1678-
last->st = st;
1679-
last->svcr = svcr;
1680-
last->sve_state = sve_state;
1681-
last->za_state = za_state;
1682-
last->sve_vl = sve_vl;
1683-
last->sme_vl = sme_vl;
1748+
*last = *state;
16841749
}
16851750

16861751
/*

arch/arm64/kernel/process.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,8 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
331331
clear_tsk_thread_flag(dst, TIF_SME);
332332
}
333333

334+
dst->thread.fp_type = FP_STATE_FPSIMD;
335+
334336
/* clear any pending asynchronous tag fault raised by the parent */
335337
clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
336338

0 commit comments

Comments
 (0)