Skip to content

Commit 8bd7f91

Browse files
brooniectmarinas
authored andcommitted
arm64/sme: Implement traps and syscall handling for SME
By default all SME operations in userspace will trap. When this happens we allocate storage space for the SME register state, set up the SVE registers and disable traps. We do not need to initialize ZA since the architecture guarantees that it will be zeroed when enabled and when we trap ZA is disabled. On syscall we exit streaming mode if we were previously in it and ensure that all but the lower 128 bits of the registers are zeroed while preserving the state of ZA. This follows the aarch64 PCS for SME, ZA state is preserved over a function call and streaming mode is exited. Since the traps for SME do not distinguish between streaming mode SVE and ZA usage if ZA is in use rather than reenabling traps we instead zero the parts of the SVE registers not shared with FPSIMD and leave SME enabled, this simplifies handling SME traps. If ZA is not in use then we reenable SME traps and fall through to normal handling of SVE. Signed-off-by: Mark Brown <[email protected]> Reviewed-by: Catalin Marinas <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Catalin Marinas <[email protected]>
1 parent 0033cd9 commit 8bd7f91

File tree

7 files changed

+255
-23
lines changed

7 files changed

+255
-23
lines changed

arch/arm64/include/asm/esr.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@
7676
#define ESR_ELx_IL_SHIFT (25)
7777
#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT)
7878
#define ESR_ELx_ISS_MASK (ESR_ELx_IL - 1)
79+
#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK)
7980

8081
/* ISS field definitions shared by different classes */
8182
#define ESR_ELx_WNR_SHIFT (6)

arch/arm64/include/asm/exception.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr,
6464
struct pt_regs *regs);
6565
void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs);
6666
void do_sve_acc(unsigned int esr, struct pt_regs *regs);
67+
void do_sme_acc(unsigned int esr, struct pt_regs *regs);
6768
void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs);
6869
void do_sysinstr(unsigned int esr, struct pt_regs *regs);
6970
void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs);

arch/arm64/include/asm/fpsimd.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,8 @@ static inline bool sve_vq_available(unsigned int vq)
239239
return vq_available(ARM64_VEC_SVE, vq);
240240
}
241241

242+
size_t sve_state_size(struct task_struct const *task);
243+
242244
#else /* ! CONFIG_ARM64_SVE */
243245

244246
static inline void sve_alloc(struct task_struct *task) { }
@@ -278,10 +280,25 @@ static inline void vec_update_vq_map(enum vec_type t) { }
278280
static inline int vec_verify_vq_map(enum vec_type t) { return 0; }
279281
static inline void sve_setup(void) { }
280282

283+
static inline size_t sve_state_size(struct task_struct const *task)
284+
{
285+
return 0;
286+
}
287+
281288
#endif /* ! CONFIG_ARM64_SVE */
282289

283290
#ifdef CONFIG_ARM64_SME
284291

292+
static inline void sme_user_disable(void)
293+
{
294+
sysreg_clear_set(cpacr_el1, CPACR_EL1_SMEN_EL0EN, 0);
295+
}
296+
297+
static inline void sme_user_enable(void)
298+
{
299+
sysreg_clear_set(cpacr_el1, 0, CPACR_EL1_SMEN_EL0EN);
300+
}
301+
285302
static inline void sme_smstart_sm(void)
286303
{
287304
asm volatile(__msr_s(SYS_SVCR_SMSTART_SM_EL0, "xzr"));
@@ -309,23 +326,45 @@ static inline int sme_max_virtualisable_vl(void)
309326
return vec_max_virtualisable_vl(ARM64_VEC_SME);
310327
}
311328

329+
extern void sme_alloc(struct task_struct *task);
312330
extern unsigned int sme_get_vl(void);
313331
extern int sme_set_current_vl(unsigned long arg);
314332
extern int sme_get_current_vl(void);
315333

334+
/*
335+
* Return how many bytes of memory are required to store the full SME
336+
* specific state (currently just ZA) for task, given task's currently
337+
* configured vector length.
338+
*/
339+
static inline size_t za_state_size(struct task_struct const *task)
340+
{
341+
unsigned int vl = task_get_sme_vl(task);
342+
343+
return ZA_SIG_REGS_SIZE(sve_vq_from_vl(vl));
344+
}
345+
316346
#else
317347

348+
static inline void sme_user_disable(void) { BUILD_BUG(); }
349+
static inline void sme_user_enable(void) { BUILD_BUG(); }
350+
318351
static inline void sme_smstart_sm(void) { }
319352
static inline void sme_smstop_sm(void) { }
320353
static inline void sme_smstop(void) { }
321354

355+
static inline void sme_alloc(struct task_struct *task) { }
322356
static inline void sme_setup(void) { }
323357
static inline unsigned int sme_get_vl(void) { return 0; }
324358
static inline int sme_max_vl(void) { return 0; }
325359
static inline int sme_max_virtualisable_vl(void) { return 0; }
326360
static inline int sme_set_current_vl(unsigned long arg) { return -EINVAL; }
327361
static inline int sme_get_current_vl(void) { return -EINVAL; }
328362

363+
static inline size_t za_state_size(struct task_struct const *task)
364+
{
365+
return 0;
366+
}
367+
329368
#endif /* ! CONFIG_ARM64_SME */
330369

331370
/* For use by EFI runtime services calls only */

arch/arm64/kernel/entry-common.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,6 +537,14 @@ static void noinstr el0_sve_acc(struct pt_regs *regs, unsigned long esr)
537537
exit_to_user_mode(regs);
538538
}
539539

540+
static void noinstr el0_sme_acc(struct pt_regs *regs, unsigned long esr)
541+
{
542+
enter_from_user_mode(regs);
543+
local_daif_restore(DAIF_PROCCTX);
544+
do_sme_acc(esr, regs);
545+
exit_to_user_mode(regs);
546+
}
547+
540548
static void noinstr el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr)
541549
{
542550
enter_from_user_mode(regs);
@@ -645,6 +653,9 @@ asmlinkage void noinstr el0t_64_sync_handler(struct pt_regs *regs)
645653
case ESR_ELx_EC_SVE:
646654
el0_sve_acc(regs, esr);
647655
break;
656+
case ESR_ELx_EC_SME:
657+
el0_sme_acc(regs, esr);
658+
break;
648659
case ESR_ELx_EC_FP_EXC64:
649660
el0_fpsimd_exc(regs, esr);
650661
break;

arch/arm64/kernel/fpsimd.c

Lines changed: 149 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,12 @@ static void set_sme_default_vl(int val)
209209
set_default_vl(ARM64_VEC_SME, val);
210210
}
211211

212+
static void sme_free(struct task_struct *);
213+
214+
#else
215+
216+
static inline void sme_free(struct task_struct *t) { }
217+
212218
#endif
213219

214220
DEFINE_PER_CPU(bool, fpsimd_context_busy);
@@ -676,7 +682,7 @@ static void sve_to_fpsimd(struct task_struct *task)
676682
* Return how many bytes of memory are required to store the full SVE
677683
* state for task, given task's currently configured vector length.
678684
*/
679-
static size_t sve_state_size(struct task_struct const *task)
685+
size_t sve_state_size(struct task_struct const *task)
680686
{
681687
unsigned int vl = 0;
682688

@@ -818,18 +824,22 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
818824
thread_sm_enabled(&task->thread))
819825
sve_to_fpsimd(task);
820826

821-
if (system_supports_sme() && type == ARM64_VEC_SME)
827+
if (system_supports_sme() && type == ARM64_VEC_SME) {
822828
task->thread.svcr &= ~(SYS_SVCR_EL0_SM_MASK |
823829
SYS_SVCR_EL0_ZA_MASK);
830+
clear_thread_flag(TIF_SME);
831+
}
824832

825833
if (task == current)
826834
put_cpu_fpsimd_context();
827835

828836
/*
829-
* Force reallocation of task SVE state to the correct size
830-
* on next use:
837+
* Force reallocation of task SVE and SME state to the correct
838+
* size on next use:
831839
*/
832840
sve_free(task);
841+
if (system_supports_sme() && type == ARM64_VEC_SME)
842+
sme_free(task);
833843

834844
task_set_vl(task, type, vl);
835845

@@ -1164,12 +1174,43 @@ void __init sve_setup(void)
11641174
void fpsimd_release_task(struct task_struct *dead_task)
11651175
{
11661176
__sve_free(dead_task);
1177+
sme_free(dead_task);
11671178
}
11681179

11691180
#endif /* CONFIG_ARM64_SVE */
11701181

11711182
#ifdef CONFIG_ARM64_SME
11721183

1184+
/* This will move to uapi/asm/sigcontext.h when signals are implemented */
1185+
#define ZA_SIG_REGS_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
1186+
1187+
/*
1188+
* Ensure that task->thread.za_state is allocated and sufficiently large.
1189+
*
1190+
* This function should be used only in preparation for replacing
1191+
* task->thread.za_state with new data. The memory is always zeroed
1192+
* here to prevent stale data from showing through: this is done in
1193+
* the interest of testability and predictability, the architecture
1194+
* guarantees that when ZA is enabled it will be zeroed.
1195+
*/
1196+
void sme_alloc(struct task_struct *task)
1197+
{
1198+
if (task->thread.za_state) {
1199+
memset(task->thread.za_state, 0, za_state_size(task));
1200+
return;
1201+
}
1202+
1203+
/* This could potentially be up to 64K. */
1204+
task->thread.za_state =
1205+
kzalloc(za_state_size(task), GFP_KERNEL);
1206+
}
1207+
1208+
static void sme_free(struct task_struct *task)
1209+
{
1210+
kfree(task->thread.za_state);
1211+
task->thread.za_state = NULL;
1212+
}
1213+
11731214
void sme_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p)
11741215
{
11751216
/* Set priority for all PEs to architecturally defined minimum */
@@ -1279,6 +1320,29 @@ void __init sme_setup(void)
12791320

12801321
#endif /* CONFIG_ARM64_SME */
12811322

1323+
static void sve_init_regs(void)
1324+
{
1325+
/*
1326+
* Convert the FPSIMD state to SVE, zeroing all the state that
1327+
* is not shared with FPSIMD. If (as is likely) the current
1328+
* state is live in the registers then do this there and
1329+
* update our metadata for the current task including
1330+
* disabling the trap, otherwise update our in-memory copy.
1331+
* We are guaranteed to not be in streaming mode, we can only
1332+
* take a SVE trap when not in streaming mode and we can't be
1333+
* in streaming mode when taking a SME trap.
1334+
*/
1335+
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
1336+
unsigned long vq_minus_one =
1337+
sve_vq_from_vl(task_get_sve_vl(current)) - 1;
1338+
sve_set_vq(vq_minus_one);
1339+
sve_flush_live(true, vq_minus_one);
1340+
fpsimd_bind_task_to_cpu();
1341+
} else {
1342+
fpsimd_to_sve(current);
1343+
}
1344+
}
1345+
12821346
/*
12831347
* Trapped SVE access
12841348
*
@@ -1310,22 +1374,77 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
13101374
WARN_ON(1); /* SVE access shouldn't have trapped */
13111375

13121376
/*
1313-
* Convert the FPSIMD state to SVE, zeroing all the state that
1314-
* is not shared with FPSIMD. If (as is likely) the current
1315-
* state is live in the registers then do this there and
1316-
* update our metadata for the current task including
1317-
* disabling the trap, otherwise update our in-memory copy.
1377+
* Even if the task can have used streaming mode we can only
1378+
* generate SVE access traps in normal SVE mode and
1379+
* transitioning out of streaming mode may discard any
1380+
* streaming mode state. Always clear the high bits to avoid
1381+
* any potential errors tracking what is properly initialised.
1382+
*/
1383+
sve_init_regs();
1384+
1385+
put_cpu_fpsimd_context();
1386+
}
1387+
1388+
/*
1389+
* Trapped SME access
1390+
*
1391+
* Storage is allocated for the full SVE and SME state, the current
1392+
* FPSIMD register contents are migrated to SVE if SVE is not already
1393+
* active, and the access trap is disabled.
1394+
*
1395+
* TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
1396+
* would have disabled the SME access trap for userspace during
1397+
* ret_to_user, making an SVE access trap impossible in that case.
1398+
*/
1399+
void do_sme_acc(unsigned int esr, struct pt_regs *regs)
1400+
{
1401+
/* Even if we chose not to use SME, the hardware could still trap: */
1402+
if (unlikely(!system_supports_sme()) || WARN_ON(is_compat_task())) {
1403+
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
1404+
return;
1405+
}
1406+
1407+
/*
1408+
* If this not a trap due to SME being disabled then something
1409+
* is being used in the wrong mode, report as SIGILL.
13181410
*/
1411+
if (ESR_ELx_ISS(esr) != ESR_ELx_SME_ISS_SME_DISABLED) {
1412+
force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc, 0);
1413+
return;
1414+
}
1415+
1416+
sve_alloc(current);
1417+
sme_alloc(current);
1418+
if (!current->thread.sve_state || !current->thread.za_state) {
1419+
force_sig(SIGKILL);
1420+
return;
1421+
}
1422+
1423+
get_cpu_fpsimd_context();
1424+
1425+
/* With TIF_SME userspace shouldn't generate any traps */
1426+
if (test_and_set_thread_flag(TIF_SME))
1427+
WARN_ON(1);
1428+
13191429
if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
13201430
unsigned long vq_minus_one =
1321-
sve_vq_from_vl(task_get_sve_vl(current)) - 1;
1322-
sve_set_vq(vq_minus_one);
1323-
sve_flush_live(true, vq_minus_one);
1431+
sve_vq_from_vl(task_get_sme_vl(current)) - 1;
1432+
sme_set_vq(vq_minus_one);
1433+
13241434
fpsimd_bind_task_to_cpu();
1325-
} else {
1326-
fpsimd_to_sve(current);
13271435
}
13281436

1437+
/*
1438+
* If SVE was not already active initialise the SVE registers,
1439+
* any non-shared state between the streaming and regular SVE
1440+
* registers is architecturally guaranteed to be zeroed when
1441+
* we enter streaming mode. We do not need to initialize ZA
1442+
* since ZA must be disabled at this point and enabling ZA is
1443+
* architecturally defined to zero ZA.
1444+
*/
1445+
if (system_supports_sve() && !test_thread_flag(TIF_SVE))
1446+
sve_init_regs();
1447+
13291448
put_cpu_fpsimd_context();
13301449
}
13311450

@@ -1442,8 +1561,12 @@ void fpsimd_flush_thread(void)
14421561
fpsimd_flush_thread_vl(ARM64_VEC_SVE);
14431562
}
14441563

1445-
if (system_supports_sme())
1564+
if (system_supports_sme()) {
1565+
clear_thread_flag(TIF_SME);
1566+
sme_free(current);
14461567
fpsimd_flush_thread_vl(ARM64_VEC_SME);
1568+
current->thread.svcr = 0;
1569+
}
14471570

14481571
put_cpu_fpsimd_context();
14491572
}
@@ -1493,14 +1616,22 @@ static void fpsimd_bind_task_to_cpu(void)
14931616
last->svcr = &current->thread.svcr;
14941617
current->thread.fpsimd_cpu = smp_processor_id();
14951618

1619+
/*
1620+
* Toggle SVE and SME trapping for userspace if needed, these
1621+
* are serialsied by ret_to_user().
1622+
*/
1623+
if (system_supports_sme()) {
1624+
if (test_thread_flag(TIF_SME))
1625+
sme_user_enable();
1626+
else
1627+
sme_user_disable();
1628+
}
1629+
14961630
if (system_supports_sve()) {
1497-
/* Toggle SVE trapping for userspace if needed */
14981631
if (test_thread_flag(TIF_SVE))
14991632
sve_user_enable();
15001633
else
15011634
sve_user_disable();
1502-
1503-
/* Serialised by exception return to user */
15041635
}
15051636
}
15061637

0 commit comments

Comments
 (0)