@@ -209,6 +209,12 @@ static void set_sme_default_vl(int val)
209
209
set_default_vl (ARM64_VEC_SME , val );
210
210
}
211
211
212
+ static void sme_free (struct task_struct * );
213
+
214
+ #else
215
+
216
+ static inline void sme_free (struct task_struct * t ) { }
217
+
212
218
#endif
213
219
214
220
DEFINE_PER_CPU (bool , fpsimd_context_busy );
@@ -676,7 +682,7 @@ static void sve_to_fpsimd(struct task_struct *task)
676
682
* Return how many bytes of memory are required to store the full SVE
677
683
* state for task, given task's currently configured vector length.
678
684
*/
679
- static size_t sve_state_size (struct task_struct const * task )
685
+ size_t sve_state_size (struct task_struct const * task )
680
686
{
681
687
unsigned int vl = 0 ;
682
688
@@ -818,18 +824,22 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
818
824
thread_sm_enabled (& task -> thread ))
819
825
sve_to_fpsimd (task );
820
826
821
- if (system_supports_sme () && type == ARM64_VEC_SME )
827
+ if (system_supports_sme () && type == ARM64_VEC_SME ) {
822
828
task -> thread .svcr &= ~(SYS_SVCR_EL0_SM_MASK |
823
829
SYS_SVCR_EL0_ZA_MASK );
830
+ clear_thread_flag (TIF_SME );
831
+ }
824
832
825
833
if (task == current )
826
834
put_cpu_fpsimd_context ();
827
835
828
836
/*
829
- * Force reallocation of task SVE state to the correct size
830
- * on next use:
837
+ * Force reallocation of task SVE and SME state to the correct
838
+ * size on next use:
831
839
*/
832
840
sve_free (task );
841
+ if (system_supports_sme () && type == ARM64_VEC_SME )
842
+ sme_free (task );
833
843
834
844
task_set_vl (task , type , vl );
835
845
@@ -1164,12 +1174,43 @@ void __init sve_setup(void)
1164
1174
void fpsimd_release_task (struct task_struct * dead_task )
1165
1175
{
1166
1176
__sve_free (dead_task );
1177
+ sme_free (dead_task );
1167
1178
}
1168
1179
1169
1180
#endif /* CONFIG_ARM64_SVE */
1170
1181
1171
1182
#ifdef CONFIG_ARM64_SME
1172
1183
1184
+ /* This will move to uapi/asm/sigcontext.h when signals are implemented */
1185
+ #define ZA_SIG_REGS_SIZE (vq ) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
1186
+
1187
+ /*
1188
+ * Ensure that task->thread.za_state is allocated and sufficiently large.
1189
+ *
1190
+ * This function should be used only in preparation for replacing
1191
+ * task->thread.za_state with new data. The memory is always zeroed
1192
+ * here to prevent stale data from showing through: this is done in
1193
+ * the interest of testability and predictability, the architecture
1194
+ * guarantees that when ZA is enabled it will be zeroed.
1195
+ */
1196
+ void sme_alloc (struct task_struct * task )
1197
+ {
1198
+ if (task -> thread .za_state ) {
1199
+ memset (task -> thread .za_state , 0 , za_state_size (task ));
1200
+ return ;
1201
+ }
1202
+
1203
+ /* This could potentially be up to 64K. */
1204
+ task -> thread .za_state =
1205
+ kzalloc (za_state_size (task ), GFP_KERNEL );
1206
+ }
1207
+
1208
+ static void sme_free (struct task_struct * task )
1209
+ {
1210
+ kfree (task -> thread .za_state );
1211
+ task -> thread .za_state = NULL ;
1212
+ }
1213
+
1173
1214
void sme_kernel_enable (const struct arm64_cpu_capabilities * __always_unused p )
1174
1215
{
1175
1216
/* Set priority for all PEs to architecturally defined minimum */
@@ -1279,6 +1320,29 @@ void __init sme_setup(void)
1279
1320
1280
1321
#endif /* CONFIG_ARM64_SME */
1281
1322
1323
+ static void sve_init_regs (void )
1324
+ {
1325
+ /*
1326
+ * Convert the FPSIMD state to SVE, zeroing all the state that
1327
+ * is not shared with FPSIMD. If (as is likely) the current
1328
+ * state is live in the registers then do this there and
1329
+ * update our metadata for the current task including
1330
+ * disabling the trap, otherwise update our in-memory copy.
1331
+ * We are guaranteed to not be in streaming mode, we can only
1332
+ * take a SVE trap when not in streaming mode and we can't be
1333
+ * in streaming mode when taking a SME trap.
1334
+ */
1335
+ if (!test_thread_flag (TIF_FOREIGN_FPSTATE )) {
1336
+ unsigned long vq_minus_one =
1337
+ sve_vq_from_vl (task_get_sve_vl (current )) - 1 ;
1338
+ sve_set_vq (vq_minus_one );
1339
+ sve_flush_live (true, vq_minus_one );
1340
+ fpsimd_bind_task_to_cpu ();
1341
+ } else {
1342
+ fpsimd_to_sve (current );
1343
+ }
1344
+ }
1345
+
1282
1346
/*
1283
1347
* Trapped SVE access
1284
1348
*
@@ -1310,22 +1374,77 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
1310
1374
WARN_ON (1 ); /* SVE access shouldn't have trapped */
1311
1375
1312
1376
/*
1313
- * Convert the FPSIMD state to SVE, zeroing all the state that
1314
- * is not shared with FPSIMD. If (as is likely) the current
1315
- * state is live in the registers then do this there and
1316
- * update our metadata for the current task including
1317
- * disabling the trap, otherwise update our in-memory copy.
1377
+ * Even if the task can have used streaming mode we can only
1378
+ * generate SVE access traps in normal SVE mode and
1379
+ * transitioning out of streaming mode may discard any
1380
+ * streaming mode state. Always clear the high bits to avoid
1381
+ * any potential errors tracking what is properly initialised.
1382
+ */
1383
+ sve_init_regs ();
1384
+
1385
+ put_cpu_fpsimd_context ();
1386
+ }
1387
+
1388
+ /*
1389
+ * Trapped SME access
1390
+ *
1391
+ * Storage is allocated for the full SVE and SME state, the current
1392
+ * FPSIMD register contents are migrated to SVE if SVE is not already
1393
+ * active, and the access trap is disabled.
1394
+ *
1395
+ * TIF_SME should be clear on entry: otherwise, fpsimd_restore_current_state()
1396
+ * would have disabled the SME access trap for userspace during
1397
+ * ret_to_user, making an SVE access trap impossible in that case.
1398
+ */
1399
+ void do_sme_acc (unsigned int esr , struct pt_regs * regs )
1400
+ {
1401
+ /* Even if we chose not to use SME, the hardware could still trap: */
1402
+ if (unlikely (!system_supports_sme ()) || WARN_ON (is_compat_task ())) {
1403
+ force_signal_inject (SIGILL , ILL_ILLOPC , regs -> pc , 0 );
1404
+ return ;
1405
+ }
1406
+
1407
+ /*
1408
+ * If this not a trap due to SME being disabled then something
1409
+ * is being used in the wrong mode, report as SIGILL.
1318
1410
*/
1411
+ if (ESR_ELx_ISS (esr ) != ESR_ELx_SME_ISS_SME_DISABLED ) {
1412
+ force_signal_inject (SIGILL , ILL_ILLOPC , regs -> pc , 0 );
1413
+ return ;
1414
+ }
1415
+
1416
+ sve_alloc (current );
1417
+ sme_alloc (current );
1418
+ if (!current -> thread .sve_state || !current -> thread .za_state ) {
1419
+ force_sig (SIGKILL );
1420
+ return ;
1421
+ }
1422
+
1423
+ get_cpu_fpsimd_context ();
1424
+
1425
+ /* With TIF_SME userspace shouldn't generate any traps */
1426
+ if (test_and_set_thread_flag (TIF_SME ))
1427
+ WARN_ON (1 );
1428
+
1319
1429
if (!test_thread_flag (TIF_FOREIGN_FPSTATE )) {
1320
1430
unsigned long vq_minus_one =
1321
- sve_vq_from_vl (task_get_sve_vl (current )) - 1 ;
1322
- sve_set_vq (vq_minus_one );
1323
- sve_flush_live (true, vq_minus_one );
1431
+ sve_vq_from_vl (task_get_sme_vl (current )) - 1 ;
1432
+ sme_set_vq (vq_minus_one );
1433
+
1324
1434
fpsimd_bind_task_to_cpu ();
1325
- } else {
1326
- fpsimd_to_sve (current );
1327
1435
}
1328
1436
1437
+ /*
1438
+ * If SVE was not already active initialise the SVE registers,
1439
+ * any non-shared state between the streaming and regular SVE
1440
+ * registers is architecturally guaranteed to be zeroed when
1441
+ * we enter streaming mode. We do not need to initialize ZA
1442
+ * since ZA must be disabled at this point and enabling ZA is
1443
+ * architecturally defined to zero ZA.
1444
+ */
1445
+ if (system_supports_sve () && !test_thread_flag (TIF_SVE ))
1446
+ sve_init_regs ();
1447
+
1329
1448
put_cpu_fpsimd_context ();
1330
1449
}
1331
1450
@@ -1442,8 +1561,12 @@ void fpsimd_flush_thread(void)
1442
1561
fpsimd_flush_thread_vl (ARM64_VEC_SVE );
1443
1562
}
1444
1563
1445
- if (system_supports_sme ())
1564
+ if (system_supports_sme ()) {
1565
+ clear_thread_flag (TIF_SME );
1566
+ sme_free (current );
1446
1567
fpsimd_flush_thread_vl (ARM64_VEC_SME );
1568
+ current -> thread .svcr = 0 ;
1569
+ }
1447
1570
1448
1571
put_cpu_fpsimd_context ();
1449
1572
}
@@ -1493,14 +1616,22 @@ static void fpsimd_bind_task_to_cpu(void)
1493
1616
last -> svcr = & current -> thread .svcr ;
1494
1617
current -> thread .fpsimd_cpu = smp_processor_id ();
1495
1618
1619
+ /*
1620
+ * Toggle SVE and SME trapping for userspace if needed, these
1621
+ * are serialsied by ret_to_user().
1622
+ */
1623
+ if (system_supports_sme ()) {
1624
+ if (test_thread_flag (TIF_SME ))
1625
+ sme_user_enable ();
1626
+ else
1627
+ sme_user_disable ();
1628
+ }
1629
+
1496
1630
if (system_supports_sve ()) {
1497
- /* Toggle SVE trapping for userspace if needed */
1498
1631
if (test_thread_flag (TIF_SVE ))
1499
1632
sve_user_enable ();
1500
1633
else
1501
1634
sve_user_disable ();
1502
-
1503
- /* Serialised by exception return to user */
1504
1635
}
1505
1636
}
1506
1637
0 commit comments