@@ -123,6 +123,7 @@ struct fpsimd_last_state_struct {
123
123
void * sve_state ;
124
124
u64 * svcr ;
125
125
unsigned int sve_vl ;
126
+ unsigned int sme_vl ;
126
127
};
127
128
128
129
static DEFINE_PER_CPU (struct fpsimd_last_state_struct , fpsimd_last_state ) ;
@@ -301,25 +302,37 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
301
302
task -> thread .vl_onexec [type ] = vl ;
302
303
}
303
304
305
+ /*
306
+ * TIF_SME controls whether a task can use SME without trapping while
307
+ * in userspace, when TIF_SME is set then we must have storage
308
+ * alocated in sve_state and za_state to store the contents of both ZA
309
+ * and the SVE registers for both streaming and non-streaming modes.
310
+ *
311
+ * If both SVCR.ZA and SVCR.SM are disabled then at any point we
312
+ * may disable TIF_SME and reenable traps.
313
+ */
314
+
315
+
304
316
/*
305
317
* TIF_SVE controls whether a task can use SVE without trapping while
306
- * in userspace, and also the way a task's FPSIMD/SVE state is stored
307
- * in thread_struct.
318
+ * in userspace, and also (together with TIF_SME) the way a task's
319
+ * FPSIMD/SVE state is stored in thread_struct.
308
320
*
309
321
* The kernel uses this flag to track whether a user task is actively
310
322
* using SVE, and therefore whether full SVE register state needs to
311
323
* be tracked. If not, the cheaper FPSIMD context handling code can
312
324
* be used instead of the more costly SVE equivalents.
313
325
*
314
- * * TIF_SVE set:
326
+ * * TIF_SVE or SVCR.SM set:
315
327
*
316
328
* The task can execute SVE instructions while in userspace without
317
329
* trapping to the kernel.
318
330
*
319
331
* When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
320
332
* corresponding Zn), P0-P15 and FFR are encoded in in
321
333
* task->thread.sve_state, formatted appropriately for vector
322
- * length task->thread.sve_vl.
334
+ * length task->thread.sve_vl or, if SVCR.SM is set,
335
+ * task->thread.sme_vl.
323
336
*
324
337
* task->thread.sve_state must point to a valid buffer at least
325
338
* sve_state_size(task) bytes in size.
@@ -357,19 +370,40 @@ void task_set_vl_onexec(struct task_struct *task, enum vec_type type,
357
370
*/
358
371
static void task_fpsimd_load (void )
359
372
{
373
+ bool restore_sve_regs = false;
374
+ bool restore_ffr ;
375
+
360
376
WARN_ON (!system_supports_fpsimd ());
361
377
WARN_ON (!have_cpu_fpsimd_context ());
362
378
363
- if (IS_ENABLED (CONFIG_ARM64_SME ) && test_thread_flag (TIF_SME ))
364
- write_sysreg_s (current -> thread .svcr , SYS_SVCR_EL0 );
365
-
379
+ /* Check if we should restore SVE first */
366
380
if (IS_ENABLED (CONFIG_ARM64_SVE ) && test_thread_flag (TIF_SVE )) {
367
381
sve_set_vq (sve_vq_from_vl (task_get_sve_vl (current )) - 1 );
382
+ restore_sve_regs = true;
383
+ restore_ffr = true;
384
+ }
385
+
386
+ /* Restore SME, override SVE register configuration if needed */
387
+ if (system_supports_sme ()) {
388
+ unsigned long sme_vl = task_get_sme_vl (current );
389
+
390
+ if (test_thread_flag (TIF_SME ))
391
+ sme_set_vq (sve_vq_from_vl (sme_vl ) - 1 );
392
+
393
+ write_sysreg_s (current -> thread .svcr , SYS_SVCR_EL0 );
394
+
395
+ if (thread_sm_enabled (& current -> thread )) {
396
+ restore_sve_regs = true;
397
+ restore_ffr = system_supports_fa64 ();
398
+ }
399
+ }
400
+
401
+ if (restore_sve_regs )
368
402
sve_load_state (sve_pffr (& current -> thread ),
369
- & current -> thread .uw .fpsimd_state .fpsr , true);
370
- } else {
403
+ & current -> thread .uw .fpsimd_state .fpsr ,
404
+ restore_ffr );
405
+ else
371
406
fpsimd_load_state (& current -> thread .uw .fpsimd_state );
372
- }
373
407
}
374
408
375
409
/*
@@ -387,22 +421,43 @@ static void fpsimd_save(void)
387
421
struct fpsimd_last_state_struct const * last =
388
422
this_cpu_ptr (& fpsimd_last_state );
389
423
/* set by fpsimd_bind_task_to_cpu() or fpsimd_bind_state_to_cpu() */
424
+ bool save_sve_regs = false;
425
+ bool save_ffr ;
426
+ unsigned int vl ;
390
427
391
428
WARN_ON (!system_supports_fpsimd ());
392
429
WARN_ON (!have_cpu_fpsimd_context ());
393
430
394
431
if (test_thread_flag (TIF_FOREIGN_FPSTATE ))
395
432
return ;
396
433
397
- if (IS_ENABLED (CONFIG_ARM64_SME ) &&
398
- test_thread_flag (TIF_SME )) {
434
+ if (test_thread_flag (TIF_SVE )) {
435
+ save_sve_regs = true;
436
+ save_ffr = true;
437
+ vl = last -> sve_vl ;
438
+ }
439
+
440
+ if (system_supports_sme ()) {
399
441
u64 * svcr = last -> svcr ;
400
442
* svcr = read_sysreg_s (SYS_SVCR_EL0 );
443
+
444
+ if (thread_za_enabled (& current -> thread )) {
445
+ /* ZA state managment is not implemented yet */
446
+ force_signal_inject (SIGKILL , SI_KERNEL , 0 , 0 );
447
+ return ;
448
+ }
449
+
450
+ /* If we are in streaming mode override regular SVE. */
451
+ if (* svcr & SYS_SVCR_EL0_SM_MASK ) {
452
+ save_sve_regs = true;
453
+ save_ffr = system_supports_fa64 ();
454
+ vl = last -> sme_vl ;
455
+ }
401
456
}
402
457
403
- if (IS_ENABLED (CONFIG_ARM64_SVE ) &&
404
- test_thread_flag ( TIF_SVE )) {
405
- if (WARN_ON (sve_get_vl () != last -> sve_vl )) {
458
+ if (IS_ENABLED (CONFIG_ARM64_SVE ) && save_sve_regs ) {
459
+ /* Get the configured VL from RDVL, will account for SM */
460
+ if (WARN_ON (sve_get_vl () != vl )) {
406
461
/*
407
462
* Can't save the user regs, so current would
408
463
* re-enter user with corrupt state.
@@ -413,8 +468,8 @@ static void fpsimd_save(void)
413
468
}
414
469
415
470
sve_save_state ((char * )last -> sve_state +
416
- sve_ffr_offset (last -> sve_vl ),
417
- & last -> st -> fpsr , true );
471
+ sve_ffr_offset (vl ),
472
+ & last -> st -> fpsr , save_ffr );
418
473
} else {
419
474
fpsimd_save_state (last -> st );
420
475
}
@@ -619,7 +674,14 @@ static void sve_to_fpsimd(struct task_struct *task)
619
674
*/
620
675
static size_t sve_state_size (struct task_struct const * task )
621
676
{
622
- return SVE_SIG_REGS_SIZE (sve_vq_from_vl (task_get_sve_vl (task )));
677
+ unsigned int vl = 0 ;
678
+
679
+ if (system_supports_sve ())
680
+ vl = task_get_sve_vl (task );
681
+ if (system_supports_sme ())
682
+ vl = max (vl , task_get_sme_vl (task ));
683
+
684
+ return SVE_SIG_REGS_SIZE (sve_vq_from_vl (vl ));
623
685
}
624
686
625
687
/*
@@ -748,7 +810,8 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type,
748
810
}
749
811
750
812
fpsimd_flush_task_state (task );
751
- if (test_and_clear_tsk_thread_flag (task , TIF_SVE ))
813
+ if (test_and_clear_tsk_thread_flag (task , TIF_SVE ) ||
814
+ thread_sm_enabled (& task -> thread ))
752
815
sve_to_fpsimd (task );
753
816
754
817
if (system_supports_sme () && type == ARM64_VEC_SME )
@@ -1375,6 +1438,9 @@ void fpsimd_flush_thread(void)
1375
1438
fpsimd_flush_thread_vl (ARM64_VEC_SVE );
1376
1439
}
1377
1440
1441
+ if (system_supports_sme ())
1442
+ fpsimd_flush_thread_vl (ARM64_VEC_SME );
1443
+
1378
1444
put_cpu_fpsimd_context ();
1379
1445
}
1380
1446
@@ -1418,6 +1484,7 @@ static void fpsimd_bind_task_to_cpu(void)
1418
1484
last -> st = & current -> thread .uw .fpsimd_state ;
1419
1485
last -> sve_state = current -> thread .sve_state ;
1420
1486
last -> sve_vl = task_get_sve_vl (current );
1487
+ last -> sme_vl = task_get_sme_vl (current );
1421
1488
last -> svcr = & current -> thread .svcr ;
1422
1489
current -> thread .fpsimd_cpu = smp_processor_id ();
1423
1490
@@ -1433,7 +1500,8 @@ static void fpsimd_bind_task_to_cpu(void)
1433
1500
}
1434
1501
1435
1502
void fpsimd_bind_state_to_cpu (struct user_fpsimd_state * st , void * sve_state ,
1436
- unsigned int sve_vl , u64 * svcr )
1503
+ unsigned int sve_vl , unsigned int sme_vl ,
1504
+ u64 * svcr )
1437
1505
{
1438
1506
struct fpsimd_last_state_struct * last =
1439
1507
this_cpu_ptr (& fpsimd_last_state );
@@ -1445,6 +1513,7 @@ void fpsimd_bind_state_to_cpu(struct user_fpsimd_state *st, void *sve_state,
1445
1513
last -> svcr = svcr ;
1446
1514
last -> sve_state = sve_state ;
1447
1515
last -> sve_vl = sve_vl ;
1516
+ last -> sme_vl = sme_vl ;
1448
1517
}
1449
1518
1450
1519
/*
0 commit comments