Skip to content

Commit 776b4a1

Browse files
brooniectmarinas
authored andcommitted
arm64/sme: Add ptrace support for ZA
The ZA array can be read and written with the NT_ARM_ZA. Similarly to our interface for the SVE vector registers the regset consists of a header with information on the current vector length followed by an optional register data payload, represented as for signals as a series of horizontal vectors from 0 to VL/8 in the endianness independent format used for vectors. On get if ZA is enabled then register data will be provided, otherwise it will be omitted. On set if register data is provided then ZA is enabled and initialized using the provided data, otherwise it is disabled. Signed-off-by: Mark Brown <[email protected]> Reviewed-by: Catalin Marinas <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Catalin Marinas <[email protected]>
1 parent e12310a commit 776b4a1

File tree

3 files changed

+201
-0
lines changed

3 files changed

+201
-0
lines changed

arch/arm64/include/uapi/asm/ptrace.h

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,62 @@ struct user_pac_generic_keys {
268268
__uint128_t apgakey;
269269
};
270270

271+
/* ZA state (NT_ARM_ZA) */
272+
273+
struct user_za_header {
274+
__u32 size; /* total meaningful regset content in bytes */
275+
__u32 max_size; /* maxmium possible size for this thread */
276+
__u16 vl; /* current vector length */
277+
__u16 max_vl; /* maximum possible vector length */
278+
__u16 flags;
279+
__u16 __reserved;
280+
};
281+
282+
/*
283+
* Common ZA_PT_* flags:
284+
* These must be kept in sync with prctl interface in <linux/prctl.h>
285+
*/
286+
#define ZA_PT_VL_INHERIT ((1 << 17) /* PR_SME_VL_INHERIT */ >> 16)
287+
#define ZA_PT_VL_ONEXEC ((1 << 18) /* PR_SME_SET_VL_ONEXEC */ >> 16)
288+
289+
290+
/*
291+
* The remainder of the ZA state follows struct user_za_header. The
292+
* total size of the ZA state (including header) depends on the
293+
* metadata in the header: ZA_PT_SIZE(vq, flags) gives the total size
294+
* of the state in bytes, including the header.
295+
*
296+
* Refer to <asm/sigcontext.h> for details of how to pass the correct
297+
* "vq" argument to these macros.
298+
*/
299+
300+
/* Offset from the start of struct user_za_header to the register data */
301+
#define ZA_PT_ZA_OFFSET \
302+
((sizeof(struct user_za_header) + (__SVE_VQ_BYTES - 1)) \
303+
/ __SVE_VQ_BYTES * __SVE_VQ_BYTES)
304+
305+
/*
306+
* The payload starts at offset ZA_PT_ZA_OFFSET, and is of size
307+
* ZA_PT_ZA_SIZE(vq, flags).
308+
*
309+
* The ZA array is stored as a sequence of horizontal vectors ZAV of SVL/8
310+
* bytes each, starting from vector 0.
311+
*
312+
* Additional data might be appended in the future.
313+
*
314+
* The ZA matrix is represented in memory in an endianness-invariant layout
315+
* which differs from the layout used for the FPSIMD V-registers on big-endian
316+
* systems: see sigcontext.h for more explanation.
317+
*/
318+
319+
#define ZA_PT_ZAV_OFFSET(vq, n) \
320+
(ZA_PT_ZA_OFFSET + ((vq * __SVE_VQ_BYTES) * n))
321+
322+
#define ZA_PT_ZA_SIZE(vq) ((vq * __SVE_VQ_BYTES) * (vq * __SVE_VQ_BYTES))
323+
324+
#define ZA_PT_SIZE(vq) \
325+
(ZA_PT_ZA_OFFSET + ZA_PT_ZA_SIZE(vq))
326+
271327
#endif /* __ASSEMBLY__ */
272328

273329
#endif /* _UAPI__ASM_PTRACE_H */

arch/arm64/kernel/ptrace.c

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,141 @@ static int ssve_set(struct task_struct *target,
997997
ARM64_VEC_SME);
998998
}
999999

1000+
static int za_get(struct task_struct *target,
1001+
const struct user_regset *regset,
1002+
struct membuf to)
1003+
{
1004+
struct user_za_header header;
1005+
unsigned int vq;
1006+
unsigned long start, end;
1007+
1008+
if (!system_supports_sme())
1009+
return -EINVAL;
1010+
1011+
/* Header */
1012+
memset(&header, 0, sizeof(header));
1013+
1014+
if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT))
1015+
header.flags |= ZA_PT_VL_INHERIT;
1016+
1017+
header.vl = task_get_sme_vl(target);
1018+
vq = sve_vq_from_vl(header.vl);
1019+
header.max_vl = sme_max_vl();
1020+
header.max_size = ZA_PT_SIZE(vq);
1021+
1022+
/* If ZA is not active there is only the header */
1023+
if (thread_za_enabled(&target->thread))
1024+
header.size = ZA_PT_SIZE(vq);
1025+
else
1026+
header.size = ZA_PT_ZA_OFFSET;
1027+
1028+
membuf_write(&to, &header, sizeof(header));
1029+
1030+
BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header));
1031+
end = ZA_PT_ZA_OFFSET;
1032+
1033+
if (target == current)
1034+
fpsimd_preserve_current_state();
1035+
1036+
/* Any register data to include? */
1037+
if (thread_za_enabled(&target->thread)) {
1038+
start = end;
1039+
end = ZA_PT_SIZE(vq);
1040+
membuf_write(&to, target->thread.za_state, end - start);
1041+
}
1042+
1043+
/* Zero any trailing padding */
1044+
start = end;
1045+
end = ALIGN(header.size, SVE_VQ_BYTES);
1046+
return membuf_zero(&to, end - start);
1047+
}
1048+
1049+
static int za_set(struct task_struct *target,
1050+
const struct user_regset *regset,
1051+
unsigned int pos, unsigned int count,
1052+
const void *kbuf, const void __user *ubuf)
1053+
{
1054+
int ret;
1055+
struct user_za_header header;
1056+
unsigned int vq;
1057+
unsigned long start, end;
1058+
1059+
if (!system_supports_sme())
1060+
return -EINVAL;
1061+
1062+
/* Header */
1063+
if (count < sizeof(header))
1064+
return -EINVAL;
1065+
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &header,
1066+
0, sizeof(header));
1067+
if (ret)
1068+
goto out;
1069+
1070+
/*
1071+
* All current ZA_PT_* flags are consumed by
1072+
* vec_set_vector_length(), which will also validate them for
1073+
* us:
1074+
*/
1075+
ret = vec_set_vector_length(target, ARM64_VEC_SME, header.vl,
1076+
((unsigned long)header.flags) << 16);
1077+
if (ret)
1078+
goto out;
1079+
1080+
/* Actual VL set may be less than the user asked for: */
1081+
vq = sve_vq_from_vl(task_get_sme_vl(target));
1082+
1083+
/* Ensure there is some SVE storage for streaming mode */
1084+
if (!target->thread.sve_state) {
1085+
sve_alloc(target);
1086+
if (!target->thread.sve_state) {
1087+
clear_thread_flag(TIF_SME);
1088+
ret = -ENOMEM;
1089+
goto out;
1090+
}
1091+
}
1092+
1093+
/* Allocate/reinit ZA storage */
1094+
sme_alloc(target);
1095+
if (!target->thread.za_state) {
1096+
ret = -ENOMEM;
1097+
clear_tsk_thread_flag(target, TIF_SME);
1098+
goto out;
1099+
}
1100+
1101+
/* If there is no data then disable ZA */
1102+
if (!count) {
1103+
target->thread.svcr &= ~SYS_SVCR_EL0_ZA_MASK;
1104+
goto out;
1105+
}
1106+
1107+
/*
1108+
* If setting a different VL from the requested VL and there is
1109+
* register data, the data layout will be wrong: don't even
1110+
* try to set the registers in this case.
1111+
*/
1112+
if (vq != sve_vq_from_vl(header.vl)) {
1113+
ret = -EIO;
1114+
goto out;
1115+
}
1116+
1117+
BUILD_BUG_ON(ZA_PT_ZA_OFFSET != sizeof(header));
1118+
start = ZA_PT_ZA_OFFSET;
1119+
end = ZA_PT_SIZE(vq);
1120+
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf,
1121+
target->thread.za_state,
1122+
start, end);
1123+
if (ret)
1124+
goto out;
1125+
1126+
/* Mark ZA as active and let userspace use it */
1127+
set_tsk_thread_flag(target, TIF_SME);
1128+
target->thread.svcr |= SYS_SVCR_EL0_ZA_MASK;
1129+
1130+
out:
1131+
fpsimd_flush_task_state(target);
1132+
return ret;
1133+
}
1134+
10001135
#endif /* CONFIG_ARM64_SME */
10011136

10021137
#ifdef CONFIG_ARM64_PTR_AUTH
@@ -1218,6 +1353,7 @@ enum aarch64_regset {
12181353
#endif
12191354
#ifdef CONFIG_ARM64_SVE
12201355
REGSET_SSVE,
1356+
REGSET_ZA,
12211357
#endif
12221358
#ifdef CONFIG_ARM64_PTR_AUTH
12231359
REGSET_PAC_MASK,
@@ -1309,6 +1445,14 @@ static const struct user_regset aarch64_regsets[] = {
13091445
.regset_get = ssve_get,
13101446
.set = ssve_set,
13111447
},
1448+
[REGSET_ZA] = { /* SME ZA */
1449+
.core_note_type = NT_ARM_ZA,
1450+
.n = DIV_ROUND_UP(ZA_PT_ZA_SIZE(SVE_VQ_MAX), SVE_VQ_BYTES),
1451+
.size = SVE_VQ_BYTES,
1452+
.align = SVE_VQ_BYTES,
1453+
.regset_get = za_get,
1454+
.set = za_set,
1455+
},
13121456
#endif
13131457
#ifdef CONFIG_ARM64_PTR_AUTH
13141458
[REGSET_PAC_MASK] = {

include/uapi/linux/elf.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,7 @@ typedef struct elf64_shdr {
432432
#define NT_ARM_TAGGED_ADDR_CTRL 0x409 /* arm64 tagged address control (prctl()) */
433433
#define NT_ARM_PAC_ENABLED_KEYS 0x40a /* arm64 ptr auth enabled keys (prctl()) */
434434
#define NT_ARM_SSVE 0x40b /* ARM Streaming SVE registers */
435+
#define NT_ARM_ZA 0x40c /* ARM SME ZA registers */
435436
#define NT_ARC_V2 0x600 /* ARCv2 accumulator/extra registers */
436437
#define NT_VMCOREDD 0x700 /* Vmcore Device Dump Note */
437438
#define NT_MIPS_DSP 0x800 /* MIPS DSP ASE registers */

0 commit comments

Comments
 (0)