-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[lldb][debugserver] Read/write SME registers on arm64 #119171
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
2e3738e
87bdde7
1c94310
157a1b7
1fac08c
2e964a4
697340e
8daf124
110860f
6ad5249
76d88c2
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| C_SOURCES := main.c | ||
|
|
||
| CFLAGS_EXTRAS := -mcpu=apple-m4 | ||
|
|
||
| include Makefile.rules |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| import lldb | ||
| from lldbsuite.test.lldbtest import * | ||
| from lldbsuite.test.decorators import * | ||
| import lldbsuite.test.lldbutil as lldbutil | ||
| import os | ||
|
|
||
|
|
||
| class TestSMERegistersDarwin(TestBase): | ||
| NO_DEBUG_INFO_TESTCASE = True | ||
| mydir = TestBase.compute_mydir(__file__) | ||
|
|
||
| @skipIfRemote | ||
| @skipUnlessDarwin | ||
| @skipUnlessFeature("hw.optional.arm.FEAT_SME") | ||
| @skipUnlessFeature("hw.optional.arm.FEAT_SME2") | ||
| # thread_set_state/thread_get_state only avail in macOS 15.4+ | ||
| @skipIf(macos_version=["<", "15.4"]) | ||
| def test(self): | ||
| """Test that we can read the contents of the SME/SVE registers on Darwin""" | ||
| self.build() | ||
| (target, process, thread, bkpt) = lldbutil.run_to_source_breakpoint( | ||
| self, "break here", lldb.SBFileSpec("main.c") | ||
| ) | ||
| frame = thread.GetFrameAtIndex(0) | ||
| self.assertTrue(frame.IsValid()) | ||
|
|
||
| if self.TraceOn(): | ||
| self.runCmd("reg read -a") | ||
|
|
||
| svl_reg = frame.register["svl"] | ||
| svl = svl_reg.GetValueAsUnsigned() | ||
|
|
||
| # SSVE and SME modes should be enabled (reflecting PSTATE.SM and PSTATE.ZA) | ||
| svcr = frame.register["svcr"] | ||
| self.assertEqual(svcr.GetValueAsUnsigned(), 3) | ||
|
|
||
| z0 = frame.register["z0"] | ||
| self.assertEqual(z0.GetNumChildren(), svl) | ||
| self.assertEqual(z0.GetChildAtIndex(0).GetValueAsUnsigned(), 0x1) | ||
| self.assertEqual(z0.GetChildAtIndex(svl - 1).GetValueAsUnsigned(), 0x1) | ||
|
|
||
| z31 = frame.register["z31"] | ||
| self.assertEqual(z31.GetNumChildren(), svl) | ||
| self.assertEqual(z31.GetChildAtIndex(0).GetValueAsUnsigned(), 32) | ||
| self.assertEqual(z31.GetChildAtIndex(svl - 1).GetValueAsUnsigned(), 32) | ||
|
|
||
| p0 = frame.register["p0"] | ||
| self.assertEqual(p0.GetNumChildren(), svl / 8) | ||
| self.assertEqual(p0.GetChildAtIndex(0).GetValueAsUnsigned(), 0xFF) | ||
| self.assertEqual( | ||
| p0.GetChildAtIndex(p0.GetNumChildren() - 1).GetValueAsUnsigned(), 0xFF | ||
| ) | ||
|
|
||
| p15 = frame.register["p15"] | ||
| self.assertEqual(p15.GetNumChildren(), svl / 8) | ||
| self.assertEqual(p15.GetChildAtIndex(0).GetValueAsUnsigned(), 0xFF) | ||
| self.assertEqual( | ||
| p15.GetChildAtIndex(p15.GetNumChildren() - 1).GetValueAsUnsigned(), 0xFF | ||
| ) | ||
|
|
||
| za = frame.register["za"] | ||
| self.assertEqual(za.GetNumChildren(), (svl * svl)) | ||
| za_0 = za.GetChildAtIndex(0) | ||
| self.assertEqual(za_0.GetValueAsUnsigned(), 4) | ||
| za_final = za.GetChildAtIndex(za.GetNumChildren() - 1) | ||
| self.assertEqual(za_final.GetValueAsUnsigned(), 67) | ||
|
|
||
| zt0 = frame.register["zt0"] | ||
| self.assertEqual(zt0.GetNumChildren(), 64) | ||
| zt0_0 = zt0.GetChildAtIndex(0) | ||
| self.assertEqual(zt0_0.GetValueAsUnsigned(), 0) | ||
| zt0_final = zt0.GetChildAtIndex(63) | ||
| self.assertEqual(zt0_final.GetValueAsUnsigned(), 63) | ||
|
|
||
| z0_old_values = [] | ||
DavidSpickett marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| z0_new_str = '"{' | ||
| for i in range(svl): | ||
| z0_old_values.append(z0.GetChildAtIndex(i).GetValueAsUnsigned()) | ||
| z0_new_str = z0_new_str + ("0x%02x " % (z0_old_values[i] + 5)) | ||
| z0_new_str = z0_new_str + '}"' | ||
| self.runCmd("reg write z0 %s" % z0_new_str) | ||
|
|
||
| z31_old_values = [] | ||
| z31_new_str = '"{' | ||
| for i in range(svl): | ||
| z31_old_values.append(z31.GetChildAtIndex(i).GetValueAsUnsigned()) | ||
| z31_new_str = z31_new_str + ("0x%02x " % (z31_old_values[i] + 3)) | ||
| z31_new_str = z31_new_str + '}"' | ||
| self.runCmd("reg write z31 %s" % z31_new_str) | ||
|
|
||
| p0_old_values = [] | ||
| p0_new_str = '"{' | ||
| for i in range(int(svl / 8)): | ||
| p0_old_values.append(p0.GetChildAtIndex(i).GetValueAsUnsigned()) | ||
| p0_new_str = p0_new_str + ("0x%02x " % (p0_old_values[i] - 5)) | ||
| p0_new_str = p0_new_str + '}"' | ||
| self.runCmd("reg write p0 %s" % p0_new_str) | ||
|
|
||
| p15_old_values = [] | ||
| p15_new_str = '"{' | ||
| for i in range(int(svl / 8)): | ||
| p15_old_values.append(p15.GetChildAtIndex(i).GetValueAsUnsigned()) | ||
| p15_new_str = p15_new_str + ("0x%02x " % (p15_old_values[i] - 8)) | ||
| p15_new_str = p15_new_str + '}"' | ||
| self.runCmd("reg write p15 %s" % p15_new_str) | ||
|
|
||
| za_old_values = [] | ||
| za_new_str = '"{' | ||
| for i in range(svl * svl): | ||
| za_old_values.append(za.GetChildAtIndex(i).GetValueAsUnsigned()) | ||
| za_new_str = za_new_str + ("0x%02x " % (za_old_values[i] + 7)) | ||
| za_new_str = za_new_str + '}"' | ||
| self.runCmd("reg write za %s" % za_new_str) | ||
|
|
||
| zt0_old_values = [] | ||
| zt0_new_str = '"{' | ||
| for i in range(64): | ||
| zt0_old_values.append(zt0.GetChildAtIndex(i).GetValueAsUnsigned()) | ||
| zt0_new_str = zt0_new_str + ("0x%02x " % (zt0_old_values[i] + 2)) | ||
| zt0_new_str = zt0_new_str + '}"' | ||
| self.runCmd("reg write zt0 %s" % zt0_new_str) | ||
|
|
||
| thread.StepInstruction(False) | ||
| frame = thread.GetFrameAtIndex(0) | ||
|
|
||
| if self.TraceOn(): | ||
| self.runCmd("reg read -a") | ||
|
|
||
| z0 = frame.register["z0"] | ||
| for i in range(z0.GetNumChildren()): | ||
DavidSpickett marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| self.assertEqual( | ||
| z0_old_values[i] + 5, z0.GetChildAtIndex(i).GetValueAsUnsigned() | ||
| ) | ||
|
|
||
| z31 = frame.register["z31"] | ||
| for i in range(z31.GetNumChildren()): | ||
| self.assertEqual( | ||
| z31_old_values[i] + 3, z31.GetChildAtIndex(i).GetValueAsUnsigned() | ||
| ) | ||
|
|
||
| p0 = frame.register["p0"] | ||
| for i in range(p0.GetNumChildren()): | ||
| self.assertEqual( | ||
| p0_old_values[i] - 5, p0.GetChildAtIndex(i).GetValueAsUnsigned() | ||
| ) | ||
|
|
||
| p15 = frame.register["p15"] | ||
| for i in range(p15.GetNumChildren()): | ||
| self.assertEqual( | ||
| p15_old_values[i] - 8, p15.GetChildAtIndex(i).GetValueAsUnsigned() | ||
| ) | ||
|
|
||
| za = frame.register["za"] | ||
| for i in range(za.GetNumChildren()): | ||
| self.assertEqual( | ||
| za_old_values[i] + 7, za.GetChildAtIndex(i).GetValueAsUnsigned() | ||
| ) | ||
|
|
||
| zt0 = frame.register["zt0"] | ||
| for i in range(zt0.GetNumChildren()): | ||
| self.assertEqual( | ||
| zt0_old_values[i] + 2, zt0.GetChildAtIndex(i).GetValueAsUnsigned() | ||
| ) | ||
DavidSpickett marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| /// BUILT with | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: |
||
| /// xcrun -sdk macosx.internal clang -mcpu=apple-m4 -g sme.c -o sme | ||
|
|
||
| #include <stdint.h> | ||
| #include <stdio.h> | ||
| #include <stdlib.h> | ||
|
|
||
| void write_sve_regs() { | ||
| asm volatile("ptrue p0.b\n\t"); | ||
| asm volatile("ptrue p1.h\n\t"); | ||
| asm volatile("ptrue p2.s\n\t"); | ||
| asm volatile("ptrue p3.d\n\t"); | ||
| asm volatile("pfalse p4.b\n\t"); | ||
| asm volatile("ptrue p5.b\n\t"); | ||
| asm volatile("ptrue p6.h\n\t"); | ||
| asm volatile("ptrue p7.s\n\t"); | ||
| asm volatile("ptrue p8.d\n\t"); | ||
| asm volatile("pfalse p9.b\n\t"); | ||
| asm volatile("ptrue p10.b\n\t"); | ||
| asm volatile("ptrue p11.h\n\t"); | ||
| asm volatile("ptrue p12.s\n\t"); | ||
| asm volatile("ptrue p13.d\n\t"); | ||
| asm volatile("pfalse p14.b\n\t"); | ||
| asm volatile("ptrue p15.b\n\t"); | ||
|
|
||
| asm volatile("cpy z0.b, p0/z, #1\n\t"); | ||
| asm volatile("cpy z1.b, p5/z, #2\n\t"); | ||
| asm volatile("cpy z2.b, p10/z, #3\n\t"); | ||
| asm volatile("cpy z3.b, p15/z, #4\n\t"); | ||
| asm volatile("cpy z4.b, p0/z, #5\n\t"); | ||
| asm volatile("cpy z5.b, p5/z, #6\n\t"); | ||
| asm volatile("cpy z6.b, p10/z, #7\n\t"); | ||
| asm volatile("cpy z7.b, p15/z, #8\n\t"); | ||
| asm volatile("cpy z8.b, p0/z, #9\n\t"); | ||
| asm volatile("cpy z9.b, p5/z, #10\n\t"); | ||
| asm volatile("cpy z10.b, p10/z, #11\n\t"); | ||
| asm volatile("cpy z11.b, p15/z, #12\n\t"); | ||
| asm volatile("cpy z12.b, p0/z, #13\n\t"); | ||
| asm volatile("cpy z13.b, p5/z, #14\n\t"); | ||
| asm volatile("cpy z14.b, p10/z, #15\n\t"); | ||
| asm volatile("cpy z15.b, p15/z, #16\n\t"); | ||
| asm volatile("cpy z16.b, p0/z, #17\n\t"); | ||
| asm volatile("cpy z17.b, p5/z, #18\n\t"); | ||
| asm volatile("cpy z18.b, p10/z, #19\n\t"); | ||
| asm volatile("cpy z19.b, p15/z, #20\n\t"); | ||
| asm volatile("cpy z20.b, p0/z, #21\n\t"); | ||
| asm volatile("cpy z21.b, p5/z, #22\n\t"); | ||
| asm volatile("cpy z22.b, p10/z, #23\n\t"); | ||
| asm volatile("cpy z23.b, p15/z, #24\n\t"); | ||
| asm volatile("cpy z24.b, p0/z, #25\n\t"); | ||
| asm volatile("cpy z25.b, p5/z, #26\n\t"); | ||
| asm volatile("cpy z26.b, p10/z, #27\n\t"); | ||
| asm volatile("cpy z27.b, p15/z, #28\n\t"); | ||
| asm volatile("cpy z28.b, p0/z, #29\n\t"); | ||
| asm volatile("cpy z29.b, p5/z, #30\n\t"); | ||
| asm volatile("cpy z30.b, p10/z, #31\n\t"); | ||
| asm volatile("cpy z31.b, p15/z, #32\n\t"); | ||
| } | ||
|
|
||
| #define MAX_VL_BYTES 256 | ||
| void set_za_register(int svl, int value_offset) { | ||
| uint8_t data[MAX_VL_BYTES]; | ||
|
|
||
| // ldr za will actually wrap the selected vector row, by the number of rows | ||
| // you have. So setting one that didn't exist would actually set one that did. | ||
| // That's why we need the streaming vector length here. | ||
| for (int i = 0; i < svl; ++i) { | ||
| // This may involve instructions that require the smefa64 extension. | ||
| for (int j = 0; j < MAX_VL_BYTES; j++) | ||
| data[j] = i + value_offset; | ||
| // Each one of these loads a VL sized row of ZA. | ||
| asm volatile("mov w12, %w0\n\t" | ||
| "ldr za[w12, 0], [%1]\n\t" ::"r"(i), | ||
| "r"(&data) | ||
| : "w12"); | ||
| } | ||
| } | ||
|
|
||
| static uint16_t arm_sme_svl_b(void) { | ||
| uint64_t ret = 0; | ||
| asm volatile("rdsvl %[ret], #1" : [ret] "=r"(ret)); | ||
| return (uint16_t)ret; | ||
| } | ||
|
|
||
| void arm_sme2_set_zt0() { | ||
| #define ZTO_LEN (512 / 8) | ||
| uint8_t data[ZTO_LEN]; | ||
| for (unsigned i = 0; i < ZTO_LEN; ++i) | ||
| data[i] = i + 0; | ||
|
|
||
| asm volatile("ldr zt0, [%0]" ::"r"(&data)); | ||
| #undef ZT0_LEN | ||
| } | ||
|
|
||
| int main() { | ||
| printf("Enable SME mode\n"); | ||
|
|
||
| asm volatile("smstart"); | ||
|
|
||
| write_sve_regs(); | ||
|
|
||
| set_za_register(arm_sme_svl_b(), 4); | ||
|
|
||
| arm_sme2_set_zt0(); | ||
|
|
||
| int c = 10; // break here | ||
| c += 5; | ||
| c += 5; | ||
DavidSpickett marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| asm volatile("smstop"); | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -312,16 +312,21 @@ struct DNBRegisterValue { | |
| uint64_t uint64; | ||
| float float32; | ||
| double float64; | ||
| int8_t v_sint8[64]; | ||
| int16_t v_sint16[32]; | ||
| int32_t v_sint32[16]; | ||
| int64_t v_sint64[8]; | ||
| uint8_t v_uint8[64]; | ||
| uint16_t v_uint16[32]; | ||
| uint32_t v_uint32[16]; | ||
| uint64_t v_uint64[8]; | ||
| float v_float32[16]; | ||
| double v_float64[8]; | ||
| // AArch64 SME's ZA register max size is 64k, this object must be | ||
| // large enough to hold that much data. The current Apple cores | ||
| // have a much smaller maximum ZA reg size, but there are not | ||
| // multiple copies of this object so increase the static size to | ||
| // maximum possible. | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For Linux I remember heap allocating the object that represented the array register, because of the potential size. Perhaps that just uses a buffer in the background though. The problem you have with this is that even
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, this object is allocated to read/write a single register, so a read of x0 will be a 64k object. But looking at the debugserver sources, we don't store an array of them anywhere - we read / write individual registers one at a time with this object for a short time period, so I don't think the memory increase is a problem. It might be better to have a dynamically allocated size here though, as you did. I did that for the DNBArm64ArchImpl register contexts stored for each thread, where we will have one for each thread when stopped, that memory use made me more nervous.
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think for Linux we were also stack allocating the register value and I didn't want 64k stack frames everywhere we used one. d99d9d8 in case any of the concerns apply to debugserver also. (I am also very aware of these issues because in a previous job when we added MIPS MSA support we accidentally turned every register object into 512 bits, even the 8 and 16 bit ones we read from non-MIPS DSP chips)
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I looked at the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 on changing this to a heap object. This seems unnecessarily wasteful when not in SME mode, which I expect to remain the majority of the time.
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking of restructuring the internals of the object to heap-allocate the value space, which would require touching all of the DNBArchImpl back-ends, but actually just heap-allocating the object in RNBRemote (the main place this object is created) would be much easier than changing it at all. |
||
| int8_t v_sint8[65536]; | ||
| int16_t v_sint16[32768]; | ||
| int32_t v_sint32[16384]; | ||
| int64_t v_sint64[8192]; | ||
| uint8_t v_uint8[65536]; | ||
| uint16_t v_uint16[32768]; | ||
| uint32_t v_uint32[16384]; | ||
| uint64_t v_uint64[8192]; | ||
| float v_float32[16384]; | ||
| double v_float64[8192]; | ||
| void *pointer; | ||
| char *c_str; | ||
| } value; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: lots of magic values here but in all fairness that's consistent with the surrounding code. The comment covers the 8 byte granule so I'm not too concerned, though some constants might make this easier to read.