@@ -112,7 +112,7 @@ define void @za_zt0_shared_caller_za_zt0_shared_callee() "aarch64_inout_za" "aar
112112 ret void ;
113113}
114114
115- ; New-ZA Callee
115+ ; New-ZT0 Callee
116116
117117; Expect spill & fill of ZT0 around call
118118; Expect smstop/smstart za around call
@@ -134,6 +134,39 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
134134 ret void ;
135135}
136136
137+ ; New-ZT0 Callee
138+
139+ ; Expect commit of lazy-save if ZA is dormant
140+ ; Expect smstart ZA & clear ZT0
141+ ; Expect spill & fill of ZT0 around call
142+ ; Before return, expect smstop ZA
143+ define void @zt0_new_caller_zt0_new_callee () "aarch64_new_zt0" nounwind {
144+ ; CHECK-LABEL: zt0_new_caller_zt0_new_callee:
145+ ; CHECK: // %bb.0: // %prelude
146+ ; CHECK-NEXT: sub sp, sp, #80
147+ ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
148+ ; CHECK-NEXT: mrs x8, TPIDR2_EL0
149+ ; CHECK-NEXT: cbz x8, .LBB6_2
150+ ; CHECK-NEXT: // %bb.1: // %save.za
151+ ; CHECK-NEXT: bl __arm_tpidr2_save
152+ ; CHECK-NEXT: msr TPIDR2_EL0, xzr
153+ ; CHECK-NEXT: .LBB6_2:
154+ ; CHECK-NEXT: smstart za
155+ ; CHECK-NEXT: zero { zt0 }
156+ ; CHECK-NEXT: mov x19, sp
157+ ; CHECK-NEXT: str zt0, [x19]
158+ ; CHECK-NEXT: smstop za
159+ ; CHECK-NEXT: bl callee
160+ ; CHECK-NEXT: smstart za
161+ ; CHECK-NEXT: ldr zt0, [x19]
162+ ; CHECK-NEXT: smstop za
163+ ; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
164+ ; CHECK-NEXT: add sp, sp, #80
165+ ; CHECK-NEXT: ret
166+ call void @callee () "aarch64_new_zt0" ;
167+ ret void ;
168+ }
169+
137170;
138171; New-ZA Caller
139172;
@@ -144,23 +177,18 @@ define void @zt0_in_caller_zt0_new_callee() "aarch64_in_zt0" nounwind {
144177define void @zt0_new_caller () "aarch64_new_zt0" nounwind {
145178; CHECK-LABEL: zt0_new_caller:
146179; CHECK: // %bb.0: // %prelude
147- ; CHECK-NEXT: sub sp, sp, #80
148- ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
180+ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
149181; CHECK-NEXT: mrs x8, TPIDR2_EL0
150- ; CHECK-NEXT: cbz x8, .LBB6_2
182+ ; CHECK-NEXT: cbz x8, .LBB7_2
151183; CHECK-NEXT: // %bb.1: // %save.za
152- ; CHECK-NEXT: mov x8, sp
153- ; CHECK-NEXT: str zt0, [x8]
154184; CHECK-NEXT: bl __arm_tpidr2_save
155- ; CHECK-NEXT: ldr zt0, [x8]
156185; CHECK-NEXT: msr TPIDR2_EL0, xzr
157- ; CHECK-NEXT: .LBB6_2 :
186+ ; CHECK-NEXT: .LBB7_2 :
158187; CHECK-NEXT: smstart za
159188; CHECK-NEXT: zero { zt0 }
160189; CHECK-NEXT: bl callee
161190; CHECK-NEXT: smstop za
162- ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
163- ; CHECK-NEXT: add sp, sp, #80
191+ ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
164192; CHECK-NEXT: ret
165193 call void @callee () "aarch64_in_zt0" ;
166194 ret void ;
@@ -172,24 +200,19 @@ define void @zt0_new_caller() "aarch64_new_zt0" nounwind {
172200define void @new_za_zt0_caller () "aarch64_new_za" "aarch64_new_zt0" nounwind {
173201; CHECK-LABEL: new_za_zt0_caller:
174202; CHECK: // %bb.0: // %prelude
175- ; CHECK-NEXT: sub sp, sp, #80
176- ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
203+ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
177204; CHECK-NEXT: mrs x8, TPIDR2_EL0
178- ; CHECK-NEXT: cbz x8, .LBB7_2
205+ ; CHECK-NEXT: cbz x8, .LBB8_2
179206; CHECK-NEXT: // %bb.1: // %save.za
180- ; CHECK-NEXT: mov x8, sp
181- ; CHECK-NEXT: str zt0, [x8]
182207; CHECK-NEXT: bl __arm_tpidr2_save
183- ; CHECK-NEXT: ldr zt0, [x8]
184208; CHECK-NEXT: msr TPIDR2_EL0, xzr
185- ; CHECK-NEXT: .LBB7_2 :
209+ ; CHECK-NEXT: .LBB8_2 :
186210; CHECK-NEXT: smstart za
187211; CHECK-NEXT: zero {za}
188212; CHECK-NEXT: zero { zt0 }
189213; CHECK-NEXT: bl callee
190214; CHECK-NEXT: smstop za
191- ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
192- ; CHECK-NEXT: add sp, sp, #80
215+ ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
193216; CHECK-NEXT: ret
194217 call void @callee () "aarch64_inout_za" "aarch64_in_zt0" ;
195218 ret void ;
0 commit comments