@@ -82,3 +82,121 @@ define i64 @shared_caller_agnostic_callee(i64 %v) nounwind "aarch64_inout_za" "a
8282 %res = call i64 @agnostic_decl (i64 %v )
8383 ret i64 %res
8484}
85+
86+ ; agnostic-ZA + streaming -> private-ZA + non-streaming
87+ define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee (i64 %v ) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_enabled" {
88+ ; CHECK-LABEL: streaming_agnostic_caller_nonstreaming_private_za_callee:
89+ ; CHECK: // %bb.0:
90+ ; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
91+ ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
92+ ; CHECK-NEXT: mov x9, x0
93+ ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
94+ ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
95+ ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
96+ ; CHECK-NEXT: bl __arm_get_current_vg
97+ ; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
98+ ; CHECK-NEXT: mov x0, x9
99+ ; CHECK-NEXT: add x29, sp, #64
100+ ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
101+ ; CHECK-NEXT: mov x8, x0
102+ ; CHECK-NEXT: bl __arm_sme_state_size
103+ ; CHECK-NEXT: sub sp, sp, x0
104+ ; CHECK-NEXT: mov x20, sp
105+ ; CHECK-NEXT: mov x0, x20
106+ ; CHECK-NEXT: bl __arm_sme_save
107+ ; CHECK-NEXT: smstop sm
108+ ; CHECK-NEXT: mov x0, x8
109+ ; CHECK-NEXT: bl private_za_decl
110+ ; CHECK-NEXT: mov x1, x0
111+ ; CHECK-NEXT: smstart sm
112+ ; CHECK-NEXT: mov x0, x20
113+ ; CHECK-NEXT: bl __arm_sme_restore
114+ ; CHECK-NEXT: mov x0, x20
115+ ; CHECK-NEXT: bl __arm_sme_save
116+ ; CHECK-NEXT: smstop sm
117+ ; CHECK-NEXT: mov x0, x1
118+ ; CHECK-NEXT: bl private_za_decl
119+ ; CHECK-NEXT: mov x1, x0
120+ ; CHECK-NEXT: smstart sm
121+ ; CHECK-NEXT: mov x0, x20
122+ ; CHECK-NEXT: bl __arm_sme_restore
123+ ; CHECK-NEXT: mov x0, x1
124+ ; CHECK-NEXT: sub sp, x29, #64
125+ ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
126+ ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
127+ ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
128+ ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
129+ ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
130+ ; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload
131+ ; CHECK-NEXT: ret
132+ %res = call i64 @private_za_decl (i64 %v )
133+ %res2 = call i64 @private_za_decl (i64 %res )
134+ ret i64 %res2
135+ }
136+
137+ ; agnostic-ZA + streaming-compatible -> private-ZA + non-streaming
138+ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee (i64 %v ) nounwind "aarch64_za_state_agnostic" "aarch64_pstate_sm_compatible" {
139+ ; CHECK-LABEL: streaming_compatible_agnostic_caller_nonstreaming_private_za_callee:
140+ ; CHECK: // %bb.0:
141+ ; CHECK-NEXT: stp d15, d14, [sp, #-112]! // 16-byte Folded Spill
142+ ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
143+ ; CHECK-NEXT: mov x9, x0
144+ ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
145+ ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
146+ ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
147+ ; CHECK-NEXT: bl __arm_get_current_vg
148+ ; CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill
149+ ; CHECK-NEXT: mov x0, x9
150+ ; CHECK-NEXT: add x29, sp, #64
151+ ; CHECK-NEXT: stp x20, x19, [sp, #96] // 16-byte Folded Spill
152+ ; CHECK-NEXT: mov x8, x0
153+ ; CHECK-NEXT: bl __arm_sme_state_size
154+ ; CHECK-NEXT: sub sp, sp, x0
155+ ; CHECK-NEXT: mov x19, sp
156+ ; CHECK-NEXT: mov x0, x19
157+ ; CHECK-NEXT: bl __arm_sme_save
158+ ; CHECK-NEXT: bl __arm_sme_state
159+ ; CHECK-NEXT: and x20, x0, #0x1
160+ ; CHECK-NEXT: tbz w20, #0, .LBB5_2
161+ ; CHECK-NEXT: // %bb.1:
162+ ; CHECK-NEXT: smstop sm
163+ ; CHECK-NEXT: .LBB5_2:
164+ ; CHECK-NEXT: mov x0, x8
165+ ; CHECK-NEXT: bl private_za_decl
166+ ; CHECK-NEXT: mov x2, x0
167+ ; CHECK-NEXT: tbz w20, #0, .LBB5_4
168+ ; CHECK-NEXT: // %bb.3:
169+ ; CHECK-NEXT: smstart sm
170+ ; CHECK-NEXT: .LBB5_4:
171+ ; CHECK-NEXT: mov x0, x19
172+ ; CHECK-NEXT: bl __arm_sme_restore
173+ ; CHECK-NEXT: mov x0, x19
174+ ; CHECK-NEXT: bl __arm_sme_save
175+ ; CHECK-NEXT: bl __arm_sme_state
176+ ; CHECK-NEXT: and x20, x0, #0x1
177+ ; CHECK-NEXT: tbz w20, #0, .LBB5_6
178+ ; CHECK-NEXT: // %bb.5:
179+ ; CHECK-NEXT: smstop sm
180+ ; CHECK-NEXT: .LBB5_6:
181+ ; CHECK-NEXT: mov x0, x2
182+ ; CHECK-NEXT: bl private_za_decl
183+ ; CHECK-NEXT: mov x1, x0
184+ ; CHECK-NEXT: tbz w20, #0, .LBB5_8
185+ ; CHECK-NEXT: // %bb.7:
186+ ; CHECK-NEXT: smstart sm
187+ ; CHECK-NEXT: .LBB5_8:
188+ ; CHECK-NEXT: mov x0, x19
189+ ; CHECK-NEXT: bl __arm_sme_restore
190+ ; CHECK-NEXT: mov x0, x1
191+ ; CHECK-NEXT: sub sp, x29, #64
192+ ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload
193+ ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
194+ ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
195+ ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
196+ ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
197+ ; CHECK-NEXT: ldp d15, d14, [sp], #112 // 16-byte Folded Reload
198+ ; CHECK-NEXT: ret
199+ %res = call i64 @private_za_decl (i64 %v )
200+ %res2 = call i64 @private_za_decl (i64 %res )
201+ ret i64 %res2
202+ }
0 commit comments