@@ -228,65 +228,34 @@ exit:
228228 ret void
229229}
230230
231- ; FIXME: The codegen for this case could be improved (by tuning weights).
232- ; Here the ZA save has been hoisted out of the conditional, but would be better
233- ; to sink it.
234231define void @cond_private_za_call (i1 %cond ) "aarch64_inout_za" nounwind {
235- ; CHECK-LABEL: cond_private_za_call:
236- ; CHECK: // %bb.0:
237- ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
238- ; CHECK-NEXT: mov x29, sp
239- ; CHECK-NEXT: sub sp, sp, #16
240- ; CHECK-NEXT: rdsvl x8, #1
241- ; CHECK-NEXT: mov x9, sp
242- ; CHECK-NEXT: msub x9, x8, x8, x9
243- ; CHECK-NEXT: mov sp, x9
244- ; CHECK-NEXT: stp x9, x8, [x29, #-16]
245- ; CHECK-NEXT: tbz w0, #0, .LBB3_4
246- ; CHECK-NEXT: // %bb.1: // %private_za_call
247- ; CHECK-NEXT: sub x8, x29, #16
248- ; CHECK-NEXT: msr TPIDR2_EL0, x8
249- ; CHECK-NEXT: bl private_za_call
250- ; CHECK-NEXT: smstart za
251- ; CHECK-NEXT: mrs x8, TPIDR2_EL0
252- ; CHECK-NEXT: sub x0, x29, #16
253- ; CHECK-NEXT: cbnz x8, .LBB3_3
254- ; CHECK-NEXT: // %bb.2: // %private_za_call
255- ; CHECK-NEXT: bl __arm_tpidr2_restore
256- ; CHECK-NEXT: .LBB3_3: // %private_za_call
257- ; CHECK-NEXT: msr TPIDR2_EL0, xzr
258- ; CHECK-NEXT: .LBB3_4: // %exit
259- ; CHECK-NEXT: mov sp, x29
260- ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
261- ; CHECK-NEXT: b shared_za_call
262- ;
263- ; CHECK-NEWLOWERING-LABEL: cond_private_za_call:
264- ; CHECK-NEWLOWERING: // %bb.0:
265- ; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
266- ; CHECK-NEWLOWERING-NEXT: mov x29, sp
267- ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
268- ; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
269- ; CHECK-NEWLOWERING-NEXT: mov x9, sp
270- ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
271- ; CHECK-NEWLOWERING-NEXT: mov sp, x9
272- ; CHECK-NEWLOWERING-NEXT: sub x10, x29, #16
273- ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
274- ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
275- ; CHECK-NEWLOWERING-NEXT: tbz w0, #0, .LBB3_2
276- ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %private_za_call
277- ; CHECK-NEWLOWERING-NEXT: bl private_za_call
278- ; CHECK-NEWLOWERING-NEXT: .LBB3_2: // %exit
279- ; CHECK-NEWLOWERING-NEXT: smstart za
280- ; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
281- ; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
282- ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB3_4
283- ; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit
284- ; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
285- ; CHECK-NEWLOWERING-NEXT: .LBB3_4: // %exit
286- ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
287- ; CHECK-NEWLOWERING-NEXT: mov sp, x29
288- ; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
289- ; CHECK-NEWLOWERING-NEXT: b shared_za_call
232+ ; CHECK-COMMON-LABEL: cond_private_za_call:
233+ ; CHECK-COMMON: // %bb.0:
234+ ; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
235+ ; CHECK-COMMON-NEXT: mov x29, sp
236+ ; CHECK-COMMON-NEXT: sub sp, sp, #16
237+ ; CHECK-COMMON-NEXT: rdsvl x8, #1
238+ ; CHECK-COMMON-NEXT: mov x9, sp
239+ ; CHECK-COMMON-NEXT: msub x9, x8, x8, x9
240+ ; CHECK-COMMON-NEXT: mov sp, x9
241+ ; CHECK-COMMON-NEXT: stp x9, x8, [x29, #-16]
242+ ; CHECK-COMMON-NEXT: tbz w0, #0, .LBB3_4
243+ ; CHECK-COMMON-NEXT: // %bb.1: // %private_za_call
244+ ; CHECK-COMMON-NEXT: sub x8, x29, #16
245+ ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8
246+ ; CHECK-COMMON-NEXT: bl private_za_call
247+ ; CHECK-COMMON-NEXT: smstart za
248+ ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
249+ ; CHECK-COMMON-NEXT: sub x0, x29, #16
250+ ; CHECK-COMMON-NEXT: cbnz x8, .LBB3_3
251+ ; CHECK-COMMON-NEXT: // %bb.2: // %private_za_call
252+ ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
253+ ; CHECK-COMMON-NEXT: .LBB3_3: // %private_za_call
254+ ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
255+ ; CHECK-COMMON-NEXT: .LBB3_4: // %exit
256+ ; CHECK-COMMON-NEXT: mov sp, x29
257+ ; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
258+ ; CHECK-COMMON-NEXT: b shared_za_call
290259 br i1 %cond , label %private_za_call , label %exit
291260
292261private_za_call:
@@ -910,7 +879,7 @@ define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwin
910879; CHECK-NEWLOWERING-LABEL: loop_with_external_entry:
911880; CHECK-NEWLOWERING: // %bb.0: // %entry
912881; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
913- ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8 -byte Folded Spill
882+ ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16 -byte Folded Spill
914883; CHECK-NEWLOWERING-NEXT: mov x29, sp
915884; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
916885; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
@@ -923,23 +892,27 @@ define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwin
923892; CHECK-NEWLOWERING-NEXT: // %bb.1: // %init
924893; CHECK-NEWLOWERING-NEXT: bl shared_za_call
925894; CHECK-NEWLOWERING-NEXT: .LBB11_2: // %loop.preheader
926- ; CHECK-NEWLOWERING-NEXT: sub x8 , x29, #16
927- ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
895+ ; CHECK-NEWLOWERING-NEXT: sub x20 , x29, #16
896+ ; CHECK-NEWLOWERING-NEXT: b .LBB11_4
928897; CHECK-NEWLOWERING-NEXT: .LBB11_3: // %loop
898+ ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB11_4 Depth=1
899+ ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
900+ ; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB11_6
901+ ; CHECK-NEWLOWERING-NEXT: .LBB11_4: // %loop
929902; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1
903+ ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20
930904; CHECK-NEWLOWERING-NEXT: bl private_za_call
931- ; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB11_3
932- ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit
933905; CHECK-NEWLOWERING-NEXT: smstart za
934906; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
935907; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
936- ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_6
937- ; CHECK-NEWLOWERING-NEXT: // %bb.5: // %exit
908+ ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_3
909+ ; CHECK-NEWLOWERING-NEXT: // %bb.5: // %loop
910+ ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB11_4 Depth=1
938911; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
912+ ; CHECK-NEWLOWERING-NEXT: b .LBB11_3
939913; CHECK-NEWLOWERING-NEXT: .LBB11_6: // %exit
940- ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
941914; CHECK-NEWLOWERING-NEXT: mov sp, x29
942- ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8 -byte Folded Reload
915+ ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16 -byte Folded Reload
943916; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
944917; CHECK-NEWLOWERING-NEXT: ret
945918entry:
0 commit comments