9696 ret void
9797}
9898
99- ; FIXME: In the new lowering we could weight edges to avoid doing the lazy save in the loop .
99+ ; This tests that with the new lowering we push state changes out of loops (at -O1 and above) .
100100define void @private_za_loop_active_entry_and_exit (i32 %n ) "aarch64_inout_za" nounwind {
101101; CHECK-LABEL: private_za_loop_active_entry_and_exit:
102102; CHECK: // %bb.0: // %entry
@@ -142,7 +142,7 @@ define void @private_za_loop_active_entry_and_exit(i32 %n) "aarch64_inout_za" no
142142; CHECK-NEWLOWERING-LABEL: private_za_loop_active_entry_and_exit:
143143; CHECK-NEWLOWERING: // %bb.0: // %entry
144144; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
145- ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16 -byte Folded Spill
145+ ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8 -byte Folded Spill
146146; CHECK-NEWLOWERING-NEXT: mov x29, sp
147147; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
148148; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
@@ -152,31 +152,26 @@ define void @private_za_loop_active_entry_and_exit(i32 %n) "aarch64_inout_za" no
152152; CHECK-NEWLOWERING-NEXT: mov w19, w0
153153; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-16]
154154; CHECK-NEWLOWERING-NEXT: bl shared_za_call
155+ ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #16
155156; CHECK-NEWLOWERING-NEXT: cmp w19, #1
156- ; CHECK-NEWLOWERING-NEXT: b.lt .LBB1_5
157- ; CHECK-NEWLOWERING-NEXT: // %bb.1: // %loop.preheader
158- ; CHECK-NEWLOWERING-NEXT: sub x20, x29, #16
159- ; CHECK-NEWLOWERING-NEXT: b .LBB1_3
160- ; CHECK-NEWLOWERING-NEXT: .LBB1_2: // %loop
161- ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB1_3 Depth=1
162- ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
163- ; CHECK-NEWLOWERING-NEXT: cbz w19, .LBB1_5
164- ; CHECK-NEWLOWERING-NEXT: .LBB1_3: // %loop
157+ ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
158+ ; CHECK-NEWLOWERING-NEXT: b.lt .LBB1_2
159+ ; CHECK-NEWLOWERING-NEXT: .LBB1_1: // %loop
165160; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1
166- ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20
167161; CHECK-NEWLOWERING-NEXT: bl private_za_call
168- ; CHECK-NEWLOWERING-NEXT: sub w19, w19, #1
162+ ; CHECK-NEWLOWERING-NEXT: subs w19, w19, #1
163+ ; CHECK-NEWLOWERING-NEXT: b.ne .LBB1_1
164+ ; CHECK-NEWLOWERING-NEXT: .LBB1_2: // %exit
169165; CHECK-NEWLOWERING-NEXT: smstart za
170166; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
171167; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
172- ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB1_2
173- ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %loop
174- ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB1_3 Depth=1
168+ ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB1_4
169+ ; CHECK-NEWLOWERING-NEXT: // %bb.3: // %exit
175170; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
176- ; CHECK-NEWLOWERING-NEXT: b .LBB1_2
177- ; CHECK-NEWLOWERING-NEXT: .LBB1_5: // %exit
171+ ; CHECK-NEWLOWERING-NEXT: .LBB1_4: // %exit
172+ ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
178173; CHECK-NEWLOWERING-NEXT: mov sp, x29
179- ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16 -byte Folded Reload
174+ ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8 -byte Folded Reload
180175; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
181176; CHECK-NEWLOWERING-NEXT: b shared_za_call
182177entry:
@@ -879,7 +874,7 @@ define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwin
879874; CHECK-NEWLOWERING-LABEL: loop_with_external_entry:
880875; CHECK-NEWLOWERING: // %bb.0: // %entry
881876; CHECK-NEWLOWERING-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
882- ; CHECK-NEWLOWERING-NEXT: stp x20, x19, [sp, #16] // 16 -byte Folded Spill
877+ ; CHECK-NEWLOWERING-NEXT: str x19, [sp, #16] // 8 -byte Folded Spill
883878; CHECK-NEWLOWERING-NEXT: mov x29, sp
884879; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16
885880; CHECK-NEWLOWERING-NEXT: rdsvl x8, #1
@@ -892,27 +887,23 @@ define void @loop_with_external_entry(i1 %c1, i1 %c2) "aarch64_inout_za" nounwin
892887; CHECK-NEWLOWERING-NEXT: // %bb.1: // %init
893888; CHECK-NEWLOWERING-NEXT: bl shared_za_call
894889; CHECK-NEWLOWERING-NEXT: .LBB11_2: // %loop.preheader
895- ; CHECK-NEWLOWERING-NEXT: sub x20 , x29, #16
896- ; CHECK-NEWLOWERING-NEXT: b .LBB11_4
890+ ; CHECK-NEWLOWERING-NEXT: sub x8 , x29, #16
891+ ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
897892; CHECK-NEWLOWERING-NEXT: .LBB11_3: // %loop
898- ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB11_4 Depth=1
899- ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
900- ; CHECK-NEWLOWERING-NEXT: tbz w19, #0, .LBB11_6
901- ; CHECK-NEWLOWERING-NEXT: .LBB11_4: // %loop
902893; CHECK-NEWLOWERING-NEXT: // =>This Inner Loop Header: Depth=1
903- ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x20
904894; CHECK-NEWLOWERING-NEXT: bl private_za_call
895+ ; CHECK-NEWLOWERING-NEXT: tbnz w19, #0, .LBB11_3
896+ ; CHECK-NEWLOWERING-NEXT: // %bb.4: // %exit
905897; CHECK-NEWLOWERING-NEXT: smstart za
906898; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
907899; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
908- ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_3
909- ; CHECK-NEWLOWERING-NEXT: // %bb.5: // %loop
910- ; CHECK-NEWLOWERING-NEXT: // in Loop: Header=BB11_4 Depth=1
900+ ; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB11_6
901+ ; CHECK-NEWLOWERING-NEXT: // %bb.5: // %exit
911902; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
912- ; CHECK-NEWLOWERING-NEXT: b .LBB11_3
913903; CHECK-NEWLOWERING-NEXT: .LBB11_6: // %exit
904+ ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
914905; CHECK-NEWLOWERING-NEXT: mov sp, x29
915- ; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #16] // 16 -byte Folded Reload
906+ ; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8 -byte Folded Reload
916907; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
917908; CHECK-NEWLOWERING-NEXT: ret
918909entry:
0 commit comments