Skip to content

Commit 9107d09

Browse files
authored
[AArch64][SME] Avoid clobbering X0 in the MachineSMEABIPass (llvm#170131)
This tweaks `findStateChangeInsertionPoint` to also avoid clobbering X0, which should be possible in most cases (since X0's live ranges are likely to be very short before register allocation). This improves codegen in a few cases, as not all redundant copies to/from X0 are eliminated.
1 parent a09571e commit 9107d09

File tree

6 files changed

+32
-32
lines changed

6 files changed

+32
-32
lines changed

llvm/lib/Target/AArch64/MachineSMEABIPass.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -632,8 +632,8 @@ MachineSMEABI::findStateChangeInsertionPoint(
632632
PhysLiveRegs = Block.PhysLiveRegsAtExit;
633633
}
634634

635-
if (!(PhysLiveRegs & LiveRegs::NZCV))
636-
return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags).
635+
if (PhysLiveRegs == LiveRegs::None)
636+
return {InsertPt, PhysLiveRegs}; // Nothing to do (no live regs).
637637

638638
// Find the previous state change. We can not move before this point.
639639
MachineBasicBlock::iterator PrevStateChangeI;
@@ -650,15 +650,21 @@ MachineSMEABI::findStateChangeInsertionPoint(
650650
// Note: LiveUnits will only accurately track X0 and NZCV.
651651
LiveRegUnits LiveUnits(*TRI);
652652
setPhysLiveRegs(LiveUnits, PhysLiveRegs);
653+
auto BestCandidate = std::make_pair(InsertPt, PhysLiveRegs);
653654
for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) {
654655
// Don't move before/into a call (which may have a state change before it).
655656
if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall())
656657
break;
657658
LiveUnits.stepBackward(*I);
658-
if (LiveUnits.available(AArch64::NZCV))
659-
return {I, getPhysLiveRegs(LiveUnits)};
659+
LiveRegs CurrentPhysLiveRegs = getPhysLiveRegs(LiveUnits);
660+
// Find places where NZCV is available, but keep looking for locations where
661+
// both NZCV and X0 are available, which can avoid some copies.
662+
if (!(CurrentPhysLiveRegs & LiveRegs::NZCV))
663+
BestCandidate = {I, CurrentPhysLiveRegs};
664+
if (CurrentPhysLiveRegs == LiveRegs::None)
665+
break;
660666
}
661-
return {InsertPt, PhysLiveRegs};
667+
return BestCandidate;
662668
}
663669

664670
void MachineSMEABI::insertStateChanges(EmitContext &Context,

llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,14 +79,12 @@ body: |
7979
; CHECK-NEXT: RequiresZASavePseudo
8080
; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
8181
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
82-
; CHECK-NEXT: $x0 = IMPLICIT_DEF
83-
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
8482
; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv
8583
; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv
8684
; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0
8785
; CHECK-NEXT: RestoreZAPseudo [[MRS]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0
8886
; CHECK-NEXT: MSR 56965, $xzr
89-
; CHECK-NEXT: $x0 = COPY [[COPY2]]
87+
; CHECK-NEXT: $x0 = IMPLICIT_DEF
9088
; CHECK-NEXT: $nzcv = IMPLICIT_DEF
9189
; CHECK-NEXT: FAKE_USE $x0
9290
; CHECK-NEXT: $zab0 = IMPLICIT_DEF

llvm/test/CodeGen/AArch64/sme-agnostic-za.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,10 @@ define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state
6767
; CHECK-NEWLOWERING-NEXT: mov x0, x8
6868
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
6969
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
70-
; CHECK-NEWLOWERING-NEXT: mov x8, x0
70+
; CHECK-NEWLOWERING-NEXT: mov x1, x0
7171
; CHECK-NEWLOWERING-NEXT: mov x0, x19
7272
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
73-
; CHECK-NEWLOWERING-NEXT: mov x0, x8
73+
; CHECK-NEWLOWERING-NEXT: mov x0, x1
7474
; CHECK-NEWLOWERING-NEXT: mov sp, x29
7575
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload
7676
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
@@ -170,11 +170,11 @@ define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nou
170170
; CHECK-NEWLOWERING-NEXT: mov x0, x8
171171
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
172172
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
173+
; CHECK-NEWLOWERING-NEXT: mov x1, x0
173174
; CHECK-NEWLOWERING-NEXT: smstart sm
174-
; CHECK-NEWLOWERING-NEXT: mov x8, x0
175175
; CHECK-NEWLOWERING-NEXT: mov x0, x20
176176
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
177-
; CHECK-NEWLOWERING-NEXT: mov x0, x8
177+
; CHECK-NEWLOWERING-NEXT: mov x0, x1
178178
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
179179
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
180180
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
@@ -267,14 +267,14 @@ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(
267267
; CHECK-NEWLOWERING-NEXT: mov x0, x8
268268
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
269269
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
270+
; CHECK-NEWLOWERING-NEXT: mov x1, x0
270271
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB5_4
271272
; CHECK-NEWLOWERING-NEXT: // %bb.3:
272273
; CHECK-NEWLOWERING-NEXT: smstart sm
273274
; CHECK-NEWLOWERING-NEXT: .LBB5_4:
274-
; CHECK-NEWLOWERING-NEXT: mov x8, x0
275275
; CHECK-NEWLOWERING-NEXT: mov x0, x19
276276
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
277-
; CHECK-NEWLOWERING-NEXT: mov x0, x8
277+
; CHECK-NEWLOWERING-NEXT: mov x0, x1
278278
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
279279
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
280280
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
@@ -336,10 +336,10 @@ define i64 @test_many_callee_arguments(
336336
; CHECK-NEWLOWERING-NEXT: mov x0, x8
337337
; CHECK-NEWLOWERING-NEXT: bl many_args_private_za_callee
338338
; CHECK-NEWLOWERING-NEXT: add sp, sp, #16
339-
; CHECK-NEWLOWERING-NEXT: mov x8, x0
339+
; CHECK-NEWLOWERING-NEXT: mov x1, x0
340340
; CHECK-NEWLOWERING-NEXT: mov x0, x19
341341
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
342-
; CHECK-NEWLOWERING-NEXT: mov x0, x8
342+
; CHECK-NEWLOWERING-NEXT: mov x0, x1
343343
; CHECK-NEWLOWERING-NEXT: mov sp, x29
344344
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload
345345
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload

llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,7 @@ define i32 @load_tls_shared_za() nounwind "aarch64_inout_za" {
8787
; CHECK-NEXT: .tlsdesccall x
8888
; CHECK-NEXT: blr x1
8989
; CHECK-NEXT: mrs x8, TPIDR_EL0
90-
; CHECK-NEXT: ldr w0, [x8, x0]
91-
; CHECK-NEXT: mov w8, w0
90+
; CHECK-NEXT: ldr w8, [x8, x0]
9291
; CHECK-NEXT: smstart za
9392
; CHECK-NEXT: mrs x9, TPIDR2_EL0
9493
; CHECK-NEXT: sub x0, x29, #16
@@ -133,8 +132,7 @@ define i32 @load_tls_streaming_shared_za() nounwind "aarch64_inout_za" "aarch64_
133132
; CHECK-NEXT: blr x1
134133
; CHECK-NEXT: smstart sm
135134
; CHECK-NEXT: mrs x8, TPIDR_EL0
136-
; CHECK-NEXT: ldr w0, [x8, x0]
137-
; CHECK-NEXT: mov w8, w0
135+
; CHECK-NEXT: ldr w8, [x8, x0]
138136
; CHECK-NEXT: smstart za
139137
; CHECK-NEXT: mrs x9, TPIDR2_EL0
140138
; CHECK-NEXT: sub x0, x29, #80

llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -621,15 +621,15 @@ define i64 @test_many_callee_arguments(
621621
; CHECK-NEWLOWERING-NEXT: stp x10, x11, [sp, #-16]!
622622
; CHECK-NEWLOWERING-NEXT: bl many_args_private_za_callee
623623
; CHECK-NEWLOWERING-NEXT: add sp, sp, #16
624-
; CHECK-NEWLOWERING-NEXT: mov x8, x0
624+
; CHECK-NEWLOWERING-NEXT: mov x1, x0
625625
; CHECK-NEWLOWERING-NEXT: smstart za
626-
; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0
626+
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
627627
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
628-
; CHECK-NEWLOWERING-NEXT: cbnz x9, .LBB9_2
628+
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB9_2
629629
; CHECK-NEWLOWERING-NEXT: // %bb.1:
630630
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
631631
; CHECK-NEWLOWERING-NEXT: .LBB9_2:
632-
; CHECK-NEWLOWERING-NEXT: mov x0, x8
632+
; CHECK-NEWLOWERING-NEXT: mov x0, x1
633633
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
634634
; CHECK-NEWLOWERING-NEXT: mov sp, x29
635635
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload

llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ define i32 @csr_d8_allocnxv4i32i32f64(double %d) "aarch64_pstate_sm_compatible"
3333
; CHECK-COMMON-NEXT: ldr x29, [sp, #8] // 8-byte Reload
3434
; CHECK-COMMON-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
3535
; CHECK-COMMON-NEXT: ret
36-
; CHECK-COMMON-NE
36+
; CHECK-NE
3737
entry:
3838
%a = alloca <vscale x 4 x i32>
3939
%b = alloca i32
@@ -626,23 +626,21 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
626626
; CHECK-NEWLOWERING-NEXT: mov x9, sp
627627
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
628628
; CHECK-NEWLOWERING-NEXT: mov sp, x9
629-
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #80
630629
; CHECK-NEWLOWERING-NEXT: mov w20, w0
630+
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #80
631631
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-80]
632632
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
633633
; CHECK-NEWLOWERING-NEXT: smstop sm
634634
; CHECK-NEWLOWERING-NEXT: bl other
635635
; CHECK-NEWLOWERING-NEXT: smstart sm
636-
; CHECK-NEWLOWERING-NEXT: mov w0, w20
637-
; CHECK-NEWLOWERING-NEXT: mov w8, w0
638636
; CHECK-NEWLOWERING-NEXT: smstart za
639-
; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0
637+
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
640638
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #80
641-
; CHECK-NEWLOWERING-NEXT: cbnz x9, .LBB8_2
639+
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB8_2
642640
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %entry
643641
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
644642
; CHECK-NEWLOWERING-NEXT: .LBB8_2: // %entry
645-
; CHECK-NEWLOWERING-NEXT: mov w0, w8
643+
; CHECK-NEWLOWERING-NEXT: mov w0, w20
646644
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
647645
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
648646
; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa wsp, 112
@@ -671,4 +669,4 @@ entry:
671669
tail call void @other()
672670
ret i32 %x
673671
}
674-
declare void @other()
672+
declare void @other()

0 commit comments

Comments
 (0)