From 01056469350de53e093ceb605b7c6605d48c37cc Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 1 Oct 2025 12:44:04 +0100 Subject: [PATCH] [AArch64] Optimize CBZ wzr and friends. In certain situations, especially with zero phi operands propagated after tail duplications, we can end up with CBZ/CBNZ/TBZ/TBNZ with a zero register. It only happens late in the pipeline. This patch adds a basic simplifyInstruction to fold them away to either a direct branch or removing the instruction entirely. It needs some fixups, but seems to work. --- llvm/include/llvm/CodeGen/TargetInstrInfo.h | 5 +- llvm/lib/CodeGen/MachineCopyPropagation.cpp | 8 +- llvm/lib/CodeGen/ShrinkWrap.cpp | 2 + llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 49 ++++++++++++ llvm/lib/Target/AArch64/AArch64InstrInfo.h | 3 + llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 3 +- llvm/lib/Target/RISCV/RISCVInstrInfo.h | 3 +- llvm/test/CodeGen/AArch64/arm64-rev.ll | 10 +-- .../CodeGen/AArch64/arm64-shrink-wrapping.ll | 80 +++++-------------- .../block-placement-optimize-branches.ll | 34 +++----- llvm/test/CodeGen/AArch64/cbz_wzr.mir | 32 +++----- .../AArch64/lr-reserved-for-ra-live-in.ll | 2 - llvm/test/CodeGen/AArch64/tbz-tbnz.ll | 15 ++-- ...ch64_generated_funcs.ll.generated.expected | 21 ++--- ...64_generated_funcs.ll.nogenerated.expected | 20 ++--- 15 files changed, 136 insertions(+), 151 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h index 175f205328361..4bc5b50293db1 100644 --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -550,7 +550,10 @@ class LLVM_ABI TargetInstrInfo : public MCInstrInfo { /// MachineCopyPropagation, where their mutation of the MI operands may /// expose opportunities to convert the instruction to a simpler form (e.g. /// a load of 0). - virtual bool simplifyInstruction(MachineInstr &MI) const { return false; } + virtual bool simplifyInstruction(MachineInstr &MI, + bool &AlteredTerminators) const { + return false; + } /// A pair composed of a register and a sub-register index. /// Used to give some type checking when modeling Reg:SubReg. diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index e35983138550f..0a0a7dc0a1ed0 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -928,9 +928,13 @@ void MachineCopyPropagation::ForwardCopyPropagateBlock(MachineBasicBlock &MBB) { // Attempt to canonicalize/optimize the instruction now its arguments have // been mutated. This may convert MI from a non-copy to a copy instruction. - if (TII->simplifyInstruction(MI)) { + bool AlteredTerminators = false; + if (TII->simplifyInstruction(MI, AlteredTerminators)) { Changed = true; - LLVM_DEBUG(dbgs() << "MCP: After simplifyInstruction: " << MI); + if (AlteredTerminators) + break; + else + LLVM_DEBUG(dbgs() << "MCP: After simplifyInstruction: " << MI); } CopyOperands = isCopyInstr(MI, *TII, UseCopyInstr); diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index 826e4126de44c..88441b1f6a7a8 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -618,6 +618,8 @@ bool ShrinkWrapImpl::postShrinkWrapping(bool HasCandidate, MachineFunction &MF, DenseSet DirtyBBs; for (MachineBasicBlock &MBB : MF) { + if (!MDT->isReachableFromEntry(&MBB)) + continue; if (MBB.isEHPad()) { DirtyBBs.insert(&MBB); continue; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 5a51c812732e6..99a18201283b1 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -685,6 +685,55 @@ unsigned AArch64InstrInfo::insertBranch( return 2; } +bool AArch64InstrInfo::simplifyInstruction(MachineInstr &MI, + bool &AlteredTerminators) const { + unsigned Opc = MI.getOpcode(); + switch (Opc) { + case AArch64::CBZW: + case AArch64::CBZX: + case AArch64::TBZW: + case AArch64::TBZX: + // CBZ XZR -> B + if (MI.getOperand(0).getReg() == AArch64::WZR || + MI.getOperand(0).getReg() == AArch64::XZR) { + MachineBasicBlock *Target = + MI.getOperand(Opc == AArch64::TBZW || Opc == AArch64::TBZX ? 2 : 1) + .getMBB(); + MachineBasicBlock *MBB = MI.getParent(); + SmallVector Succs(MBB->successors()); + for (auto *S : Succs) + if (S != Target) + MBB->removeSuccessor(S); + SmallVector DeadInstrs; + for (auto It = MI.getIterator(); It != MBB->end(); ++It) + DeadInstrs.push_back(&*It); + BuildMI(MBB, MI.getDebugLoc(), get(AArch64::B)).addMBB(Target); + for (auto It : DeadInstrs) + It->eraseFromParent(); + AlteredTerminators = true; + return true; + } + break; + case AArch64::CBNZW: + case AArch64::CBNZX: + case AArch64::TBNZW: + case AArch64::TBNZX: + // CBNZ XZR -> nop + if (MI.getOperand(0).getReg() == AArch64::WZR || + MI.getOperand(0).getReg() == AArch64::XZR) { + MachineBasicBlock *Target = + MI.getOperand(Opc == AArch64::TBNZW || Opc == AArch64::TBNZX ? 2 : 1) + .getMBB(); + MI.getParent()->removeSuccessor(Target); + MI.eraseFromParent(); + AlteredTerminators = true; + return true; + } + break; + } + return false; +} + // Find the original register that VReg is copied from. static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) { while (Register::isVirtualRegister(VReg)) { diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 179574a73aa01..c27f0b7bc608f 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -401,6 +401,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo { const DebugLoc &DL, int *BytesAdded = nullptr) const override; + bool simplifyInstruction(MachineInstr &MI, + bool &AlteredTerminators) const override; + std::unique_ptr analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 1e6b04f8a4281..bd2e446212cb6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -4169,7 +4169,8 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI, #undef CASE_VFMA_OPCODE_VV #undef CASE_VFMA_SPLATS -bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI) const { +bool RISCVInstrInfo::simplifyInstruction(MachineInstr &MI, + bool &AlteredTerminators) const { switch (MI.getOpcode()) { default: break; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 42a0c4c01b472..e2ba31318f131 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -239,7 +239,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { unsigned OpIdx1, unsigned OpIdx2) const override; - bool simplifyInstruction(MachineInstr &MI) const override; + bool simplifyInstruction(MachineInstr &MI, + bool &AlteredTerminators) const override; MachineInstr *convertToThreeAddress(MachineInstr &MI, LiveVariables *LV, LiveIntervals *LIS) const override; diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll index 84557b441853a..5980e3214d0da 100644 --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -530,28 +530,22 @@ declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) nounwind readnone define void @test_rev16_truncstore() { ; CHECK-SD-LABEL: test_rev16_truncstore: ; CHECK-SD: // %bb.0: // %entry -; CHECK-SD-NEXT: cbnz wzr, .LBB38_2 ; CHECK-SD-NEXT: .LBB38_1: // %cleanup ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-SD-NEXT: ldrh w8, [x8] ; CHECK-SD-NEXT: rev16 w8, w8 ; CHECK-SD-NEXT: strh w8, [x8] -; CHECK-SD-NEXT: cbz wzr, .LBB38_1 -; CHECK-SD-NEXT: .LBB38_2: // %fail -; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: b .LBB38_1 ; ; CHECK-GI-LABEL: test_rev16_truncstore: ; CHECK-GI: // %bb.0: // %entry -; CHECK-GI-NEXT: tbnz wzr, #0, .LBB38_2 ; CHECK-GI-NEXT: .LBB38_1: // %cleanup ; CHECK-GI-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-GI-NEXT: ldrh w8, [x8] ; CHECK-GI-NEXT: rev w8, w8 ; CHECK-GI-NEXT: lsr w8, w8, #16 ; CHECK-GI-NEXT: strh w8, [x8] -; CHECK-GI-NEXT: tbz wzr, #0, .LBB38_1 -; CHECK-GI-NEXT: .LBB38_2: // %fail -; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: b .LBB38_1 entry: br label %body diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll index 5806bcf0dacf1..b837a361bd287 100644 --- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -735,22 +735,15 @@ define void @infiniteloop() { ; ENABLE-NEXT: .cfi_offset w29, -16 ; ENABLE-NEXT: .cfi_offset w19, -24 ; ENABLE-NEXT: .cfi_offset w20, -32 -; ENABLE-NEXT: cbnz wzr, LBB10_3 -; ENABLE-NEXT: ; %bb.1: ; %if.then ; ENABLE-NEXT: sub x19, sp, #16 ; ENABLE-NEXT: mov sp, x19 ; ENABLE-NEXT: mov w20, wzr -; ENABLE-NEXT: LBB10_2: ; %for.body +; ENABLE-NEXT: LBB10_1: ; %for.body ; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: bl _something ; ENABLE-NEXT: add w20, w0, w20 ; ENABLE-NEXT: str w20, [x19] -; ENABLE-NEXT: b LBB10_2 -; ENABLE-NEXT: LBB10_3: ; %if.end -; ENABLE-NEXT: sub sp, x29, #16 -; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload -; ENABLE-NEXT: ret +; ENABLE-NEXT: b LBB10_1 ; ; DISABLE-LABEL: infiniteloop: ; DISABLE: ; %bb.0: ; %entry @@ -762,22 +755,15 @@ define void @infiniteloop() { ; DISABLE-NEXT: .cfi_offset w29, -16 ; DISABLE-NEXT: .cfi_offset w19, -24 ; DISABLE-NEXT: .cfi_offset w20, -32 -; DISABLE-NEXT: cbnz wzr, LBB10_3 -; DISABLE-NEXT: ; %bb.1: ; %if.then ; DISABLE-NEXT: sub x19, sp, #16 ; DISABLE-NEXT: mov sp, x19 ; DISABLE-NEXT: mov w20, wzr -; DISABLE-NEXT: LBB10_2: ; %for.body +; DISABLE-NEXT: LBB10_1: ; %for.body ; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: bl _something ; DISABLE-NEXT: add w20, w0, w20 ; DISABLE-NEXT: str w20, [x19] -; DISABLE-NEXT: b LBB10_2 -; DISABLE-NEXT: LBB10_3: ; %if.end -; DISABLE-NEXT: sub sp, x29, #16 -; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload -; DISABLE-NEXT: ret +; DISABLE-NEXT: b LBB10_1 entry: br i1 undef, label %if.then, label %if.end @@ -808,12 +794,10 @@ define void @infiniteloop2() { ; ENABLE-NEXT: .cfi_offset w29, -16 ; ENABLE-NEXT: .cfi_offset w19, -24 ; ENABLE-NEXT: .cfi_offset w20, -32 -; ENABLE-NEXT: cbnz wzr, LBB11_3 -; ENABLE-NEXT: ; %bb.1: ; %if.then ; ENABLE-NEXT: sub x8, sp, #16 ; ENABLE-NEXT: mov sp, x8 ; ENABLE-NEXT: mov w9, wzr -; ENABLE-NEXT: LBB11_2: ; %for.body +; ENABLE-NEXT: LBB11_1: ; %for.body ; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: ; InlineAsm Start ; ENABLE-NEXT: mov x10, #0 ; =0x0 @@ -824,12 +808,7 @@ define void @infiniteloop2() { ; ENABLE-NEXT: ; InlineAsm Start ; ENABLE-NEXT: nop ; ENABLE-NEXT: ; InlineAsm End -; ENABLE-NEXT: b LBB11_2 -; ENABLE-NEXT: LBB11_3: ; %if.end -; ENABLE-NEXT: sub sp, x29, #16 -; ENABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; ENABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload -; ENABLE-NEXT: ret +; ENABLE-NEXT: b LBB11_1 ; ; DISABLE-LABEL: infiniteloop2: ; DISABLE: ; %bb.0: ; %entry @@ -841,12 +820,10 @@ define void @infiniteloop2() { ; DISABLE-NEXT: .cfi_offset w29, -16 ; DISABLE-NEXT: .cfi_offset w19, -24 ; DISABLE-NEXT: .cfi_offset w20, -32 -; DISABLE-NEXT: cbnz wzr, LBB11_3 -; DISABLE-NEXT: ; %bb.1: ; %if.then ; DISABLE-NEXT: sub x8, sp, #16 ; DISABLE-NEXT: mov sp, x8 ; DISABLE-NEXT: mov w9, wzr -; DISABLE-NEXT: LBB11_2: ; %for.body +; DISABLE-NEXT: LBB11_1: ; %for.body ; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: ; InlineAsm Start ; DISABLE-NEXT: mov x10, #0 ; =0x0 @@ -857,12 +834,7 @@ define void @infiniteloop2() { ; DISABLE-NEXT: ; InlineAsm Start ; DISABLE-NEXT: nop ; DISABLE-NEXT: ; InlineAsm End -; DISABLE-NEXT: b LBB11_2 -; DISABLE-NEXT: LBB11_3: ; %if.end -; DISABLE-NEXT: sub sp, x29, #16 -; DISABLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload -; DISABLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload -; DISABLE-NEXT: ret +; DISABLE-NEXT: b LBB11_1 entry: br i1 undef, label %if.then, label %if.end @@ -893,51 +865,43 @@ if.end: define void @infiniteloop3() { ; ENABLE-LABEL: infiniteloop3: ; ENABLE: ; %bb.0: ; %entry -; ENABLE-NEXT: cbnz wzr, LBB12_5 -; ENABLE-NEXT: ; %bb.1: ; %loop2a.preheader ; ENABLE-NEXT: mov x8, xzr ; ENABLE-NEXT: mov x9, xzr ; ENABLE-NEXT: mov x11, xzr -; ENABLE-NEXT: b LBB12_3 -; ENABLE-NEXT: LBB12_2: ; %loop2b -; ENABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1 +; ENABLE-NEXT: b LBB12_2 +; ENABLE-NEXT: LBB12_1: ; %loop2b +; ENABLE-NEXT: ; in Loop: Header=BB12_2 Depth=1 ; ENABLE-NEXT: str x10, [x11] ; ENABLE-NEXT: mov x11, x10 -; ENABLE-NEXT: LBB12_3: ; %loop1 +; ENABLE-NEXT: LBB12_2: ; %loop1 ; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: mov x10, x9 ; ENABLE-NEXT: ldr x9, [x8] -; ENABLE-NEXT: cbnz x8, LBB12_2 -; ENABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1 +; ENABLE-NEXT: cbnz x8, LBB12_1 +; ENABLE-NEXT: ; %bb.3: ; in Loop: Header=BB12_2 Depth=1 ; ENABLE-NEXT: mov x8, x10 ; ENABLE-NEXT: mov x11, x10 -; ENABLE-NEXT: b LBB12_3 -; ENABLE-NEXT: LBB12_5: ; %end -; ENABLE-NEXT: ret +; ENABLE-NEXT: b LBB12_2 ; ; DISABLE-LABEL: infiniteloop3: ; DISABLE: ; %bb.0: ; %entry -; DISABLE-NEXT: cbnz wzr, LBB12_5 -; DISABLE-NEXT: ; %bb.1: ; %loop2a.preheader ; DISABLE-NEXT: mov x8, xzr ; DISABLE-NEXT: mov x9, xzr ; DISABLE-NEXT: mov x11, xzr -; DISABLE-NEXT: b LBB12_3 -; DISABLE-NEXT: LBB12_2: ; %loop2b -; DISABLE-NEXT: ; in Loop: Header=BB12_3 Depth=1 +; DISABLE-NEXT: b LBB12_2 +; DISABLE-NEXT: LBB12_1: ; %loop2b +; DISABLE-NEXT: ; in Loop: Header=BB12_2 Depth=1 ; DISABLE-NEXT: str x10, [x11] ; DISABLE-NEXT: mov x11, x10 -; DISABLE-NEXT: LBB12_3: ; %loop1 +; DISABLE-NEXT: LBB12_2: ; %loop1 ; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: mov x10, x9 ; DISABLE-NEXT: ldr x9, [x8] -; DISABLE-NEXT: cbnz x8, LBB12_2 -; DISABLE-NEXT: ; %bb.4: ; in Loop: Header=BB12_3 Depth=1 +; DISABLE-NEXT: cbnz x8, LBB12_1 +; DISABLE-NEXT: ; %bb.3: ; in Loop: Header=BB12_2 Depth=1 ; DISABLE-NEXT: mov x8, x10 ; DISABLE-NEXT: mov x11, x10 -; DISABLE-NEXT: b LBB12_3 -; DISABLE-NEXT: LBB12_5: ; %end -; DISABLE-NEXT: ret +; DISABLE-NEXT: b LBB12_2 entry: br i1 undef, label %loop2a, label %body diff --git a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll index 3645718968f9e..6e6fb6f367867 100644 --- a/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll +++ b/llvm/test/CodeGen/AArch64/block-placement-optimize-branches.ll @@ -8,21 +8,14 @@ define i8 @foo_optsize(i32 %v4) optsize { ; CHECK-LABEL: foo_optsize: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz wzr, .LBB0_2 -; CHECK-NEXT: .LBB0_1: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_2: // %b1 -; CHECK-NEXT: cbnz w0, .LBB0_4 -; CHECK-NEXT: .LBB0_3: // %b2 +; CHECK-NEXT: cbnz w0, .LBB0_2 +; CHECK-NEXT: // %bb.1: // %b2 ; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_4: // %b1 +; CHECK-NEXT: .LBB0_2: // %b1 ; CHECK-NEXT: cmp w0, #1 -; CHECK-NEXT: b.ne .LBB0_1 -; CHECK-NEXT: // %bb.5: // %b3 -; CHECK-NEXT: cbz wzr, .LBB0_1 -; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret entry: %v2 = icmp eq i32 0, 0 br i1 %v2, label %b1, label %b4 @@ -48,21 +41,14 @@ b4: define i8 @foo_optspeed(i32 %v4) { ; CHECK-LABEL: foo_optspeed: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cbz wzr, .LBB1_2 -; CHECK-NEXT: .LBB1_1: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_2: // %b1 -; CHECK-NEXT: cbnz w0, .LBB1_4 -; CHECK-NEXT: .LBB1_3: // %b2 +; CHECK-NEXT: cbnz w0, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %b2 ; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_4: // %b1 +; CHECK-NEXT: .LBB1_2: // %b1 ; CHECK-NEXT: cmp w0, #1 -; CHECK-NEXT: b.ne .LBB1_1 -; CHECK-NEXT: // %bb.5: // %b3 -; CHECK-NEXT: cbnz wzr, .LBB1_3 -; CHECK-NEXT: b .LBB1_1 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret entry: %v2 = icmp eq i32 0, 0 br i1 %v2, label %b1, label %b4 diff --git a/llvm/test/CodeGen/AArch64/cbz_wzr.mir b/llvm/test/CodeGen/AArch64/cbz_wzr.mir index 7deea56ba23a1..5d841c26014b2 100644 --- a/llvm/test/CodeGen/AArch64/cbz_wzr.mir +++ b/llvm/test/CodeGen/AArch64/cbz_wzr.mir @@ -7,10 +7,10 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: cbz_wzr ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: successors: %bb.2(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: CBZW $wzr, %bb.2 + ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: $w0 = MOVZWi 10, 0 @@ -39,11 +39,9 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: cbnz_wzr ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: successors: %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: CBNZW $wzr, %bb.2 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: $w0 = MOVZWi 10, 0 ; CHECK-NEXT: RET undef $lr, implicit $w0 @@ -71,10 +69,10 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: tbz_wzr ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: successors: %bb.2(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBZW $wzr, 0, %bb.2 + ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: $w0 = MOVZWi 10, 0 @@ -103,11 +101,9 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: tbnz_wzr ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: successors: %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBNZW $wzr, 0, %bb.2 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: $w0 = MOVZWi 10, 0 ; CHECK-NEXT: RET undef $lr, implicit $w0 @@ -136,10 +132,10 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: cbz_xzr ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: successors: %bb.2(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: CBZX $xzr, %bb.2 + ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: $w0 = MOVZWi 10, 0 @@ -168,11 +164,9 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: cbnz_xzr ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: successors: %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: CBNZX $xzr, %bb.2 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: $w0 = MOVZWi 10, 0 ; CHECK-NEXT: RET undef $lr, implicit $w0 @@ -200,10 +194,10 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: tbz_xzr ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: successors: %bb.2(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBZX $xzr, 0, %bb.2 + ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: $w0 = MOVZWi 10, 0 @@ -232,11 +226,9 @@ tracksRegLiveness: true body: | ; CHECK-LABEL: name: tbnz_xzr ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: successors: %bb.1(0x40000000) ; CHECK-NEXT: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: TBNZX $xzr, 0, %bb.2 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: $w0 = MOVZWi 10, 0 ; CHECK-NEXT: RET undef $lr, implicit $w0 diff --git a/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll b/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll index 29427146e8a43..708ba621c26d8 100644 --- a/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll +++ b/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll @@ -21,10 +21,8 @@ define i32 @check_lr_liveness(ptr %arg) #1 { ; CHECK-NEXT: B %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1.bb: - ; CHECK-NEXT: successors: %bb.3(0x2aaaaaab), %bb.2(0x55555555) ; CHECK-NEXT: liveins: $w0, $lr ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: CBNZW $wzr, %bb.3 ; CHECK-NEXT: B %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.bb1: diff --git a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll index 6946cc23d867d..72e3a2ef59677 100644 --- a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll +++ b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll @@ -586,13 +586,12 @@ define ptr @tbnz_wzr(i1 %cmp1.not.i, ptr %locflg) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: tbz w0, #0, .LBB20_2 ; CHECK-SD-NEXT: // %bb.1: -; CHECK-SD-NEXT: tbnz wzr, #0, .LBB20_3 ; CHECK-SD-NEXT: b .LBB20_4 ; CHECK-SD-NEXT: .LBB20_2: // %opnfil.exit.thread ; CHECK-SD-NEXT: mov w8, #1 // =0x1 ; CHECK-SD-NEXT: str wzr, [x1] ; CHECK-SD-NEXT: tbz w8, #0, .LBB20_4 -; CHECK-SD-NEXT: .LBB20_3: // %if.else25 +; CHECK-SD-NEXT: // %bb.3: // %if.else25 ; CHECK-SD-NEXT: str wzr, [x1] ; CHECK-SD-NEXT: .LBB20_4: // %common.ret ; CHECK-SD-NEXT: mov x0, xzr @@ -656,7 +655,7 @@ define ptr @tbz_wzr(i1 %cmp1.not.i, ptr %locflg) { ; CHECK-SD-NEXT: b .LBB21_4 ; CHECK-SD-NEXT: .LBB21_2: // %opnfil.exit.thread ; CHECK-SD-NEXT: str wzr, [x1] -; CHECK-SD-NEXT: tbz wzr, #0, .LBB21_4 +; CHECK-SD-NEXT: b .LBB21_4 ; CHECK-SD-NEXT: .LBB21_3: // %if.else25 ; CHECK-SD-NEXT: str wzr, [x1] ; CHECK-SD-NEXT: .LBB21_4: // %common.ret @@ -716,13 +715,12 @@ define ptr @cbnz_wzr(i1 %cmp1.not.i, ptr %locflg) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: tbz w0, #0, .LBB22_2 ; CHECK-SD-NEXT: // %bb.1: -; CHECK-SD-NEXT: cbnz wzr, .LBB22_3 ; CHECK-SD-NEXT: b .LBB22_4 ; CHECK-SD-NEXT: .LBB22_2: // %opnfil.exit.thread ; CHECK-SD-NEXT: mov w8, #10 // =0xa ; CHECK-SD-NEXT: str wzr, [x1] ; CHECK-SD-NEXT: cbz w8, .LBB22_4 -; CHECK-SD-NEXT: .LBB22_3: // %if.else25 +; CHECK-SD-NEXT: // %bb.3: // %if.else25 ; CHECK-SD-NEXT: str wzr, [x1] ; CHECK-SD-NEXT: .LBB22_4: // %common.ret ; CHECK-SD-NEXT: mov x0, xzr @@ -784,7 +782,7 @@ define ptr @cbz_wzr(i1 %cmp1.not.i, ptr %locflg) { ; CHECK-SD-NEXT: b .LBB23_4 ; CHECK-SD-NEXT: .LBB23_2: // %opnfil.exit.thread ; CHECK-SD-NEXT: str wzr, [x1] -; CHECK-SD-NEXT: cbz wzr, .LBB23_4 +; CHECK-SD-NEXT: b .LBB23_4 ; CHECK-SD-NEXT: .LBB23_3: // %if.else25 ; CHECK-SD-NEXT: str wzr, [x1] ; CHECK-SD-NEXT: .LBB23_4: // %common.ret @@ -848,12 +846,11 @@ define i1 @avifSequenceHeaderParse() { ; CHECK-SD-NEXT: .LBB24_2: // %bb1 ; CHECK-SD-NEXT: cbz w8, .LBB24_4 ; CHECK-SD-NEXT: // %bb.3: -; CHECK-SD-NEXT: tbz xzr, #63, .LBB24_1 -; CHECK-SD-NEXT: b .LBB24_5 +; CHECK-SD-NEXT: b .LBB24_1 ; CHECK-SD-NEXT: .LBB24_4: // %bb2 ; CHECK-SD-NEXT: mov w8, #1 // =0x1 ; CHECK-SD-NEXT: tbz x8, #63, .LBB24_1 -; CHECK-SD-NEXT: .LBB24_5: // %bb4 +; CHECK-SD-NEXT: // %bb.5: // %bb4 ; CHECK-SD-NEXT: mov w8, #1 // =0x1 ; CHECK-SD-NEXT: mov w0, wzr ; CHECK-SD-NEXT: ret diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected index 97b17d98d3472..215b11a746759 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected @@ -72,24 +72,20 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: stur xzr, [x29, #-8] -; CHECK-NEXT: cbz wzr, .LBB0_3 -; CHECK-NEXT: // %bb.1: -; CHECK-NEXT: str w8, [sp, #16] +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: ldur w8, [x29, #-8] -; CHECK-NEXT: cbz w8, .LBB0_4 -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: cbz w8, .LBB0_2 +; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: str w8, [sp, #16] -; CHECK-NEXT: b .LBB0_5 -; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: bl OUTLINED_FUNCTION_0 -; CHECK-NEXT: ldur w8, [x29, #-8] -; CHECK-NEXT: cbnz w8, .LBB0_2 -; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 -; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload @@ -132,7 +128,6 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; ; CHECK-LABEL: OUTLINED_FUNCTION_0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: stp w9, w8, [x29, #-12] ; CHECK-NEXT: mov w9, #3 // =0x3 ; CHECK-NEXT: mov w8, #4 // =0x4 diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected index 3d379ea1faf5f..bf7cf2b54983b 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected @@ -13,24 +13,20 @@ define dso_local i32 @check_boundaries() #0 { ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: stur xzr, [x29, #-8] -; CHECK-NEXT: cbz wzr, .LBB0_3 -; CHECK-NEXT: // %bb.1: -; CHECK-NEXT: str w8, [sp, #16] +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 ; CHECK-NEXT: ldur w8, [x29, #-8] -; CHECK-NEXT: cbz w8, .LBB0_4 -; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: cbz w8, .LBB0_2 +; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: str w8, [sp, #16] -; CHECK-NEXT: b .LBB0_5 -; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: bl OUTLINED_FUNCTION_0 -; CHECK-NEXT: ldur w8, [x29, #-8] -; CHECK-NEXT: cbnz w8, .LBB0_2 -; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: bl OUTLINED_FUNCTION_0 -; CHECK-NEXT: .LBB0_5: +; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload