diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp index 5218e39b88222..b4e0b4bf4585e 100644 --- a/llvm/lib/CodeGen/BranchFolding.cpp +++ b/llvm/lib/CodeGen/BranchFolding.cpp @@ -467,7 +467,7 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, DebugLoc dl = CurMBB->findBranchDebugLoc(); if (!dl) dl = BranchDL; - if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { + if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond)) { MachineBasicBlock *NextBB = &*I; if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->reverseBranchCondition(Cond)) { @@ -1107,7 +1107,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { MachineBasicBlock *TBB = nullptr, *FBB = nullptr; SmallVector Cond; - if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond, true)) { + if (!TII->analyzeBranch(*PBB, TBB, FBB, Cond)) { // Failing case: IBB is the target of a cbr, and we cannot reverse the // branch. SmallVector NewCond(Cond); @@ -1564,7 +1564,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // Loop: xxx; jcc Out; jmp Loop // we want: // Loop: xxx; jncc Loop; jmp Out - if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) { + if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB && + !CurCond.empty()) { SmallVector NewCond(CurCond); if (!TII->reverseBranchCondition(NewCond)) { DebugLoc Dl = MBB->findBranchDebugLoc(); diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index e8d00f4df7c86..3fff898411d7a 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -35,6 +35,7 @@ add_llvm_target(RISCVCodeGen RISCVConstantPoolValue.cpp RISCVDeadRegisterDefinitions.cpp RISCVMakeCompressible.cpp + RISCVLatePeephole.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp RISCVFoldMemOffset.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 641e2eb4094f9..cd6045355a9ef 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -43,6 +43,9 @@ FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM, FunctionPass *createRISCVMakeCompressibleOptPass(); void initializeRISCVMakeCompressibleOptPass(PassRegistry &); +FunctionPass *createRISCVLatePeepholeOptPass(); +void initializeRISCVLatePeepholeOptPass(PassRegistry &); + FunctionPass *createRISCVGatherScatterLoweringPass(); void initializeRISCVGatherScatterLoweringPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 2fdf6bd36e88f..e0d364c4a1306 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -1005,6 +1005,109 @@ RISCVCC::CondCode RISCVCC::getOppositeBranchCondition(RISCVCC::CondCode CC) { } } +// Return true if MO definitely contains the value one. +static bool isOne(MachineOperand &MO) { + if (MO.isImm() && MO.getImm() == 1) + return true; + + if (!MO.isReg() || !MO.getReg().isVirtual()) + return false; + + MachineRegisterInfo &MRI = + MO.getParent()->getParent()->getParent()->getRegInfo(); + MachineInstr *DefMI = MRI.getUniqueVRegDef(MO.getReg()); + if (!DefMI) + return false; + + // For now, just check the canonical one value. + if (DefMI->getOpcode() == RISCV::ADDI && + DefMI->getOperand(1).getReg() == RISCV::X0 && + DefMI->getOperand(2).getImm() == 1) + return true; + + return false; +} + +// Return true if MO definitely contains the value zero. +static bool isZero(MachineOperand &MO) { + if (MO.isImm() && MO.getImm() == 0) + return true; + if (MO.isReg() && MO.getReg() == RISCV::X0) + return true; + return false; +} + +bool RISCVInstrInfo::trySimplifyCondBr( + MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + SmallVectorImpl &Cond) const { + + if (!TBB || Cond.size() != 3) + return false; + + RISCVCC::CondCode CC = static_cast(Cond[0].getImm()); + auto LHS = Cond[1]; + auto RHS = Cond[2]; + + MachineBasicBlock *Folded = nullptr; + switch (CC) { + default: + // TODO: Implement for more CCs + return false; + case RISCVCC::COND_EQ: { + // We can statically evaluate that we take the first branch + if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) { + Folded = TBB; + break; + } + // We can statically evaluate that we take the second branch + if ((isZero(LHS) && isOne(RHS)) || (isOne(LHS) && isZero(RHS))) { + Folded = FBB; + break; + } + return false; + } + case RISCVCC::COND_NE: { + // We can statically evaluate that we take the first branch + if ((isOne(LHS) && isZero(RHS)) || (isZero(LHS) && isOne(RHS))) { + Folded = TBB; + break; + } + // We can statically evaluate that we take the second branch + if ((isZero(LHS) && isZero(RHS)) || (isOne(LHS) && isOne(RHS))) { + Folded = FBB; + break; + } + return false; + } + } + + // At this point, its legal to optimize. + removeBranch(MBB); + Cond.clear(); + + // Only need to insert a branch if we're not falling through. + if (Folded) { + DebugLoc DL = MBB.findBranchDebugLoc(); + insertBranch(MBB, Folded, nullptr, {}, DL); + } + + // Update the successors. Remove them all and add back the correct one. + while (!MBB.succ_empty()) + MBB.removeSuccessor(MBB.succ_end() - 1); + + // If it's a fallthrough, we need to figure out where MBB is going. + if (!Folded) { + MachineFunction::iterator Fallthrough = ++MBB.getIterator(); + if (Fallthrough != MBB.getParent()->end()) + MBB.addSuccessor(&*Fallthrough); + } else + MBB.addSuccessor(Folded); + + TBB = Folded; + FBB = nullptr; + return true; +} + bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, @@ -1062,6 +1165,9 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, // Handle a single conditional branch. if (NumTerminators == 1 && I->getDesc().isConditionalBranch()) { parseCondBranch(*I, TBB, Cond); + // Try to fold the branch of the conditional branch into a the fallthru. + if (AllowModify) + trySimplifyCondBr(MBB, TBB, FBB, Cond); return false; } @@ -1070,6 +1176,10 @@ bool RISCVInstrInfo::analyzeBranch(MachineBasicBlock &MBB, I->getDesc().isUnconditionalBranch()) { parseCondBranch(*std::prev(I), TBB, Cond); FBB = getBranchDestBlock(*I); + // Try to fold the branch of the conditional branch into an unconditional + // branch. + if (AllowModify) + trySimplifyCondBr(MBB, TBB, FBB, Cond); return false; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 656cb38e11297..d00b6f57d10e0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -306,6 +306,26 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { static bool isLdStSafeToPair(const MachineInstr &LdSt, const TargetRegisterInfo *TRI); + /// Return true if the branch represented by the conditional branch with + /// components TBB, FBB, and CurCond was folded into an unconditional branch. + /// + /// If FBB is nullptr, then the the input represents a conditional branch with + /// a fallthrough. + /// + /// For example: + /// BRCOND EQ 0, 0, BB1 + /// BR BB2 + /// + /// can be simplified to BR BB1 since 0 == 0 statically. On the other hand, + /// + /// + /// BRCOND EQ 0, 1, BB1 + /// BR BB2 + /// + /// can be simplified to BR BB2 because 0 != 1 statically. + bool trySimplifyCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + SmallVectorImpl &Cond) const; protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVLatePeephole.cpp b/llvm/lib/Target/RISCV/RISCVLatePeephole.cpp new file mode 100644 index 0000000000000..9537f413e2246 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVLatePeephole.cpp @@ -0,0 +1,85 @@ +//===-- RISCVLatePeephole.cpp - Late stage peephole optimization ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// This file provides RISC-V late peephole optimizations +/// +//===----------------------------------------------------------------------===// + +#include "MCTargetDesc/RISCVMCTargetDesc.h" +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-late-peephole" +#define RISCV_LATE_PEEPHOLE_NAME "RISC-V Late Stage Peephole" + +namespace { + +struct RISCVLatePeepholeOpt : public MachineFunctionPass { + static char ID; + + RISCVLatePeepholeOpt() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { return RISCV_LATE_PEEPHOLE_NAME; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &Fn) override; + +private: + bool optimizeBlock(MachineBasicBlock &MBB); + + const RISCVInstrInfo *TII = nullptr; +}; +} // namespace + +char RISCVLatePeepholeOpt::ID = 0; +INITIALIZE_PASS(RISCVLatePeepholeOpt, "riscv-late-peephole", + RISCV_LATE_PEEPHOLE_NAME, false, false) + +bool RISCVLatePeepholeOpt::optimizeBlock(MachineBasicBlock &MBB) { + + // Use trySimplifyCondBr directly to know whether the optimization occured. + MachineBasicBlock *TBB, *FBB; + SmallVector Cond; + if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, false)) + return TII->trySimplifyCondBr(MBB, TBB, FBB, Cond); + + return false; +} + +bool RISCVLatePeepholeOpt::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + TII = MF.getSubtarget().getInstrInfo(); + + bool MadeChange = false; + + for (MachineBasicBlock &MBB : MF) + MadeChange |= optimizeBlock(MBB); + + return MadeChange; +} + +/// Returns an instance of the Make Compressible Optimization pass. +FunctionPass *llvm::createRISCVLatePeepholeOptPass() { + return new RISCVLatePeepholeOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index f78e5f8147d98..a283bd02bf8fa 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -128,6 +128,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeKCFIPass(*PR); initializeRISCVDeadRegisterDefinitionsPass(*PR); initializeRISCVMakeCompressibleOptPass(*PR); + initializeRISCVLatePeepholeOptPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); initializeRISCVCodeGenPreparePass(*PR); initializeRISCVPostRAExpandPseudoPass(*PR); @@ -567,6 +568,7 @@ void RISCVPassConfig::addPreEmitPass() { addPass(createMachineCopyPropagationPass(true)); addPass(&BranchRelaxationPassID); addPass(createRISCVMakeCompressibleOptPass()); + addPass(createRISCVLatePeepholeOptPass()); } void RISCVPassConfig::addPreEmitPass2() { diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll index 338925059862c..74ec7308cb646 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv32zbb.ll @@ -357,11 +357,6 @@ define i64 @ctpop_i64(i64 %a) nounwind { define i1 @ctpop_i64_ugt_two(i64 %a) nounwind { ; RV32I-LABEL: ctpop_i64_ugt_two: ; RV32I: # %bb.0: -; RV32I-NEXT: beqz zero, .LBB6_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltiu a0, zero, 0 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB6_2: ; RV32I-NEXT: srli a2, a0, 1 ; RV32I-NEXT: lui a3, 349525 ; RV32I-NEXT: lui a4, 209715 @@ -404,11 +399,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind { ; ; RV32ZBB-LABEL: ctpop_i64_ugt_two: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: beqz zero, .LBB6_2 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltiu a0, zero, 0 -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB6_2: ; RV32ZBB-NEXT: cpop a0, a0 ; RV32ZBB-NEXT: cpop a1, a1 ; RV32ZBB-NEXT: add a0, a1, a0 @@ -422,11 +412,6 @@ define i1 @ctpop_i64_ugt_two(i64 %a) nounwind { define i1 @ctpop_i64_ugt_one(i64 %a) nounwind { ; RV32I-LABEL: ctpop_i64_ugt_one: ; RV32I: # %bb.0: -; RV32I-NEXT: beqz zero, .LBB7_2 -; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: snez a0, zero -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB7_2: ; RV32I-NEXT: srli a2, a0, 1 ; RV32I-NEXT: lui a3, 349525 ; RV32I-NEXT: lui a4, 209715 @@ -470,11 +455,6 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind { ; ; RV32ZBB-LABEL: ctpop_i64_ugt_one: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: beqz zero, .LBB7_2 -; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: snez a0, zero -; RV32ZBB-NEXT: ret -; RV32ZBB-NEXT: .LBB7_2: ; RV32ZBB-NEXT: cpop a0, a0 ; RV32ZBB-NEXT: cpop a1, a1 ; RV32ZBB-NEXT: add a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index f93cb65897210..29ec19b7e35a7 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -64,6 +64,7 @@ ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: RISC-V Make Compressible +; CHECK-NEXT: RISC-V Late Stage Peephole ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 976d1ee003a1f..b3698caf7f0f6 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -196,6 +196,7 @@ ; CHECK-NEXT: Machine Copy Propagation Pass ; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: RISC-V Make Compressible +; CHECK-NEXT: RISC-V Late Stage Peephole ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/RISCV/branch_zero.ll b/llvm/test/CodeGen/RISCV/branch_zero.ll index fd0979977ba3b..2c13c28647516 100644 --- a/llvm/test/CodeGen/RISCV/branch_zero.ll +++ b/llvm/test/CodeGen/RISCV/branch_zero.ll @@ -5,16 +5,11 @@ define void @foo(i16 %finder_idx) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .LBB0_1: # %for.body -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: # %bb.1: # %for.body ; CHECK-NEXT: slli a0, a0, 48 -; CHECK-NEXT: bltz a0, .LBB0_4 +; CHECK-NEXT: bltz a0, .LBB0_3 ; CHECK-NEXT: # %bb.2: # %while.cond.preheader.i -; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: bnez zero, .LBB0_1 -; CHECK-NEXT: # %bb.3: # %while.body -; CHECK-NEXT: .LBB0_4: # %while.cond1.preheader.i +; CHECK-NEXT: .LBB0_3: # %while.cond1.preheader.i entry: br label %for.body @@ -46,16 +41,11 @@ if.then: define void @bar(i16 %finder_idx) { ; CHECK-LABEL: bar: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: .LBB1_1: # %for.body -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: # %bb.1: # %for.body ; CHECK-NEXT: slli a0, a0, 48 -; CHECK-NEXT: bgez a0, .LBB1_4 +; CHECK-NEXT: bgez a0, .LBB1_3 ; CHECK-NEXT: # %bb.2: # %while.cond.preheader.i -; CHECK-NEXT: # in Loop: Header=BB1_1 Depth=1 -; CHECK-NEXT: li a0, 0 -; CHECK-NEXT: bnez zero, .LBB1_1 -; CHECK-NEXT: # %bb.3: # %while.body -; CHECK-NEXT: .LBB1_4: # %while.cond1.preheader.i +; CHECK-NEXT: .LBB1_3: # %while.cond1.preheader.i entry: br label %for.body diff --git a/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll b/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll index 1e72529b17f59..00689c3136517 100644 --- a/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll +++ b/llvm/test/CodeGen/RISCV/push-pop-opt-crash.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -mattr=+zcmp -verify-machineinstrs \ ; RUN: -mtriple=riscv32 -target-abi=ilp32 < %s \ ; RUN: | FileCheck %s -check-prefixes=RV32IZCMP @@ -11,40 +12,39 @@ declare dso_local void @f1() local_unnamed_addr declare dso_local void @f2() local_unnamed_addr -define dso_local void @f0() local_unnamed_addr { +define dso_local void @f0(i1 %c) local_unnamed_addr { ; RV32IZCMP-LABEL: f0: -; RV32IZCMP: .cfi_startproc -; RV32IZCMP-NEXT: # %bb.0: # %entry -; RV32IZCMP-NEXT: bnez zero, .LBB0_2 -; RV32IZCMP-NEXT: # %bb.1: # %if.T -; RV32IZCMP-NEXT: cm.push {ra}, -16 -; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 -; RV32IZCMP-NEXT: .cfi_offset ra, -4 -; RV32IZCMP-NEXT: call f1 -; RV32IZCMP-NEXT: cm.pop {ra}, 16 -; RV32IZCMP-NEXT: .cfi_restore ra -; RV32IZCMP-NEXT: .cfi_def_cfa_offset 0 -; RV32IZCMP-NEXT: .LBB0_2: # %if.F -; RV32IZCMP-NEXT: tail f2 -; RV32IZCMP-NEXT: .Lfunc_end0: - +; RV32IZCMP: # %bb.0: # %entry +; RV32IZCMP-NEXT: andi a0, a0, 1 +; RV32IZCMP-NEXT: beqz a0, .LBB0_2 +; RV32IZCMP-NEXT: # %bb.1: # %if.T +; RV32IZCMP-NEXT: cm.push {ra}, -16 +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV32IZCMP-NEXT: .cfi_offset ra, -4 +; RV32IZCMP-NEXT: call f1 +; RV32IZCMP-NEXT: cm.pop {ra}, 16 +; RV32IZCMP-NEXT: .cfi_restore ra +; RV32IZCMP-NEXT: .cfi_def_cfa_offset 0 +; RV32IZCMP-NEXT: .LBB0_2: # %if.F +; RV32IZCMP-NEXT: tail f2 +; ; RV64IZCMP-LABEL: f0: -; RV64IZCMP: .cfi_startproc -; RV64IZCMP-NEXT: # %bb.0: # %entry -; RV64IZCMP-NEXT: bnez zero, .LBB0_2 -; RV64IZCMP-NEXT: # %bb.1: # %if.T -; RV64IZCMP-NEXT: cm.push {ra}, -16 -; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 -; RV64IZCMP-NEXT: .cfi_offset ra, -8 -; RV64IZCMP-NEXT: call f1 -; RV64IZCMP-NEXT: cm.pop {ra}, 16 -; RV64IZCMP-NEXT: .cfi_restore ra -; RV64IZCMP-NEXT: .cfi_def_cfa_offset 0 -; RV64IZCMP-NEXT: .LBB0_2: # %if.F -; RV64IZCMP-NEXT: tail f2 -; RV64IZCMP-NEXT: .Lfunc_end0: +; RV64IZCMP: # %bb.0: # %entry +; RV64IZCMP-NEXT: andi a0, a0, 1 +; RV64IZCMP-NEXT: beqz a0, .LBB0_2 +; RV64IZCMP-NEXT: # %bb.1: # %if.T +; RV64IZCMP-NEXT: cm.push {ra}, -16 +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 16 +; RV64IZCMP-NEXT: .cfi_offset ra, -8 +; RV64IZCMP-NEXT: call f1 +; RV64IZCMP-NEXT: cm.pop {ra}, 16 +; RV64IZCMP-NEXT: .cfi_restore ra +; RV64IZCMP-NEXT: .cfi_def_cfa_offset 0 +; RV64IZCMP-NEXT: .LBB0_2: # %if.F +; RV64IZCMP-NEXT: tail f2 + entry: - br i1 poison, label %if.T, label %if.F + br i1 %c, label %if.T, label %if.F if.T: tail call void @f1() diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll index c35f05be304cc..5251074717c93 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll @@ -14,9 +14,9 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_dst_stride, ptr nocapture noundef readonly %src1, i32 noundef signext %i_src1_stride, ptr nocapture noundef readonly %src2, i32 noundef signext %i_src2_stride, i32 noundef signext %i_width, i32 noundef signext %i_height) { ; RV32-LABEL: test1: ; RV32: # %bb.0: # %entry -; RV32-NEXT: blez a7, .LBB0_17 +; RV32-NEXT: blez a7, .LBB0_13 ; RV32-NEXT: # %bb.1: # %for.cond1.preheader.lr.ph -; RV32-NEXT: blez a6, .LBB0_17 +; RV32-NEXT: blez a6, .LBB0_13 ; RV32-NEXT: # %bb.2: # %for.cond1.preheader.us.preheader ; RV32-NEXT: addi t0, a7, -1 ; RV32-NEXT: csrr t2, vlenb @@ -25,11 +25,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: mul t5, a5, t0 ; RV32-NEXT: slli t1, t2, 1 ; RV32-NEXT: li t6, 32 -; RV32-NEXT: mv t0, t1 -; RV32-NEXT: bnez zero, .LBB0_4 -; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader ; RV32-NEXT: li t0, 32 -; RV32-NEXT: .LBB0_4: # %for.cond1.preheader.us.preheader ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: sw s0, 12(sp) # 4-byte Folded Spill @@ -41,17 +37,13 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: add t3, a0, t3 ; RV32-NEXT: add t4, a2, t4 ; RV32-NEXT: add s0, a4, t5 -; RV32-NEXT: bltu t6, t1, .LBB0_6 -; RV32-NEXT: # %bb.5: # %for.cond1.preheader.us.preheader +; RV32-NEXT: bltu t6, t1, .LBB0_4 +; RV32-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader ; RV32-NEXT: li t1, 32 -; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us.preheader +; RV32-NEXT: .LBB0_4: # %for.cond1.preheader.us.preheader ; RV32-NEXT: add t3, t3, a6 ; RV32-NEXT: add t5, t4, a6 ; RV32-NEXT: add t4, s0, a6 -; RV32-NEXT: beqz zero, .LBB0_8 -; RV32-NEXT: # %bb.7: # %for.cond1.preheader.us.preheader -; RV32-NEXT: mv t1, t0 -; RV32-NEXT: .LBB0_8: # %for.cond1.preheader.us.preheader ; RV32-NEXT: li t0, 0 ; RV32-NEXT: sltu t5, a0, t5 ; RV32-NEXT: sltu t6, a2, t3 @@ -70,25 +62,25 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: or t1, t1, t3 ; RV32-NEXT: andi t1, t1, 1 ; RV32-NEXT: slli t2, t2, 1 -; RV32-NEXT: j .LBB0_10 -; RV32-NEXT: .LBB0_9: # %for.cond1.for.cond.cleanup3_crit_edge.us -; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1 +; RV32-NEXT: j .LBB0_6 +; RV32-NEXT: .LBB0_5: # %for.cond1.for.cond.cleanup3_crit_edge.us +; RV32-NEXT: # in Loop: Header=BB0_6 Depth=1 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: add a2, a2, a3 ; RV32-NEXT: addi t0, t0, 1 ; RV32-NEXT: add a4, a4, a5 -; RV32-NEXT: beq t0, a7, .LBB0_16 -; RV32-NEXT: .LBB0_10: # %for.cond1.preheader.us +; RV32-NEXT: beq t0, a7, .LBB0_12 +; RV32-NEXT: .LBB0_6: # %for.cond1.preheader.us ; RV32-NEXT: # =>This Loop Header: Depth=1 -; RV32-NEXT: # Child Loop BB0_13 Depth 2 -; RV32-NEXT: # Child Loop BB0_15 Depth 2 -; RV32-NEXT: beqz t1, .LBB0_12 -; RV32-NEXT: # %bb.11: # in Loop: Header=BB0_10 Depth=1 +; RV32-NEXT: # Child Loop BB0_9 Depth 2 +; RV32-NEXT: # Child Loop BB0_11 Depth 2 +; RV32-NEXT: beqz t1, .LBB0_8 +; RV32-NEXT: # %bb.7: # in Loop: Header=BB0_6 Depth=1 ; RV32-NEXT: li t4, 0 ; RV32-NEXT: li t3, 0 -; RV32-NEXT: j .LBB0_15 -; RV32-NEXT: .LBB0_12: # %vector.ph -; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1 +; RV32-NEXT: j .LBB0_11 +; RV32-NEXT: .LBB0_8: # %vector.ph +; RV32-NEXT: # in Loop: Header=BB0_6 Depth=1 ; RV32-NEXT: li t3, 0 ; RV32-NEXT: neg t4, t2 ; RV32-NEXT: and t4, t4, a6 @@ -96,8 +88,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: li t6, 0 ; RV32-NEXT: li t5, 0 ; RV32-NEXT: vsetvli s0, zero, e8, m2, ta, ma -; RV32-NEXT: .LBB0_13: # %vector.body -; RV32-NEXT: # Parent Loop BB0_10 Depth=1 +; RV32-NEXT: .LBB0_9: # %vector.body +; RV32-NEXT: # Parent Loop BB0_6 Depth=1 ; RV32-NEXT: # => This Inner Loop Header: Depth=2 ; RV32-NEXT: add s0, a2, t6 ; RV32-NEXT: add s1, a4, t6 @@ -112,12 +104,12 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: or s2, t6, t5 ; RV32-NEXT: vs2r.v v8, (s0) ; RV32-NEXT: mv t6, s1 -; RV32-NEXT: bnez s2, .LBB0_13 -; RV32-NEXT: # %bb.14: # %middle.block -; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1 -; RV32-NEXT: beq t4, a6, .LBB0_9 -; RV32-NEXT: .LBB0_15: # %for.body4.us -; RV32-NEXT: # Parent Loop BB0_10 Depth=1 +; RV32-NEXT: bnez s2, .LBB0_9 +; RV32-NEXT: # %bb.10: # %middle.block +; RV32-NEXT: # in Loop: Header=BB0_6 Depth=1 +; RV32-NEXT: beq t4, a6, .LBB0_5 +; RV32-NEXT: .LBB0_11: # %for.body4.us +; RV32-NEXT: # Parent Loop BB0_6 Depth=1 ; RV32-NEXT: # => This Inner Loop Header: Depth=2 ; RV32-NEXT: add t5, a2, t4 ; RV32-NEXT: add t6, a4, t4 @@ -133,9 +125,9 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: srli t5, t5, 1 ; RV32-NEXT: or t6, t6, t3 ; RV32-NEXT: sb t5, 0(s0) -; RV32-NEXT: bnez t6, .LBB0_15 -; RV32-NEXT: j .LBB0_9 -; RV32-NEXT: .LBB0_16: +; RV32-NEXT: bnez t6, .LBB0_11 +; RV32-NEXT: j .LBB0_5 +; RV32-NEXT: .LBB0_12: ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload @@ -144,7 +136,7 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: .cfi_restore s2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 -; RV32-NEXT: .LBB0_17: # %for.cond.cleanup +; RV32-NEXT: .LBB0_13: # %for.cond.cleanup ; RV32-NEXT: ret ; ; RV64P670-LABEL: test1: diff --git a/llvm/test/CodeGen/RISCV/simplify-condbr.ll b/llvm/test/CodeGen/RISCV/simplify-condbr.ll new file mode 100644 index 0000000000000..3f9a73607103a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/simplify-condbr.ll @@ -0,0 +1,179 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #0 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) +declare void @llvm.assume(i1 noundef) #0 + +declare fastcc i1 @S_reginclass() + +declare fastcc ptr @Perl_av_store(i64) + +define fastcc i32 @S_regrepeat(ptr %startposp, i32 %max, i8 %0, i1 %cmp343) nounwind { +; CHECK-LABEL: S_regrepeat: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-NEXT: andi a2, a2, 255 +; CHECK-NEXT: addi a4, a2, -19 +; CHECK-NEXT: li a5, 2 +; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: bltu a4, a5, .LBB0_4 +; CHECK-NEXT: # %bb.1: # %entry +; CHECK-NEXT: li a1, 1 +; CHECK-NEXT: bltu a1, a2, .LBB0_8 +; CHECK-NEXT: # %bb.2: # %do_exactf +; CHECK-NEXT: andi a3, a3, 1 +; CHECK-NEXT: beqz a3, .LBB0_10 +; CHECK-NEXT: # %bb.3: # %land.rhs251 +; CHECK-NEXT: lw zero, 0(zero) +; CHECK-NEXT: li s0, 1 +; CHECK-NEXT: bnez s0, .LBB0_9 +; CHECK-NEXT: j .LBB0_8 +; CHECK-NEXT: .LBB0_4: # %sw.bb336 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: li s0, 0 +; CHECK-NEXT: andi s2, a3, 1 +; CHECK-NEXT: .LBB0_5: # %land.rhs345 +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: call S_reginclass +; CHECK-NEXT: andi a0, a0, 1 +; CHECK-NEXT: beqz a0, .LBB0_7 +; CHECK-NEXT: # %bb.6: # %while.body350 +; CHECK-NEXT: # in Loop: Header=BB0_5 Depth=1 +; CHECK-NEXT: addiw s0, s0, 1 +; CHECK-NEXT: bnez s2, .LBB0_5 +; CHECK-NEXT: j .LBB0_8 +; CHECK-NEXT: .LBB0_7: +; CHECK-NEXT: mv a0, s1 +; CHECK-NEXT: bnez s0, .LBB0_9 +; CHECK-NEXT: .LBB0_8: # %if.else1492 +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: .LBB0_9: # %if.end1497 +; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 0(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_10: +; CHECK-NEXT: j .LBB0_8 +entry: + switch i8 %0, label %if.else1492 [ + i8 19, label %sw.bb336 + i8 20, label %sw.bb336 + i8 1, label %do_exactf + i8 0, label %do_exactf + ] + +do_exactf: ; preds = %entry, %entry + br i1 %cmp343, label %land.rhs251, label %if.end334 + +land.rhs251: ; preds = %do_exactf + %bcmp414 = load volatile i32, ptr null, align 4 + br label %if.end334 + +if.end334: ; preds = %land.rhs251, %do_exactf + %hardcount.7 = phi i32 [ 0, %do_exactf ], [ 1, %land.rhs251 ] + call void @llvm.lifetime.end.p0(i64 0, ptr null) + br label %sw.epilog1489 + +sw.bb336: ; preds = %entry, %entry + br label %land.rhs345 + +land.rhs345: ; preds = %while.body350, %sw.bb336 + %hardcount.8634 = phi i32 [ %inc356, %while.body350 ], [ 0, %sw.bb336 ] + %call347 = call fastcc i1 @S_reginclass() + br i1 %call347, label %while.body350, label %sw.epilog1489 + +while.body350: ; preds = %land.rhs345 + %inc356 = add i32 %hardcount.8634, 1 + br i1 %cmp343, label %land.rhs345, label %if.end1497 + +sw.epilog1489: ; preds = %land.rhs345, %if.end334 + %hardcount.20 = phi i32 [ %hardcount.7, %if.end334 ], [ %hardcount.8634, %land.rhs345 ] + %tobool1490.not = icmp eq i32 %hardcount.20, 0 + br i1 %tobool1490.not, label %if.else1492, label %if.end1497 + +if.else1492: ; preds = %sw.epilog1489, %entry + br label %if.end1497 + +if.end1497: ; preds = %if.else1492, %sw.epilog1489, %while.body350 + %c.0 = phi i32 [ 0, %if.else1492 ], [ %max, %sw.epilog1489 ], [ 0, %while.body350 ] + ret i32 %c.0 +} + +define ptr @Perl_pp_refassign(ptr %PL_stack_sp, i1 %tobool.not, i1 %tobool3.not, i1 %cond1) nounwind { +; CHECK-LABEL: Perl_pp_refassign: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: beqz a1, .LBB1_3 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a1, 0 +; CHECK-NEXT: andi a2, a2, 1 +; CHECK-NEXT: bnez a2, .LBB1_4 +; CHECK-NEXT: .LBB1_2: # %cond.true4 +; CHECK-NEXT: ld a0, 0(a0) +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: bnez a0, .LBB1_5 +; CHECK-NEXT: j .LBB1_6 +; CHECK-NEXT: .LBB1_3: # %cond.true +; CHECK-NEXT: ld a1, 0(a0) +; CHECK-NEXT: andi a2, a2, 1 +; CHECK-NEXT: beqz a2, .LBB1_2 +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: j .LBB1_6 +; CHECK-NEXT: .LBB1_5: # %sw.bb85 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; CHECK-NEXT: ld a0, 0(a1) +; CHECK-NEXT: call Perl_av_store +; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: .LBB1_6: # %common.ret +; CHECK-NEXT: li a0, 0 +; CHECK-NEXT: ret +entry: + br i1 %tobool.not, label %cond.end, label %cond.true + +cond.true: ; preds = %entry + %0 = load ptr, ptr %PL_stack_sp, align 8 + br label %cond.end + +cond.end: ; preds = %cond.true, %entry + %cond = phi ptr [ %0, %cond.true ], [ null, %entry ] + br i1 %tobool3.not, label %cond.end7, label %cond.true4 + +cond.true4: ; preds = %cond.end + %1 = load ptr, ptr %PL_stack_sp, align 8 + %2 = icmp ne ptr %1, null + br label %cond.end7 + +cond.end7: ; preds = %cond.true4, %cond.end + %cond84 = phi i1 [ %2, %cond.true4 ], [ false, %cond.end ] + br i1 %cond1, label %if.end48, label %sw.bb + +sw.bb: ; preds = %cond.end7 + call void @llvm.assume(i1 %tobool.not) + br label %if.end48 + +if.end48: ; preds = %sw.bb, %cond.end7 + br i1 %cond84, label %sw.bb85, label %common.ret + +common.ret: ; preds = %sw.bb85, %if.end48 + ret ptr null + +sw.bb85: ; preds = %if.end48 + %3 = load i64, ptr %cond, align 8 + %call125 = call fastcc ptr @Perl_av_store(i64 %3) + br label %common.ret +} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }