diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 0ff178e1f1959..a14907b26f841 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -71,6 +71,7 @@ add_llvm_target(RISCVCodeGen RISCVVLOptimizer.cpp RISCVVMV0Elimination.cpp RISCVZacasABIFix.cpp + RISCVZilsdOptimizer.cpp GISel/RISCVCallLowering.cpp GISel/RISCVInstructionSelector.cpp GISel/RISCVLegalizerInfo.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index ae9410193efe1..d2378e1837ba3 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -93,6 +93,9 @@ void initializeRISCVPushPopOptPass(PassRegistry &); FunctionPass *createRISCVLoadStoreOptPass(); void initializeRISCVLoadStoreOptPass(PassRegistry &); +FunctionPass *createRISCVPreAllocZilsdOptPass(); +void initializeRISCVPreAllocZilsdOptPass(PassRegistry &); + FunctionPass *createRISCVZacasABIFixPass(); void initializeRISCVZacasABIFixPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 40c05e8602553..26ccff4eb9587 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -194,6 +194,10 @@ def HasStdExtZilsd : Predicate<"Subtarget->hasStdExtZilsd()">, AssemblerPredicate<(all_of FeatureStdExtZilsd), "'Zilsd' (Load/Store pair instructions)">; +def FeatureZilsd4ByteAlign + : SubtargetFeature<"zilsd-4byte-align", "AllowZilsd4ByteAlign", "true", + "Allow 4-byte alignment for Zilsd LD/SD instructions">; + // Multiply Extensions def FeatureStdExtZmmul diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td index a3203f288b545..4fc859f2547c1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZilsd.td @@ -47,6 +47,23 @@ let Predicates = [HasStdExtZilsd, IsRV32] in { def PseudoLD_RV32 : PseudoLoad<"ld", GPRPairRV32>; def PseudoSD_RV32 : PseudoStore<"sd", GPRPairRV32>; +// Pseudo instructions for load/store optimization with 2 separate registers +def PseudoLD_RV32_OPT : + Pseudo<(outs GPR:$rd1, GPR:$rd2), + (ins GPR:$rs1, simm12_lo:$imm12), [], "", ""> { + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 0; +} + +def PseudoSD_RV32_OPT : + Pseudo<(outs), + (ins GPR:$rs1, GPR:$rs2, GPR:$rs3, simm12_lo:$imm12), [], "", ""> { + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 1; +} + def : InstAlias<"ld $rd, (${rs1})", (LD_RV32 GPRPairRV32:$rd, GPR:$rs1, 0), 0>; def : InstAlias<"sd $rs2, (${rs1})", (SD_RV32 GPRPairRV32:$rs2, GPR:$rs1, 0), 0>; } diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp index 115a96e01f6c9..badc3f06b3e53 100644 --- a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp @@ -11,6 +11,9 @@ // paired instruction, leveraging hardware support for paired memory accesses. // Much of the pairing logic is adapted from the AArch64LoadStoreOpt pass. // +// Post-allocation Zilsd decomposition: Fixes invalid LD/SD instructions if +// register allocation didn't provide suitable consecutive registers. +// // NOTE: The AArch64LoadStoreOpt pass performs additional optimizations such as // merging zero store instructions, promoting loads that read directly from a // preceding store, and merging base register updates with load/store @@ -23,6 +26,7 @@ #include "RISCV.h" #include "RISCVTargetMachine.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/TargetRegistry.h" @@ -38,6 +42,8 @@ using namespace llvm; // pairs. static cl::opt LdStLimit("riscv-load-store-scan-limit", cl::init(128), cl::Hidden); +STATISTIC(NumLD2LW, "Number of LD instructions split back to LW"); +STATISTIC(NumSD2SW, "Number of SD instructions split back to SW"); namespace { @@ -75,6 +81,14 @@ struct RISCVLoadStoreOpt : public MachineFunctionPass { mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, bool MergeForward); + // Post reg-alloc zilsd part + bool fixInvalidRegPairOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI); + bool isConsecutiveRegPair(Register First, Register Second); + void splitLdSdIntoTwo(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, bool IsLoad); + int64_t getLoadStoreOffset(const MachineInstr &MI); + private: AliasAnalysis *AA; MachineRegisterInfo *MRI; @@ -91,9 +105,8 @@ INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false, bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { if (skipFunction(Fn.getFunction())) return false; + const RISCVSubtarget &Subtarget = Fn.getSubtarget(); - if (!Subtarget.useMIPSLoadStorePairs()) - return false; bool MadeChange = false; TII = Subtarget.getInstrInfo(); @@ -103,18 +116,34 @@ bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { ModifiedRegUnits.init(*TRI); UsedRegUnits.init(*TRI); - for (MachineBasicBlock &MBB : Fn) { - LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); + if (Subtarget.useMIPSLoadStorePairs()) { + for (MachineBasicBlock &MBB : Fn) { + LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) && + tryToPairLdStInst(MBBI)) + MadeChange = true; + else + ++MBBI; + } + } + } - for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); - MBBI != E;) { - if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) && - tryToPairLdStInst(MBBI)) - MadeChange = true; - else - ++MBBI; + if (!Subtarget.is64Bit() && Subtarget.hasStdExtZilsd()) { + for (auto &MBB : Fn) { + for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E;) { + if (fixInvalidRegPairOp(MBB, MBBI)) { + MadeChange = true; + // Iterator was updated by fixInvalidRegPairOp + } else { + ++MBBI; + } + } } } + return MadeChange; } @@ -395,6 +424,236 @@ RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, return NextI; } +//===----------------------------------------------------------------------===// +// Post reg-alloc zilsd pass implementation +//===----------------------------------------------------------------------===// + +// Helper function to extract offset from load/store operands +int64_t RISCVLoadStoreOpt::getLoadStoreOffset(const MachineInstr &MI) { + const MachineOperand &OffsetOp = MI.getOperand(2); + + // Handle immediate offset + if (OffsetOp.isImm()) + return OffsetOp.getImm(); + + // Handle symbolic operands with MO_LO flag (from MergeBaseOffset) + if (OffsetOp.getTargetFlags() & RISCVII::MO_LO) + if (OffsetOp.isGlobal() || OffsetOp.isCPI() || OffsetOp.isBlockAddress() || + OffsetOp.isSymbol()) + return OffsetOp.getOffset(); + + return 0; +} + +bool RISCVLoadStoreOpt::isConsecutiveRegPair(Register First, Register Second) { + // Special case: First register can not be zero + // zeros + if (First == RISCV::X0) + return true; + + // Check if registers form a valid even/odd pair for Zilsd + unsigned FirstNum = TRI->getEncodingValue(First); + unsigned SecondNum = TRI->getEncodingValue(Second); + + // Must be consecutive and first must be even + return (FirstNum % 2 == 0) && (SecondNum == FirstNum + 1); +} + +void RISCVLoadStoreOpt::splitLdSdIntoTwo(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + bool IsLoad) { + MachineInstr *MI = &*MBBI; + DebugLoc DL = MI->getDebugLoc(); + + Register FirstReg = MI->getOperand(0).getReg(); + Register SecondReg = MI->getOperand(1).getReg(); + Register BaseReg = MI->getOperand(2).getReg(); + + // Handle both immediate and symbolic operands for offset + const MachineOperand &OffsetOp = MI->getOperand(3); + int BaseOffset; + if (OffsetOp.isImm()) + BaseOffset = OffsetOp.getImm(); + else + // For symbolic operands, extract the embedded offset + BaseOffset = OffsetOp.getOffset(); + + unsigned Opc = IsLoad ? RISCV::LW : RISCV::SW; + + // Create two separate instructions + if (IsLoad) { + auto MIB1 = BuildMI(MBB, MBBI, DL, TII->get(Opc)) + .addReg(FirstReg, RegState::Define) + .addReg(BaseReg); + + auto MIB2 = BuildMI(MBB, MBBI, DL, TII->get(Opc)) + .addReg(SecondReg, RegState::Define) + .addReg(BaseReg); + + // Add offset operands - preserve symbolic references + if (OffsetOp.isImm()) { + MIB1.addImm(BaseOffset); + MIB2.addImm(BaseOffset + 4); + } else if (OffsetOp.isGlobal()) { + MIB1.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset, + OffsetOp.getTargetFlags()); + MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4, + OffsetOp.getTargetFlags()); + } else if (OffsetOp.isCPI()) { + MIB1.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset, + OffsetOp.getTargetFlags()); + MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4, + OffsetOp.getTargetFlags()); + } else if (OffsetOp.isSymbol()) { + MIB1.addExternalSymbol(OffsetOp.getSymbolName(), + OffsetOp.getTargetFlags()); + MIB2.addExternalSymbol(OffsetOp.getSymbolName(), + OffsetOp.getTargetFlags()); + } else if (OffsetOp.isBlockAddress()) { + MIB1.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset, + OffsetOp.getTargetFlags()); + MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4, + OffsetOp.getTargetFlags()); + } + + // Copy memory operands if the original instruction had them + // FIXME: This is overly conservative; the new instruction accesses 4 bytes, + // not 8. + if (MI->memoperands_begin() != MI->memoperands_end()) { + MIB1.cloneMemRefs(*MI); + MIB2.cloneMemRefs(*MI); + } + + ++NumLD2LW; + LLVM_DEBUG(dbgs() << "Split LD back to two LW instructions\n"); + } else { + auto MIB1 = + BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(FirstReg).addReg(BaseReg); + + auto MIB2 = + BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SecondReg).addReg(BaseReg); + + // Add offset operands - preserve symbolic references + if (OffsetOp.isImm()) { + MIB1.addImm(BaseOffset); + MIB2.addImm(BaseOffset + 4); + } else if (OffsetOp.isGlobal()) { + MIB1.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset, + OffsetOp.getTargetFlags()); + MIB2.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset + 4, + OffsetOp.getTargetFlags()); + } else if (OffsetOp.isCPI()) { + MIB1.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset, + OffsetOp.getTargetFlags()); + MIB2.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset + 4, + OffsetOp.getTargetFlags()); + } else if (OffsetOp.isSymbol()) { + MIB1.addExternalSymbol(OffsetOp.getSymbolName(), + OffsetOp.getTargetFlags()); + MIB2.addExternalSymbol(OffsetOp.getSymbolName(), + OffsetOp.getTargetFlags()); + } else if (OffsetOp.isBlockAddress()) { + MIB1.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset, + OffsetOp.getTargetFlags()); + MIB2.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset + 4, + OffsetOp.getTargetFlags()); + } + + // Copy memory operands if the original instruction had them + // FIXME: This is overly conservative; the new instruction accesses 4 bytes, + // not 8. + if (MI->memoperands_begin() != MI->memoperands_end()) { + MIB1.cloneMemRefs(*MI); + MIB2.cloneMemRefs(*MI); + } + + ++NumSD2SW; + LLVM_DEBUG(dbgs() << "Split SD back to two SW instructions\n"); + } + + // Remove the original paired instruction and update iterator + MBBI = MBB.erase(MBBI); +} + +bool RISCVLoadStoreOpt::fixInvalidRegPairOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI) { + MachineInstr *MI = &*MBBI; + unsigned Opcode = MI->getOpcode(); + + // Check if this is a Zilsd pseudo that needs fixing + if (Opcode != RISCV::PseudoLD_RV32_OPT && Opcode != RISCV::PseudoSD_RV32_OPT) + return false; + + bool IsLoad = (Opcode == RISCV::PseudoLD_RV32_OPT); + + Register FirstReg = MI->getOperand(0).getReg(); + Register SecondReg = MI->getOperand(1).getReg(); + + // Check if we have valid consecutive registers + if (!isConsecutiveRegPair(FirstReg, SecondReg)) { + // Need to split back into two instructions + splitLdSdIntoTwo(MBB, MBBI, IsLoad); + return true; + } + + // Registers are valid, convert to real LD/SD instruction + Register BaseReg = MI->getOperand(2).getReg(); + DebugLoc DL = MI->getDebugLoc(); + // Handle both immediate and symbolic operands for offset + const MachineOperand &OffsetOp = MI->getOperand(3); + int BaseOffset; + if (OffsetOp.isImm()) + BaseOffset = OffsetOp.getImm(); + else + // For symbolic operands, extract the embedded offset + BaseOffset = OffsetOp.getOffset(); + + unsigned RealOpc = IsLoad ? RISCV::LD_RV32 : RISCV::SD_RV32; + + // Create register pair from the two individual registers + unsigned RegPair = TRI->getMatchingSuperReg(FirstReg, RISCV::sub_gpr_even, + &RISCV::GPRPairRegClass); + // Create the real LD/SD instruction with register pair + MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(RealOpc)); + + if (IsLoad) { + // For LD, the register pair is the destination + MIB.addReg(RegPair, RegState::Define); + } else { + // For SD, the register pair is the source + MIB.addReg(RegPair); + } + + MIB.addReg(BaseReg); + + // Add offset operand - preserve symbolic references + if (OffsetOp.isImm()) + MIB.addImm(BaseOffset); + else if (OffsetOp.isGlobal()) + MIB.addGlobalAddress(OffsetOp.getGlobal(), BaseOffset, + OffsetOp.getTargetFlags()); + else if (OffsetOp.isCPI()) + MIB.addConstantPoolIndex(OffsetOp.getIndex(), BaseOffset, + OffsetOp.getTargetFlags()); + else if (OffsetOp.isSymbol()) + MIB.addExternalSymbol(OffsetOp.getSymbolName(), OffsetOp.getTargetFlags()); + else if (OffsetOp.isBlockAddress()) + MIB.addBlockAddress(OffsetOp.getBlockAddress(), BaseOffset, + OffsetOp.getTargetFlags()); + + // Copy memory operands if the original instruction had them + if (MI->memoperands_begin() != MI->memoperands_end()) + MIB.cloneMemRefs(*MI); + + LLVM_DEBUG(dbgs() << "Converted pseudo to real instruction: " << *MIB + << "\n"); + + // Remove the pseudo instruction and update iterator + MBBI = MBB.erase(MBBI); + + return true; +} + // Returns an instance of the Load / Store Optimization pass. FunctionPass *llvm::createRISCVLoadStoreOptPass() { return new RISCVLoadStoreOpt(); diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index 40b641680b2ce..58c37ae3bb29b 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -853,6 +853,44 @@ bool RISCVRegisterInfo::getRegAllocationHints( const MachineRegisterInfo *MRI = &MF.getRegInfo(); auto &Subtarget = MF.getSubtarget(); + // Handle RegPairEven/RegPairOdd hints for Zilsd register pairs + std::pair Hint = MRI->getRegAllocationHint(VirtReg); + unsigned HintType = Hint.first; + Register Partner = Hint.second; + + if (HintType == RISCVRI::RegPairEven || HintType == RISCVRI::RegPairOdd) { + // Check if we want the even or odd register of a consecutive pair + bool WantOdd = (HintType == RISCVRI::RegPairOdd); + + // First priority: Check if partner is already allocated + if (Partner.isVirtual() && VRM && VRM->hasPhys(Partner)) { + MCPhysReg PartnerPhys = VRM->getPhys(Partner); + // Calculate the exact register we need for consecutive pairing + MCPhysReg TargetReg = PartnerPhys + (WantOdd ? 1 : -1); + + // Verify it's valid and available + if (RISCV::GPRRegClass.contains(TargetReg) && + is_contained(Order, TargetReg)) { + Hints.push_back(TargetReg); + } + } + + // Second priority: Try to find consecutive register pairs in the allocation + // order + for (MCPhysReg PhysReg : Order) { + if (!PhysReg) + continue; + + unsigned RegNum = getEncodingValue(PhysReg); + // Check if this register matches the even/odd requirement + bool IsOdd = (RegNum % 2 != 0); + + // Verify the pair register exists and is in the same register class + if ((WantOdd && IsOdd) || (!WantOdd && !IsOdd)) + Hints.push_back(PhysReg); + } + } + bool BaseImplRetVal = TargetRegisterInfo::getRegAllocationHints( VirtReg, Order, Hints, MF, VRM, Matrix); @@ -994,6 +1032,35 @@ bool RISCVRegisterInfo::getRegAllocationHints( return BaseImplRetVal; } +void RISCVRegisterInfo::updateRegAllocHint(Register Reg, Register NewReg, + MachineFunction &MF) const { + MachineRegisterInfo *MRI = &MF.getRegInfo(); + std::pair Hint = MRI->getRegAllocationHint(Reg); + + // Handle RegPairEven/RegPairOdd hints for Zilsd register pairs + if ((Hint.first == RISCVRI::RegPairOdd || + Hint.first == RISCVRI::RegPairEven) && + Hint.second.isVirtual()) { + // If 'Reg' is one of the even/odd register pair and it's now changed + // (e.g. coalesced) into a different register, the other register of the + // pair allocation hint must be updated to reflect the relationship change. + Register Partner = Hint.second; + std::pair PartnerHint = + MRI->getRegAllocationHint(Partner); + + // Make sure partner still points to us + if (PartnerHint.second == Reg) { + // Update partner to point to NewReg instead of Reg + MRI->setRegAllocationHint(Partner, PartnerHint.first, NewReg); + + // If NewReg is virtual, set up the reciprocal hint + // NewReg takes over Reg's role, so it gets the SAME hint type as Reg + if (NewReg.isVirtual()) + MRI->setRegAllocationHint(NewReg, Hint.first, Partner); + } + } +} + Register RISCVRegisterInfo::findVRegWithEncoding(const TargetRegisterClass &RegClass, uint16_t Encoding) const { diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h index 67726db504122..f29f85e4987f6 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h @@ -37,6 +37,13 @@ enum : uint8_t { NFShiftMask = 0b111 << NFShift, }; +/// Register allocation hints for Zilsd register pairs +enum { + // Used for Zilsd LD/SD register pairs + RegPairOdd = 1, + RegPairEven = 2, +}; + /// \returns the IsVRegClass for the register class. static inline bool isVRegClass(uint8_t TSFlags) { return (TSFlags & IsVRegClassShiftMask) >> IsVRegClassShift; @@ -143,6 +150,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo { const MachineFunction &MF, const VirtRegMap *VRM, const LiveRegMatrix *Matrix) const override; + void updateRegAllocHint(Register Reg, Register NewReg, + MachineFunction &MF) const override; + Register findVRegWithEncoding(const TargetRegisterClass &RegClass, uint16_t Encoding) const; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index f81b1e1260ee3..7a104353f6c60 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -136,6 +136,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVPushPopOptPass(*PR); initializeRISCVIndirectBranchTrackingPass(*PR); initializeRISCVLoadStoreOptPass(*PR); + initializeRISCVPreAllocZilsdOptPass(*PR); initializeRISCVExpandAtomicPseudoPass(*PR); initializeRISCVRedundantCopyEliminationPass(*PR); initializeRISCVAsmPrinterPass(*PR); @@ -594,6 +595,8 @@ void RISCVPassConfig::addPreRegAlloc() { if (TM->getOptLevel() != CodeGenOptLevel::None) { addPass(createRISCVMergeBaseOffsetOptPass()); addPass(createRISCVVLOptimizerPass()); + // Add Zilsd pre-allocation load/store optimization + addPass(createRISCVPreAllocZilsdOptPass()); } addPass(createRISCVInsertReadWriteCSRPass()); diff --git a/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp new file mode 100644 index 0000000000000..1c563ed9ad2fd --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp @@ -0,0 +1,612 @@ +//===-- RISCVZilsdOptimizer.cpp - RISC-V Zilsd Load/Store Optimizer ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that performs load/store optimizations for the +// RISC-V Zilsd extension. It combines pairs of 32-bit load/store instructions +// into single 64-bit LD/SD instructions when possible. +// +// The pass runs in two phases: +// 1. Pre-allocation: Reschedules loads/stores to bring consecutive memory +// accesses closer together and forms LD/SD pairs with register hints. +// 2. Post-allocation: Fixes invalid LD/SD instructions if register allocation +// didn't provide suitable consecutive registers. +// +// Note: second phase is integrated into RISCVLoadStoreOptimizer +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVMachineFunctionInfo.h" +#include "RISCVRegisterInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "riscv-zilsd-opt" + +STATISTIC(NumLDFormed, "Number of LD instructions formed"); +STATISTIC(NumSDFormed, "Number of SD instructions formed"); + +static cl::opt + DisableZilsdOpt("disable-riscv-zilsd-opt", cl::Hidden, cl::init(false), + cl::desc("Disable Zilsd load/store optimization")); + +static cl::opt MaxRescheduleDistance( + "riscv-zilsd-max-reschedule-distance", cl::Hidden, cl::init(10), + cl::desc("Maximum distance for rescheduling load/store instructions")); + +namespace { + +//===----------------------------------------------------------------------===// +// Pre-allocation Zilsd optimization pass +//===----------------------------------------------------------------------===// +class RISCVPreAllocZilsdOpt : public MachineFunctionPass { +public: + static char ID; + + RISCVPreAllocZilsdOpt() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "RISC-V pre-allocation Zilsd load/store optimization"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + +private: + bool isMemoryOp(const MachineInstr &MI); + bool rescheduleLoadStoreInstrs(MachineBasicBlock *MBB); + bool canFormLdSdPair(MachineInstr *Op0, MachineInstr *Op1, unsigned &NewOpc, + Register &FirstReg, Register &SecondReg, + Register &BaseReg, int &Offset); + bool rescheduleOps(MachineBasicBlock *MBB, + SmallVectorImpl &Ops, unsigned Base, + bool IsLoad, + DenseMap &MI2LocMap); + bool isSafeToMove(MachineInstr *MI, MachineInstr *Target, bool MoveForward); + int getMemoryOpOffset(const MachineInstr &MI); + + const RISCVSubtarget *STI; + const RISCVInstrInfo *TII; + const RISCVRegisterInfo *TRI; + MachineRegisterInfo *MRI; + AliasAnalysis *AA; + MachineDominatorTree *DT; +}; + +} // end anonymous namespace + +char RISCVPreAllocZilsdOpt::ID = 0; + +INITIALIZE_PASS_BEGIN(RISCVPreAllocZilsdOpt, "riscv-prera-zilsd-opt", + "RISC-V pre-allocation Zilsd optimization", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) +INITIALIZE_PASS_END(RISCVPreAllocZilsdOpt, "riscv-prera-zilsd-opt", + "RISC-V pre-allocation Zilsd optimization", false, false) + +//===----------------------------------------------------------------------===// +// Pre-allocation pass implementation +//===----------------------------------------------------------------------===// + +bool RISCVPreAllocZilsdOpt::runOnMachineFunction(MachineFunction &MF) { + + if (DisableZilsdOpt || skipFunction(MF.getFunction())) + return false; + + STI = &MF.getSubtarget(); + + // Only run on RV32 with Zilsd extension + if (STI->is64Bit() || !STI->hasStdExtZilsd()) + return false; + + TII = STI->getInstrInfo(); + TRI = STI->getRegisterInfo(); + MRI = &MF.getRegInfo(); + AA = &getAnalysis().getAAResults(); + DT = &getAnalysis().getDomTree(); + + bool Modified = false; + for (auto &MBB : MF) { + Modified |= rescheduleLoadStoreInstrs(&MBB); + } + + return Modified; +} + +int RISCVPreAllocZilsdOpt::getMemoryOpOffset(const MachineInstr &MI) { + switch (MI.getOpcode()) { + case RISCV::LW: + case RISCV::SW: { + // For LW/SW, the offset is in operand 2 + const MachineOperand &OffsetOp = MI.getOperand(2); + + // Handle immediate offset + if (OffsetOp.isImm()) + return OffsetOp.getImm(); + + // Handle symbolic operands with MO_LO flag (from MergeBaseOffset) + if (OffsetOp.getTargetFlags() & RISCVII::MO_LO) + if (OffsetOp.isGlobal() || OffsetOp.isCPI() || + OffsetOp.isBlockAddress() || OffsetOp.isSymbol()) + return OffsetOp.getOffset(); + + break; + } + default: + break; + } + return 0; +} + +bool RISCVPreAllocZilsdOpt::canFormLdSdPair(MachineInstr *Op0, + MachineInstr *Op1, unsigned &NewOpc, + Register &FirstReg, + Register &SecondReg, + Register &BaseReg, int &Offset) { + + unsigned Opcode = Op0->getOpcode(); + + // Check if we have two LW or two SW instructions + if (Opcode != Op1->getOpcode()) + return false; + + if (Opcode == RISCV::LW) + NewOpc = RISCV::PseudoLD_RV32_OPT; + else if (Opcode == RISCV::SW) + NewOpc = RISCV::PseudoSD_RV32_OPT; + else + return false; + + if (!Op0->hasOneMemOperand() || !Op1->hasOneMemOperand()) + return false; + + // Check if operands are compatible for merging + const MachineOperand &OffsetOp0 = Op0->getOperand(2); + const MachineOperand &OffsetOp1 = Op1->getOperand(2); + + // Both must be the same type + if (OffsetOp0.getType() != OffsetOp1.getType()) + return false; + + // If they're symbolic, they must reference the same symbol + if (!OffsetOp0.isImm()) { + // Check if both have MO_LO flag + if ((OffsetOp0.getTargetFlags() & RISCVII::MO_LO) != + (OffsetOp1.getTargetFlags() & RISCVII::MO_LO)) + return false; + + // For global addresses, must be the same global + if (OffsetOp0.isGlobal()) { + if (!OffsetOp1.isGlobal() || + OffsetOp0.getGlobal() != OffsetOp1.getGlobal()) + return false; + } + // For constant pool indices, must be the same index + else if (OffsetOp0.isCPI()) { + if (!OffsetOp1.isCPI() || OffsetOp0.getIndex() != OffsetOp1.getIndex()) + return false; + } + // For symbols, must be the same symbol name + else if (OffsetOp0.isSymbol()) { + if (!OffsetOp1.isSymbol() || + strcmp(OffsetOp0.getSymbolName(), OffsetOp1.getSymbolName()) != 0) + return false; + } + // For block addresses, must be the same block + else if (OffsetOp0.isBlockAddress()) { + if (!OffsetOp1.isBlockAddress() || + OffsetOp0.getBlockAddress() != OffsetOp1.getBlockAddress()) + return false; + } + } + + // Get offsets and check they are consecutive + int Offset0 = getMemoryOpOffset(*Op0); + int Offset1 = getMemoryOpOffset(*Op1); + + // Offsets must be 4 bytes apart + if (std::abs(Offset1 - Offset0) != 4) + return false; + + // Make sure we have the same base register + Register Base0 = Op0->getOperand(1).getReg(); + Register Base1 = Op1->getOperand(1).getReg(); + if (Base0 != Base1) + return false; + + // Set output parameters + if (Offset0 < Offset1) { + FirstReg = Op0->getOperand(0).getReg(); + SecondReg = Op1->getOperand(0).getReg(); + Offset = Offset0; + } else { + FirstReg = Op1->getOperand(0).getReg(); + SecondReg = Op0->getOperand(0).getReg(); + Offset = Offset1; + } + + BaseReg = Base0; + + // Check that the two destination registers are different + if (FirstReg == SecondReg) + return false; + + // For loads, check that neither destination register is the same as the base + // register. This prevents register reuse issues where the first load + // overwrites the base. + if (Opcode == RISCV::LW) { + if (FirstReg == BaseReg || SecondReg == BaseReg) + return false; + } + + return true; +} + +bool RISCVPreAllocZilsdOpt::isSafeToMove(MachineInstr *MI, MachineInstr *Target, + bool MoveForward) { + MachineBasicBlock *MBB = MI->getParent(); + MachineBasicBlock::iterator Start = MI->getIterator(); + MachineBasicBlock::iterator End = Target->getIterator(); + + if (!MoveForward) + std::swap(Start, End); + + // Increment Start to skip the current instruction + if (Start != MBB->end()) + ++Start; + + Register DefReg = MI->getOperand(0).getReg(); + Register BaseReg = MI->getOperand(1).getReg(); + + unsigned ScanCount = 0; + for (auto It = Start; It != End; ++It, ++ScanCount) { + // Don't move across calls or terminators + if (It->isCall() || It->isTerminator()) { + LLVM_DEBUG(dbgs() << "Cannot move across call/terminator: " << *It); + return false; + } + + // Don't move across instructions that modify memory barrier + if (It->hasUnmodeledSideEffects()) { + LLVM_DEBUG(dbgs() << "Cannot move across instruction with side effects: " + << *It); + return false; + } + + // Check if the base register is modified + if (It->modifiesRegister(BaseReg, TRI)) { + LLVM_DEBUG(dbgs() << "Base register " << BaseReg + << " modified by: " << *It); + return false; + } + + // For loads, check if the loaded value is used + if (MI->mayLoad() && + (It->readsRegister(DefReg, TRI) || It->modifiesRegister(DefReg, TRI))) { + LLVM_DEBUG(dbgs() << "Destination register " << DefReg + << " used by: " << *It); + return false; + } + + // For stores, check if the stored register is modified + if (MI->mayStore() && It->modifiesRegister(DefReg, TRI)) { + LLVM_DEBUG(dbgs() << "Source register " << DefReg + << " modified by: " << *It); + return false; + } + + // Check for memory operation interference + if (MI->mayLoadOrStore() && It->mayLoadOrStore() && + It->mayAlias(AA, *MI, /*UseTBAA*/ false)) { + LLVM_DEBUG(dbgs() << "Memory operation interference detected\n"); + return false; + } + } + + return true; +} + +bool RISCVPreAllocZilsdOpt::rescheduleOps( + MachineBasicBlock *MBB, SmallVectorImpl &Ops, unsigned Base, + bool IsLoad, DenseMap &MI2LocMap) { + // Sort by offset + llvm::sort(Ops.begin(), Ops.end(), [this](MachineInstr *A, MachineInstr *B) { + return getMemoryOpOffset(*A) < getMemoryOpOffset(*B); + }); + + bool Modified = false; + + // Try to pair consecutive operations + for (size_t i = 0; i + 1 < Ops.size(); i++) { + MachineInstr *Op0 = Ops[i]; + MachineInstr *Op1 = Ops[i + 1]; + + // Skip if either instruction was already processed + if (!Op0->getParent() || !Op1->getParent()) + continue; + + unsigned NewOpc; + Register FirstReg, SecondReg, BaseReg; + int Offset; + + if (!canFormLdSdPair(Op0, Op1, NewOpc, FirstReg, SecondReg, BaseReg, + Offset)) + continue; + + // Check if we can safely and profitably move the instructions together + SmallPtrSet MemOps; + SmallSet MemRegs; + MemOps.insert(Op0); + MemRegs.insert(Op0->getOperand(0).getReg().id()); + + // Use MI2LocMap to determine which instruction appears later in program + // order + bool Op1IsLater = MI2LocMap[Op1] > MI2LocMap[Op0]; + + // For loads: move later instruction up (backwards) to earlier instruction + // For stores: move earlier instruction down (forwards) to later instruction + MachineInstr *MoveInstr, *TargetInstr; + if (IsLoad) { + // For loads: move the later instruction to the earlier one + MoveInstr = Op1IsLater ? Op1 : Op0; + TargetInstr = Op1IsLater ? Op0 : Op1; + } else { + // For stores: move the earlier instruction to the later one + MoveInstr = Op1IsLater ? Op0 : Op1; + TargetInstr = Op1IsLater ? Op1 : Op0; + } + + unsigned Distance = Op1IsLater ? MI2LocMap[Op1] - MI2LocMap[Op0] + : MI2LocMap[Op0] - MI2LocMap[Op1]; + if (!isSafeToMove(MoveInstr, TargetInstr, !IsLoad) || + Distance > MaxRescheduleDistance) + continue; + + // Move the instruction to the target position + MachineBasicBlock::iterator InsertPos = TargetInstr->getIterator(); + ++InsertPos; + + // If we need to move an instruction, do it now + if (MoveInstr != TargetInstr) + MBB->splice(InsertPos, MBB, MoveInstr->getIterator()); + + // Create the paired instruction + MachineInstrBuilder MIB; + DebugLoc DL = Op0->getDebugLoc(); + + if (IsLoad) { + MIB = BuildMI(*MBB, InsertPos, DL, TII->get(NewOpc)) + .addReg(FirstReg, RegState::Define) + .addReg(SecondReg, RegState::Define) + .addReg(BaseReg); + ++NumLDFormed; + LLVM_DEBUG(dbgs() << "Formed LD: " << *MIB << "\n"); + } else { + MIB = BuildMI(*MBB, InsertPos, DL, TII->get(NewOpc)) + .addReg(FirstReg) + .addReg(SecondReg) + .addReg(BaseReg); + ++NumSDFormed; + LLVM_DEBUG(dbgs() << "Formed SD: " << *MIB << "\n"); + } + + // Add the offset operand - preserve symbolic references + const MachineOperand &OffsetOp = (Offset == getMemoryOpOffset(*Op0)) + ? Op0->getOperand(2) + : Op1->getOperand(2); + + if (OffsetOp.isImm()) + MIB.addImm(Offset); + else if (OffsetOp.isGlobal()) + MIB.addGlobalAddress(OffsetOp.getGlobal(), Offset, + OffsetOp.getTargetFlags()); + else if (OffsetOp.isCPI()) + MIB.addConstantPoolIndex(OffsetOp.getIndex(), Offset, + OffsetOp.getTargetFlags()); + else if (OffsetOp.isSymbol()) + MIB.addExternalSymbol(OffsetOp.getSymbolName(), + OffsetOp.getTargetFlags()); + else if (OffsetOp.isBlockAddress()) + MIB.addBlockAddress(OffsetOp.getBlockAddress(), Offset, + OffsetOp.getTargetFlags()); + + // Copy memory operands + MIB.cloneMergedMemRefs({Op0, Op1}); + + // Add register allocation hints for consecutive registers + // RISC-V Zilsd requires even/odd register pairs + // Only set hints for virtual registers (physical registers already have + // encoding) + if (FirstReg.isVirtual() && SecondReg.isVirtual()) { + // For virtual registers, we can't determine even/odd yet, but we can hint + // that they should be allocated as a consecutive pair + MRI->setRegAllocationHint(FirstReg, RISCVRI::RegPairEven, SecondReg); + MRI->setRegAllocationHint(SecondReg, RISCVRI::RegPairOdd, FirstReg); + } + + // Remove the original instructions + Op0->eraseFromParent(); + Op1->eraseFromParent(); + + Modified = true; + + // Skip the next instruction since we've already processed it + i++; + } + + return Modified; +} + +bool RISCVPreAllocZilsdOpt::isMemoryOp(const MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + if (Opcode != RISCV::LW && Opcode != RISCV::SW) + return false; + + if (!MI.getOperand(1).isReg()) + return false; + + // When no memory operands are present, conservatively assume unaligned, + // volatile, unfoldable. + if (!MI.hasOneMemOperand()) + return false; + + const MachineMemOperand *MMO = *MI.memoperands_begin(); + + // Check alignment: default is 8-byte, but allow 4-byte with tune feature + // If unaligned scalar memory is enabled, allow any alignment + Align RequiredAlign = STI->enableUnalignedScalarMem() ? Align(1) + : STI->allowZilsd4ByteAlign() ? Align(4) + : Align(8); + if (MMO->getAlign() < RequiredAlign) + return false; + + if (MMO->isVolatile() || MMO->isAtomic()) + return false; + + // sw could probably be eliminated entirely, but for now we just want + // to avoid making a mess of it. + if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef()) + return false; + + // Likewise don't mess with references to undefined addresses. + if (MI.getOperand(1).isUndef()) + return false; + + return true; +} + +bool RISCVPreAllocZilsdOpt::rescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { + bool Modified = false; + + // Process the basic block in windows delimited by calls, terminators, + // or instructions with duplicate base+offset pairs + MachineBasicBlock::iterator MBBI = MBB->begin(); + MachineBasicBlock::iterator E = MBB->end(); + + while (MBBI != E) { + // Map from instruction to its location in the current window + DenseMap MI2LocMap; + + // Map from base register to list of load/store instructions + using Base2InstMap = DenseMap>; + using BaseVec = SmallVector; + Base2InstMap Base2LdsMap; + Base2InstMap Base2StsMap; + BaseVec LdBases; + BaseVec StBases; + + unsigned Loc = 0; + + // Build the current window of instructions + for (; MBBI != E; ++MBBI) { + MachineInstr &MI = *MBBI; + + // Stop at barriers (calls and terminators) + if (MI.isCall() || MI.isTerminator()) { + // Move past the barrier for next iteration + ++MBBI; + break; + } + + // Track instruction location in window + if (!MI.isDebugInstr()) + MI2LocMap[&MI] = ++Loc; + + // Skip non-memory operations + if (!isMemoryOp(MI)) + continue; + + bool IsLd = (MI.getOpcode() == RISCV::LW); + Register Base = MI.getOperand(1).getReg(); + int Offset = getMemoryOpOffset(MI); + bool StopHere = false; + + // Lambda to find or add base register entries + auto FindBases = [&](Base2InstMap &Base2Ops, BaseVec &Bases) { + auto [BI, Inserted] = Base2Ops.try_emplace(Base.id()); + if (Inserted) { + // First time seeing this base register + BI->second.push_back(&MI); + Bases.push_back(Base.id()); + return; + } + // Check if we've seen this exact base+offset before + for (const MachineInstr *PrevMI : BI->second) { + if (Offset == getMemoryOpOffset(*PrevMI)) { + // Found duplicate base+offset - stop here to process current window + StopHere = true; + break; + } + } + if (!StopHere) + BI->second.push_back(&MI); + }; + + if (IsLd) + FindBases(Base2LdsMap, LdBases); + else + FindBases(Base2StsMap, StBases); + + if (StopHere) { + // Found a duplicate (a base+offset combination that's seen earlier). + // Backtrack to process the current window. + --Loc; + break; + } + } + + // Process the current window - reschedule loads + for (unsigned Base : LdBases) { + SmallVectorImpl &Lds = Base2LdsMap[Base]; + if (Lds.size() > 1) { + Modified |= rescheduleOps(MBB, Lds, Base, true, MI2LocMap); + } + } + + // Process the current window - reschedule stores + for (unsigned Base : StBases) { + SmallVectorImpl &Sts = Base2StsMap[Base]; + if (Sts.size() > 1) { + Modified |= rescheduleOps(MBB, Sts, Base, false, MI2LocMap); + } + } + } + + return Modified; +} + +//===----------------------------------------------------------------------===// +// Pass creation functions +//===----------------------------------------------------------------------===// + +FunctionPass *llvm::createRISCVPreAllocZilsdOptPass() { + return new RISCVPreAllocZilsdOpt(); +} diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index ea08061221fd4..ff6dda44fafe3 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -125,6 +125,7 @@ ; CHECK-NEXT: RISC-V Merge Base Offset ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: RISC-V VL Optimizer +; CHECK-NEXT: RISC-V pre-allocation Zilsd load/store optimization ; CHECK-NEXT: RISC-V Insert Read/Write CSR Pass ; CHECK-NEXT: RISC-V Insert Write VXRM Pass ; CHECK-NEXT: RISC-V Landing Pad Setup diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll index 1a7a72d3e072b..a1a40ada87a88 100644 --- a/llvm/test/CodeGen/RISCV/features-info.ll +++ b/llvm/test/CodeGen/RISCV/features-info.ll @@ -287,6 +287,7 @@ ; CHECK-NEXT: zihintpause - 'Zihintpause' (Pause Hint). ; CHECK-NEXT: zihpm - 'Zihpm' (Hardware Performance Counters). ; CHECK-NEXT: zilsd - 'Zilsd' (Load/Store Pair Instructions). +; CHECK-NEXT: zilsd-4byte-align - Allow 4-byte alignment for Zilsd LD/SD instructions. ; CHECK-NEXT: zimop - 'Zimop' (May-Be-Operations). ; CHECK-NEXT: zk - 'Zk' (Standard scalar cryptography extension). ; CHECK-NEXT: zkn - 'Zkn' (NIST Algorithm Suite). diff --git a/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-postra.mir b/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-postra.mir new file mode 100644 index 0000000000000..4303576e282b4 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-postra.mir @@ -0,0 +1,165 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# NOTE: Test expansion of PseudoLD_RV32_OPT/PseudoSD_RV32_OPT after register allocation +# RUN: llc -mtriple=riscv32 -mattr=+zilsd -run-pass riscv-load-store-opt %s -o - | FileCheck %s +--- | + define i32 @expand_pseudold_valid(ptr %0) { + %2 = load i32, ptr %0, align 4 + %3 = getelementptr inbounds i32, ptr %0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = add i32 %2, %4 + ret i32 %5 + } + + define void @expand_pseudosd_valid(ptr %0, i32 %1, i32 %2) { + store i32 %1, ptr %0, align 4 + %4 = getelementptr inbounds i32, ptr %0, i32 1 + store i32 %2, ptr %4, align 4 + ret void + } + + define i32 @expand_pseudold_invalid_pair(ptr %0) { + %2 = load i32, ptr %0, align 4 + %3 = getelementptr inbounds i32, ptr %0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = add i32 %2, %4 + ret i32 %5 + } + + define void @expand_pseudosd_invalid_pair(ptr %0, i32 %1, i32 %2) { + store i32 %1, ptr %0, align 4 + %4 = getelementptr inbounds i32, ptr %0, i32 1 + store i32 %2, ptr %4, align 4 + ret void + } + + define void @store_zero_combine_valid(ptr %0) { + store i32 0, ptr %0, align 8 + %2 = getelementptr inbounds i32, ptr %0, i32 1 + store i32 0, ptr %2, align 8 + ret void + } + + @global_array = external global [100 x i32] + + define i32 @expand_pseudold_invalid_symbolic() { + ret i32 0 + } +... +--- +# Valid consecutive even/odd register pair - should expand to LD_RV32 +name: expand_pseudold_valid +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + + ; PseudoLD_RV32_OPT with consecutive even/odd registers (x12, x13) + ; CHECK-LABEL: name: expand_pseudold_valid + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x12_x13 = LD_RV32 $x10, 0 + ; CHECK-NEXT: $x10 = ADD killed $x12, killed $x13 + ; CHECK-NEXT: PseudoRET implicit $x10 + $x12, $x13 = PseudoLD_RV32_OPT $x10, 0 + $x10 = ADD killed $x12, killed $x13 + PseudoRET implicit $x10 + +... +--- +# Valid consecutive even/odd register pair - should expand to SD_RV32 +name: expand_pseudosd_valid +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x12, $x13 + + ; PseudoSD_RV32_OPT with consecutive even/odd registers (x12, x13) + ; CHECK-LABEL: name: expand_pseudosd_valid + ; CHECK: liveins: $x10, $x12, $x13 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SD_RV32 $x12_x13, $x10, 0 + ; CHECK-NEXT: PseudoRET + PseudoSD_RV32_OPT $x12, $x13, $x10, 0 + PseudoRET + +... +--- +# Invalid register pair (not consecutive) - should decompose back to LW +name: expand_pseudold_invalid_pair +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + + ; PseudoLD_RV32_OPT with non-consecutive registers (x11, x13) + ; Should decompose back to two LW instructions + ; CHECK-LABEL: name: expand_pseudold_invalid_pair + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x11 = LW $x10, 0 + ; CHECK-NEXT: $x13 = LW $x10, 4 + ; CHECK-NEXT: $x10 = ADD killed $x11, killed $x13 + ; CHECK-NEXT: PseudoRET implicit $x10 + $x11, $x13 = PseudoLD_RV32_OPT $x10, 0 + $x10 = ADD killed $x11, killed $x13 + PseudoRET implicit $x10 + +... +--- +# Invalid register pair (not even/odd) - should decompose back to SW +name: expand_pseudosd_invalid_pair +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10, $x11, $x14 + + ; PseudoSD_RV32_OPT with non-consecutive registers (x11, x14) + ; Should decompose back to two SW instructions + ; CHECK-LABEL: name: expand_pseudosd_invalid_pair + ; CHECK: liveins: $x10, $x11, $x14 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SW $x11, $x10, 0 + ; CHECK-NEXT: SW $x14, $x10, 4 + ; CHECK-NEXT: PseudoRET + PseudoSD_RV32_OPT $x11, $x14, $x10, 0 + PseudoRET + +... +--- +# Test store zero combinations - zeros don't need consecutive pairs +name: store_zero_combine_valid +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + ; CHECK-LABEL: name: store_zero_combine_valid + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: SD_RV32 $x0_pair, $x10, 0 + ; CHECK-NEXT: PseudoRET + PseudoSD_RV32_OPT $x0, $x0, $x10, 0 + PseudoRET + +... +--- +# Test invalid register pair with symbolic operands - should split back to LW +name: expand_pseudold_invalid_symbolic +tracksRegLiveness: false +body: | + bb.0: + liveins: $x10 + + ; PseudoLD_RV32_OPT with symbolic operand and non-consecutive registers (x11, x14) + ; Should decompose back to two LW instructions preserving symbolic references + ; CHECK-LABEL: name: expand_pseudold_invalid_symbolic + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $x11 = LW $x10, target-flags(riscv-lo) @global_array + ; CHECK-NEXT: $x14 = LW $x10, target-flags(riscv-lo) @global_array + 4 + ; CHECK-NEXT: $x10 = ADD killed $x11, killed $x14 + ; CHECK-NEXT: PseudoRET implicit $x10 + $x11, $x14 = PseudoLD_RV32_OPT $x10, target-flags(riscv-lo) @global_array + $x10 = ADD killed $x11, killed $x14 + PseudoRET implicit $x10 + +... diff --git a/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir b/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir new file mode 100644 index 0000000000000..21cdf100fad6c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zilsd-ldst-opt-prera.mir @@ -0,0 +1,1277 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=riscv32 -mattr=+zilsd -run-pass riscv-prera-zilsd-opt %s -o - | FileCheck %s +# RUN: llc -mtriple=riscv32 -mattr=+zilsd,+zilsd-4byte-align -run-pass riscv-prera-zilsd-opt %s -o - | FileCheck %s --check-prefix=CHECK-4BYTE +--- | + declare void @external_func() + + define i32 @basic_load_combine(ptr %0) { + %2 = load i32, ptr %0, align 4 + %3 = getelementptr inbounds i32, ptr %0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = add i32 %2, %4 + ret i32 %5 + } + + define void @basic_store_combine(ptr %0, i32 %1, i32 %2) { + store i32 %1, ptr %0, align 4 + %4 = getelementptr inbounds i32, ptr %0, i32 1 + store i32 %2, ptr %4, align 4 + ret void + } + + define i32 @basic_load_combine_8_byte_aligned(ptr %0) { + %2 = load i32, ptr %0, align 8 + %3 = getelementptr inbounds i32, ptr %0, i32 1 + %4 = load i32, ptr %3, align 8 + %5 = add i32 %2, %4 + ret i32 %5 + } + + define void @basic_store_combine_8_byte_aligned(ptr %0, i32 %1, i32 %2) { + store i32 %1, ptr %0, align 8 + %4 = getelementptr inbounds i32, ptr %0, i32 1 + store i32 %2, ptr %4, align 8 + ret void + } + + + define i32 @non_consecutive_offsets(ptr %0) { + %2 = load i32, ptr %0, align 4 + %3 = getelementptr inbounds i32, ptr %0, i32 2 + %4 = load i32, ptr %3, align 4 + %5 = add i32 %2, %4 + ret i32 %5 + } + + define i32 @different_base_regs(ptr %0, ptr %1) { + %3 = load i32, ptr %0, align 4 + %4 = getelementptr inbounds i32, ptr %1, i32 1 + %5 = load i32, ptr %4, align 4 + %6 = add i32 %3, %5 + ret i32 %6 + } + + define i32 @call_blocks_optimization(ptr %0) { + %2 = load i32, ptr %0, align 4 + call void @external_func() + %3 = getelementptr inbounds i32, ptr %0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = add i32 %2, %4 + ret i32 %5 + } + + define i32 @terminator_blocks_optimization(ptr %0, i32 %1) { + %3 = load i32, ptr %0, align 4 + %4 = icmp eq i32 %1, %3 + br i1 %4, label %5, label %5 + 5: + %6 = getelementptr inbounds i32, ptr %0, i32 1 + %7 = load i32, ptr %6, align 4 + %8 = add i32 %3, %7 + ret i32 %8 + } + + define i32 @dependency_interference(ptr %0) { + %2 = load i32, ptr %0, align 4 + %3 = add i32 %2, 1 + %4 = getelementptr inbounds i32, ptr %0, i32 1 + %5 = load i32, ptr %4, align 4 + %6 = add i32 %3, %5 + ret i32 %6 + } + + define i32 @memory_aliasing(ptr %0, i32 %1) { + %3 = load i32, ptr %0, align 4 + %4 = getelementptr inbounds i32, ptr %0, i32 2 + store i32 %1, ptr %4, align 4 + %5 = getelementptr inbounds i32, ptr %0, i32 1 + %6 = load i32, ptr %5, align 4 + %7 = add i32 %3, %6 + ret i32 %7 + } + + define i32 @multiple_pairs(ptr %0, ptr %1) { + %3 = load i32, ptr %0, align 4 + %4 = getelementptr inbounds i32, ptr %0, i32 1 + %5 = load i32, ptr %4, align 4 + %6 = add i32 %3, %5 + %7 = getelementptr inbounds i32, ptr %1, i32 2 + %8 = load i32, ptr %7, align 4 + %9 = getelementptr inbounds i32, ptr %1, i32 3 + %10 = load i32, ptr %9, align 4 + %11 = add i32 %8, %10 + %12 = add i32 %6, %11 + ret i32 %12 + } + + define i32 @high_register_pressure(ptr %0) { + %2 = getelementptr inbounds i32, ptr %0, i32 4 + %3 = load i32, ptr %2, align 4 + %4 = getelementptr inbounds i32, ptr %0, i32 5 + %5 = load i32, ptr %4, align 4 + %6 = getelementptr inbounds i32, ptr %0, i32 6 + %7 = load i32, ptr %6, align 4 + %8 = getelementptr inbounds i32, ptr %0, i32 7 + %9 = load i32, ptr %8, align 4 + %10 = getelementptr inbounds i32, ptr %0, i32 8 + %11 = load i32, ptr %10, align 4 + %12 = getelementptr inbounds i32, ptr %0, i32 9 + %13 = load i32, ptr %12, align 4 + %14 = getelementptr inbounds i32, ptr %0, i32 10 + %15 = load i32, ptr %14, align 4 + %16 = getelementptr inbounds i32, ptr %0, i32 11 + %17 = load i32, ptr %16, align 4 + %18 = getelementptr inbounds i32, ptr %0, i32 12 + %19 = load i32, ptr %18, align 4 + %20 = getelementptr inbounds i32, ptr %0, i32 13 + %21 = load i32, ptr %20, align 4 + %22 = getelementptr inbounds i32, ptr %0, i32 14 + %23 = load i32, ptr %22, align 4 + %24 = getelementptr inbounds i32, ptr %0, i32 15 + %25 = load i32, ptr %24, align 4 + %26 = load i32, ptr %0, align 4 + %27 = getelementptr inbounds i32, ptr %0, i32 1 + %28 = load i32, ptr %27, align 4 + %29 = add i32 %3, %5 + %30 = add i32 %7, %9 + %31 = add i32 %11, %13 + %32 = add i32 %15, %17 + %33 = add i32 %19, %21 + %34 = add i32 %23, %25 + %35 = add i32 %26, %28 + %36 = add i32 %29, %30 + %37 = add i32 %31, %32 + %38 = add i32 %33, %34 + %39 = add i32 %35, %36 + %40 = add i32 %37, %38 + %41 = add i32 %39, %40 + ret i32 %41 + } + + define i32 @reverse_order_loads(ptr %0) { + %2 = getelementptr inbounds i32, ptr %0, i32 1 + %3 = load i32, ptr %2, align 4 + %4 = load i32, ptr %0, align 4 + %5 = add i32 %3, %4 + ret i32 %5 + } + + define i32 @offset_calculation(ptr %0) { + %2 = getelementptr inbounds i8, ptr %0, i32 100 + %3 = load i32, ptr %2, align 4 + %4 = getelementptr inbounds i8, ptr %0, i32 104 + %5 = load i32, ptr %4, align 4 + %6 = add i32 %3, %5 + ret i32 %6 + } + + define i32 @large_offsets(ptr %0) { + %2 = getelementptr inbounds i8, ptr %0, i32 2040 + %3 = load i32, ptr %2, align 4 + %4 = getelementptr inbounds i8, ptr %0, i32 2044 + %5 = load i32, ptr %4, align 4 + %6 = add i32 %3, %5 + ret i32 %6 + } + + define i32 @negative_offsets(ptr %0) { + %2 = getelementptr inbounds i8, ptr %0, i32 -8 + %3 = load i32, ptr %2, align 4 + %4 = getelementptr inbounds i8, ptr %0, i32 -4 + %5 = load i32, ptr %4, align 4 + %6 = add i32 %3, %5 + ret i32 %6 + } + + define i32 @register_reuse(ptr %0) { + %2 = load i32, ptr %0, align 4 + %3 = getelementptr inbounds i32, ptr %0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = add i32 %2, %4 + ret i32 %5 + } + + define i32 @volatile_loads(ptr %0) { + %2 = load volatile i32, ptr %0, align 4 + %3 = getelementptr inbounds i32, ptr %0, i32 1 + %4 = load volatile i32, ptr %3, align 4 + %5 = add i32 %2, %4 + ret i32 %5 + } + + define i32 @store_dependency(ptr %0, i32 %1) { + %3 = load i32, ptr %0, align 4 + %4 = getelementptr inbounds i32, ptr %0, i32 1 + store i32 %1, ptr %4, align 4 + %5 = load i32, ptr %4, align 4 + %6 = add i32 %3, %5 + ret i32 %6 + } + + define i32 @three_loads(ptr %0) { + %2 = load i32, ptr %0, align 4 + %3 = getelementptr inbounds i32, ptr %0, i32 1 + %4 = load i32, ptr %3, align 4 + %5 = getelementptr inbounds i32, ptr %0, i32 2 + %6 = load i32, ptr %5, align 4 + %7 = add i32 %2, %4 + %8 = add i32 %7, %6 + ret i32 %8 + } + + define i32 @distance_exceeds_max(ptr %0, i32 %1) { + %3 = load i32, ptr %0, align 4 + %4 = add i32 %3, %1 + %5 = add i32 %4, %1 + %6 = add i32 %5, %1 + %7 = add i32 %6, %1 + %8 = add i32 %7, %1 + %9 = add i32 %8, %1 + %10 = add i32 %9, %1 + %11 = add i32 %10, %1 + %12 = add i32 %11, %1 + %13 = add i32 %12, %1 + %14 = add i32 %13, %1 + %15 = getelementptr inbounds i32, ptr %0, i32 1 + %16 = load i32, ptr %15, align 4 + %17 = add i32 %14, %16 + ret i32 %17 + } + + @global_var = external global [100 x i32] + + define i32 @symbolic_operands_global() { + ret i32 0 + } + + define i32 @symbolic_operands_different_globals() { + ret i32 0 + } + + define i32 @symbolic_operands_constantpool() { + ret i32 0 + } +--- +# Basic case: two consecutive 32-bit loads that can be combined into LD +name: basic_load_combine +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: basic_load_combine + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: basic_load_combine + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = LW %0, 0 :: (load (s32)) + %2:gpr = LW %0, 4 :: (load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Basic case: two consecutive 32-bit stores that can be combined into SD +name: basic_store_combine +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } + - { reg: '$x12', virtual-reg: '%2' } +body: | + bb.0: + liveins: $x10, $x11, $x12 + + ; CHECK-LABEL: name: basic_store_combine + ; CHECK: liveins: $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12 + ; CHECK-NEXT: SW [[COPY1]], [[COPY]], 0 :: (store (s32)) + ; CHECK-NEXT: SW [[COPY2]], [[COPY]], 4 :: (store (s32)) + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: basic_store_combine + ; CHECK-4BYTE: liveins: $x10, $x11, $x12 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-4BYTE-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12 + ; CHECK-4BYTE-NEXT: PseudoSD_RV32_OPT [[COPY1]], [[COPY2]], [[COPY]], 0 :: (store (s32)) + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = COPY $x11 + %2:gpr = COPY $x12 + SW %1, %0, 0 :: (store (s32)) + SW %2, %0, 4 :: (store (s32)) + PseudoRET + +... +--- +name: basic_load_combine_8_byte_aligned +alignment: 8 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: basic_load_combine_8_byte_aligned + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32), align 8) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: basic_load_combine_8_byte_aligned + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32), align 8) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = LW %0, 0 :: (load (s32), align 8) + %2:gpr = LW %0, 4 :: (load (s32), align 8) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Basic case: two consecutive 32-bit stores that can be combined into SD +name: basic_store_combine_8_byte_aligned +alignment: 8 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } + - { reg: '$x12', virtual-reg: '%2' } +body: | + bb.0: + liveins: $x10, $x11, $x12 + + ; CHECK-LABEL: name: basic_store_combine_8_byte_aligned + ; CHECK: liveins: $x10, $x11, $x12 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12 + ; CHECK-NEXT: PseudoSD_RV32_OPT [[COPY1]], [[COPY2]], [[COPY]], 0 :: (store (s32), align 8) + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: basic_store_combine_8_byte_aligned + ; CHECK-4BYTE: liveins: $x10, $x11, $x12 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-4BYTE-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x12 + ; CHECK-4BYTE-NEXT: PseudoSD_RV32_OPT [[COPY1]], [[COPY2]], [[COPY]], 0 :: (store (s32), align 8) + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = COPY $x11 + %2:gpr = COPY $x12 + SW %1, %0, 0 :: (store (s32), align 8) + SW %2, %0, 4 :: (store (s32), align 8) + PseudoRET + +... +--- +# Non-consecutive offsets - should not combine +name: non_consecutive_offsets +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: non_consecutive_offsets + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: non_consecutive_offsets + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; Should not combine - offset gap is 8, not 4 + %1:gpr = LW %0, 0 :: (load (s32)) + %2:gpr = LW %0, 8 :: (load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Different base registers - should not combine +name: different_base_regs +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +body: | + bb.0: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: different_base_regs + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY1]], 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: different_base_regs + ; CHECK-4BYTE: liveins: $x10, $x11 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY1]], 4 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = COPY $x11 + ; Should not combine - different base registers + %2:gpr = LW %0, 0 :: (load (s32)) + %3:gpr = LW %1, 4 :: (load (s32)) + %4:gpr = ADD %2, %3 + PseudoRET + +... +--- +# Call instruction blocks optimization +name: call_blocks_optimization +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: call_blocks_optimization + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: PseudoCALL target-flags(riscv-call) @external_func, csr_ilp32_lp64, implicit-def $x1 + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: call_blocks_optimization + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: PseudoCALL target-flags(riscv-call) @external_func, csr_ilp32_lp64, implicit-def $x1 + ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = LW %0, 0 :: (load (s32)) + ; Call instruction should block combining across it + PseudoCALL target-flags(riscv-call) @external_func, csr_ilp32_lp64, implicit-def $x1 + %2:gpr = LW %0, 4 :: (load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Terminator instruction blocks optimization +name: terminator_blocks_optimization +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +body: | + ; CHECK-LABEL: name: terminator_blocks_optimization + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: BEQ [[COPY1]], [[LW]], %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: terminator_blocks_optimization + ; CHECK-4BYTE: bb.0: + ; CHECK-4BYTE-NEXT: successors: %bb.1(0x80000000) + ; CHECK-4BYTE-NEXT: liveins: $x10, $x11 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: BEQ [[COPY1]], [[LW]], %bb.1 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: bb.1: + ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-4BYTE-NEXT: PseudoRET + bb.0: + liveins: $x10, $x11 + + %0:gpr = COPY $x10 + %1:gpr = COPY $x11 + %2:gpr = LW %0, 0 :: (load (s32)) + BEQ %1, %2, %bb.1 + + bb.1: + ; Should not combine across basic block boundary + %3:gpr = LW %0, 4 :: (load (s32)) + %4:gpr = ADD %2, %3 + PseudoRET + +... +--- +# Dependency interference - load result used before second load +name: dependency_interference +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: dependency_interference + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[LW]], 1 + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[ADDI]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: dependency_interference + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI [[PseudoLD_RV32_OPT]], 1 + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[ADDI]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = LW %0, 0 :: (load (s32)) + ; Use result of first load before second load - should not combine + %3:gpr = ADDI %1, 1 + %2:gpr = LW %0, 4 :: (load (s32)) + %4:gpr = ADD %3, %2 + PseudoRET + +... +--- +# Memory aliasing - store between loads should prevent combination +name: memory_aliasing +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +body: | + bb.0: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: memory_aliasing + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: SW [[COPY1]], [[COPY]], 8 :: (store (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: memory_aliasing + ; CHECK-4BYTE: liveins: $x10, $x11 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: SW [[COPY1]], [[COPY]], 8 :: (store (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = COPY $x11 + %2:gpr = LW %0, 0 :: (load (s32)) + SW %1, %0, 8 :: (store (s32)) + %3:gpr = LW %0, 4 :: (load (s32)) + %4:gpr = ADD %2, %3 + PseudoRET + +... +--- +# Multiple pairs in same function +name: multiple_pairs +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +body: | + bb.0: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: multiple_pairs + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY1]], 8 :: (load (s32)) + ; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY1]], 12 :: (load (s32)) + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[LW2]], [[LW3]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[ADD]], [[ADD1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: multiple_pairs + ; CHECK-4BYTE: liveins: $x10, $x11 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT2:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT3:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY1]], 8 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT2]], [[PseudoLD_RV32_OPT3]] + ; CHECK-4BYTE-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[ADD]], [[ADD1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = COPY $x11 + ; First pair should combine + %2:gpr = LW %0, 0 :: (load (s32)) + %3:gpr = LW %0, 4 :: (load (s32)) + %4:gpr = ADD %2, %3 + + ; Second pair should also combine + %5:gpr = LW %1, 8 :: (load (s32)) + %6:gpr = LW %1, 12 :: (load (s32)) + %7:gpr = ADD %5, %6 + %8:gpr = ADD %4, %7 + PseudoRET + +... +--- +# Register pressure test - high register pressure should prevent combination +name: high_register_pressure +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: high_register_pressure + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 16 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 20 :: (load (s32)) + ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 24 :: (load (s32)) + ; CHECK-NEXT: [[LW3:%[0-9]+]]:gpr = LW [[COPY]], 28 :: (load (s32)) + ; CHECK-NEXT: [[LW4:%[0-9]+]]:gpr = LW [[COPY]], 32 :: (load (s32)) + ; CHECK-NEXT: [[LW5:%[0-9]+]]:gpr = LW [[COPY]], 36 :: (load (s32)) + ; CHECK-NEXT: [[LW6:%[0-9]+]]:gpr = LW [[COPY]], 40 :: (load (s32)) + ; CHECK-NEXT: [[LW7:%[0-9]+]]:gpr = LW [[COPY]], 44 :: (load (s32)) + ; CHECK-NEXT: [[LW8:%[0-9]+]]:gpr = LW [[COPY]], 48 :: (load (s32)) + ; CHECK-NEXT: [[LW9:%[0-9]+]]:gpr = LW [[COPY]], 52 :: (load (s32)) + ; CHECK-NEXT: [[LW10:%[0-9]+]]:gpr = LW [[COPY]], 56 :: (load (s32)) + ; CHECK-NEXT: [[LW11:%[0-9]+]]:gpr = LW [[COPY]], 60 :: (load (s32)) + ; CHECK-NEXT: [[LW12:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: [[LW13:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[LW2]], [[LW3]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[LW4]], [[LW5]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:gpr = ADD [[LW6]], [[LW7]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:gpr = ADD [[LW8]], [[LW9]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:gpr = ADD [[LW10]], [[LW11]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:gpr = ADD [[LW12]], [[LW13]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:gpr = ADD [[ADD]], [[ADD1]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:gpr = ADD [[ADD2]], [[ADD3]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:gpr = ADD [[ADD4]], [[ADD5]] + ; CHECK-NEXT: [[ADD10:%[0-9]+]]:gpr = ADD [[ADD6]], [[ADD7]] + ; CHECK-NEXT: [[ADD11:%[0-9]+]]:gpr = ADD [[ADD8]], [[ADD9]] + ; CHECK-NEXT: [[ADD12:%[0-9]+]]:gpr = ADD [[ADD10]], [[ADD11]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: high_register_pressure + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 16 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT2:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT3:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 24 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT4:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT5:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 32 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT6:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT7:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 40 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT8:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT9:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 48 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT10:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT11:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 56 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT12:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT13:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT2]], [[PseudoLD_RV32_OPT3]] + ; CHECK-4BYTE-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT4]], [[PseudoLD_RV32_OPT5]] + ; CHECK-4BYTE-NEXT: [[ADD3:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT6]], [[PseudoLD_RV32_OPT7]] + ; CHECK-4BYTE-NEXT: [[ADD4:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT8]], [[PseudoLD_RV32_OPT9]] + ; CHECK-4BYTE-NEXT: [[ADD5:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT10]], [[PseudoLD_RV32_OPT11]] + ; CHECK-4BYTE-NEXT: [[ADD6:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT12]], [[PseudoLD_RV32_OPT13]] + ; CHECK-4BYTE-NEXT: [[ADD7:%[0-9]+]]:gpr = ADD [[ADD]], [[ADD1]] + ; CHECK-4BYTE-NEXT: [[ADD8:%[0-9]+]]:gpr = ADD [[ADD2]], [[ADD3]] + ; CHECK-4BYTE-NEXT: [[ADD9:%[0-9]+]]:gpr = ADD [[ADD4]], [[ADD5]] + ; CHECK-4BYTE-NEXT: [[ADD10:%[0-9]+]]:gpr = ADD [[ADD6]], [[ADD7]] + ; CHECK-4BYTE-NEXT: [[ADD11:%[0-9]+]]:gpr = ADD [[ADD8]], [[ADD9]] + ; CHECK-4BYTE-NEXT: [[ADD12:%[0-9]+]]:gpr = ADD [[ADD10]], [[ADD11]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; Create high register pressure with many live values + %1:gpr = LW %0, 16 :: (load (s32)) + %2:gpr = LW %0, 20 :: (load (s32)) + %3:gpr = LW %0, 24 :: (load (s32)) + %4:gpr = LW %0, 28 :: (load (s32)) + %5:gpr = LW %0, 32 :: (load (s32)) + %6:gpr = LW %0, 36 :: (load (s32)) + %7:gpr = LW %0, 40 :: (load (s32)) + %8:gpr = LW %0, 44 :: (load (s32)) + %9:gpr = LW %0, 48 :: (load (s32)) + %10:gpr = LW %0, 52 :: (load (s32)) + %11:gpr = LW %0, 56 :: (load (s32)) + %12:gpr = LW %0, 60 :: (load (s32)) + + ; With high register pressure, these loads might not be combined + ; depending on the profitability analysis + %13:gpr = LW %0, 0 :: (load (s32)) + %14:gpr = LW %0, 4 :: (load (s32)) + + ; Use all the loaded values to keep them live + %15:gpr = ADD %1, %2 + %16:gpr = ADD %3, %4 + %17:gpr = ADD %5, %6 + %18:gpr = ADD %7, %8 + %19:gpr = ADD %9, %10 + %20:gpr = ADD %11, %12 + %21:gpr = ADD %13, %14 + %22:gpr = ADD %15, %16 + %23:gpr = ADD %17, %18 + %24:gpr = ADD %19, %20 + %25:gpr = ADD %21, %22 + %26:gpr = ADD %23, %24 + %27:gpr = ADD %25, %26 + PseudoRET + +... +--- +# Test reverse order - second load has lower offset than first +name: reverse_order_loads +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: reverse_order_loads + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: reverse_order_loads + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT1]], [[PseudoLD_RV32_OPT]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; Load at higher offset first, then lower offset + ; Should still be combined as LD with lower offset + %1:gpr = LW %0, 4 :: (load (s32)) + %2:gpr = LW %0, 0 :: (load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Test with immediate offset calculation +name: offset_calculation +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: offset_calculation + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 100 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 104 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: offset_calculation + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 100 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; Test with different immediate values that are consecutive + %1:gpr = LW %0, 100 :: (load (s32)) + %2:gpr = LW %0, 104 :: (load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Test large offset values +name: large_offsets +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: large_offsets + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 2040 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 2044 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: large_offsets + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 2040 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; Test with large offset values + %1:gpr = LW %0, 2040 :: (load (s32)) + %2:gpr = LW %0, 2044 :: (load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Test with negative offsets +name: negative_offsets +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: negative_offsets + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], -8 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], -4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: negative_offsets + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], -8 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; Test with negative consecutive offsets + %1:gpr = LW %0, -8 :: (load (s32)) + %2:gpr = LW %0, -4 :: (load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Test register reuse between loads +name: register_reuse +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: register_reuse + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[LW]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: register_reuse + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[LW]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; First load overwrites input register - should prevent combination + %0:gpr = LW %0, 0 :: (load (s32)) + %1:gpr = LW %0, 4 :: (load (s32)) + %2:gpr = ADD %0, %1 + PseudoRET + +... +--- +# Test with volatile loads - should not combine +name: volatile_loads +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: volatile_loads + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (volatile load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (volatile load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: volatile_loads + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (volatile load (s32)) + ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (volatile load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; Volatile loads should not be combined + %1:gpr = LW %0, 0 :: (volatile load (s32)) + %2:gpr = LW %0, 4 :: (volatile load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Test store dependency - store modifies same location as load +name: store_dependency +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +body: | + bb.0: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: store_dependency + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: SW [[COPY1]], [[COPY]], 4 :: (store (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: store_dependency + ; CHECK-4BYTE: liveins: $x10, $x11 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: SW [[COPY1]], [[COPY]], 4 :: (store (s32)) + ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = COPY $x11 + %2:gpr = LW %0, 0 :: (load (s32)) + ; Store to same location as second load - should prevent combination + SW %1, %0, 4 :: (store (s32)) + %3:gpr = LW %0, 4 :: (load (s32)) + %4:gpr = ADD %2, %3 + PseudoRET + +... +--- +# Test three loads - only first two should combine +name: three_loads +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: three_loads + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: [[LW2:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[ADD]], [[LW2]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: three_loads + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 8 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[ADD]], [[LW]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; First two loads should combine, third should remain separate + %1:gpr = LW %0, 0 :: (load (s32)) + %2:gpr = LW %0, 4 :: (load (s32)) + %3:gpr = LW %0, 8 :: (load (s32)) + %4:gpr = ADD %1, %2 + %5:gpr = ADD %4, %3 + PseudoRET +... +--- +# Test where distance between loads exceeds MaxRescheduleDistance +name: distance_exceeds_max +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } +body: | + bb.0: + liveins: $x10, $x11 + + ; CHECK-LABEL: name: distance_exceeds_max + ; CHECK: liveins: $x10, $x11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[COPY1]] + ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[ADD]], [[COPY1]] + ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[ADD1]], [[COPY1]] + ; CHECK-NEXT: [[ADD3:%[0-9]+]]:gpr = ADD [[ADD2]], [[COPY1]] + ; CHECK-NEXT: [[ADD4:%[0-9]+]]:gpr = ADD [[ADD3]], [[COPY1]] + ; CHECK-NEXT: [[ADD5:%[0-9]+]]:gpr = ADD [[ADD4]], [[COPY1]] + ; CHECK-NEXT: [[ADD6:%[0-9]+]]:gpr = ADD [[ADD5]], [[COPY1]] + ; CHECK-NEXT: [[ADD7:%[0-9]+]]:gpr = ADD [[ADD6]], [[COPY1]] + ; CHECK-NEXT: [[ADD8:%[0-9]+]]:gpr = ADD [[ADD7]], [[COPY1]] + ; CHECK-NEXT: [[ADD9:%[0-9]+]]:gpr = ADD [[ADD8]], [[COPY1]] + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: distance_exceeds_max + ; CHECK-4BYTE: liveins: $x10, $x11 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11 + ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], 0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[COPY1]] + ; CHECK-4BYTE-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD [[ADD]], [[COPY1]] + ; CHECK-4BYTE-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD [[ADD1]], [[COPY1]] + ; CHECK-4BYTE-NEXT: [[ADD3:%[0-9]+]]:gpr = ADD [[ADD2]], [[COPY1]] + ; CHECK-4BYTE-NEXT: [[ADD4:%[0-9]+]]:gpr = ADD [[ADD3]], [[COPY1]] + ; CHECK-4BYTE-NEXT: [[ADD5:%[0-9]+]]:gpr = ADD [[ADD4]], [[COPY1]] + ; CHECK-4BYTE-NEXT: [[ADD6:%[0-9]+]]:gpr = ADD [[ADD5]], [[COPY1]] + ; CHECK-4BYTE-NEXT: [[ADD7:%[0-9]+]]:gpr = ADD [[ADD6]], [[COPY1]] + ; CHECK-4BYTE-NEXT: [[ADD8:%[0-9]+]]:gpr = ADD [[ADD7]], [[COPY1]] + ; CHECK-4BYTE-NEXT: [[ADD9:%[0-9]+]]:gpr = ADD [[ADD8]], [[COPY1]] + ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], 4 :: (load (s32)) + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + %1:gpr = COPY $x11 + %2:gpr = LW %0, 0 :: (load (s32)) + ; Insert 11 instructions between the two loads + ; This makes the distance > MaxRescheduleDistance (10) + %3:gpr = ADD %2, %1 + %4:gpr = ADD %3, %1 + %5:gpr = ADD %4, %1 + %6:gpr = ADD %5, %1 + %7:gpr = ADD %6, %1 + %8:gpr = ADD %7, %1 + %9:gpr = ADD %8, %1 + %10:gpr = ADD %9, %1 + %11:gpr = ADD %10, %1 + %12:gpr = ADD %11, %1 + ; Second load at offset 4 - too far from first load + %14:gpr = LW %0, 4 :: (load (s32)) + PseudoRET +... +--- +# Test combining loads with symbolic operands (global address) +name: symbolic_operands_global +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: symbolic_operands_global + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) @global_var :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) @global_var + 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: symbolic_operands_global + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], target-flags(riscv-lo) @global_var :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; Two consecutive loads with symbolic global address operands + %1:gpr = LW %0, target-flags(riscv-lo) @global_var :: (load (s32)) + %2:gpr = LW %0, target-flags(riscv-lo) @global_var + 4 :: (load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Test that loads with different global symbols are not combined +name: symbolic_operands_different_globals +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +stack: + - { id: 0, offset: -4, size: 4 } +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: symbolic_operands_different_globals + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) @global_var :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], %stack.0 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: symbolic_operands_different_globals + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) @global_var :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], %stack.0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; Should not combine - different symbol types + %1:gpr = LW %0, target-flags(riscv-lo) @global_var :: (load (s32)) + %2:gpr = LW %0, %stack.0 :: (load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... +--- +# Test combining loads with constant pool operands +name: symbolic_operands_constantpool +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$x10', virtual-reg: '%0' } +constants: + - id: 0 + value: 'double 3.140000e+00' + alignment: 8 +body: | + bb.0: + liveins: $x10 + + ; CHECK-LABEL: name: symbolic_operands_constantpool + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: [[LW:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) %const.0 :: (load (s32)) + ; CHECK-NEXT: [[LW1:%[0-9]+]]:gpr = LW [[COPY]], target-flags(riscv-lo) %const.0 + 4 :: (load (s32)) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[LW]], [[LW1]] + ; CHECK-NEXT: PseudoRET + ; + ; CHECK-4BYTE-LABEL: name: symbolic_operands_constantpool + ; CHECK-4BYTE: liveins: $x10 + ; CHECK-4BYTE-NEXT: {{ $}} + ; CHECK-4BYTE-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-4BYTE-NEXT: [[PseudoLD_RV32_OPT:%[0-9]+]]:gpr, [[PseudoLD_RV32_OPT1:%[0-9]+]]:gpr = PseudoLD_RV32_OPT [[COPY]], target-flags(riscv-lo) %const.0 :: (load (s32)) + ; CHECK-4BYTE-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[PseudoLD_RV32_OPT]], [[PseudoLD_RV32_OPT1]] + ; CHECK-4BYTE-NEXT: PseudoRET + %0:gpr = COPY $x10 + ; Two consecutive loads with constant pool operands + %1:gpr = LW %0, target-flags(riscv-lo) %const.0 :: (load (s32)) + %2:gpr = LW %0, target-flags(riscv-lo) %const.0 + 4 :: (load (s32)) + %3:gpr = ADD %1, %2 + PseudoRET + +... diff --git a/llvm/test/CodeGen/RISCV/zilsd-regalloc-hints.mir b/llvm/test/CodeGen/RISCV/zilsd-regalloc-hints.mir new file mode 100644 index 0000000000000..7f21f421f9af8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/zilsd-regalloc-hints.mir @@ -0,0 +1,83 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=riscv32 -mattr=+zilsd -run-pass=greedy,virtregrewriter %s -o - | FileCheck --check-prefix=WITHOUT-HINT %s +# RUN: llc -mtriple=riscv32 -mattr=+zilsd -run-pass=riscv-prera-zilsd-opt,greedy,virtregrewriter %s -o - | FileCheck --check-prefix=WITH-HINT %s + +--- | + define i32 @test_load_pair_hints(ptr %p) { + %v1 = load i32, ptr %p, align 4 + %p2 = getelementptr inbounds i8, ptr %p, i32 4 + %v2 = load i32, ptr %p2, align 4 + %sum = add i32 %v1, %v2 + ret i32 %sum + } + + define void @test_store_pair_hints(ptr %p, i32 %a, i32 %b) { + store i32 %a, ptr %p, align 4 + %p2 = getelementptr inbounds i8, ptr %p, i32 4 + store i32 %b, ptr %p2, align 4 + ret void + } +... +--- +# Test that load pairs get register hints and allocate consecutive registers +# After register allocation, should either keep LD_RV32 or split back to LW +name: test_load_pair_hints +tracksRegLiveness: true +body: | + bb.0: + liveins: $x10 + + ; WITHOUT-HINT-LABEL: name: test_load_pair_hints + ; WITHOUT-HINT: liveins: $x10 + ; WITHOUT-HINT-NEXT: {{ $}} + ; WITHOUT-HINT-NEXT: renamable $x11 = LW renamable $x10, 0 :: (load (s32) from %ir.p) + ; WITHOUT-HINT-NEXT: renamable $x10 = LW killed renamable $x10, 4 :: (load (s32) from %ir.p2) + ; WITHOUT-HINT-NEXT: renamable $x10 = ADD killed renamable $x11, killed renamable $x10 + ; WITHOUT-HINT-NEXT: PseudoRET implicit $x10 + ; + ; WITH-HINT-LABEL: name: test_load_pair_hints + ; WITH-HINT: liveins: $x10 + ; WITH-HINT-NEXT: {{ $}} + ; WITH-HINT-NEXT: renamable $x11 = LW renamable $x10, 0 :: (load (s32) from %ir.p) + ; WITH-HINT-NEXT: renamable $x10 = LW killed renamable $x10, 4 :: (load (s32) from %ir.p2) + ; WITH-HINT-NEXT: renamable $x10 = ADD killed renamable $x11, killed renamable $x10 + ; WITH-HINT-NEXT: PseudoRET implicit $x10 + %10:gpr = COPY $x10 + ; These two LW instructions at offset 0 and 4 should be combined + %0:gpr = LW %10, 0 :: (load (s32) from %ir.p) + %1:gpr = LW %10, 4 :: (load (s32) from %ir.p2) + %2:gpr = ADD %0, %1 + $x10 = COPY %2 + PseudoRET implicit $x10 + +... +--- +# Test that store pairs get register hints and allocate consecutive registers +# After register allocation, should either keep SD_RV32 or split back to SW +name: test_store_pair_hints +tracksRegLiveness: true +body: | + bb.0: + liveins: $x10, $x11, $x12 + + ; WITHOUT-HINT-LABEL: name: test_store_pair_hints + ; WITHOUT-HINT: liveins: $x10, $x11, $x12 + ; WITHOUT-HINT-NEXT: {{ $}} + ; WITHOUT-HINT-NEXT: SW killed renamable $x11, renamable $x10, 0 :: (store (s32) into %ir.p) + ; WITHOUT-HINT-NEXT: SW killed renamable $x12, killed renamable $x10, 4 :: (store (s32) into %ir.p2) + ; WITHOUT-HINT-NEXT: PseudoRET + ; + ; WITH-HINT-LABEL: name: test_store_pair_hints + ; WITH-HINT: liveins: $x10, $x11, $x12 + ; WITH-HINT-NEXT: {{ $}} + ; WITH-HINT-NEXT: SW killed renamable $x11, renamable $x10, 0 :: (store (s32) into %ir.p) + ; WITH-HINT-NEXT: SW killed renamable $x12, killed renamable $x10, 4 :: (store (s32) into %ir.p2) + ; WITH-HINT-NEXT: PseudoRET + %10:gpr = COPY $x10 + %11:gpr = COPY $x11 + %12:gpr = COPY $x12 + ; These two SW instructions at offset 0 and 4 should be combined + SW %11, %10, 0 :: (store (s32) into %ir.p) + SW %12, %10, 4 :: (store (s32) into %ir.p2) + PseudoRET +...