diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h index baa5476cec94..18e8038ec7af 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervals.h +++ b/llvm/include/llvm/CodeGen/LiveIntervals.h @@ -99,6 +99,8 @@ class VirtRegMap; LiveIntervals(); ~LiveIntervals() override; + const TargetInstrInfo &getTargetInstrInfo() const { return *TII; } + /// Calculate the spill weight to assign to a single instruction. static float getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index f850767270a4..d4c9f5654f98 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -219,6 +219,8 @@ namespace llvm { /// FunctionPass *createGreedyRegisterAllocator(); FunctionPass *createGreedyRegisterAllocator(RegClassFilterFunc F); + FunctionPass *createGreedyRegisterAllocator(RegClassFilterFunc F, + LiveIntervalFilterFunc LIF); /// PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean /// Quadratic Prograaming (PBQP) based register allocator. diff --git a/llvm/include/llvm/CodeGen/RegAllocCommon.h b/llvm/include/llvm/CodeGen/RegAllocCommon.h index 757ca8e112ee..24125b71f745 100644 --- a/llvm/include/llvm/CodeGen/RegAllocCommon.h +++ b/llvm/include/llvm/CodeGen/RegAllocCommon.h @@ -16,6 +16,10 @@ namespace llvm { class TargetRegisterClass; class TargetRegisterInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class LiveInterval; + typedef std::function RegClassFilterFunc; @@ -26,6 +30,17 @@ static inline bool allocateAllRegClasses(const TargetRegisterInfo &, return true; } +typedef std::function + LiveIntervalFilterFunc; +/// Default live interval filter function for register allocation. All live +/// intervals should be allocated. +static inline bool allocateAllLiveIntervals(MachineRegisterInfo &, + const TargetInstrInfo &, + const LiveInterval *) { + return true; } +} // namespace llvm + #endif // LLVM_CODEGEN_REGALLOCCOMMON_H diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index 36a6e02a028f..63ed006cbb60 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -188,8 +188,13 @@ void RegAllocBase::enqueue(const LiveInterval *LI) { const TargetRegisterClass &RC = *MRI->getRegClass(Reg); if (ShouldAllocateClass(*TRI, RC)) { - LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n'); - enqueueImpl(LI); + if (ShouldAllocateLiveInterval(*MRI, LIS->getTargetInstrInfo(), LI)) { + LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n'); + enqueueImpl(LI); + } else { + LLVM_DEBUG(dbgs() << "Not enqueueing " << printReg(Reg, TRI) + << " in skipped live interval\n"); + } } else { LLVM_DEBUG(dbgs() << "Not enqueueing " << printReg(Reg, TRI) << " in skipped register class\n"); diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h index 9ac9caeb093d..e672591e2117 100644 --- a/llvm/lib/CodeGen/RegAllocBase.h +++ b/llvm/lib/CodeGen/RegAllocBase.h @@ -72,6 +72,7 @@ class RegAllocBase { LiveRegMatrix *Matrix = nullptr; RegisterClassInfo RegClassInfo; const RegClassFilterFunc ShouldAllocateClass; + const LiveIntervalFilterFunc ShouldAllocateLiveInterval; /// Inst which is a def of an original reg and whose defs are already all /// dead after remat is saved in DeadRemats. The deletion of such inst is @@ -79,8 +80,9 @@ class RegAllocBase { /// always available for the remat of all the siblings of the original reg. SmallPtrSet DeadRemats; - RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses) : - ShouldAllocateClass(F) {} + RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses, + const LiveIntervalFilterFunc LIF = allocateAllLiveIntervals) + : ShouldAllocateClass(F), ShouldAllocateLiveInterval(LIF) {} virtual ~RegAllocBase() = default; diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 590ff74a11c0..4f664c18e94a 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -205,11 +205,15 @@ FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) { return new RAGreedy(Ftor); } -RAGreedy::RAGreedy(RegClassFilterFunc F): - MachineFunctionPass(ID), - RegAllocBase(F) { +FunctionPass * +llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor, + LiveIntervalFilterFunc LIFtor) { + return new RAGreedy(Ftor, LIFtor); } +RAGreedy::RAGreedy(RegClassFilterFunc F, LiveIntervalFilterFunc LIF) + : MachineFunctionPass(ID), RegAllocBase(F, LIF) {} + void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h index 9aebe9343bb8..46f556748bd2 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -284,7 +284,8 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass, bool ReverseLocalAssignment = false; public: - RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); + RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses, + const LiveIntervalFilterFunc LIF = allocateAllLiveIntervals); /// Return the pass name. StringRef getPassName() const override { return "Greedy Register Allocator"; } diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp index 2731ceeaf8ea..1acb7366021e 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp @@ -835,7 +835,6 @@ AIE2InstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2::LDA_dms_spill, AIE2::sub_dim_size}, {AIE2::LDA_dms_spill, AIE2::sub_dim_stride}, {AIE2::LDA_dms_spill, AIE2::sub_dim_count}, - {AIE2::LDA_dms_spill, AIE2::sub_hi_dim_then_sub_mod}, {AIE2::LDA_dms_spill, AIE2::sub_hi_dim_then_sub_dim_size}, {AIE2::LDA_dms_spill, AIE2::sub_hi_dim_then_sub_dim_stride}, {AIE2::LDA_dms_spill, AIE2::sub_hi_dim_then_sub_dim_count}}; @@ -844,7 +843,6 @@ AIE2InstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2::ST_dms_spill, AIE2::sub_dim_size}, {AIE2::ST_dms_spill, AIE2::sub_dim_stride}, {AIE2::ST_dms_spill, AIE2::sub_dim_count}, - {AIE2::ST_dms_spill, AIE2::sub_hi_dim_then_sub_mod}, {AIE2::ST_dms_spill, AIE2::sub_hi_dim_then_sub_dim_size}, {AIE2::ST_dms_spill, AIE2::sub_hi_dim_then_sub_dim_stride}, {AIE2::ST_dms_spill, AIE2::sub_hi_dim_then_sub_dim_count}}; @@ -1205,7 +1203,6 @@ AIE2InstrInfo::getTiedRegInfo(unsigned Opcode) const { SubRegSplit(AIE2::sub_dim_size), SubRegSplit(AIE2::sub_dim_stride), SubRegSplit(AIE2::sub_dim_count), - SubRegSplit(AIE2::sub_hi_dim_then_sub_mod, /*IsUndef=*/true), SubRegSplit(AIE2::sub_hi_dim_then_sub_dim_size), SubRegSplit(AIE2::sub_hi_dim_then_sub_dim_stride), SubRegSplit(AIE2::sub_hi_dim_then_sub_dim_count)}; diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.td b/llvm/lib/Target/AIE/AIE2InstrInfo.td index f91ebb99b24d..5a53bb4b151b 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.td +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.td @@ -605,7 +605,7 @@ foreach instr = [VST_2D_SRS_D8_S32, VST_2D_SRS_D16_S64, VST_2D_SRS_D16_S32, // Define _split variants for instructions using 3D registers class Split3DInstr : SplitPseudo {} + eDN:$dim_size2, eDJ:$dim_stride2, eDC:$dim_count2)> {} foreach instr = [VLDA_3D_dmw_lda_w, VLDA_3D_dmw_lda_am, VLDA_3D_CONV_FP32_BF16, VLDB_3D, VLDB_3D_128, LDA_3D_dmv_lda_q, VLDB_3D_UNPACK_S8_S4, VLDB_3D_UNPACK_S16_S8, VLDB_3D_UNPACK_D8_D4, VLDB_3D_UNPACK_D16_D8, diff --git a/llvm/lib/Target/AIE/AIE2RegisterInfo.cpp b/llvm/lib/Target/AIE/AIE2RegisterInfo.cpp index d86d09e1196b..f1ccad91345b 100644 --- a/llvm/lib/Target/AIE/AIE2RegisterInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2RegisterInfo.cpp @@ -475,7 +475,6 @@ const std::set &AIE2RegisterInfo::getSubRegSplit(int RegClassId) const { AIE2::sub_dim_size, AIE2::sub_dim_stride, AIE2::sub_dim_count, - AIE2::sub_hi_dim_then_sub_mod, AIE2::sub_hi_dim_then_sub_dim_size, AIE2::sub_hi_dim_then_sub_dim_stride, AIE2::sub_hi_dim_then_sub_dim_count}; diff --git a/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp b/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp index ba6c482b1ae1..0ec24c9a48a9 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp @@ -649,15 +649,29 @@ void AIEBaseInstrInfo::copyThroughSubRegs(MachineBasicBlock &MBB, MCRegister SrcReg, bool KillSrc) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); - - SmallSet SrcSubRegs; - collectSubRegs(SrcReg, SrcSubRegs, TRI); + auto &TRI = + *static_cast(MRI.getTargetRegisterInfo()); + + const auto *RC = Register::isPhysicalRegister(SrcReg.id()) + ? TRI.getMinimalPhysRegClass(SrcReg) + : MRI.getRegClass(SrcReg); + auto &SubRegSplit = TRI.getSubRegSplit(RC->getID()); + + if (SubRegSplit.size() > 1) { + for (const auto &SubRegIdx : SubRegSplit) { + MCRegister SrcSubReg = TRI.getSubReg(SrcReg, SubRegIdx); + MCRegister DstSubReg = TRI.getSubReg(DstReg, SubRegIdx); + copyPhysReg(MBB, MBBI, DL, DstSubReg, SrcSubReg, KillSrc); + } + } else { + SmallSet SrcSubRegs; + collectSubRegs(SrcReg, SrcSubRegs, TRI); - for (MCRegister SrcSubReg : SrcSubRegs) { - unsigned SubRegIdx = TRI.getSubRegIndex(SrcReg, SrcSubReg); - MCRegister DstSubReg = TRI.getSubReg(DstReg, SubRegIdx); - copyPhysReg(MBB, MBBI, DL, DstSubReg, SrcSubReg, KillSrc); + for (MCRegister SrcSubReg : SrcSubRegs) { + unsigned SubRegIdx = TRI.getSubRegIndex(SrcReg, SrcSubReg); + MCRegister DstSubReg = TRI.getSubReg(DstReg, SubRegIdx); + copyPhysReg(MBB, MBBI, DL, DstSubReg, SrcSubReg, KillSrc); + } } } diff --git a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp index 2c7b4a8e218e..910a8ef451bc 100644 --- a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp @@ -11,6 +11,10 @@ #include "AIEBaseInstrInfo.h" #include "AIEBaseRegisterInfo.h" +#include "aie2p/AIE2PRegisterBankInfo.h" +#include "aie2p/AIE2PRegisterInfo.h" +#include "aie2p/AIE2PSubtarget.h" + #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveDebugVariables.h" @@ -65,9 +69,14 @@ class AIESuperRegRewriter : public MachineFunctionPass { private: void rewriteSuperReg(Register Reg, Register AssignedPhysReg, - MachineRegisterInfo &MRI, const AIEBaseRegisterInfo &TRI, - VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS, + MachineFunction &MF, MachineRegisterInfo &MRI, + const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM, + LiveRegMatrix &LRM, LiveIntervals &LIS, SlotIndexes &Indexes, LiveDebugVariables &DebugVars); + + void expandCopyBundle(MachineInstr &MI, MachineFunction &MF, + const AIEBaseRegisterInfo &TRI, SlotIndexes &Indexes, + SmallSet &RecomputeLIandLRM); }; /// Returns the subreg indices that can be used to rewrite \p Reg into smaller @@ -149,6 +158,7 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { SlotIndexes &Indexes = getAnalysis(); LiveDebugVariables &DebugVars = getAnalysis(); std::map AssignedPhysRegs; + std::list UnAssignedPhysRegs; // Collect already-assigned VRegs that can be split into smaller ones. LLVM_DEBUG(VRM.dump()); @@ -156,10 +166,12 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { ++VRegIdx) { Register Reg = Register::index2VirtReg(VRegIdx); - // Ignore un-used registers and un-allocated registers - if (MRI.reg_nodbg_empty(Reg) || !VRM.hasPhys(Reg)) + // Ignore un-used registers registers + if (MRI.reg_nodbg_empty(Reg)) continue; + const bool VirtualRegIsAllocated = VRM.hasPhys(Reg); + // Skip vregs that are spilled, they would anyway be disregarded by // getRewritableSubRegs due to the spill instructions using the whole reg // without any subreg indices. @@ -172,17 +184,62 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Analysing " << printReg(Reg, &TRI, 0, &MRI) << ":" << printRegClassOrBank(Reg, MRI, &TRI) << '\n'); if (!getRewritableSubRegs(Reg, MRI, TRI).empty()) { - AssignedPhysRegs[Reg] = VRM.getPhys(Reg); - LRM.unassign(LIS.getInterval(Reg)); + if (VirtualRegIsAllocated) { + AssignedPhysRegs[Reg] = VRM.getPhys(Reg); + LRM.unassign(LIS.getInterval(Reg)); + } else { + UnAssignedPhysRegs.push_back(Reg); + } } else { LLVM_DEBUG(dbgs() << "Could not rewrite " << printReg(Reg, &TRI, 0, &MRI) << '\n'); } } - // Re-write all the collected VRegs + // Re-write all the collected assigned VRegs for (auto &[VReg, PhysReg] : AssignedPhysRegs) { - rewriteSuperReg(VReg, PhysReg, MRI, TRI, VRM, LRM, LIS, Indexes, DebugVars); + rewriteSuperReg(VReg, PhysReg, MF, MRI, TRI, VRM, LRM, LIS, Indexes, + DebugVars); + } + + // Re-write all the collected unassigned VRegs + for (auto &VReg : UnAssignedPhysRegs) { + MCRegister DummyPhysReg; + const TargetRegisterClass *SuperRC = MRI.getRegClass(VReg); + // TODO : Remove ARCH specific check + if (SuperRC == &AIE2P::eDSRegClass) + rewriteSuperReg(VReg, DummyPhysReg, MF, MRI, TRI, VRM, LRM, LIS, Indexes, + DebugVars); + } + + // Expand CopyBundle + SmallSet RecomputeLIandLRM; + for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); + MBBI != MBBE; ++MBBI) { + LLVM_DEBUG(MBBI->print(dbgs(), &Indexes)); + for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) { + expandCopyBundle(MI, MF, TRI, Indexes, RecomputeLIandLRM); + } + } + + for (Register Reg : RecomputeLIandLRM) { + if (LIS.hasInterval(Reg)) { + LLVM_DEBUG(dbgs() << "Recomputing live range for " << printReg(Reg, &TRI) + << '\n'); + // Recompute the LiveIntervals for the register and update the + // LiveRegMatrix + if (Reg.isPhysical()) { + const MCRegister PhysReg = VRM.getPhys(Reg); + const LiveInterval &OldLI = LIS.getInterval(Reg); + LIS.removeInterval(Reg); + LRM.unassign(OldLI); + const LiveInterval &LI = LIS.getInterval(Reg); + LRM.assign(LI, PhysReg); + } else { + LIS.removeInterval(Reg); + LIS.getInterval(Reg); + } + } } LLVM_DEBUG(VRM.dump()); @@ -238,10 +295,13 @@ static void rewriteFullCopy(MachineInstr &MI, const std::set &CopySubRegs, } void AIESuperRegRewriter::rewriteSuperReg( - Register Reg, Register AssignedPhysReg, MachineRegisterInfo &MRI, - const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM, LiveRegMatrix &LRM, - LiveIntervals &LIS, SlotIndexes &Indexes, LiveDebugVariables &DebugVars) { - LLVM_DEBUG(dbgs() << "Rewriting " << printReg(Reg, &TRI, 0, &MRI) << '\n'); + Register Reg, Register AssignedPhysReg, MachineFunction &MF, + MachineRegisterInfo &MRI, const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM, + LiveRegMatrix &LRM, LiveIntervals &LIS, SlotIndexes &Indexes, + LiveDebugVariables &DebugVars) { + bool AssignPhysRegIsValid = AssignedPhysReg.isPhysical(); + LLVM_DEBUG(dbgs() << "Rewriting " << printReg(Reg, &TRI, 0, &MRI) + << " Assigned " << AssignPhysRegIsValid << '\n'); auto *TII = static_cast( VRM.getMachineFunction().getSubtarget().getInstrInfo()); @@ -251,7 +311,9 @@ void AIESuperRegRewriter::rewriteSuperReg( SmallSet SubRegs = getRewritableSubRegs(Reg, MRI, TRI); assert(!SubRegs.empty()); for (int SubReg : SubRegs) { - const TargetRegisterClass *SubRC = TRI.getSubRegisterClass(SuperRC, SubReg); + const TargetRegisterClass *SubRC = TRI.getLargestLegalSuperClass( + TRI.getSubRegisterClass(SuperRC, SubReg), MF); + SubRegToVReg[SubReg] = MRI.createVirtualRegister(SubRC); } @@ -288,8 +350,31 @@ void AIESuperRegRewriter::rewriteSuperReg( VRM.grow(); LIS.removeInterval(Reg); + // The liverange splitting logic sometimes produces bundles of copies when + // subregisters are involved. Sometimes some of the copies are not used, + // since super-reg-rewriter is going to modify them into individual virtual + // register with separate live ranges we need to make sure we remove the + // dead-MI from the bundel of copies + SmallVector SubRegsToRemove; + for (auto &[SubRegIdx, VReg] : make_early_inc_range(SubRegToVReg)) { + if (MRI.use_nodbg_empty(VReg)) + for (auto &MI : MRI.reg_nodbg_instructions(VReg)) { + if (MI.isBundled() && MI.isCopy()) { + Indexes.removeSingleMachineInstrFromMaps(MI); + MI.eraseFromBundle(); + SubRegsToRemove.push_back(SubRegIdx); + } + break; + } + } + + for (auto SubRegIdx : SubRegsToRemove) + SubRegToVReg.erase(SubRegIdx); + for (auto &[SubRegIdx, VReg] : SubRegToVReg) { - MCRegister SubPhysReg = TRI.getSubReg(AssignedPhysReg, SubRegIdx); + MCRegister SubPhysReg; + if (AssignPhysRegIsValid) + SubPhysReg = TRI.getSubReg(AssignedPhysReg, SubRegIdx); LiveInterval &SubRegLI = LIS.getInterval(VReg); LLVM_DEBUG(dbgs() << " Assigning Range: " << SubRegLI << '\n'); @@ -300,11 +385,12 @@ void AIESuperRegRewriter::rewriteSuperReg( LIComponents.push_back(&SubRegLI); VRM.grow(); - for (LiveInterval *LI : LIComponents) { - LRM.assign(*LI, SubPhysReg); - VRM.setRequiredPhys(LI->reg(), SubPhysReg); - LLVM_DEBUG(dbgs() << " Assigned " << printReg(LI->reg()) << "\n"); - } + if (AssignPhysRegIsValid) + for (LiveInterval *LI : LIComponents) { + LRM.assign(*LI, SubPhysReg); + VRM.setRequiredPhys(LI->reg(), SubPhysReg); + LLVM_DEBUG(dbgs() << " Assigned " << printReg(LI->reg()) << "\n"); + } } // Announce new VRegs so DBG locations can be updated. @@ -313,6 +399,84 @@ void AIESuperRegRewriter::rewriteSuperReg( DebugVars.splitRegister(Reg, NewVRegs, LIS); } +// The liverange splitting logic sometimes produces bundles of copies when +// subregisters are involved. Expand these into a sequence of copy instructions +// after processing the last in the bundle. This is needed to ensure that the +// un-assigned virtual reg operands of COPY that were part of these bundles have +// a unique SlotIndex and thus a LiveInterval which is better for RA. +void AIESuperRegRewriter::expandCopyBundle( + MachineInstr &MI, MachineFunction &MF, const AIEBaseRegisterInfo &TRI, + SlotIndexes &Indexes, SmallSet &RecomputeLIandLRM) { + if (!MI.isCopy() && !MI.isKill()) + return; + + if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) { + SmallVector MIs({&MI}); + + // Only do this when the complete bundle is made out of COPYs and KILLs. + MachineBasicBlock &MBB = *MI.getParent(); + for (MachineBasicBlock::reverse_instr_iterator + I = std::next(MI.getReverseIterator()), + E = MBB.instr_rend(); + I != E && I->isBundledWithSucc(); ++I) { + if (!I->isCopy() && !I->isKill()) + return; + MIs.push_back(&*I); + } + MachineInstr *FirstMI = MIs.back(); + + auto anyRegsAlias = [](const MachineInstr *Dst, + ArrayRef Srcs, + const TargetRegisterInfo &TRI) { + for (const MachineInstr *Src : Srcs) + if (Src != Dst) + if (TRI.regsOverlap(Dst->getOperand(0).getReg(), + Src->getOperand(1).getReg())) + return true; + return false; + }; + + // If any of the destination registers in the bundle of copies alias any of + // the source registers, try to schedule the instructions to avoid any + // clobbering. + for (int E = MIs.size(), PrevE = E; E > 1; PrevE = E) { + for (int I = E; I--;) + if (!anyRegsAlias(MIs[I], ArrayRef(MIs).take_front(E), TRI)) { + if (I + 1 != E) + std::swap(MIs[I], MIs[E - 1]); + --E; + } + if (PrevE == E) { + MF.getFunction().getContext().emitError( + "super-reg-rewriter register rewriting failed: cycle in copy " + "bundle"); + break; + } + } + + MachineInstr *BundleStart = FirstMI; + for (MachineInstr *BundledMI : llvm::reverse(MIs)) { + // If instruction is in the middle of the bundle, move it before the + // bundle starts, otherwise, just unbundle it. When we get to the last + // instruction, the bundle will have been completely undone. + if (BundledMI != BundleStart) { + BundledMI->removeFromBundle(); + MBB.insert(BundleStart, BundledMI); + } else if (BundledMI->isBundledWithSucc()) { + BundledMI->unbundleFromSucc(); + BundleStart = &*std::next(BundledMI->getIterator()); + } + + if (BundledMI != FirstMI) { + Indexes.insertMachineInstrInMaps(*BundledMI); + RecomputeLIandLRM.insert(BundledMI->getOperand(0).getReg()); + RecomputeLIandLRM.insert(BundledMI->getOperand(1).getReg()); + BundledMI->getOperand(0).setIsInternalRead(false); + } + } + } +} + } // end anonymous namespace char AIESuperRegRewriter::ID = 0; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index 49d892737bf3..e9139454577e 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -1051,7 +1051,6 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2P::ST_dms_sts_spill, AIE2P::sub_dim_size}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_dim_stride}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_dim_count}, - {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_mod}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_dim_size}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_dim_stride}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_dim_count}}; @@ -1089,7 +1088,6 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2P::LDA_dms_lda_spill, AIE2P::sub_dim_size}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_dim_stride}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_dim_count}, - {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_mod}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_dim_size}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_dim_stride}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_dim_count}}; @@ -1277,7 +1275,6 @@ AIE2PInstrInfo::getTiedRegInfo(unsigned Opcode) const { SubRegSplit(AIE2P::sub_dim_size), SubRegSplit(AIE2P::sub_dim_stride), SubRegSplit(AIE2P::sub_dim_count), - SubRegSplit(AIE2P::sub_hi_dim_then_sub_mod, /*IsUndef=*/true), SubRegSplit(AIE2P::sub_hi_dim_then_sub_dim_size), SubRegSplit(AIE2P::sub_hi_dim_then_sub_dim_stride), SubRegSplit(AIE2P::sub_hi_dim_then_sub_dim_count)}; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td index 14822e9880b6..52329a838ffc 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td @@ -225,7 +225,7 @@ foreach instr = [ // Define _split variants for instructions using 3D registers class Split3DInstr : SplitPseudo {} + eDN:$dim_size2, eDJ:$dim_stride2, eDC:$dim_count2)> {} foreach instr = [PADDA_3D, PADDB_3D, PADDS_3D, PADD_3D_pseudo] in def instr # _split : Split3DInstr; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp index c0f856aef2f8..410ef25be5a6 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp @@ -52,6 +52,16 @@ AIE2PRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_AIE2P_SaveList; } +const TargetRegisterClass * +AIE2PRegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, + unsigned Idx) const { + if (Idx == AIE2P::sub_lo_dim) { + return nullptr; + } + // Forward to TableGen's default version. + return AIE2PGenRegisterInfo::getSubClassWithSubReg(RC, Idx); +} + BitVector AIE2PRegisterInfo::getReservedRegs(const MachineFunction &MF) const { const TargetFrameLowering *TFI = getFrameLowering(MF); BitVector Reserved(getNumRegs()); @@ -495,7 +505,6 @@ const std::set &AIE2PRegisterInfo::getSubRegSplit(int RegClassId) const { AIE2P::sub_dim_size, AIE2P::sub_dim_stride, AIE2P::sub_dim_count, - AIE2P::sub_hi_dim_then_sub_mod, AIE2P::sub_hi_dim_then_sub_dim_size, AIE2P::sub_hi_dim_then_sub_dim_stride, AIE2P::sub_hi_dim_then_sub_dim_count}; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h index c7db1ac989a8..30178b383105 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h @@ -48,6 +48,10 @@ struct AIE2PRegisterInfo : public AIE2PGenRegisterInfo { const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, unsigned Kind) const override; + const TargetRegisterClass * + getSubClassWithSubReg(const TargetRegisterClass *RC, + unsigned Idx) const override; + bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp index ab0158f21334..6f13c154db14 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp @@ -14,6 +14,7 @@ #include "AIE2PTargetMachine.h" #include "AIE2PTargetTransformInfo.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" using namespace llvm; @@ -66,6 +67,173 @@ void AIE2PPassConfig::addPreRegBankSelect() { } } +static bool onlyAllocateLIwith3DInstruction(MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + const LiveInterval *LI) { + const Register Reg = LI->reg(); + return std::any_of( + MRI.use_nodbg_instructions(Reg).begin(), + MRI.use_nodbg_instructions(Reg).end(), [&](const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AIE2P::LDA_3D_dms_lda: + case AIE2P::LDA_3D_dmv_lda_q: + case AIE2P::LDA_3D_s16: + case AIE2P::LDA_3D_s8: + case AIE2P::LDA_3D_u16: + case AIE2P::LDA_3D_u8: + case AIE2P::LDA_TM_3D: + case AIE2P::ST_3D_dms_sts: + case AIE2P::ST_3D_dmv_sts_q: + case AIE2P::ST_3D_s16: + case AIE2P::ST_3D_s8: + case AIE2P::ST_TM_3D: + case AIE2P::VLDA_3D_128: + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmw_lda_ups_bf: + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf: + case AIE2P::VLDA_3D_dmw_lda_w: + case AIE2P::VLDA_3D_dmx_lda_bm: + case AIE2P::VLDA_3D_dmx_lda_fifohl: + case AIE2P::VLDA_3D_dmx_lda_x: + case AIE2P::VLDB_3D_128: + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign0: + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign1: + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign0: + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign1: + case AIE2P::VLDB_3D_dmw_ldb: + case AIE2P::VLDB_3D_dmx_ldb_x: + case AIE2P::VST_3D_128: + case AIE2P::VST_3D_CONV_bf16_fp32_dmw_sts_srs_bf: + case AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf: + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0: + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1: + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0: + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1: + case AIE2P::VST_3D_dmw_sts_w: + case AIE2P::VST_3D_dmx_sts_bm: + case AIE2P::VST_3D_dmx_sts_fifohl: + case AIE2P::VST_3D_dmx_sts_x: + case AIE2P::VLD_3D_w_pseudo: + case AIE2P::VLD_3D_x_pseudo: + case AIE2P::VLD_3D_128_pseudo: + case AIE2P::PADDA_3D: + case AIE2P::PADDB_3D: + case AIE2P::PADDS_3D: + case AIE2P::PADD_3D_pseudo: + case AIE2P::VLDA_3D_UPS_2x_dmw_lda_ups_w2b_upsSign1: + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign0: + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign1: + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign0: + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign1: + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0: + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign1: + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign0: + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign1: + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign0: + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign1: + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign0: + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign1: + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign0: + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign1: + case AIE2P::VST_FLUSH_512_3D: + case AIE2P::VST_FLUSH_512_CONV_3D: + case AIE2P::VLDA_POP_512_3D: + case AIE2P::VLDA_POP_544_3D: + case AIE2P::VLDA_POP_576_3D: + case AIE2P::VLDA_POP_640_3D: + case AIE2P::VLDA_POP_704_3D: + case AIE2P::VLDB_POP_512_3D: + case AIE2P::VLDB_POP_544_3D: + case AIE2P::VLDB_POP_576_3D: + case AIE2P::VLDB_POP_640_3D: + case AIE2P::VLDB_POP_704_3D: + case AIE2P::VLD_POP_512_3D_pseudo: + case AIE2P::VLD_POP_544_3D_pseudo: + case AIE2P::VLD_POP_576_3D_pseudo: + case AIE2P::VLD_POP_640_3D_pseudo: + case AIE2P::VLD_POP_704_3D_pseudo: + case AIE2P::LDA_3D_dms_lda_split: + case AIE2P::LDA_3D_dmv_lda_q_split: + case AIE2P::LDA_3D_s16_split: + case AIE2P::LDA_3D_s8_split: + case AIE2P::LDA_3D_u16_split: + case AIE2P::LDA_3D_u8_split: + case AIE2P::LDA_TM_3D_split: + case AIE2P::ST_3D_dms_sts_split: + case AIE2P::ST_3D_dmv_sts_q_split: + case AIE2P::ST_3D_s16_split: + case AIE2P::ST_3D_s8_split: + case AIE2P::ST_TM_3D_split: + case AIE2P::VLDA_3D_128_split: + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmw_lda_ups_bf_split: + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf_split: + case AIE2P::VLDA_3D_dmw_lda_w_split: + case AIE2P::VLDA_3D_dmx_lda_bm_split: + case AIE2P::VLDA_3D_dmx_lda_fifohl_split: + case AIE2P::VLDA_3D_dmx_lda_x_split: + case AIE2P::VLDB_3D_128_split: + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign0_split: + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign1_split: + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign0_split: + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign1_split: + case AIE2P::VLDB_3D_dmw_ldb_split: + case AIE2P::VLDB_3D_dmx_ldb_x_split: + case AIE2P::VST_3D_128_split: + case AIE2P::VST_3D_CONV_bf16_fp32_dmw_sts_srs_bf_split: + case AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf_split: + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0_split: + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1_split: + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0_split: + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1_split: + case AIE2P::VST_3D_dmw_sts_w_split: + case AIE2P::VST_3D_dmx_sts_bm_split: + case AIE2P::VST_3D_dmx_sts_fifohl_split: + case AIE2P::VST_3D_dmx_sts_x_split: + case AIE2P::VLD_3D_w_pseudo_split: + case AIE2P::VLD_3D_x_pseudo_split: + case AIE2P::VLD_3D_128_pseudo_split: + case AIE2P::PADDA_3D_split: + case AIE2P::PADDB_3D_split: + case AIE2P::PADDS_3D_split: + case AIE2P::PADD_3D_pseudo_split: + case AIE2P::VLDA_3D_UPS_2x_dmw_lda_ups_w2b_upsSign1_split: + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign0_split: + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign1_split: + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign0_split: + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign1_split: + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split: + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign1_split: + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign0_split: + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign1_split: + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign0_split: + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign1_split: + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign0_split: + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign1_split: + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign0_split: + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign1_split: + case AIE2P::VST_FLUSH_512_3D_split: + case AIE2P::VST_FLUSH_512_CONV_3D_split: + case AIE2P::VLDA_POP_512_3D_split: + case AIE2P::VLDA_POP_544_3D_split: + case AIE2P::VLDA_POP_576_3D_split: + case AIE2P::VLDA_POP_640_3D_split: + case AIE2P::VLDA_POP_704_3D_split: + case AIE2P::VLDB_POP_512_3D_split: + case AIE2P::VLDB_POP_544_3D_split: + case AIE2P::VLDB_POP_576_3D_split: + case AIE2P::VLDB_POP_640_3D_split: + case AIE2P::VLDB_POP_704_3D_split: + case AIE2P::VLD_POP_512_3D_pseudo_split: + case AIE2P::VLD_POP_544_3D_pseudo_split: + case AIE2P::VLD_POP_576_3D_pseudo_split: + case AIE2P::VLD_POP_640_3D_pseudo_split: + case AIE2P::VLD_POP_704_3D_pseudo_split: + return true; + default: + return false; + } + }); +} + static bool onlyAllocate3DRegisters(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) { return AIE2P::eDSRegClass.hasSubClassEq(&RC); @@ -97,7 +265,8 @@ bool AIE2PPassConfig::addRegAssignAndRewriteOptimized() { if (AllocateMRegsFirst) addPass(createGreedyRegisterAllocator(onlyAllocateMRegisters)); if (EnableStagedRA) { - addPass(createGreedyRegisterAllocator(onlyAllocate3DRegisters)); + addPass(createGreedyRegisterAllocator(onlyAllocate3DRegisters, + onlyAllocateLIwith3DInstruction)); addPass(createAIESuperRegRewriter()); addPass(createGreedyRegisterAllocator(onlyAllocate3D2DRegisters)); addPass(createAIESuperRegRewriter()); diff --git a/llvm/test/CodeGen/AIE/aie2/postrapseudos/dim_spill.mir b/llvm/test/CodeGen/AIE/aie2/postrapseudos/dim_spill.mir index 743562fc33a0..eaa70a19c4f3 100644 --- a/llvm/test/CodeGen/AIE/aie2/postrapseudos/dim_spill.mir +++ b/llvm/test/CodeGen/AIE/aie2/postrapseudos/dim_spill.mir @@ -119,18 +119,16 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_spill -36, implicit $sp :: (load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_spill -32, implicit $sp :: (load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_spill -28, implicit $sp :: (load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_spill -24, implicit $sp :: (load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_spill -20, implicit $sp :: (load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_spill -16, implicit $sp :: (load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_spill -12, implicit $sp :: (load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_spill -24, implicit $sp :: (load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_spill -20, implicit $sp :: (load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_spill -16, implicit $sp :: (load (s32) from %stack.0 + 24) ; CHECK-NEXT: ST_dms_spill $m1, -80, implicit $sp :: (store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_spill $dn1, -76, implicit $sp :: (store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_spill $dj1, -72, implicit $sp :: (store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_spill $dc1, -68, implicit $sp :: (store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_spill $m5, -64, implicit $sp :: (store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_spill $dn5, -60, implicit $sp :: (store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_spill $dj5, -56, implicit $sp :: (store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_spill $dc5, -52, implicit $sp :: (store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_spill $dn5, -64, implicit $sp :: (store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_spill $dj5, -60, implicit $sp :: (store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_spill $dc5, -56, implicit $sp :: (store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL -40, implicit $sp :: (load (s256) from %stack.0, align 4) ST_DS_SPILL $d1_3d, -80, implicit $sp :: (store (s256) into %stack.1, align 4) ... @@ -148,9 +146,9 @@ body: | bb.0 (align 16): ; CHECK-LABEL: name: test_ds_partial - ; CHECK: $dj5 = LDA_dms_spill -16, implicit $sp :: (load (s32) from %stack.0 + 24) + ; CHECK: $dj5 = LDA_dms_spill -20, implicit $sp :: (load (s32) from %stack.0 + 20) ; CHECK-NEXT: $dj6 = MOV_mv_scl $dj5 - ; CHECK-NEXT: ST_dms_spill $dj6, -56, implicit $sp :: (store (s32) into %stack.1 + 24) + ; CHECK-NEXT: ST_dms_spill $dj6, -60, implicit $sp :: (store (s32) into %stack.1 + 20) $d1_3d = LDA_DS_SPILL -40, implicit $sp :: (load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, -80, implicit $sp :: (store (s256) into %stack.1, align 4) @@ -171,19 +169,17 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_spill -36, implicit $sp :: (load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_spill -32, implicit $sp :: (load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_spill -28, implicit $sp :: (load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_spill -24, implicit $sp :: (load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_spill -20, implicit $sp :: (load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_spill -16, implicit $sp :: (load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_spill -12, implicit $sp :: (load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_spill -24, implicit $sp :: (load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_spill -20, implicit $sp :: (load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_spill -16, implicit $sp :: (load (s32) from %stack.0 + 24) ; CHECK-NEXT: $dj6 = MOV_mv_scl $dj5 ; CHECK-NEXT: ST_dms_spill $m2, -80, implicit $sp :: (store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_spill $dn2, -76, implicit $sp :: (store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_spill $dj2, -72, implicit $sp :: (store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_spill $dc2, -68, implicit $sp :: (store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_spill $m6, -64, implicit $sp :: (store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_spill $dn6, -60, implicit $sp :: (store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_spill $dj6, -56, implicit $sp :: (store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_spill $dc6, -52, implicit $sp :: (store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_spill $dn6, -64, implicit $sp :: (store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_spill $dj6, -60, implicit $sp :: (store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_spill $dc6, -56, implicit $sp :: (store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL -40, implicit $sp :: (load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, -80, implicit $sp :: (store (s256) into %stack.1, align 4) @@ -206,19 +202,17 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_spill -36, implicit $sp :: (volatile load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_spill -32, implicit $sp :: (volatile load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_spill -28, implicit $sp :: (volatile load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_spill -24, implicit $sp :: (volatile load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_spill -20, implicit $sp :: (volatile load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_spill -16, implicit $sp :: (volatile load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_spill -12, implicit $sp :: (volatile load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_spill -24, implicit $sp :: (volatile load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_spill -20, implicit $sp :: (volatile load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_spill -16, implicit $sp :: (volatile load (s32) from %stack.0 + 24) ; CHECK-NEXT: $dj6 = MOV_mv_scl $dj5 ; CHECK-NEXT: ST_dms_spill undef $m2, -80, implicit $sp :: (volatile store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_spill undef $dn2, -76, implicit $sp :: (volatile store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_spill undef $dj2, -72, implicit $sp :: (volatile store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_spill undef $dc2, -68, implicit $sp :: (volatile store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_spill undef $m6, -64, implicit $sp :: (volatile store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_spill undef $dn6, -60, implicit $sp :: (volatile store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_spill $dj6, -56, implicit $sp :: (volatile store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_spill undef $dc6, -52, implicit $sp :: (volatile store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_spill undef $dn6, -64, implicit $sp :: (volatile store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_spill $dj6, -60, implicit $sp :: (volatile store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_spill undef $dc6, -56, implicit $sp :: (volatile store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL -40, implicit $sp :: (volatile load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, -80, implicit $sp :: (volatile store (s256) into %stack.1, align 4) diff --git a/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-create.mir b/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-create.mir index 28cc3f4271ab..91ce9b12cf09 100644 --- a/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-create.mir +++ b/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-create.mir @@ -25,9 +25,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = PADDA_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = PADDB_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = PADDS_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = PADDA_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = PADDB_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = PADDS_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count %20:ep = COPY $p0 %100:ed = COPY $d1 %20:ep, %100.sub_dim_count:ed = PADDA_2D killed %20, killed %100 @@ -51,9 +51,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = PADDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = PADDS_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDS_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count %20:ep = COPY $p0 %100:eds = COPY $d1_3d %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D killed %20, killed %100 @@ -76,9 +76,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:mwa, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_dmw_lda_w_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %3:mamm, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_dmw_lda_am_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %4:mwa, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = VLDB_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_2D_dmw_lda_w_split:%[0-9]+]]:mwa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_dmw_lda_w_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_2D_dmw_lda_am_split:%[0-9]+]]:mamm, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_dmw_lda_am_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_2D_split:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %0:mwa, %20:ep, %100.sub_dim_count:ed = VLDA_2D_dmw_lda_w killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -101,9 +101,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:mwa, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_dmw_lda_w_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %3:mamm, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_dmw_lda_am_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %4:mwa, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_3D_dmw_lda_w_split:%[0-9]+]]:mwa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_dmw_lda_w_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_3D_dmw_lda_am_split:%[0-9]+]]:mamm, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_dmw_lda_am_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_3D_split:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %0:mwa, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_dmw_lda_w killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -149,8 +149,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead [[VLD_3D_pseudo_split:%[0-9]+]]:mwa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_3D_pseudo_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead [[VLD_3D_pseudo_split1:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_3D_pseudo_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLD_3D_pseudo_split:%[0-9]+]]:mwa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_3D_pseudo_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLD_3D_pseudo_split1:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_3D_pseudo_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %0:mwa, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLD_3D_pseudo killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -172,7 +172,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:mbms, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = VLDA_2D_CONV_FP32_BF16_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_2D_CONV_FP32_BF16_split:%[0-9]+]]:mbms, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_CONV_FP32_BF16_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %1:mbms, dead %20:ep, dead %100.sub_dim_count:ed = VLDA_2D_CONV_FP32_BF16 killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -193,7 +193,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:mbms, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_CONV_FP32_BF16_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_3D_CONV_FP32_BF16_split:%[0-9]+]]:mbms, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_CONV_FP32_BF16_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %1:mbms, dead %20:ep, dead %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_CONV_FP32_BF16 killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -215,10 +215,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDB_2D_UNPACK_S8_S4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %3:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDB_2D_UNPACK_S16_S8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %4:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDB_2D_UNPACK_D8_D4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %5:mxs, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = VLDB_2D_UNPACK_D16_D8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_2D_UNPACK_S8_S4_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_UNPACK_S8_S4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_2D_UNPACK_S16_S8_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_UNPACK_S16_S8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_2D_UNPACK_D8_D4_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_UNPACK_D8_D4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_2D_UNPACK_D16_D8_split:%[0-9]+]]:mxs, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_UNPACK_D16_D8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %0:mxs, %20:ep, %100.sub_dim_count:ed = VLDB_2D_UNPACK_S8_S4 killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -242,10 +242,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_S8_S4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %3:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_S16_S8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %4:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_D8_D4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %5:mxs, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_D16_D8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_3D_UNPACK_S8_S4_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_S8_S4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_3D_UNPACK_S16_S8_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_S16_S8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_3D_UNPACK_D8_D4_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_D8_D4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_3D_UNPACK_D16_D8_split:%[0-9]+]]:mxs, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_D16_D8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %0:mxs, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_S8_S4 killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -272,9 +272,9 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mws = COPY $wl0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mams = COPY $wl0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_dmw_sts_w_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_128_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = VST_2D_dmw_sts_am_split killed [[COPY3]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_dmw_sts_w_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_128_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_dmw_sts_am_split killed [[COPY3]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %0:mws = COPY $wl0 @@ -300,7 +300,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mqqa = COPY $wl0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = ST_2D_dmv_sts_q_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<4 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = ST_2D_dmv_sts_q_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<4 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %0:mqqa = COPY $wl0 @@ -325,9 +325,9 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mws = COPY $wl0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mams = COPY $wl0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_dmw_sts_w_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_128_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_dmw_sts_am_split killed [[COPY3]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_dmw_sts_w_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_128_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_dmw_sts_am_split killed [[COPY3]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %0:mws = COPY $wl0 @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mqqa = COPY $wl0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_dmv_sts_q_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<4 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_dmv_sts_q_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<4 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %0:mqqa = COPY $wl0 @@ -375,14 +375,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S32_D16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S64_D32_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S32_D8_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S64_D16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S32_S16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S64_S32_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S32_S8_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S64_S16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S32_D16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S64_D32_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S32_D8_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S64_D16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S32_S16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S64_S32_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S32_S8_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S64_S16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %0:mbms, %20:ep, %100.sub_dim_count:ed = VLDA_2D_UPS_S32_D16 $s0, %20, killed %100, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) @@ -410,14 +410,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_D32_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_D16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_S16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_S32_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_S8_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_S16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_D32_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_D16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_S16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_S32_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_S8_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_S16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %0:mbms, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D16 $s0, killed %20, killed %100, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) @@ -446,10 +446,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mxs = COPY $x0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_PACK_D4_D8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_PACK_D8_D16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_PACK_S4_S8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_PACK_S8_S16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_PACK_D4_D8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_PACK_D8_D16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_PACK_S4_S8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_PACK_S8_S16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %200:mxs = COPY $x0 @@ -475,10 +475,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mxs = COPY $x0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_D4_D8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_D8_D16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_S4_S8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_S8_S16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_D4_D8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_D8_D16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_S4_S8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_S8_S16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %200:mxs = COPY $x0 @@ -503,14 +503,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_D8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_D16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_D16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_D32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_S8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_S16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_S16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_S32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_D8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_D16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_D16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_D32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_S8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_S16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_S16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_S32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %20:ep, %100.sub_dim_count:ed = VST_2D_SRS_D8_S32 killed %20, killed %100, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) @@ -539,14 +539,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D8_S32 killed %20, killed %100, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) @@ -574,7 +574,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_CONV_2D_BF16_FP32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_CONV_2D_BF16_FP32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %20:ep, %100.sub_dim_count:ed = VST_CONV_2D_BF16_FP32 killed %20, killed %100, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>) into unknown-address) @@ -595,7 +595,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_CONV_3D_BF16_FP32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_CONV_3D_BF16_FP32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VST_CONV_3D_BF16_FP32 killed %20, killed %100, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>) into unknown-address) @@ -616,7 +616,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:mwa, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = VLDB_2D_128_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s16>)) + ; CHECK-NEXT: dead [[VLDB_2D_128_split:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_128_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s16>)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %2:mwa, dead %20:ep, dead %100.sub_dim_count:ed = VLDB_2D_128 killed %20, killed %100 :: (load (<8 x s16>) from unknown-address) @@ -637,7 +637,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:mwa, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_128_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s16>)) + ; CHECK-NEXT: dead [[VLDB_3D_128_split:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_128_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s16>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %2:mwa, dead %20:ep, dead %100.sub_dim_count:eds, dead %100.sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_128 killed %20, killed %100 :: (load (<8 x s16>) from unknown-address) @@ -658,11 +658,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:msclst, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_dms_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %3:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_S8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %4:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_U8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %5:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_S16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %6:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_U16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_dms_lda_split:%[0-9]+]]:msclst, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_dms_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_S8_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_S8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_U8_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_U8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_S16_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_S16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_U16_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_U16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %2:msclst, %20:ep, %100.sub_dim_count:ed = LDA_2D_dms_lda %20, %100 :: (load (s32) from unknown-address) @@ -687,11 +687,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:msclst, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %3:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_S8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %4:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_U8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %5:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_S16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %6:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_U16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_dms_lda_split:%[0-9]+]]:msclst, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_S8_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_S8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_U8_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_U8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_S16_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_S16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_U16_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_U16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %2:msclst, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda %20, %100 :: (load (s32) from unknown-address) @@ -716,7 +716,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:mqqa, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_dmv_lda_q_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_dmv_lda_q_split:%[0-9]+]]:mqqa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_dmv_lda_q_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %2:mqqa, %20:ep, %100.sub_dim_count:ed = LDA_2D_dmv_lda_q %20, %100 :: (load (s32) from unknown-address) @@ -737,7 +737,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:mqqa, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dmv_lda_q_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_dmv_lda_q_split:%[0-9]+]]:mqqa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dmv_lda_q_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %2:mqqa, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dmv_lda_q %20, %100 :: (load (s32) from unknown-address) @@ -759,9 +759,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = ST_2D_dms_sts_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (store (s32)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = ST_2D_S8_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (store (s8)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = ST_2D_S16_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (s16)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = ST_2D_dms_sts_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (store (s32)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = ST_2D_S8_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (store (s8)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = ST_2D_S16_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (s16)) %20:ep = COPY $p0 %100:ed = COPY $d1 %2:er = COPY $r0 @@ -786,9 +786,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_dms_sts_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s32)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_S8_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s8)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_S16_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s16)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_dms_sts_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s32)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_S8_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s8)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_S16_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s16)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %2:er = COPY $r0 diff --git a/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-replace.mir b/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-replace.mir index 00bc697cb385..8261b55e0cd8 100644 --- a/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-replace.mir +++ b/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-replace.mir @@ -156,7 +156,7 @@ body: | bb.3: liveins: $dc4, $dj4, $dn4, $p0, $m0, $dn0, $dj0, $dc0 - $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 + $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -193,9 +193,9 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDB_3D killed $p0, $d0_3d ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDS_3D killed $p0, $d0_3d ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 - $p0, $dc0, $dc4 = PADDB_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 - $p0, $dc0, $dc4 = PADDS_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 + $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 + $p0, $dc0, $dc4 = PADDB_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 + $p0, $dc0, $dc4 = PADDS_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -232,9 +232,9 @@ body: | ; CHECK-NEXT: dead $amll0, $p0, $dc0, $dc4 = VLDA_3D_dmw_lda_am killed $p0, $d0_3d :: (load (<8 x s32>)) ; CHECK-NEXT: dead $wl0, $p0, $dc0, $dc4 = VLDB_3D killed $p0, $d0_3d :: (load (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - dead $wl0, $p0, $dc0, $dc4 = VLDA_3D_dmw_lda_w_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) - dead $amll0, $p0, $dc0, $dc4 = VLDA_3D_dmw_lda_am_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) - dead $wl0, $p0, $dc0, $dc4 = VLDB_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (load (<8 x s32>) from unknown-address) + dead $wl0, $p0, $dc0, $dc4 = VLDA_3D_dmw_lda_w_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) + dead $amll0, $p0, $dc0, $dc4 = VLDA_3D_dmw_lda_am_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) + dead $wl0, $p0, $dc0, $dc4 = VLDB_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 :: (load (<8 x s32>) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -289,10 +289,10 @@ body: | ; CHECK-NEXT: dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D8_D4 killed $p0, $d0_3d, implicit $crunpacksign :: (load (<8 x s32>)) ; CHECK-NEXT: dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D16_D8 killed $p0, $d0_3d, implicit $crunpacksign :: (load (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_S8_S4_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) - dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_S16_S8_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) - dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D8_D4_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit $crunpacksign :: (load (<8 x s32>) from unknown-address) - dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D16_D8_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4, implicit $crunpacksign :: (load (<8 x s32>) from unknown-address) + dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_S8_S4_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) + dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_S16_S8_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) + dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D8_D4_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit $crunpacksign :: (load (<8 x s32>) from unknown-address) + dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D16_D8_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4, implicit $crunpacksign :: (load (<8 x s32>) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -308,7 +308,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $bml0, $p0, $dc0, $dc4 = VLDA_3D_CONV_FP32_BF16 killed $p0, $d0_3d :: (load (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - dead $bml0, $p0, $dc0, $dc4 = VLDA_3D_CONV_FP32_BF16_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) + dead $bml0, $p0, $dc0, $dc4 = VLDA_3D_CONV_FP32_BF16_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -369,10 +369,10 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_PACK_S4_S8 killed $p0, $d0_3d, $x0, implicit $crsat :: (store (<8 x s32>)) ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_PACK_S8_S16 killed $p0, $d0_3d, $x0, implicit $crsat :: (store (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = VST_3D_PACK_D4_D8_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $x0, implicit $crsat, implicit $crpacksign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_PACK_D8_D16_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $x0, implicit $crsat, implicit $crpacksign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_PACK_S4_S8_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $x0, implicit $crsat :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_PACK_S8_S16_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4, $x0, implicit $crsat :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_PACK_D4_D8_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $x0, implicit $crsat, implicit $crpacksign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_PACK_D8_D16_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $x0, implicit $crsat, implicit $crpacksign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_PACK_S4_S8_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $x0, implicit $crsat :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_PACK_S8_S16_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4, $x0, implicit $crsat :: (store (<8 x s32>) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc4 ... @@ -406,9 +406,9 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_128 killed $wl0, killed $p0, $d0_3d :: (store (<8 x s32>)) ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_dmw_sts_am killed $amll0, killed $p0, $d0_3d :: (store (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = VST_3D_dmw_sts_w_split $wl0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_128_split killed $wl0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_dmw_sts_am_split killed $amll0, killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_dmw_sts_w_split $wl0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_128_split killed $wl0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_dmw_sts_am_split killed $amll0, killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 :: (store (<8 x s32>) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -423,7 +423,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $p0, $dc0, $dc4 = ST_3D_dmv_sts_q killed $q0, killed $p0, $d0_3d :: (store (<4 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = ST_3D_dmv_sts_q_split killed $q0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (store (<4 x s32>) into unknown-address) + $p0, $dc0, $dc4 = ST_3D_dmv_sts_q_split killed $q0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (store (<4 x s32>) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -475,14 +475,14 @@ body: | ; CHECK-NEXT: $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_S8 $s0, killed $p0, $d0_3d, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) ; CHECK-NEXT: $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_S16 $s0, killed $p0, $d0_3d, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_D16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) - $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_D32_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) - $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_D8_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) - $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_D16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) - $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_S16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) - $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_S32_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) - $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_S8_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) - $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_S16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) + $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_D16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) + $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_D32_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) + $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_D8_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) + $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_D16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) + $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_S16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) + $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_S32_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) + $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_S8_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) + $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_S16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -534,14 +534,14 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_SRS_S16_S32 killed $p0, $d0_3d, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_SRS_S32_S64 killed $p0, $d0_3d, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = VST_3D_SRS_D8_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_D16_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_D16_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_D32_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_S8_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_S16_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_S16_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_S32_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_D8_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_D16_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_D16_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_D32_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_S8_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_S16_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_S16_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_S32_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -573,7 +573,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_CONV_3D_BF16_FP32 killed $p0, $d0_3d, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = VST_CONV_3D_BF16_FP32_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_CONV_3D_BF16_FP32_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -604,7 +604,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $wl0, $p0, $dc0, $dc4 = VLDB_3D_128 killed $p0, $d0_3d :: (load (<8 x s16>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - dead $wl0, $p0, $dc0, $dc4 = VLDB_3D_128_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (load (<8 x s16>) from unknown-address) + dead $wl0, $p0, $dc0, $dc4 = VLDB_3D_128_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 :: (load (<8 x s16>) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -649,11 +649,11 @@ body: | ; CHECK-NEXT: $r0, $p0, $dc0, $dc4 = LDA_3D_S16_dmhb_lda killed $p0, $d0_3d :: (load (s32)) ; CHECK-NEXT: $r0, $p0, $dc0, $dc4 = LDA_3D_U16_dmhb_lda killed $p0, $d0_3d :: (load (s32)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $r0, $p0, $dc0, $dc4 = LDA_3D_dms_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) - $r0, $p0, $dc0, $dc4 = LDA_3D_S8_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) - $r0, $p0, $dc0, $dc4 = LDA_3D_U8_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) - $r0, $p0, $dc0, $dc4 = LDA_3D_S16_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) - $r0, $p0, $dc0, $dc4 = LDA_3D_U16_dmhb_lda_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (load (s32) from unknown-address) + $r0, $p0, $dc0, $dc4 = LDA_3D_dms_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) + $r0, $p0, $dc0, $dc4 = LDA_3D_S8_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) + $r0, $p0, $dc0, $dc4 = LDA_3D_U8_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) + $r0, $p0, $dc0, $dc4 = LDA_3D_S16_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) + $r0, $p0, $dc0, $dc4 = LDA_3D_U16_dmhb_lda_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 :: (load (s32) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -684,7 +684,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $q0, $p0, $dc0, $dc4 = LDA_3D_dmv_lda_q killed $p0, $d0_3d :: (load (s32)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $q0, $p0, $dc0, $dc4 = LDA_3D_dmv_lda_q_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) + $q0, $p0, $dc0, $dc4 = LDA_3D_dmv_lda_q_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -714,7 +714,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $p0, $dc0, $dc4 = ST_3D_dms_sts $r0, killed $p0, $d0_3d :: (store (s32)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = ST_3D_dms_sts_split $r0, killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (store (s32) into unknown-address) + $p0, $dc0, $dc4 = ST_3D_dms_sts_split $r0, killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 :: (store (s32) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -747,7 +747,7 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = ST_3D_S8 $r0, $p0, $d0_3d :: (store (s8)) ; CHECK-NEXT: $p0, $dc0, $dc4 = ST_3D_S16 $r0, killed $p0, $d0_3d :: (store (s16)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = ST_3D_S8_split $r0, $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (store (s8) into unknown-address) - $p0, $dc0, $dc4 = ST_3D_S16_split $r0, killed $p0, $m0, $dn0, $dj0, $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (store (s16) into unknown-address) + $p0, $dc0, $dc4 = ST_3D_S8_split $r0, $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (store (s8) into unknown-address) + $p0, $dc0, $dc4 = ST_3D_S16_split $r0, killed $p0, $m0, $dn0, $dj0, $dc0, killed $dn4, killed $dj4, killed $dc4 :: (store (s16) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... diff --git a/llvm/test/CodeGen/AIE/aie2/ra/tie-subregs-flow-3d.mir b/llvm/test/CodeGen/AIE/aie2/ra/tie-subregs-flow-3d.mir index 7f92425e94a4..563d492d2d85 100644 --- a/llvm/test/CodeGen/AIE/aie2/ra/tie-subregs-flow-3d.mir +++ b/llvm/test/CodeGen/AIE/aie2/ra/tie-subregs-flow-3d.mir @@ -34,7 +34,6 @@ body: | ; CHECK-NEXT: $dj4 = MOV_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 ; CHECK-NEXT: $p0, dead $dc0, $dc4 = PADDA_3D killed $p0, killed $d0_3d ; CHECK-NEXT: DelayedSchedBarrier implicit killed renamable $p0, implicit killed renamable $dc4 %0:em = COPY $r0 @@ -47,10 +46,10 @@ body: | %7:edc = COPY $r7 %8:ep = COPY $p0 - %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count + %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count %200:ep, %300:edc, %400:edc = PADDA_3D %8, %100 - %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %300, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %400, %subreg.sub_hi_dim_then_sub_dim_count + %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %300, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %400, %subreg.sub_hi_dim_then_sub_dim_count %201:ep, %301:edc, %401:edc = PADDA_3D %200, %101 PseudoRET implicit $lr, implicit %201, implicit %401 @@ -88,27 +87,26 @@ body: | ; CHECK-NEXT: $dn2 = MOV_mv_scl killed $r1 ; CHECK-NEXT: $dj2 = MOV_mv_scl killed $r2 ; CHECK-NEXT: $dc2 = MOV_mv_scl killed $r3 - ; CHECK-NEXT: $m6 = MOV_mv_scl killed $r4 ; CHECK-NEXT: $dn6 = MOV_mv_scl killed $r5 ; CHECK-NEXT: $dj6 = MOV_mv_scl killed $r6 ; CHECK-NEXT: $dc6 = MOV_mv_scl killed $r7 ; CHECK-NEXT: ST_dms_spill killed $m1, -32, implicit $sp :: (store (s32) into %stack.0) - ; CHECK-NEXT: $m1 = MOV_mv_scl $m2 + ; CHECK-NEXT: $dc1 = MOV_mv_scl $dc2 ; CHECK-NEXT: $dn1 = MOV_mv_scl $dn2 ; CHECK-NEXT: $dj1 = MOV_mv_scl $dj2 - ; CHECK-NEXT: $dc1 = MOV_mv_scl $dc2 - ; CHECK-NEXT: $m5 = MOV_mv_scl $m6 + ; CHECK-NEXT: $m1 = MOV_mv_scl $m2 + ; CHECK-NEXT: $dc5 = MOV_mv_scl $dc6 ; CHECK-NEXT: $dn5 = MOV_mv_scl $dn6 ; CHECK-NEXT: $dj5 = MOV_mv_scl $dj6 - ; CHECK-NEXT: $dc5 = MOV_mv_scl $dc6 - ; CHECK-NEXT: $p0, $dc1, $dc5 = PADDA_3D killed $p0, $d1_3d + ; CHECK-NEXT: $p0, $dc1, $dc5 = PADDA_3D killed $p0, killed $d1_3d ; CHECK-NEXT: $m1 = LDA_dms_spill -32, implicit $sp :: (load (s32) from %stack.0) ; CHECK-NEXT: $p2 = MOV_mv_scl $p1 ; CHECK-NEXT: $p2, $dc2, $dc6 = PADDA_3D killed $p2, $d2_3d ; CHECK-NEXT: $dn1 = MOV_mv_scl killed $r9 ; CHECK-NEXT: $dj1 = MOV_mv_scl killed $r10 - ; CHECK-NEXT: $dn5 = MOV_mv_scl killed $r13 + ; CHECK-NEXT: $m5 = MOV_mv_scl killed $r12 ; CHECK-NEXT: frame-destroy PADDB_sp_imm -32, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $dn5 = MOV_mv_scl killed $r13 ; CHECK-NEXT: $dj5 = MOV_mv_scl killed $r14 ; CHECK-NEXT: $dc2 = MOV_mv_scl killed $dc1 ; CHECK-NEXT: RET implicit $lr @@ -117,7 +115,6 @@ body: | ; CHECK-NEXT: $dc5 = MOV_mv_scl killed $r15 ; CHECK-NEXT: $p0, dead $dc2, dead $dc6 = PADDA_3D killed $p0, killed $d2_3d ; CHECK-NEXT: $p1, dead $dc1, dead $dc5 = PADDA_3D killed $p1, killed $d1_3d - ; CHECK-NEXT: dead renamable $m5 = KILL killed $r12 ; CHECK-NEXT: DelayedSchedBarrier implicit killed renamable $p0, implicit killed renamable $p1, implicit killed renamable $p2, implicit killed $m0, implicit killed $d3_3d %0:em = COPY $r0 %1:edn = COPY $r1 @@ -140,16 +137,16 @@ body: | %20:ep = COPY $p0 %21:ep = COPY $p1 - %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count + %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count %200:ep, %300:edc, %400:edc = PADDA_3D %20, %100 - %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count + %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count %201:ep, %301:edc, %401:edc = PADDA_3D %21, %101 %102:eds = REG_SEQUENCE %10, %subreg.sub_mod, %11, %subreg.sub_dim_size, %12, %subreg.sub_dim_stride, %13, %subreg.sub_dim_count, %14, %subreg.sub_hi_dim_then_sub_mod, %15, %subreg.sub_hi_dim_then_sub_dim_size, %16, %subreg.sub_hi_dim_then_sub_dim_stride, %17, %subreg.sub_hi_dim_then_sub_dim_count %202:ep, %302:edc, %402:edc = PADDA_3D %21, %102 - %103:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %300, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %400, %subreg.sub_hi_dim_then_sub_dim_count + %103:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %300, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %400, %subreg.sub_hi_dim_then_sub_dim_count %203:ep, %303:edc, %403:edc = PADDA_3D %200, %103 PseudoRET implicit $lr, implicit %203, implicit %202, implicit %201, implicit $m0, implicit $d3_3d @@ -172,8 +169,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $m0 = MOV_mv_scl killed $r0 ; CHECK-NEXT: renamable $r0 = MOVA_lda_cg 0 - ; CHECK-NEXT: renamable $r9 = GE renamable $r0, renamable $r8 - ; CHECK-NEXT: JNZ killed renamable $r9, %bb.3 + ; CHECK-NEXT: renamable $r4 = GE renamable $r0, renamable $r8 + ; CHECK-NEXT: JNZ killed renamable $r4, %bb.3 ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP @@ -241,7 +238,7 @@ body: | %100:edc = PHI %3, %bb.1, %102, %bb.3 %200:edc = PHI %7, %bb.1, %102, %bb.3 ST_dms_sts_idx_imm %60, %70, 0 :: (store (s32)) - %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %100, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %200, %subreg.sub_hi_dim_then_sub_dim_count + %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %100, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %200, %subreg.sub_hi_dim_then_sub_dim_count %10:ep, %102:edc, %202:edc = PADDA_3D %70, %101 %13:er = nuw nsw ADD_add_r_ri killed %60, 1, implicit-def $srcarry %19:er = EQ %9, %13 @@ -270,7 +267,6 @@ body: | ; CHECK-NEXT: $dj4 = MOV_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -302,11 +298,11 @@ body: | %7:edc = COPY $r7 %8:ep = COPY killed $p0 %9:er = COPY killed $r8 - %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count + %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count %101:ep, %102:edc, %103:edc = PADDA_3D %8, %100 PseudoJNZ killed %9, %bb.2 bb.1: - %200:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %102, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count + %200:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %102, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count %201:ep, %202:edc, %203:edc = PADDA_3D %101, %200 bb.2: %10:edc = PHI %202, %bb.1, %102, %bb.0 @@ -336,7 +332,6 @@ body: | ; CHECK-NEXT: $dj4 = MOV_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -375,15 +370,15 @@ body: | %8:ep = COPY killed $p0 %9:er = COPY killed $r8 %30:edc = COPY $r9 - %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count + %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count %101:ep, %102:edc, %103:edc = PADDA_3D %8, %100 PseudoJNZ killed %9, %bb.2 bb.1: - %200:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %102, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count + %200:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %102, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count %201:ep, %202:edc, %203:edc = PADDA_3D %101, %200 PseudoJ_jump_imm %bb.3 bb.2: - %300:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %30, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count + %300:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %30, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count %301:ep, %302:edc, %303:edc = PADDA_3D %101, %300 bb.3: %10:edc = PHI %202, %bb.1, %302, %bb.2 diff --git a/llvm/test/CodeGen/AIE/aie2/run-physreg-copy.mir b/llvm/test/CodeGen/AIE/aie2/run-physreg-copy.mir index 120bebb1e3c1..f78b8ca11d0f 100644 --- a/llvm/test/CodeGen/AIE/aie2/run-physreg-copy.mir +++ b/llvm/test/CodeGen/AIE/aie2/run-physreg-copy.mir @@ -26,14 +26,13 @@ name: test_copy_ds body: | bb.0: ; CHECK-LABEL: name: test_copy_ds - ; CHECK: $m1 = MOV_mv_scl $m2 + ; CHECK: $dc1 = MOV_mv_scl $dc2 ; CHECK-NEXT: $dn1 = MOV_mv_scl $dn2 ; CHECK-NEXT: $dj1 = MOV_mv_scl $dj2 - ; CHECK-NEXT: $dc1 = MOV_mv_scl $dc2 - ; CHECK-NEXT: $m5 = MOV_mv_scl $m6 + ; CHECK-NEXT: $m1 = MOV_mv_scl $m2 + ; CHECK-NEXT: $dc5 = MOV_mv_scl $dc6 ; CHECK-NEXT: $dn5 = MOV_mv_scl $dn6 ; CHECK-NEXT: $dj5 = MOV_mv_scl $dj6 - ; CHECK-NEXT: $dc5 = MOV_mv_scl $dc6 $d1_3d = COPY $d2_3d ... diff --git a/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir b/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir index 5a7bfa5776ef..aa0d678c6d40 100644 --- a/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir +++ b/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir @@ -417,12 +417,10 @@ body: | ; CHECK-NEXT: $dj1 = MOVXM -2228 ; CHECK-NEXT: $dc0 = LDA_dms_lda_idx $p0, killed $dj1 ; CHECK-NEXT: $dj1 = MOVXM -2224 - ; CHECK-NEXT: $m4 = LDA_dms_lda_idx $p0, killed $dj1 - ; CHECK-NEXT: $dj1 = MOVXM -2220 ; CHECK-NEXT: $dn4 = LDA_dms_lda_idx $p0, killed $dj1 - ; CHECK-NEXT: $dj1 = MOVXM -2216 + ; CHECK-NEXT: $dj1 = MOVXM -2220 ; CHECK-NEXT: $dj4 = LDA_dms_lda_idx $p0, killed $dj1 - ; CHECK-NEXT: $dj1 = MOVXM -2212 + ; CHECK-NEXT: $dj1 = MOVXM -2216 ; CHECK-NEXT: $dc4 = LDA_dms_lda_idx killed $p0, killed $dj1 ; CHECK-NEXT: $p0 = MOV_alu_mv_mv_mv_scl $sp ; CHECK-NEXT: $dj1 = MOVXM -2240 @@ -434,12 +432,10 @@ body: | ; CHECK-NEXT: $dj0 = MOVXM -2228 ; CHECK-NEXT: ST_dms_sts_idx $dc0, $p0, killed $dj0 ; CHECK-NEXT: $dj0 = MOVXM -2224 - ; CHECK-NEXT: ST_dms_sts_idx $m4, $p0, killed $dj0 - ; CHECK-NEXT: $dj0 = MOVXM -2220 ; CHECK-NEXT: ST_dms_sts_idx $dn4, $p0, killed $dj0 - ; CHECK-NEXT: $dj0 = MOVXM -2216 + ; CHECK-NEXT: $dj0 = MOVXM -2220 ; CHECK-NEXT: ST_dms_sts_idx $dj4, $p0, killed $dj0 - ; CHECK-NEXT: $dj0 = MOVXM -2212 + ; CHECK-NEXT: $dj0 = MOVXM -2216 ; CHECK-NEXT: ST_dms_sts_idx $dc4, killed $p0, killed $dj0 ; CHECK-NEXT: frame-destroy PADDXM_pstm_sp_imm -2240, implicit-def $sp, implicit $sp ; CHECK-NEXT: PseudoRET implicit $lr diff --git a/llvm/test/CodeGen/AIE/aie2p/issue_1.ll b/llvm/test/CodeGen/AIE/aie2p/issue_1.ll new file mode 100644 index 000000000000..5867c08d1ca4 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/issue_1.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2024-25 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc -mtriple=aie2p -verify-machineinstrs -o - < %s | FileCheck %s + + +target datalayout = "e-m:e-p:20:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-f32:32:32-i64:32-f64:32-a:0:32-n32" +target triple = "aie2p-none-unknown-elf" + +define void @issue_1(i1 %exitcond.not.i) { +; CHECK-LABEL: issue_1: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mova dn3, #0; nopb ; movx r1, #1; mov crupsmode, #0 +; CHECK-NEXT: and r7, r0, r1; mov r0, dn3 +; CHECK-NEXT: mova r2, #0; mov r1, dn3 +; CHECK-NEXT: movs dn7, dn3; vbcst.16 x0, r2 +; CHECK-NEXT: movs dc3, dn3; mov s0, r2 +; CHECK-NEXT: movs dc7, dn3; mov r2, dn3 +; CHECK-NEXT: movs dn0, dn3; mov r3, dn3 +; CHECK-NEXT: movs dc0, dn3; mov r4, dn3 +; CHECK-NEXT: movs dc5, dn3; mov r5, dn3 +; CHECK-NEXT: movs dj3, dn3; mov r6, dn3 +; CHECK-NEXT: movs m2, dn3; mov dj6, dn3 +; CHECK-NEXT: movs m1, dn3; mov dj2, dn3 +; CHECK-NEXT: movs dn1, dn3; mov dj1, dn3 +; CHECK-NEXT: movs dn5, dn3; mov dj5, dn3 +; CHECK-NEXT: movs dn4, dn3; mov dj0, dn3 +; CHECK-NEXT: movs m0, dn3; mov dj4, dn3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %for.body.i +; CHECK-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NEXT: // Child Loop BB0_2 Depth 2 +; CHECK-NEXT: nopa ; nopb ; movs dc4, dn3; nopx ; vups.2x cml0, x0, s0, upssign0; nopv +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: // %for.body58.i +; CHECK-NEXT: // Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NEXT: nopa ; nopb ; nopx ; mov dn2, dn7; movs dc2, dc3 +; CHECK-NEXT: mova p0, #0; movs dc6, dc7; mov dn6, r0 +; CHECK-NEXT: movs dn2, r1; paddb.3d [p0], d2; jz r7, #.LBB0_2 +; CHECK-NEXT: mov dn6, r2 // Delay Slot 5 +; CHECK-NEXT: movs dc2, dc4; mov dc6, r3 // Delay Slot 4 +; CHECK-NEXT: paddb.3d [p0], d2 // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; mov dc1, r5 // Delay Slot 2 +; CHECK-NEXT: paddb.3d [p0], d1; mov dc4, dc2 // Delay Slot 1 +; CHECK-NEXT: // %bb.3: // %for.cond.cleanup57.i +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: nopa ; nopb ; nops ; j #.LBB0_1; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; mov dc4, dn3 // Delay Slot 2 +; CHECK-NEXT: paddb.3d [p0], d0 // Delay Slot 1 +entry: + br label %for.body.i + +for.body.i: ; preds = %for.cond.cleanup57.i, %entry + %iterator_pout_cnt0.0489.i = phi i32 [ 0, %entry ], [ %4, %for.cond.cleanup57.i ] + %Ky_cnt.0485.i = phi i32 [ 0, %entry ], [ %14, %for.cond.cleanup57.i ] + %0 = tail call <32 x i32> @llvm.aie2p.acc32.v32.I512.ups(<32 x i16> zeroinitializer, i32 0, i32 0) + br label %for.body58.i + +for.cond.cleanup57.i: ; preds = %for.body58.i + %1 = trunc i32 %iterator_pout_cnt0.0489.i to i20 + %2 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %1, i20 0, i20 0) + %3 = extractvalue { ptr, i20, i20 } %2, 1 + %4 = zext i20 %3 to i32 + br label %for.body.i + +for.body58.i: ; preds = %for.body58.i, %for.body.i + %iterator_inner1_cnt0.1478.i = phi i32 [ 0, %for.body.i ], [ %10, %for.body58.i ] + %Ky_cnt.1476.i = phi i32 [ %Ky_cnt.0485.i, %for.body.i ], [ %14, %for.body58.i ] + %5 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0) + %6 = extractvalue { ptr, i20, i20 } %5, 0 + %7 = trunc i32 %iterator_inner1_cnt0.1478.i to i20 + %8 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr %6, i20 0, i20 0, i20 0, i20 0, i20 %7, i20 0, i20 0) + %9 = extractvalue { ptr, i20, i20 } %8, 1 + %10 = zext i20 %9 to i32 + %11 = trunc i32 %Ky_cnt.1476.i to i20 + %12 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 %11) + %13 = extractvalue { ptr, i20, i20 } %12, 2 + %14 = zext i20 %13 to i32 + br i1 %exitcond.not.i, label %for.cond.cleanup57.i, label %for.body58.i + +; uselistorder directives + uselistorder i32 %14, { 1, 0 } +} + +; Function Attrs: nounwind memory(inaccessiblemem: read) +declare <32 x i32> @llvm.aie2p.acc32.v32.I512.ups(<32 x i16>, i32, i32) #0 + +; Function Attrs: nounwind memory(none) +declare { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) #1 + +; uselistorder directives +uselistorder ptr @llvm.aie2p.add.3d, { 3, 2, 1, 0 } + +attributes #0 = { nounwind memory(inaccessiblemem: read) } +attributes #1 = { nounwind memory(none) } diff --git a/llvm/test/CodeGen/AIE/aie2p/issue_2.ll b/llvm/test/CodeGen/AIE/aie2p/issue_2.ll new file mode 100644 index 000000000000..607f00db45e8 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/issue_2.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2024-25 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc -mtriple=aie2p -verify-machineinstrs -o - < %s | FileCheck %s + + +target datalayout = "e-m:e-p:20:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-f32:32:32-i64:32-f64:32-a:0:32-n32" +target triple = "aie2p-none-unknown-elf" + +define void @issue_2(i32 %0, i1 %exitcond.not.i) { +; CHECK-LABEL: issue_2: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mova m0, #0; nopb ; nopx +; CHECK-NEXT: movs dc5, m0; mov dj0, m0 +; CHECK-NEXT: movs dj4, m0; mov dn0, m0 +; CHECK-NEXT: movs dj2, m0; mov dn4, m0 +; CHECK-NEXT: movs dj6, m0; mov dn2, m0 +; CHECK-NEXT: movs dn6, m0; mov dc0, m0 +; CHECK-NEXT: movs dc4, m0; mov r4, m0 +; CHECK-NEXT: movs dc3, m0; mov r6, m0 +; CHECK-NEXT: mova dn5, #1; movs dc2, m0; mov r3, m0 +; CHECK-NEXT: movs dn3, m0; mov r5, m0 +; CHECK-NEXT: mova r16, #1; movs dj3, m0; mov r2, m0 +; CHECK-NEXT: movs dn7, m0; and r16, r1, r16; mov r1, m0 +; CHECK-NEXT: mova r7, #0; movs dj7, m0; mov m3, m0 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %for.body58.i +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: nopa ; nopb ; nops ; jz r16, #.LBB0_1; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: mova p0, #0 // Delay Slot 4 +; CHECK-NEXT: paddb.3d [p0], d0 // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; movs dc6, r7; mov m2, m0 // Delay Slot 2 +; CHECK-NEXT: paddb.3d [p0], d2; or r7, r0, r0; mov dc0, dn5 // Delay Slot 1 +; CHECK-NEXT: // %bb.2: // %for.cond.cleanup57.i +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: nopx ; mov dc7, dn5 +; CHECK-NEXT: movs dc0, dc5; mov dc1, r1 +; CHECK-NEXT: movs dj1, r2; mov r7, dn5 +; CHECK-NEXT: movs dj5, m0; mov dn1, m0 +; CHECK-NEXT: movs dn5, m0; mov dc5, m0 +; CHECK-NEXT: mova p0, #0; movs m1, m0; j #.LBB0_1 +; CHECK-NEXT: paddb.3d [p0], d1 // Delay Slot 5 +; CHECK-NEXT: mova p0, #0; movs dc2, m0; mov dn5, r7 // Delay Slot 4 +; CHECK-NEXT: movs dj1, m0; paddb.3d [p0], d3; mov r1, dc1 // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; movs dc5, dc0; mov dc1, m0 // Delay Slot 2 +; CHECK-NEXT: mova r7, #0; paddb.3d [p0], d1; movs dc4, m0; mov dc0, m0 // Delay Slot 1 +entry: + br label %for.body.i + +for.body.i: ; preds = %for.cond.cleanup57.i, %entry + %iterator_outer0_cnt0.0496.i = phi i32 [ 0, %entry ], [ %4, %for.cond.cleanup57.i ] + %iterator_weights_cnt0.0493.i = phi i32 [ 0, %entry ], [ %8, %for.cond.cleanup57.i ] + %y_cnt.0487.i = phi i32 [ 0, %entry ], [ %12, %for.cond.cleanup57.i ] + br label %for.body58.i + +for.cond.cleanup57.i: ; preds = %for.body58.i + %1 = trunc i32 %iterator_outer0_cnt0.0496.i to i20 + %2 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %1, i20 0, i20 1) + %3 = extractvalue { ptr, i20, i20 } %2, 1 + %4 = zext i20 %3 to i32 + %5 = trunc i32 %iterator_weights_cnt0.0493.i to i20 + %6 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %5, i20 0, i20 0) + %7 = extractvalue { ptr, i20, i20 } %6, 1 + %8 = zext i20 %7 to i32 + %9 = trunc i32 %y_cnt.0487.i to i20 + %10 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 1, i20 %9) + %11 = extractvalue { ptr, i20, i20 } %10, 2 + %12 = zext i20 %11 to i32 + br label %for.body.i + +for.body58.i: ; preds = %for.body58.i, %for.body.i + %iterator_inner0_cnt0.1480.i = phi i32 [ 0, %for.body.i ], [ 1, %for.body58.i ] + %iterator_inner0_cnt1.1479.i = phi i32 [ 0, %for.body.i ], [ %17, %for.body58.i ] + %iterator_inner1_cnt0.1478.i = phi i32 [ 0, %for.body.i ], [ %22, %for.body58.i ] + %iterator_inner1_cnt1.1477.i = phi i32 [ 0, %for.body.i ], [ %0, %for.body58.i ] + %13 = trunc i32 %iterator_inner0_cnt0.1480.i to i20 + %14 = trunc i32 %iterator_inner0_cnt1.1479.i to i20 + %15 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %13, i20 0, i20 %14) + %16 = extractvalue { ptr, i20, i20 } %15, 2 + %17 = zext i20 %16 to i32 + %18 = trunc i32 %iterator_inner1_cnt0.1478.i to i20 + %19 = trunc i32 %iterator_inner1_cnt1.1477.i to i20 + %20 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %18, i20 0, i20 %19) + %21 = extractvalue { ptr, i20, i20 } %20, 1 + %22 = zext i20 %21 to i32 + br i1 %exitcond.not.i, label %for.cond.cleanup57.i, label %for.body58.i +} + +; Function Attrs: nounwind memory(none) +declare { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) #0 + +; uselistorder directives +uselistorder ptr @llvm.aie2p.add.3d, { 4, 3, 2, 1, 0 } + +attributes #0 = { nounwind memory(none) } diff --git a/llvm/test/CodeGen/AIE/aie2p/issue_3.ll b/llvm/test/CodeGen/AIE/aie2p/issue_3.ll new file mode 100644 index 000000000000..0001552bf518 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/issue_3.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2024-25 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc -mtriple=aie2p -verify-machineinstrs -o - < %s | FileCheck %s + +target datalayout = "e-m:e-p:20:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-f32:32:32-i64:32-f64:32-a:0:32-n32" +target triple = "aie2p-none-unknown-elf" + +define void @issue_3(i1 %exitcond.not.i) { +; CHECK-LABEL: issue_3: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mova p3, #0; nopb ; nops ; paddxm [sp], #192; nopv +; CHECK-NEXT: mova r1, #0; nopb ; jl p3; nopm ; nops +; CHECK-NEXT: st r8, [sp, #-192]; vbcst.32 x0, r1 // 4-byte Folded Spill Delay Slot 5 +; CHECK-NEXT: st lr, [sp, #-188]; vmov x1, x0 // 4-byte Folded Spill Delay Slot 4 +; CHECK-NEXT: mova p0, #0; vst x0, [sp, #-128] // 64-byte Folded Spill Delay Slot 3 +; CHECK-NEXT: mova p1, #0; vst x1, [sp, #-64] // 64-byte Folded Spill Delay Slot 2 +; CHECK-NEXT: mova p2, #0; mov r8, r0 // Delay Slot 1 +; CHECK-NEXT: mova m4, #0; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: mov dn0, m4 +; CHECK-NEXT: mov dn4, m4 +; CHECK-NEXT: mov dn1, m4 +; CHECK-NEXT: mov dn5, m4 +; CHECK-NEXT: mov dn2, m4 +; CHECK-NEXT: movs dc5, m4; mov dc1, m4 +; CHECK-NEXT: vlda x2, [sp, #-128]; movs dc2, m4; mov r1, m4 // 64-byte Folded Reload +; CHECK-NEXT: vlda x3, [sp, #-64]; movs dc3, m4; movx r0, #1; mov r2, m4 // 64-byte Folded Reload +; CHECK-NEXT: movs dc0, m4; and r3, r8, r0; mov r0, m4 +; CHECK-NEXT: movs m1, m4; mov dj7, m4 +; CHECK-NEXT: movs m3, m4; mov dj1, r1 +; CHECK-NEXT: movs m2, m4; mov dj5, r1 +; CHECK-NEXT: movs dn7, m4; mov dj2, r1 +; CHECK-NEXT: movs dj6, r1; vmov lfl0, x2 +; CHECK-NEXT: mova dc4, #0; movs dj3, r1; movx r4, #0; vmov lfh0, x3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %for.body.i +; CHECK-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NEXT: // Child Loop BB0_2 Depth 2 +; CHECK-NEXT: nopx ; vmov lfl1, lfl0 +; CHECK-NEXT: mova p1, #0; mov r25, r4 +; CHECK-NEXT: vldb.pop.576.3d ex0, [p1, lf1, r25, d1]; mov dj4, r1 +; CHECK-NEXT: mova p0, #0; movs m0, m4; mov dj0, r1 +; CHECK-NEXT: movs dn6, dn0; paddb.3d [p0], d0; vmov lfh1, lfh0 +; CHECK-NEXT: mova p0, #0; movs dn3, dn4; mov dc6, dc4 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: // %for.body103.i +; CHECK-NEXT: // Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; mov m0, m4; nopv +; CHECK-NEXT: movs dn0, r0; jz r3, #.LBB0_2 +; CHECK-NEXT: movs dj0, r1; mov dc0, m4 // Delay Slot 5 +; CHECK-NEXT: movs dn4, r2; mov dc4, m4 // Delay Slot 4 +; CHECK-NEXT: movs dj4, r1; mov r25, r4 // Delay Slot 3 +; CHECK-NEXT: movs p1, p0; vmov lfl1, x2 // Delay Slot 2 +; CHECK-NEXT: vldb.pop.576.3d ex0, [p1, lf1, r25, d0]; vmov lfh1, x3 // Delay Slot 1 +; CHECK-NEXT: // %bb.3: // %for.cond.cleanup102.i +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: nopa ; nopb ; nopxm +; CHECK-NEXT: movs dc5, dc4; j #.LBB0_1 +; CHECK-NEXT: movs dn0, dn6; mov dc4, dc6 // Delay Slot 5 +; CHECK-NEXT: mova p0, #0; movs dn6, m4; mov dc6, m4 // Delay Slot 4 +; CHECK-NEXT: paddb.3d [p0], d2; mov dn4, dn3 // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; movs dc7, m4; mov dn3, m4 // Delay Slot 2 +; CHECK-NEXT: mova dc0, #1; paddb.3d [p0], d3; movs dc1, dc0 // Delay Slot 1 +entry: + tail call void null(ptr null, ptr null, ptr null) + br label %for.body.i + +for.body.i: ; preds = %for.cond.cleanup102.i, %entry + %dimsAI.sroa.17.0665.i = phi i32 [ 0, %entry ], [ %20, %for.cond.cleanup102.i ] + %dimsAI.sroa.13.0664.i = phi i32 [ 0, %entry ], [ %18, %for.cond.cleanup102.i ] + %dimsAO.sroa.8.0662.i = phi i32 [ 0, %entry ], [ %11, %for.cond.cleanup102.i ] + %dimsW.sroa.8.0660.i = phi i32 [ 0, %entry ], [ %15, %for.cond.cleanup102.i ] + %iterator_psum_cnt0.0659.i = phi i32 [ 0, %entry ], [ 1, %for.cond.cleanup102.i ] + %iterator_psum_cnt1.0658.i = phi i32 [ 0, %entry ], [ %7, %for.cond.cleanup102.i ] + %0 = trunc i32 %iterator_psum_cnt0.0659.i to i20 + %1 = trunc i32 %iterator_psum_cnt1.0658.i to i20 + %2 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %0, i20 0, i20 %1) + %3 = extractvalue { ptr, i20, i20 } %2, 2 + %4 = trunc i32 %dimsAI.sroa.13.0664.i to i20 + %5 = trunc i32 %dimsAI.sroa.17.0665.i to i20 + %6 = tail call { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5) null, <32 x i32> zeroinitializer, i32 0, i20 0, i20 0, i20 %4, i20 0, i20 0, i20 %5, i20 0) + br label %for.body103.i + +for.cond.cleanup102.i: ; preds = %for.body103.i + %7 = zext i20 %3 to i32 + %8 = trunc i32 %dimsAO.sroa.8.0662.i to i20 + %9 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %8, i20 0, i20 0) + %10 = extractvalue { ptr, i20, i20 } %9, 1 + %11 = zext i20 %10 to i32 + %12 = trunc i32 %dimsW.sroa.8.0660.i to i20 + %13 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %12, i20 0, i20 0) + %14 = extractvalue { ptr, i20, i20 } %13, 1 + %15 = zext i20 %14 to i32 + br label %for.body.i + +for.body103.i: ; preds = %for.body103.i, %for.body.i + %16 = tail call { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5) null, <32 x i32> zeroinitializer, i32 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0) + %17 = extractvalue { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } %16, 3 + %18 = zext i20 %17 to i32 + %19 = extractvalue { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } %16, 4 + %20 = zext i20 %19 to i32 + br i1 %exitcond.not.i, label %for.cond.cleanup102.i, label %for.body103.i +} + +; Function Attrs: nounwind memory(none) +declare { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) #0 + +; Function Attrs: nounwind memory(argmem: read) +declare { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5), <32 x i32>, i32, i20, i20, i20, i20, i20, i20, i20) #1 + +; uselistorder directives +uselistorder ptr @llvm.aie2p.add.3d, { 2, 1, 0 } +uselistorder ptr @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5, { 1, 0 } + +attributes #0 = { nounwind memory(none) } +attributes #1 = { nounwind memory(argmem: read) } diff --git a/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-create.mir b/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-create.mir index c115d130cfda..3c5d08b0a7c8 100644 --- a/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-create.mir +++ b/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-create.mir @@ -166,9 +166,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDS_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDS_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count %20:ep = COPY $p0 %100:eds = COPY $d1_3d %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D killed %20, killed %100 @@ -190,7 +190,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: $r4, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $r4, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count %20:ep = COPY $p0 %100:eds = COPY $d1_3d $r4, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda killed %20, killed %100 @@ -210,7 +210,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: [[VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split:%[0-9]+]]:edm, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split $s0, [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 + ; CHECK-NEXT: [[VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split:%[0-9]+]]:edm, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split $s0, [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 %20:ep = COPY $p0 %100:eds = COPY $d1_3d %0:edm, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0 $s0, %20, %100, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 @@ -238,7 +238,7 @@ body: | ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[MOV_PD_imm11_pseudo]].sub_mod ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[MOV_PD_imm11_pseudo]].sub_dim_size ; CHECK-NEXT: [[COPY:%[0-9]+]]:mpfs = COPY $p0 - ; CHECK-NEXT: $sf, %2:mpfs, $r26, dead [[MOV_PD_imm11_pseudo]].sub_dim_count:eds, dead [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count:eds = VST_FLUSH_512_3D_split $sf, %2, $r26, [[MOV_PD_imm11_pseudo]].sub_mod, [[MOV_PD_imm11_pseudo]].sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_dim_count, undef [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_mod, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count, implicit-def $srfifo_of + ; CHECK-NEXT: $sf, %2:mpfs, $r26, dead [[MOV_PD_imm11_pseudo]].sub_dim_count:eds, dead [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count:eds = VST_FLUSH_512_3D_split $sf, %2, $r26, [[MOV_PD_imm11_pseudo]].sub_mod, [[MOV_PD_imm11_pseudo]].sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_dim_count, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count, implicit-def $srfifo_of ; CHECK-NEXT: PseudoRET implicit $lr undef %12.sub_mod:eds = MOV_PD_imm11_pseudo 0 %12.sub_hi_dim_then_sub_dim_stride:eds = MOV_PD_imm11_pseudo 128 @@ -275,7 +275,7 @@ body: | ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_count:eds = COPY [[MOV_PD_imm11_pseudo]].sub_mod ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[MOV_PD_imm11_pseudo]].sub_mod ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[MOV_PD_imm11_pseudo]].sub_dim_size - ; CHECK-NEXT: [[VLD_POP_512_3D_pseudo_split:%[0-9]+]]:vec512, dead [[COPY:%[0-9]+]].sub_ptr:epsrfldf, dead [[COPY:%[0-9]+]].sub_fifo:epsrfldf, dead [[COPY:%[0-9]+]].sub_avail:epsrfldf, dead [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_count:eds, dead [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_POP_512_3D_pseudo_split [[COPY]].sub_ptr, undef [[COPY]].sub_fifo, [[COPY]].sub_avail, [[MOV_PD_imm11_pseudo]].sub_mod, [[MOV_PD_imm11_pseudo]].sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_dim_count, undef [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_mod, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count, implicit-def $srfifo_uf + ; CHECK-NEXT: [[VLD_POP_512_3D_pseudo_split:%[0-9]+]]:vec512, dead [[COPY:%[0-9]+]].sub_ptr:epsrfldf, dead [[COPY:%[0-9]+]].sub_fifo:epsrfldf, dead [[COPY:%[0-9]+]].sub_avail:epsrfldf, dead [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_count:eds, dead [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_POP_512_3D_pseudo_split [[COPY]].sub_ptr, undef [[COPY]].sub_fifo, [[COPY]].sub_avail, [[MOV_PD_imm11_pseudo]].sub_mod, [[MOV_PD_imm11_pseudo]].sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_dim_count, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count, implicit-def $srfifo_uf ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLD_POP_512_3D_pseudo_split]] undef %12.sub_mod:eds = MOV_PD_imm11_pseudo 0 %12.sub_hi_dim_then_sub_dim_stride:eds = MOV_PD_imm11_pseudo 128 diff --git a/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-replace.mir b/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-replace.mir index 4cf0baf18aac..0878183f69f5 100644 --- a/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-replace.mir +++ b/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-replace.mir @@ -143,7 +143,7 @@ body: | bb.3: liveins: $dc4, $dj4, $dn4, $p0, $m0, $dn0, $dj0, $dc0 - $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 + $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -263,9 +263,9 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDB_3D killed $p0, $d0_3d ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDS_3D killed $p0, $d0_3d ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 - $p0, $dc0, $dc4 = PADDB_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 - $p0, $dc0, $dc4 = PADDS_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 + $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 + $p0, $dc0, $dc4 = PADDB_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 + $p0, $dc0, $dc4 = PADDS_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -280,7 +280,7 @@ body: | ; CHECK: liveins: $p0, $d1_3d ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $r4, dead $p0, dead $dc1, dead $dc5 = LDA_3D_dms_lda killed $p0, $d1_3d - $r4, dead $p0, dead $dc1, dead $dc5 = LDA_3D_dms_lda_split killed $p0, $m1, $dn1, $dj1, $dc1, undef $m5, $dn5, $dj5, $dc5 + $r4, dead $p0, dead $dc1, dead $dc5 = LDA_3D_dms_lda_split killed $p0, $m1, $dn1, $dj1, $dc1, $dn5, $dj5, $dc5 ... --- @@ -294,7 +294,7 @@ body: | ; CHECK: liveins: $p0, $s0, $d1_3d ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $dm0, dead $p0, dead $dc1, dead $dc5 = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0 $s0, killed $p0, $d1_3d, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 - dead $dm0, dead $p0, dead $dc1, dead $dc5 = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split $s0, killed $p0, $m1, $dn1, $dj1, $dc1, undef $m5, $dn5, $dj5, $dc5, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 + dead $dm0, dead $p0, dead $dc1, dead $dc5 = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split $s0, killed $p0, $m1, $dn1, $dj1, $dc1, $dn5, $dj5, $dc5, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 ... @@ -326,7 +326,7 @@ body: | renamable $dc4 = COPY renamable $m0 renamable $dn4 = COPY renamable $dn0 renamable $p2 = COPY $p0 - $sf, dead $p2, $r26, dead $dc0, dead $dc4 = VST_FLUSH_512_3D_split $sf, killed $p2, $r26, killed $m0, killed $dn0, killed $dj0, killed $dc0, undef $m4, killed $dn4, killed $dj4, killed $dc4, implicit-def $srfifo_of + $sf, dead $p2, $r26, dead $dc0, dead $dc4 = VST_FLUSH_512_3D_split $sf, killed $p2, $r26, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4, implicit-def $srfifo_of PseudoRET implicit $lr ... @@ -358,6 +358,6 @@ body: | renamable $dc0 = COPY renamable $m0 renamable $dc4 = COPY renamable $m0 renamable $dn4 = COPY renamable $dn0 - $x0, dead $p0, dead $lf0, dead $r24, dead $dc0, dead $dc4 = VLD_POP_512_3D_pseudo_split killed $p0, undef $lf0, $r24, killed $m0, killed $dn0, killed $dj0, killed $dc0, undef $m4, killed $dn4, killed $dj4, killed $dc4, implicit-def $srfifo_uf + $x0, dead $p0, dead $lf0, dead $r24, dead $dc0, dead $dc4 = VLD_POP_512_3D_pseudo_split killed $p0, undef $lf0, $r24, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4, implicit-def $srfifo_uf PseudoRET implicit $lr, implicit killed renamable $x0 ... diff --git a/llvm/test/CodeGen/AIE/aie2p/ra/tie-subregs-flow-3d.mir b/llvm/test/CodeGen/AIE/aie2p/ra/tie-subregs-flow-3d.mir index d77448271de2..40bf6b110211 100644 --- a/llvm/test/CodeGen/AIE/aie2p/ra/tie-subregs-flow-3d.mir +++ b/llvm/test/CodeGen/AIE/aie2p/ra/tie-subregs-flow-3d.mir @@ -26,13 +26,13 @@ body: | ; CHECK-NEXT: $dn0 = MOV_alu_mv_mv_mv_scl killed $r1 ; CHECK-NEXT: $dj0 = MOV_alu_mv_mv_mv_scl killed $r2 ; CHECK-NEXT: $dc0 = MOV_alu_mv_mv_mv_scl killed $r3 + ; CHECK-NEXT: $m4 = MOV_alu_mv_mv_mv_scl killed $r4 ; CHECK-NEXT: $dn4 = MOV_alu_mv_mv_mv_scl killed $r5 ; CHECK-NEXT: RET implicit $lr ; CHECK-NEXT: $dj4 = MOV_alu_mv_mv_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_alu_mv_mv_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 - ; CHECK-NEXT: $p0, dead $dc0, $dc4 = PADDA_3D killed $p0, killed $d0_3d + ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, killed $d0_3d ; CHECK-NEXT: NOP ; CHECK-NEXT: DelayedSchedBarrier implicit killed renamable $p0, implicit killed renamable $dc4 %0:em = COPY $r0 @@ -139,12 +139,12 @@ body: | ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP - ; CHECK-NEXT: NOP + ; CHECK-NEXT: $m4 = MOV_alu_mv_mv_mv_scl killed $r4 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $m0, $p0, $r0, $r1, $r2, $r3, $r5, $r6, $r7, $r8 + ; CHECK-NEXT: liveins: $p0, $r0, $r1, $r2, $r3, $r5, $r6, $r7, $r8, $d0_3d:0x0001C00000200E00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $dn0 = MOV_alu_mv_mv_mv_scl killed $r1 ; CHECK-NEXT: $dj0 = MOV_alu_mv_mv_mv_scl killed $r2 @@ -155,7 +155,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2 (align 16): ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: liveins: $dc0, $dc4, $dj0, $dj4, $dn0, $dn4, $m0, $p0, $r0, $r8, $d0_3d:0x0001C00000200E00 + ; CHECK-NEXT: liveins: $p0, $r0, $r8, $d0_3d:0x0001C00000200E00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: ST_dms_sts_idx_imm renamable $r0, renamable $p0, 0 :: (store (s32)) ; CHECK-NEXT: renamable $r0 = nuw nsw ADD_add_r_ri killed renamable $r0, 1, implicit-def $srcarry @@ -164,7 +164,7 @@ body: | ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP - ; CHECK-NEXT: $p0, $dc0, dead $dc4 = PADDA_3D killed $p0, $d0_3d + ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d ; CHECK-NEXT: $dc4 = MOV_alu_mv_mv_mv_scl $dc0 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} @@ -225,23 +225,23 @@ body: | ; CHECK-NEXT: $m0 = MOV_alu_mv_mv_mv_scl killed $r0 ; CHECK-NEXT: $dn0 = MOV_alu_mv_mv_mv_scl killed $r1 ; CHECK-NEXT: $dj0 = MOV_alu_mv_mv_mv_scl killed $r2 - ; CHECK-NEXT: JNZ renamable $r8, %bb.2 ; CHECK-NEXT: $dc0 = MOV_alu_mv_mv_mv_scl killed $r3 + ; CHECK-NEXT: JNZ renamable $r8, %bb.2 + ; CHECK-NEXT: $m4 = MOV_alu_mv_mv_mv_scl killed $r4 ; CHECK-NEXT: $dn4 = MOV_alu_mv_mv_mv_scl killed $r5 ; CHECK-NEXT: $dj4 = MOV_alu_mv_mv_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_alu_mv_mv_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $dc0, $dc4, $dj0, $dj4, $dn0, $dn4, $m0, $p0, $d0_3d:0x0001C00000200E00 + ; CHECK-NEXT: liveins: $p0, $d0_3d:0x0001C00000200E00 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $p0, $dc0, dead $dc4 = PADDA_3D killed $p0, killed $d0_3d + ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, killed $d0_3d ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2 (align 16): - ; CHECK-NEXT: liveins: $dc0, $p0 + ; CHECK-NEXT: liveins: $p0, $d0_3d:0x0000000000000200 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: RET implicit $lr ; CHECK-NEXT: NOP @@ -291,18 +291,18 @@ body: | ; CHECK-NEXT: $m0 = MOV_alu_mv_mv_mv_scl killed $r0 ; CHECK-NEXT: $dn0 = MOV_alu_mv_mv_mv_scl killed $r1 ; CHECK-NEXT: $dj0 = MOV_alu_mv_mv_mv_scl killed $r2 - ; CHECK-NEXT: JZ renamable $r8, %bb.2 ; CHECK-NEXT: $dc0 = MOV_alu_mv_mv_mv_scl killed $r3 + ; CHECK-NEXT: JZ renamable $r8, %bb.2 + ; CHECK-NEXT: $m4 = MOV_alu_mv_mv_mv_scl killed $r4 ; CHECK-NEXT: $dn4 = MOV_alu_mv_mv_mv_scl killed $r5 ; CHECK-NEXT: $dj4 = MOV_alu_mv_mv_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_alu_mv_mv_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: - ; CHECK-NEXT: liveins: $dc4, $dj0, $dj4, $dn0, $dn4, $m0, $p0, $r9, $d0_3d:0x0001C00000200C00 + ; CHECK-NEXT: liveins: $p0, $r9, $d0_3d:0x0001C00000200C00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: RET implicit $lr ; CHECK-NEXT: NOP @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: DelayedSchedBarrier implicit killed renamable $dc0, implicit killed renamable $p0, implicit killed renamable $dc4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2 (align 16): - ; CHECK-NEXT: liveins: $dc0, $dc4, $dj0, $dj4, $dn0, $dn4, $m0, $p0, $d0_3d:0x0001C00000200E00 + ; CHECK-NEXT: liveins: $p0, $d0_3d:0x0001C00000200E00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: RET implicit $lr ; CHECK-NEXT: NOP diff --git a/llvm/test/CodeGen/AIE/aie2p/spill/dim_spill.mir b/llvm/test/CodeGen/AIE/aie2p/spill/dim_spill.mir index b00ce925bde0..b02205a75416 100644 --- a/llvm/test/CodeGen/AIE/aie2p/spill/dim_spill.mir +++ b/llvm/test/CodeGen/AIE/aie2p/spill/dim_spill.mir @@ -119,18 +119,16 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_lda_spill -60, implicit $sp :: (load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_lda_spill -56, implicit $sp :: (load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_lda_spill -52, implicit $sp :: (load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_lda_spill -48, implicit $sp :: (load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -44, implicit $sp :: (load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -40, implicit $sp :: (load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -36, implicit $sp :: (load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -48, implicit $sp :: (load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -44, implicit $sp :: (load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -40, implicit $sp :: (load (s32) from %stack.0 + 24) ; CHECK-NEXT: ST_dms_sts_spill $m1, -32, implicit $sp :: (store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_sts_spill $dn1, -28, implicit $sp :: (store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_sts_spill $dj1, -24, implicit $sp :: (store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_sts_spill $dc1, -20, implicit $sp :: (store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_sts_spill $m5, -16, implicit $sp :: (store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_sts_spill $dn5, -12, implicit $sp :: (store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_sts_spill $dj5, -8, implicit $sp :: (store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_sts_spill $dc5, -4, implicit $sp :: (store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_sts_spill $dn5, -16, implicit $sp :: (store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_sts_spill $dj5, -12, implicit $sp :: (store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_sts_spill $dc5, -8, implicit $sp :: (store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL %stack.0, implicit $sp :: (load (s256) from %stack.0, align 4) ST_DS_SPILL $d1_3d, %stack.1, implicit $sp :: (store (s256) into %stack.1, align 4) ... @@ -148,9 +146,9 @@ body: | bb.0 (align 16): ; CHECK-LABEL: name: test_ds_partial ; CHECK: frame-setup PADDXM_pstm_sp_imm 64, implicit-def $sp, implicit $sp - ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -40, implicit $sp :: (load (s32) from %stack.0 + 24) + ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -44, implicit $sp :: (load (s32) from %stack.0 + 20) ; CHECK-NEXT: $dj6 = COPY $dj5 - ; CHECK-NEXT: ST_dms_sts_spill $dj6, -8, implicit $sp :: (store (s32) into %stack.1 + 24) + ; CHECK-NEXT: ST_dms_sts_spill $dj6, -12, implicit $sp :: (store (s32) into %stack.1 + 20) $d1_3d = LDA_DS_SPILL %stack.0, implicit $sp :: (load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, %stack.1, implicit $sp :: (store (s256) into %stack.1, align 4) @@ -171,19 +169,17 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_lda_spill -60, implicit $sp :: (load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_lda_spill -56, implicit $sp :: (load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_lda_spill -52, implicit $sp :: (load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_lda_spill -48, implicit $sp :: (load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -44, implicit $sp :: (load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -40, implicit $sp :: (load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -36, implicit $sp :: (load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -48, implicit $sp :: (load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -44, implicit $sp :: (load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -40, implicit $sp :: (load (s32) from %stack.0 + 24) ; CHECK-NEXT: $dj6 = COPY $dj5 ; CHECK-NEXT: ST_dms_sts_spill $m2, -32, implicit $sp :: (store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_sts_spill $dn2, -28, implicit $sp :: (store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_sts_spill $dj2, -24, implicit $sp :: (store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_sts_spill $dc2, -20, implicit $sp :: (store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_sts_spill $m6, -16, implicit $sp :: (store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_sts_spill $dn6, -12, implicit $sp :: (store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_sts_spill $dj6, -8, implicit $sp :: (store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_sts_spill $dc6, -4, implicit $sp :: (store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_sts_spill $dn6, -16, implicit $sp :: (store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_sts_spill $dj6, -12, implicit $sp :: (store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_sts_spill $dc6, -8, implicit $sp :: (store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL %stack.0, implicit $sp :: (load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, %stack.1, implicit $sp :: (store (s256) into %stack.1, align 4) @@ -206,19 +202,17 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_lda_spill -60, implicit $sp :: (volatile load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_lda_spill -56, implicit $sp :: (volatile load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_lda_spill -52, implicit $sp :: (volatile load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_lda_spill -48, implicit $sp :: (volatile load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -44, implicit $sp :: (volatile load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -40, implicit $sp :: (volatile load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -36, implicit $sp :: (volatile load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -48, implicit $sp :: (volatile load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -44, implicit $sp :: (volatile load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -40, implicit $sp :: (volatile load (s32) from %stack.0 + 24) ; CHECK-NEXT: $dj6 = COPY $dj5 ; CHECK-NEXT: ST_dms_sts_spill undef $m2, -32, implicit $sp :: (volatile store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_sts_spill undef $dn2, -28, implicit $sp :: (volatile store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_sts_spill undef $dj2, -24, implicit $sp :: (volatile store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_sts_spill undef $dc2, -20, implicit $sp :: (volatile store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_sts_spill undef $m6, -16, implicit $sp :: (volatile store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_sts_spill undef $dn6, -12, implicit $sp :: (volatile store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_sts_spill $dj6, -8, implicit $sp :: (volatile store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_sts_spill undef $dc6, -4, implicit $sp :: (volatile store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_sts_spill undef $dn6, -16, implicit $sp :: (volatile store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_sts_spill $dj6, -12, implicit $sp :: (volatile store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_sts_spill undef $dc6, -8, implicit $sp :: (volatile store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL %stack.0, implicit $sp :: (volatile load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, %stack.1, implicit $sp :: (volatile store (s256) into %stack.1, align 4) diff --git a/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir b/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir index a5bf4107fc8a..bec4dbba0b92 100644 --- a/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir +++ b/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir @@ -31,13 +31,13 @@ body: | ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] - ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm3]] + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY5:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY3]], [[COPY2]], [[COPY4]], [[COPY5]] + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY5:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY3]], [[COPY2]], [[COPY4]], [[COPY5]] ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY5]] ; ; AIE2-RA-LABEL: name: test_split_2d_from_1d @@ -71,13 +71,13 @@ body: | ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] - ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm3]] + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY5:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY3]], [[COPY2]], [[COPY4]], [[COPY5]] + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY5:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY3]], [[COPY2]], [[COPY4]], [[COPY5]] ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY5]] ; ; AIE2P-RA-LABEL: name: test_split_2d_from_1d @@ -134,9 +134,9 @@ body: | ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: @@ -174,9 +174,9 @@ body: | ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: @@ -236,13 +236,13 @@ body: | ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:em = COPY [[COPY2]].sub_mod - ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY6:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:em_as_32bit = COPY [[COPY2]].sub_mod + ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY6:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY6:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY6]] + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY6:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY6]] ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY6]] ; ; AIE2-RA-LABEL: name: test_split_2d_from_other_2d @@ -274,13 +274,13 @@ body: | ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:em = COPY [[COPY2]].sub_mod - ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY6:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:em_as_32bit = COPY [[COPY2]].sub_mod + ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY6:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY6:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY6]] + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY6:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY6]] ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY6]] ; ; AIE2P-RA-LABEL: name: test_split_2d_from_other_2d @@ -391,17 +391,17 @@ body: | ; AIE2-VREGS-NEXT: liveins: $p0, $m1, $dn1, $dj1 ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 - ; AIE2-VREGS-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m1 - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY $dn1 - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj = COPY $dj1 - ; AIE2-VREGS-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 0 + ; AIE2-VREGS-NEXT: [[COPY1:%[0-9]+]]:spill_em_to_er = COPY $m1 + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:spill_edn_to_er = COPY $dn1 + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:spill_edj_to_er = COPY $dj1 + ; AIE2-VREGS-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edc_as_32bit = MOV_PD_imm11_pseudo 0 ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]] - ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edn = COPY [[COPY2]] - ; AIE2-VREGS-NEXT: [[COPY6:%[0-9]+]]:edj = COPY [[COPY3]] - ; AIE2-VREGS-NEXT: [[COPY7:%[0-9]+]]:em = COPY [[COPY1]] - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY7]], [[COPY5]], [[COPY6]], [[COPY4]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc_as_32bit = COPY [[MOV_PD_imm11_pseudo]] + ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edn_as_32bit = COPY [[COPY2]] + ; AIE2-VREGS-NEXT: [[COPY6:%[0-9]+]]:edj_as_32bit = COPY [[COPY3]] + ; AIE2-VREGS-NEXT: [[COPY7:%[0-9]+]]:em_as_32bit = COPY [[COPY1]] + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY7]], [[COPY5]], [[COPY6]], [[COPY4]] ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY4]] ; ; AIE2-RA-LABEL: name: test_split_2d_from_full_copy_def_by_1d @@ -423,17 +423,17 @@ body: | ; AIE2P-VREGS-NEXT: liveins: $p0, $m1, $dn1, $dj1 ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 - ; AIE2P-VREGS-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m1 - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY $dn1 - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj = COPY $dj1 - ; AIE2P-VREGS-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 0 + ; AIE2P-VREGS-NEXT: [[COPY1:%[0-9]+]]:spill_em_to_er = COPY $m1 + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:spill_edn_to_er = COPY $dn1 + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:spill_edj_to_er = COPY $dj1 + ; AIE2P-VREGS-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edc_as_32bit = MOV_PD_imm11_pseudo 0 ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]] - ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edn = COPY [[COPY2]] - ; AIE2P-VREGS-NEXT: [[COPY6:%[0-9]+]]:edj = COPY [[COPY3]] - ; AIE2P-VREGS-NEXT: [[COPY7:%[0-9]+]]:em = COPY [[COPY1]] - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY7]], [[COPY5]], [[COPY6]], [[COPY4]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc_as_32bit = COPY [[MOV_PD_imm11_pseudo]] + ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edn_as_32bit = COPY [[COPY2]] + ; AIE2P-VREGS-NEXT: [[COPY6:%[0-9]+]]:edj_as_32bit = COPY [[COPY3]] + ; AIE2P-VREGS-NEXT: [[COPY7:%[0-9]+]]:em_as_32bit = COPY [[COPY1]] + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY7]], [[COPY5]], [[COPY6]], [[COPY4]] ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY4]] ; ; AIE2P-RA-LABEL: name: test_split_2d_from_full_copy_def_by_1d @@ -476,12 +476,12 @@ body: | ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], undef %9:em, [[COPY2]], [[COPY3]], [[COPY4]] + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], undef %9:em_as_32bit, [[COPY2]], [[COPY3]], [[COPY4]] ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY4]] ; ; AIE2-RA-LABEL: name: test_split_2d_undef @@ -512,12 +512,12 @@ body: | ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], undef %9:em, [[COPY2]], [[COPY3]], [[COPY4]] + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], undef %9:em_as_32bit, [[COPY2]], [[COPY3]], [[COPY4]] ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY4]] ; ; AIE2P-RA-LABEL: name: test_split_2d_undef @@ -569,15 +569,15 @@ body: | ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; AIE2-VREGS-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1 - ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:edn_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 4 - ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:edj_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 8 - ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:edc_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 12 + ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:spill_edn_to_er = LDA_dms_lda_idx_imm [[COPY1]], 4 + ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:spill_edj_to_er = LDA_dms_lda_idx_imm [[COPY1]], 8 + ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:spill_edc_to_er = LDA_dms_lda_idx_imm [[COPY1]], 12 ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY2:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], undef %11:em, [[COPY3]], [[COPY4]], [[COPY2]] + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY2:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], undef %11:em_as_32bit, [[COPY3]], [[COPY4]], [[COPY2]] ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY2]] ; ; AIE2-RA-LABEL: name: test_split_2d_undef_through_copy @@ -602,15 +602,15 @@ body: | ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; AIE2P-VREGS-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1 - ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:edn_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 4 - ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:edj_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 8 - ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:edc_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 12 + ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:spill_edn_to_er = LDA_dms_lda_idx_imm [[COPY1]], 4 + ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:spill_edj_to_er = LDA_dms_lda_idx_imm [[COPY1]], 8 + ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:spill_edc_to_er = LDA_dms_lda_idx_imm [[COPY1]], 12 ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY2:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], undef %11:em, [[COPY3]], [[COPY4]], [[COPY2]] + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY2:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], undef %11:em_as_32bit, [[COPY3]], [[COPY4]], [[COPY2]] ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY2]] ; ; AIE2P-RA-LABEL: name: test_split_2d_undef_through_copy @@ -671,7 +671,7 @@ body: | ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]].sub_hi_dim:eds = COPY [[COPY2]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, undef [[COPY3]].sub_hi_dim_then_sub_mod, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY3]].sub_dim_count, implicit [[COPY3]].sub_hi_dim_then_sub_dim_count ; ; AIE2-RA-LABEL: name: test_split_3d_from_various @@ -692,7 +692,7 @@ body: | ; AIE2-RA-NEXT: bb.1: ; AIE2-RA-NEXT: liveins: $p0, $d0_3d:0x000000000001C870 ; AIE2-RA-NEXT: {{ $}} - ; AIE2-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 + ; AIE2-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 ; AIE2-RA-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit renamable $dc4 ; ; AIE2P-VREGS-LABEL: name: test_split_3d_from_various @@ -717,7 +717,7 @@ body: | ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]].sub_hi_dim:eds = COPY [[COPY2]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, undef [[COPY3]].sub_hi_dim_then_sub_mod, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY3]].sub_dim_count, implicit [[COPY3]].sub_hi_dim_then_sub_dim_count ; ; AIE2P-RA-LABEL: name: test_split_3d_from_various @@ -738,7 +738,7 @@ body: | ; AIE2P-RA-NEXT: bb.1: ; AIE2P-RA-NEXT: liveins: $p0, $d0_3d:0x0001C00000200E00 ; AIE2P-RA-NEXT: {{ $}} - ; AIE2P-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 + ; AIE2P-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 ; AIE2P-RA-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit renamable $dc4 bb.1.entry: successors: %bb.2 @@ -763,7 +763,7 @@ body: | bb.2: - %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split %20, %100.sub_mod, %100.sub_dim_size, %100.sub_dim_stride, %100.sub_dim_count, undef %100.sub_hi_dim_then_sub_mod, %100.sub_hi_dim_then_sub_dim_size, %100.sub_hi_dim_then_sub_dim_stride, %100.sub_hi_dim_then_sub_dim_count + %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split %20, %100.sub_mod, %100.sub_dim_size, %100.sub_dim_stride, %100.sub_dim_count, %100.sub_hi_dim_then_sub_dim_size, %100.sub_hi_dim_then_sub_dim_stride, %100.sub_hi_dim_then_sub_dim_count PseudoRET implicit $lr, implicit %20, implicit %100.sub_dim_count, implicit %100.sub_hi_dim_then_sub_dim_count ... @@ -785,7 +785,7 @@ body: | ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]].sub_hi_dim:eds = COPY [[COPY2]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, undef [[COPY3]].sub_hi_dim_then_sub_mod, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY3]].sub_dim_count, implicit [[COPY3]].sub_hi_dim_then_sub_dim_count ; ; AIE2-RA-LABEL: name: test_split_3d_from_2d @@ -799,7 +799,7 @@ body: | ; AIE2-RA-NEXT: bb.1: ; AIE2-RA-NEXT: liveins: $p0, $d0_3d:0x000000000001C870 ; AIE2-RA-NEXT: {{ $}} - ; AIE2-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 + ; AIE2-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 ; AIE2-RA-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit renamable $dc4 ; ; AIE2P-VREGS-LABEL: name: test_split_3d_from_2d @@ -814,7 +814,7 @@ body: | ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]].sub_hi_dim:eds = COPY [[COPY2]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, undef [[COPY3]].sub_hi_dim_then_sub_mod, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY3]].sub_dim_count, implicit [[COPY3]].sub_hi_dim_then_sub_dim_count ; ; AIE2P-RA-LABEL: name: test_split_3d_from_2d @@ -828,7 +828,7 @@ body: | ; AIE2P-RA-NEXT: bb.1: ; AIE2P-RA-NEXT: liveins: $p0, $d0_3d:0x0001C00000200E00 ; AIE2P-RA-NEXT: {{ $}} - ; AIE2P-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 + ; AIE2P-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 ; AIE2P-RA-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit renamable $dc4 bb.1.entry: successors: %bb.2 @@ -841,7 +841,7 @@ body: | %100.sub_hi_dim:eds = COPY %5 bb.2: - %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split %20, %100.sub_mod, %100.sub_dim_size, %100.sub_dim_stride, %100.sub_dim_count, undef %100.sub_hi_dim_then_sub_mod, %100.sub_hi_dim_then_sub_dim_size, %100.sub_hi_dim_then_sub_dim_stride, %100.sub_hi_dim_then_sub_dim_count + %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split %20, %100.sub_mod, %100.sub_dim_size, %100.sub_dim_stride, %100.sub_dim_count, %100.sub_hi_dim_then_sub_dim_size, %100.sub_hi_dim_then_sub_dim_stride, %100.sub_hi_dim_then_sub_dim_count PseudoRET implicit $lr, implicit %20, implicit %100.sub_dim_count, implicit %100.sub_hi_dim_then_sub_dim_count ... @@ -864,9 +864,9 @@ body: | ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: @@ -912,9 +912,9 @@ body: | ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: