From 043eb9ed09283a36d5b18b19870879ea07420d4f Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Tue, 22 Apr 2025 11:54:48 -0700 Subject: [PATCH 1/7] [Reg-Alloc] Provide a way to select LiveInterval for GreedyRegAlloc --- llvm/include/llvm/CodeGen/LiveIntervals.h | 2 ++ llvm/include/llvm/CodeGen/Passes.h | 2 ++ llvm/include/llvm/CodeGen/RegAllocCommon.h | 15 +++++++++++++++ llvm/lib/CodeGen/RegAllocBase.cpp | 9 +++++++-- llvm/lib/CodeGen/RegAllocBase.h | 6 ++++-- llvm/lib/CodeGen/RegAllocGreedy.cpp | 10 +++++++--- llvm/lib/CodeGen/RegAllocGreedy.h | 3 ++- 7 files changed, 39 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h index baa5476cec94..18e8038ec7af 100644 --- a/llvm/include/llvm/CodeGen/LiveIntervals.h +++ b/llvm/include/llvm/CodeGen/LiveIntervals.h @@ -99,6 +99,8 @@ class VirtRegMap; LiveIntervals(); ~LiveIntervals() override; + const TargetInstrInfo &getTargetInstrInfo() const { return *TII; } + /// Calculate the spill weight to assign to a single instruction. static float getSpillWeight(bool isDef, bool isUse, const MachineBlockFrequencyInfo *MBFI, diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index f850767270a4..d4c9f5654f98 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -219,6 +219,8 @@ namespace llvm { /// FunctionPass *createGreedyRegisterAllocator(); FunctionPass *createGreedyRegisterAllocator(RegClassFilterFunc F); + FunctionPass *createGreedyRegisterAllocator(RegClassFilterFunc F, + LiveIntervalFilterFunc LIF); /// PBQPRegisterAllocation Pass - This pass implements the Partitioned Boolean /// Quadratic Prograaming (PBQP) based register allocator. diff --git a/llvm/include/llvm/CodeGen/RegAllocCommon.h b/llvm/include/llvm/CodeGen/RegAllocCommon.h index 757ca8e112ee..24125b71f745 100644 --- a/llvm/include/llvm/CodeGen/RegAllocCommon.h +++ b/llvm/include/llvm/CodeGen/RegAllocCommon.h @@ -16,6 +16,10 @@ namespace llvm { class TargetRegisterClass; class TargetRegisterInfo; +class MachineRegisterInfo; +class TargetInstrInfo; +class LiveInterval; + typedef std::function RegClassFilterFunc; @@ -26,6 +30,17 @@ static inline bool allocateAllRegClasses(const TargetRegisterInfo &, return true; } +typedef std::function + LiveIntervalFilterFunc; +/// Default live interval filter function for register allocation. All live +/// intervals should be allocated. +static inline bool allocateAllLiveIntervals(MachineRegisterInfo &, + const TargetInstrInfo &, + const LiveInterval *) { + return true; } +} // namespace llvm + #endif // LLVM_CODEGEN_REGALLOCCOMMON_H diff --git a/llvm/lib/CodeGen/RegAllocBase.cpp b/llvm/lib/CodeGen/RegAllocBase.cpp index 36a6e02a028f..63ed006cbb60 100644 --- a/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/llvm/lib/CodeGen/RegAllocBase.cpp @@ -188,8 +188,13 @@ void RegAllocBase::enqueue(const LiveInterval *LI) { const TargetRegisterClass &RC = *MRI->getRegClass(Reg); if (ShouldAllocateClass(*TRI, RC)) { - LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n'); - enqueueImpl(LI); + if (ShouldAllocateLiveInterval(*MRI, LIS->getTargetInstrInfo(), LI)) { + LLVM_DEBUG(dbgs() << "Enqueuing " << printReg(Reg, TRI) << '\n'); + enqueueImpl(LI); + } else { + LLVM_DEBUG(dbgs() << "Not enqueueing " << printReg(Reg, TRI) + << " in skipped live interval\n"); + } } else { LLVM_DEBUG(dbgs() << "Not enqueueing " << printReg(Reg, TRI) << " in skipped register class\n"); diff --git a/llvm/lib/CodeGen/RegAllocBase.h b/llvm/lib/CodeGen/RegAllocBase.h index 9ac9caeb093d..e672591e2117 100644 --- a/llvm/lib/CodeGen/RegAllocBase.h +++ b/llvm/lib/CodeGen/RegAllocBase.h @@ -72,6 +72,7 @@ class RegAllocBase { LiveRegMatrix *Matrix = nullptr; RegisterClassInfo RegClassInfo; const RegClassFilterFunc ShouldAllocateClass; + const LiveIntervalFilterFunc ShouldAllocateLiveInterval; /// Inst which is a def of an original reg and whose defs are already all /// dead after remat is saved in DeadRemats. The deletion of such inst is @@ -79,8 +80,9 @@ class RegAllocBase { /// always available for the remat of all the siblings of the original reg. SmallPtrSet DeadRemats; - RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses) : - ShouldAllocateClass(F) {} + RegAllocBase(const RegClassFilterFunc F = allocateAllRegClasses, + const LiveIntervalFilterFunc LIF = allocateAllLiveIntervals) + : ShouldAllocateClass(F), ShouldAllocateLiveInterval(LIF) {} virtual ~RegAllocBase() = default; diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 590ff74a11c0..4f664c18e94a 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -205,11 +205,15 @@ FunctionPass *llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor) { return new RAGreedy(Ftor); } -RAGreedy::RAGreedy(RegClassFilterFunc F): - MachineFunctionPass(ID), - RegAllocBase(F) { +FunctionPass * +llvm::createGreedyRegisterAllocator(RegClassFilterFunc Ftor, + LiveIntervalFilterFunc LIFtor) { + return new RAGreedy(Ftor, LIFtor); } +RAGreedy::RAGreedy(RegClassFilterFunc F, LiveIntervalFilterFunc LIF) + : MachineFunctionPass(ID), RegAllocBase(F, LIF) {} + void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h index 9aebe9343bb8..46f556748bd2 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -284,7 +284,8 @@ class LLVM_LIBRARY_VISIBILITY RAGreedy : public MachineFunctionPass, bool ReverseLocalAssignment = false; public: - RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses); + RAGreedy(const RegClassFilterFunc F = allocateAllRegClasses, + const LiveIntervalFilterFunc LIF = allocateAllLiveIntervals); /// Return the pass name. StringRef getPassName() const override { return "Greedy Register Allocator"; } From 526c03a43365dd2f4be96f4930fdf5ca85425d6a Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Tue, 22 Apr 2025 11:55:42 -0700 Subject: [PATCH 2/7] [AIE2P] Use ShouldAllocateLiveInterval to focus on alloc 3D virtual reg that are used by 3D instruction --- .../Target/AIE/aie2p/AIE2PTargetMachine.cpp | 171 +++++++++++++++++- 1 file changed, 170 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp index ab0158f21334..6f13c154db14 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PTargetMachine.cpp @@ -14,6 +14,7 @@ #include "AIE2PTargetMachine.h" #include "AIE2PTargetTransformInfo.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" using namespace llvm; @@ -66,6 +67,173 @@ void AIE2PPassConfig::addPreRegBankSelect() { } } +static bool onlyAllocateLIwith3DInstruction(MachineRegisterInfo &MRI, + const TargetInstrInfo &TII, + const LiveInterval *LI) { + const Register Reg = LI->reg(); + return std::any_of( + MRI.use_nodbg_instructions(Reg).begin(), + MRI.use_nodbg_instructions(Reg).end(), [&](const MachineInstr &MI) { + switch (MI.getOpcode()) { + case AIE2P::LDA_3D_dms_lda: + case AIE2P::LDA_3D_dmv_lda_q: + case AIE2P::LDA_3D_s16: + case AIE2P::LDA_3D_s8: + case AIE2P::LDA_3D_u16: + case AIE2P::LDA_3D_u8: + case AIE2P::LDA_TM_3D: + case AIE2P::ST_3D_dms_sts: + case AIE2P::ST_3D_dmv_sts_q: + case AIE2P::ST_3D_s16: + case AIE2P::ST_3D_s8: + case AIE2P::ST_TM_3D: + case AIE2P::VLDA_3D_128: + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmw_lda_ups_bf: + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf: + case AIE2P::VLDA_3D_dmw_lda_w: + case AIE2P::VLDA_3D_dmx_lda_bm: + case AIE2P::VLDA_3D_dmx_lda_fifohl: + case AIE2P::VLDA_3D_dmx_lda_x: + case AIE2P::VLDB_3D_128: + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign0: + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign1: + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign0: + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign1: + case AIE2P::VLDB_3D_dmw_ldb: + case AIE2P::VLDB_3D_dmx_ldb_x: + case AIE2P::VST_3D_128: + case AIE2P::VST_3D_CONV_bf16_fp32_dmw_sts_srs_bf: + case AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf: + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0: + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1: + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0: + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1: + case AIE2P::VST_3D_dmw_sts_w: + case AIE2P::VST_3D_dmx_sts_bm: + case AIE2P::VST_3D_dmx_sts_fifohl: + case AIE2P::VST_3D_dmx_sts_x: + case AIE2P::VLD_3D_w_pseudo: + case AIE2P::VLD_3D_x_pseudo: + case AIE2P::VLD_3D_128_pseudo: + case AIE2P::PADDA_3D: + case AIE2P::PADDB_3D: + case AIE2P::PADDS_3D: + case AIE2P::PADD_3D_pseudo: + case AIE2P::VLDA_3D_UPS_2x_dmw_lda_ups_w2b_upsSign1: + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign0: + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign1: + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign0: + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign1: + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0: + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign1: + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign0: + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign1: + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign0: + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign1: + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign0: + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign1: + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign0: + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign1: + case AIE2P::VST_FLUSH_512_3D: + case AIE2P::VST_FLUSH_512_CONV_3D: + case AIE2P::VLDA_POP_512_3D: + case AIE2P::VLDA_POP_544_3D: + case AIE2P::VLDA_POP_576_3D: + case AIE2P::VLDA_POP_640_3D: + case AIE2P::VLDA_POP_704_3D: + case AIE2P::VLDB_POP_512_3D: + case AIE2P::VLDB_POP_544_3D: + case AIE2P::VLDB_POP_576_3D: + case AIE2P::VLDB_POP_640_3D: + case AIE2P::VLDB_POP_704_3D: + case AIE2P::VLD_POP_512_3D_pseudo: + case AIE2P::VLD_POP_544_3D_pseudo: + case AIE2P::VLD_POP_576_3D_pseudo: + case AIE2P::VLD_POP_640_3D_pseudo: + case AIE2P::VLD_POP_704_3D_pseudo: + case AIE2P::LDA_3D_dms_lda_split: + case AIE2P::LDA_3D_dmv_lda_q_split: + case AIE2P::LDA_3D_s16_split: + case AIE2P::LDA_3D_s8_split: + case AIE2P::LDA_3D_u16_split: + case AIE2P::LDA_3D_u8_split: + case AIE2P::LDA_TM_3D_split: + case AIE2P::ST_3D_dms_sts_split: + case AIE2P::ST_3D_dmv_sts_q_split: + case AIE2P::ST_3D_s16_split: + case AIE2P::ST_3D_s8_split: + case AIE2P::ST_TM_3D_split: + case AIE2P::VLDA_3D_128_split: + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmw_lda_ups_bf_split: + case AIE2P::VLDA_3D_CONV_fp32_bf16_dmx_lda_ups_bf_split: + case AIE2P::VLDA_3D_dmw_lda_w_split: + case AIE2P::VLDA_3D_dmx_lda_bm_split: + case AIE2P::VLDA_3D_dmx_lda_fifohl_split: + case AIE2P::VLDA_3D_dmx_lda_x_split: + case AIE2P::VLDB_3D_128_split: + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign0_split: + case AIE2P::VLDB_3D_UNPACK_dmw_ldb_unpack_unpackSign1_split: + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign0_split: + case AIE2P::VLDB_3D_UNPACK_dmx_ldb_unpack_unpackSign1_split: + case AIE2P::VLDB_3D_dmw_ldb_split: + case AIE2P::VLDB_3D_dmx_ldb_x_split: + case AIE2P::VST_3D_128_split: + case AIE2P::VST_3D_CONV_bf16_fp32_dmw_sts_srs_bf_split: + case AIE2P::VST_3D_CONV_bf16_fp32_dmx_sts_srs_bf_split: + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign0_split: + case AIE2P::VST_3D_PACK_dmw_sts_pack_packSign1_split: + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign0_split: + case AIE2P::VST_3D_PACK_dmx_sts_pack_packSign1_split: + case AIE2P::VST_3D_dmw_sts_w_split: + case AIE2P::VST_3D_dmx_sts_bm_split: + case AIE2P::VST_3D_dmx_sts_fifohl_split: + case AIE2P::VST_3D_dmx_sts_x_split: + case AIE2P::VLD_3D_w_pseudo_split: + case AIE2P::VLD_3D_x_pseudo_split: + case AIE2P::VLD_3D_128_pseudo_split: + case AIE2P::PADDA_3D_split: + case AIE2P::PADDB_3D_split: + case AIE2P::PADDS_3D_split: + case AIE2P::PADD_3D_pseudo_split: + case AIE2P::VLDA_3D_UPS_2x_dmw_lda_ups_w2b_upsSign1_split: + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign0_split: + case AIE2P::VLDA_3D_UPS_2x_dmx_lda_ups_x2c_upsSign1_split: + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign0_split: + case AIE2P::VLDA_3D_UPS_4x_dmw_lda_ups_w2c_upsSign1_split: + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split: + case AIE2P::VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign1_split: + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign0_split: + case AIE2P::VST_3D_SRS_2x_dm_sts_srs_cm_srsSign1_split: + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign0_split: + case AIE2P::VST_3D_SRS_2x_dmw_sts_srs_bm_srsSign1_split: + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign0_split: + case AIE2P::VST_3D_SRS_4x_dm_sts_srs_cm_srsSign1_split: + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign0_split: + case AIE2P::VST_3D_SRS_4x_dmx_sts_srs_dm_srsSign1_split: + case AIE2P::VST_FLUSH_512_3D_split: + case AIE2P::VST_FLUSH_512_CONV_3D_split: + case AIE2P::VLDA_POP_512_3D_split: + case AIE2P::VLDA_POP_544_3D_split: + case AIE2P::VLDA_POP_576_3D_split: + case AIE2P::VLDA_POP_640_3D_split: + case AIE2P::VLDA_POP_704_3D_split: + case AIE2P::VLDB_POP_512_3D_split: + case AIE2P::VLDB_POP_544_3D_split: + case AIE2P::VLDB_POP_576_3D_split: + case AIE2P::VLDB_POP_640_3D_split: + case AIE2P::VLDB_POP_704_3D_split: + case AIE2P::VLD_POP_512_3D_pseudo_split: + case AIE2P::VLD_POP_544_3D_pseudo_split: + case AIE2P::VLD_POP_576_3D_pseudo_split: + case AIE2P::VLD_POP_640_3D_pseudo_split: + case AIE2P::VLD_POP_704_3D_pseudo_split: + return true; + default: + return false; + } + }); +} + static bool onlyAllocate3DRegisters(const TargetRegisterInfo &TRI, const TargetRegisterClass &RC) { return AIE2P::eDSRegClass.hasSubClassEq(&RC); @@ -97,7 +265,8 @@ bool AIE2PPassConfig::addRegAssignAndRewriteOptimized() { if (AllocateMRegsFirst) addPass(createGreedyRegisterAllocator(onlyAllocateMRegisters)); if (EnableStagedRA) { - addPass(createGreedyRegisterAllocator(onlyAllocate3DRegisters)); + addPass(createGreedyRegisterAllocator(onlyAllocate3DRegisters, + onlyAllocateLIwith3DInstruction)); addPass(createAIESuperRegRewriter()); addPass(createGreedyRegisterAllocator(onlyAllocate3D2DRegisters)); addPass(createAIESuperRegRewriter()); From 02e9180bcc0f71b062aa18b27a6fdc69c5e0c6f8 Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Tue, 22 Apr 2025 13:44:13 -0700 Subject: [PATCH 3/7] [WIP] Enhance Super-Reg-Rewriter to work with unassigned virtual reg --- llvm/lib/Target/AIE/AIESuperRegRewriter.cpp | 69 +++++++--- llvm/test/CodeGen/AIE/staged-ra-rewrite.mir | 144 ++++++++++---------- 2 files changed, 122 insertions(+), 91 deletions(-) diff --git a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp index 2c7b4a8e218e..0b8aff0c78c0 100644 --- a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp @@ -11,6 +11,10 @@ #include "AIEBaseInstrInfo.h" #include "AIEBaseRegisterInfo.h" +#include "aie2p/AIE2PRegisterBankInfo.h" +#include "aie2p/AIE2PRegisterInfo.h" +#include "aie2p/AIE2PSubtarget.h" + #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveDebugVariables.h" @@ -65,8 +69,9 @@ class AIESuperRegRewriter : public MachineFunctionPass { private: void rewriteSuperReg(Register Reg, Register AssignedPhysReg, - MachineRegisterInfo &MRI, const AIEBaseRegisterInfo &TRI, - VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS, + MachineFunction &MF, MachineRegisterInfo &MRI, + const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM, + LiveRegMatrix &LRM, LiveIntervals &LIS, SlotIndexes &Indexes, LiveDebugVariables &DebugVars); }; @@ -149,6 +154,7 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { SlotIndexes &Indexes = getAnalysis(); LiveDebugVariables &DebugVars = getAnalysis(); std::map AssignedPhysRegs; + std::list UnAssignedPhysRegs; // Collect already-assigned VRegs that can be split into smaller ones. LLVM_DEBUG(VRM.dump()); @@ -156,10 +162,12 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { ++VRegIdx) { Register Reg = Register::index2VirtReg(VRegIdx); - // Ignore un-used registers and un-allocated registers - if (MRI.reg_nodbg_empty(Reg) || !VRM.hasPhys(Reg)) + // Ignore un-used registers registers + if (MRI.reg_nodbg_empty(Reg)) continue; + const bool VirtualRegIsAllocated = VRM.hasPhys(Reg); + // Skip vregs that are spilled, they would anyway be disregarded by // getRewritableSubRegs due to the spill instructions using the whole reg // without any subreg indices. @@ -172,17 +180,32 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { LLVM_DEBUG(dbgs() << "Analysing " << printReg(Reg, &TRI, 0, &MRI) << ":" << printRegClassOrBank(Reg, MRI, &TRI) << '\n'); if (!getRewritableSubRegs(Reg, MRI, TRI).empty()) { - AssignedPhysRegs[Reg] = VRM.getPhys(Reg); - LRM.unassign(LIS.getInterval(Reg)); + if (VirtualRegIsAllocated) { + AssignedPhysRegs[Reg] = VRM.getPhys(Reg); + LRM.unassign(LIS.getInterval(Reg)); + } else { + UnAssignedPhysRegs.push_back(Reg); + } } else { LLVM_DEBUG(dbgs() << "Could not rewrite " << printReg(Reg, &TRI, 0, &MRI) << '\n'); } } - // Re-write all the collected VRegs + // Re-write all the collected assigned VRegs for (auto &[VReg, PhysReg] : AssignedPhysRegs) { - rewriteSuperReg(VReg, PhysReg, MRI, TRI, VRM, LRM, LIS, Indexes, DebugVars); + rewriteSuperReg(VReg, PhysReg, MF, MRI, TRI, VRM, LRM, LIS, Indexes, + DebugVars); + } + + // Re-write all the collected unassigned VRegs + for (auto &VReg : UnAssignedPhysRegs) { + MCRegister DummyPhysReg; + const TargetRegisterClass *SuperRC = MRI.getRegClass(VReg); + // TODO : Remove ARCH specific check + if (SuperRC == &AIE2P::eDSRegClass) + rewriteSuperReg(VReg, DummyPhysReg, MF, MRI, TRI, VRM, LRM, LIS, Indexes, + DebugVars); } LLVM_DEBUG(VRM.dump()); @@ -238,10 +261,13 @@ static void rewriteFullCopy(MachineInstr &MI, const std::set &CopySubRegs, } void AIESuperRegRewriter::rewriteSuperReg( - Register Reg, Register AssignedPhysReg, MachineRegisterInfo &MRI, - const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM, LiveRegMatrix &LRM, - LiveIntervals &LIS, SlotIndexes &Indexes, LiveDebugVariables &DebugVars) { - LLVM_DEBUG(dbgs() << "Rewriting " << printReg(Reg, &TRI, 0, &MRI) << '\n'); + Register Reg, Register AssignedPhysReg, MachineFunction &MF, + MachineRegisterInfo &MRI, const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM, + LiveRegMatrix &LRM, LiveIntervals &LIS, SlotIndexes &Indexes, + LiveDebugVariables &DebugVars) { + bool AssignPhysRegIsValid = AssignedPhysReg.isValid(); + LLVM_DEBUG(dbgs() << "Rewriting " << printReg(Reg, &TRI, 0, &MRI) + << " Assigned " << AssignPhysRegIsValid << '\n'); auto *TII = static_cast( VRM.getMachineFunction().getSubtarget().getInstrInfo()); @@ -251,7 +277,9 @@ void AIESuperRegRewriter::rewriteSuperReg( SmallSet SubRegs = getRewritableSubRegs(Reg, MRI, TRI); assert(!SubRegs.empty()); for (int SubReg : SubRegs) { - const TargetRegisterClass *SubRC = TRI.getSubRegisterClass(SuperRC, SubReg); + const TargetRegisterClass *SubRC = TRI.getLargestLegalSuperClass( + TRI.getSubRegisterClass(SuperRC, SubReg), MF); + SubRegToVReg[SubReg] = MRI.createVirtualRegister(SubRC); } @@ -289,7 +317,9 @@ void AIESuperRegRewriter::rewriteSuperReg( LIS.removeInterval(Reg); for (auto &[SubRegIdx, VReg] : SubRegToVReg) { - MCRegister SubPhysReg = TRI.getSubReg(AssignedPhysReg, SubRegIdx); + MCRegister SubPhysReg; + if (AssignPhysRegIsValid) + SubPhysReg = TRI.getSubReg(AssignedPhysReg, SubRegIdx); LiveInterval &SubRegLI = LIS.getInterval(VReg); LLVM_DEBUG(dbgs() << " Assigning Range: " << SubRegLI << '\n'); @@ -300,11 +330,12 @@ void AIESuperRegRewriter::rewriteSuperReg( LIComponents.push_back(&SubRegLI); VRM.grow(); - for (LiveInterval *LI : LIComponents) { - LRM.assign(*LI, SubPhysReg); - VRM.setRequiredPhys(LI->reg(), SubPhysReg); - LLVM_DEBUG(dbgs() << " Assigned " << printReg(LI->reg()) << "\n"); - } + if (AssignPhysRegIsValid) + for (LiveInterval *LI : LIComponents) { + LRM.assign(*LI, SubPhysReg); + VRM.setRequiredPhys(LI->reg(), SubPhysReg); + LLVM_DEBUG(dbgs() << " Assigned " << printReg(LI->reg()) << "\n"); + } } // Announce new VRegs so DBG locations can be updated. diff --git a/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir b/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir index a5bf4107fc8a..c24cadc0b912 100644 --- a/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir +++ b/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir @@ -31,13 +31,13 @@ body: | ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] - ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm3]] + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY5:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY3]], [[COPY2]], [[COPY4]], [[COPY5]] + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY5:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY3]], [[COPY2]], [[COPY4]], [[COPY5]] ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY5]] ; ; AIE2-RA-LABEL: name: test_split_2d_from_1d @@ -71,13 +71,13 @@ body: | ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] - ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm3]] + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY5:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY3]], [[COPY2]], [[COPY4]], [[COPY5]] + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY5:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY3]], [[COPY2]], [[COPY4]], [[COPY5]] ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY5]] ; ; AIE2P-RA-LABEL: name: test_split_2d_from_1d @@ -134,9 +134,9 @@ body: | ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: @@ -174,9 +174,9 @@ body: | ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: @@ -236,13 +236,13 @@ body: | ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:em = COPY [[COPY2]].sub_mod - ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY6:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:em_as_32bit = COPY [[COPY2]].sub_mod + ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY6:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY6:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY6]] + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY6:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY6]] ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY6]] ; ; AIE2-RA-LABEL: name: test_split_2d_from_other_2d @@ -274,13 +274,13 @@ body: | ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:em = COPY [[COPY2]].sub_mod - ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY6:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:em_as_32bit = COPY [[COPY2]].sub_mod + ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY6:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY6:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY6]] + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY6:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY6]] ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY6]] ; ; AIE2P-RA-LABEL: name: test_split_2d_from_other_2d @@ -391,17 +391,17 @@ body: | ; AIE2-VREGS-NEXT: liveins: $p0, $m1, $dn1, $dj1 ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 - ; AIE2-VREGS-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m1 - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY $dn1 - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj = COPY $dj1 - ; AIE2-VREGS-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 0 + ; AIE2-VREGS-NEXT: [[COPY1:%[0-9]+]]:spill_em_to_er = COPY $m1 + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:spill_edn_to_er = COPY $dn1 + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:spill_edj_to_er = COPY $dj1 + ; AIE2-VREGS-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edc_as_32bit = MOV_PD_imm11_pseudo 0 ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]] - ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edn = COPY [[COPY2]] - ; AIE2-VREGS-NEXT: [[COPY6:%[0-9]+]]:edj = COPY [[COPY3]] - ; AIE2-VREGS-NEXT: [[COPY7:%[0-9]+]]:em = COPY [[COPY1]] - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY7]], [[COPY5]], [[COPY6]], [[COPY4]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc_as_32bit = COPY [[MOV_PD_imm11_pseudo]] + ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edn_as_32bit = COPY [[COPY2]] + ; AIE2-VREGS-NEXT: [[COPY6:%[0-9]+]]:edj_as_32bit = COPY [[COPY3]] + ; AIE2-VREGS-NEXT: [[COPY7:%[0-9]+]]:em_as_32bit = COPY [[COPY1]] + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY7]], [[COPY5]], [[COPY6]], [[COPY4]] ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY4]] ; ; AIE2-RA-LABEL: name: test_split_2d_from_full_copy_def_by_1d @@ -423,17 +423,17 @@ body: | ; AIE2P-VREGS-NEXT: liveins: $p0, $m1, $dn1, $dj1 ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 - ; AIE2P-VREGS-NEXT: [[COPY1:%[0-9]+]]:em = COPY $m1 - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY $dn1 - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj = COPY $dj1 - ; AIE2P-VREGS-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 0 + ; AIE2P-VREGS-NEXT: [[COPY1:%[0-9]+]]:spill_em_to_er = COPY $m1 + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:spill_edn_to_er = COPY $dn1 + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:spill_edj_to_er = COPY $dj1 + ; AIE2P-VREGS-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edc_as_32bit = MOV_PD_imm11_pseudo 0 ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc = COPY [[MOV_PD_imm11_pseudo]] - ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edn = COPY [[COPY2]] - ; AIE2P-VREGS-NEXT: [[COPY6:%[0-9]+]]:edj = COPY [[COPY3]] - ; AIE2P-VREGS-NEXT: [[COPY7:%[0-9]+]]:em = COPY [[COPY1]] - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], [[COPY7]], [[COPY5]], [[COPY6]], [[COPY4]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc_as_32bit = COPY [[MOV_PD_imm11_pseudo]] + ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edn_as_32bit = COPY [[COPY2]] + ; AIE2P-VREGS-NEXT: [[COPY6:%[0-9]+]]:edj_as_32bit = COPY [[COPY3]] + ; AIE2P-VREGS-NEXT: [[COPY7:%[0-9]+]]:em_as_32bit = COPY [[COPY1]] + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], [[COPY7]], [[COPY5]], [[COPY6]], [[COPY4]] ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY4]] ; ; AIE2P-RA-LABEL: name: test_split_2d_from_full_copy_def_by_1d @@ -476,12 +476,12 @@ body: | ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], undef %9:em, [[COPY2]], [[COPY3]], [[COPY4]] + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], undef %9:em_as_32bit, [[COPY2]], [[COPY3]], [[COPY4]] ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY4]] ; ; AIE2-RA-LABEL: name: test_split_2d_undef @@ -512,12 +512,12 @@ body: | ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], undef %9:em, [[COPY2]], [[COPY3]], [[COPY4]] + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY4:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], undef %9:em_as_32bit, [[COPY2]], [[COPY3]], [[COPY4]] ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY4]] ; ; AIE2P-RA-LABEL: name: test_split_2d_undef @@ -569,15 +569,15 @@ body: | ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; AIE2-VREGS-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1 - ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:edn_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 4 - ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:edj_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 8 - ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:edc_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 12 + ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:spill_edn_to_er = LDA_dms_lda_idx_imm [[COPY1]], 4 + ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:spill_edj_to_er = LDA_dms_lda_idx_imm [[COPY1]], 8 + ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:spill_edc_to_er = LDA_dms_lda_idx_imm [[COPY1]], 12 ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY2:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], undef %11:em, [[COPY3]], [[COPY4]], [[COPY2]] + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY2:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], undef %11:em_as_32bit, [[COPY3]], [[COPY4]], [[COPY2]] ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY2]] ; ; AIE2-RA-LABEL: name: test_split_2d_undef_through_copy @@ -602,15 +602,15 @@ body: | ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; AIE2P-VREGS-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p1 - ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:edn_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 4 - ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:edj_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 8 - ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:edc_as_32bit = LDA_dms_lda_idx_imm [[COPY1]], 12 + ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm:%[0-9]+]]:spill_edn_to_er = LDA_dms_lda_idx_imm [[COPY1]], 4 + ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:spill_edj_to_er = LDA_dms_lda_idx_imm [[COPY1]], 8 + ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:spill_edc_to_er = LDA_dms_lda_idx_imm [[COPY1]], 12 ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edc = COPY [[LDA_dms_lda_idx_imm2]] - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY2:%[0-9]+]]:edc = PADDA_2D_split [[COPY]], undef %11:em, [[COPY3]], [[COPY4]], [[COPY2]] + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY2:%[0-9]+]]:edc_as_32bit = PADDA_2D_split [[COPY]], undef %11:em_as_32bit, [[COPY3]], [[COPY4]], [[COPY2]] ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY2]] ; ; AIE2P-RA-LABEL: name: test_split_2d_undef_through_copy @@ -864,9 +864,9 @@ body: | ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: @@ -912,9 +912,9 @@ body: | ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm1:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 4 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm2:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 8 ; AIE2P-VREGS-NEXT: [[LDA_dms_lda_idx_imm3:%[0-9]+]]:er = LDA_dms_lda_idx_imm [[COPY1]], 12 - ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn = COPY [[LDA_dms_lda_idx_imm1]] - ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em = COPY [[LDA_dms_lda_idx_imm]] - ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj = COPY [[LDA_dms_lda_idx_imm2]] + ; AIE2P-VREGS-NEXT: [[COPY2:%[0-9]+]]:edn_as_32bit = COPY [[LDA_dms_lda_idx_imm1]] + ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]]:em_as_32bit = COPY [[LDA_dms_lda_idx_imm]] + ; AIE2P-VREGS-NEXT: [[COPY4:%[0-9]+]]:edj_as_32bit = COPY [[LDA_dms_lda_idx_imm2]] ; AIE2P-VREGS-NEXT: [[COPY5:%[0-9]+]]:edc_as_32bit = COPY [[LDA_dms_lda_idx_imm3]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: From c6bc5a8bc42b3442c57bfcb1eb606be39dab44c3 Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Tue, 22 Apr 2025 18:18:47 -0700 Subject: [PATCH 4/7] [AIE2P] Force to create individual sub-copy when splitting live ranges --- llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp | 10 ++++++++++ llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp index c0f856aef2f8..bfcc2a049a94 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp @@ -52,6 +52,16 @@ AIE2PRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_AIE2P_SaveList; } +const TargetRegisterClass * +AIE2PRegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, + unsigned Idx) const { + if (Idx == AIE2P::sub_lo_dim) { + return nullptr; + } + // Forward to TableGen's default version. + return AIE2PGenRegisterInfo::getSubClassWithSubReg(RC, Idx); +} + BitVector AIE2PRegisterInfo::getReservedRegs(const MachineFunction &MF) const { const TargetFrameLowering *TFI = getFrameLowering(MF); BitVector Reserved(getNumRegs()); diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h index c7db1ac989a8..30178b383105 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.h @@ -48,6 +48,10 @@ struct AIE2PRegisterInfo : public AIE2PGenRegisterInfo { const TargetRegisterClass *getPointerRegClass(const MachineFunction &MF, unsigned Kind) const override; + const TargetRegisterClass * + getSubClassWithSubReg(const TargetRegisterClass *RC, + unsigned Idx) const override; + bool requiresRegisterScavenging(const MachineFunction &MF) const override { return true; } From a681b6ec5cfef563739a68382eb03970d452d487 Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Wed, 30 Apr 2025 13:26:08 -0700 Subject: [PATCH 5/7] [AIE2x] sub_hi_dim_then_sub_mod not required for 3D register --- llvm/lib/Target/AIE/AIE2InstrInfo.cpp | 3 - llvm/lib/Target/AIE/AIE2InstrInfo.td | 2 +- llvm/lib/Target/AIE/AIE2RegisterInfo.cpp | 1 - llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp | 30 ++- llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp | 3 - llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td | 2 +- .../Target/AIE/aie2p/AIE2PRegisterInfo.cpp | 1 - .../AIE/aie2/postrapseudos/dim_spill.mir | 46 ++--- .../AIE/aie2/ra/split-instrs-create.mir | 188 +++++++++--------- .../AIE/aie2/ra/split-instrs-replace.mir | 94 ++++----- .../AIE/aie2/ra/tie-subregs-flow-3d.mir | 43 ++-- .../CodeGen/AIE/aie2/run-physreg-copy.mir | 7 +- .../AIE/aie2p/eliminate-frame-index.mir | 12 +- .../AIE/aie2p/ra/split-instrs-create.mir | 14 +- .../AIE/aie2p/ra/split-instrs-replace.mir | 16 +- .../AIE/aie2p/ra/tie-subregs-flow-3d.mir | 30 +-- .../CodeGen/AIE/aie2p/spill/dim_spill.mir | 46 ++--- llvm/test/CodeGen/AIE/staged-ra-rewrite.mir | 20 +- 18 files changed, 271 insertions(+), 287 deletions(-) diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp index 2731ceeaf8ea..1acb7366021e 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp @@ -835,7 +835,6 @@ AIE2InstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2::LDA_dms_spill, AIE2::sub_dim_size}, {AIE2::LDA_dms_spill, AIE2::sub_dim_stride}, {AIE2::LDA_dms_spill, AIE2::sub_dim_count}, - {AIE2::LDA_dms_spill, AIE2::sub_hi_dim_then_sub_mod}, {AIE2::LDA_dms_spill, AIE2::sub_hi_dim_then_sub_dim_size}, {AIE2::LDA_dms_spill, AIE2::sub_hi_dim_then_sub_dim_stride}, {AIE2::LDA_dms_spill, AIE2::sub_hi_dim_then_sub_dim_count}}; @@ -844,7 +843,6 @@ AIE2InstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2::ST_dms_spill, AIE2::sub_dim_size}, {AIE2::ST_dms_spill, AIE2::sub_dim_stride}, {AIE2::ST_dms_spill, AIE2::sub_dim_count}, - {AIE2::ST_dms_spill, AIE2::sub_hi_dim_then_sub_mod}, {AIE2::ST_dms_spill, AIE2::sub_hi_dim_then_sub_dim_size}, {AIE2::ST_dms_spill, AIE2::sub_hi_dim_then_sub_dim_stride}, {AIE2::ST_dms_spill, AIE2::sub_hi_dim_then_sub_dim_count}}; @@ -1205,7 +1203,6 @@ AIE2InstrInfo::getTiedRegInfo(unsigned Opcode) const { SubRegSplit(AIE2::sub_dim_size), SubRegSplit(AIE2::sub_dim_stride), SubRegSplit(AIE2::sub_dim_count), - SubRegSplit(AIE2::sub_hi_dim_then_sub_mod, /*IsUndef=*/true), SubRegSplit(AIE2::sub_hi_dim_then_sub_dim_size), SubRegSplit(AIE2::sub_hi_dim_then_sub_dim_stride), SubRegSplit(AIE2::sub_hi_dim_then_sub_dim_count)}; diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.td b/llvm/lib/Target/AIE/AIE2InstrInfo.td index f91ebb99b24d..5a53bb4b151b 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.td +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.td @@ -605,7 +605,7 @@ foreach instr = [VST_2D_SRS_D8_S32, VST_2D_SRS_D16_S64, VST_2D_SRS_D16_S32, // Define _split variants for instructions using 3D registers class Split3DInstr : SplitPseudo {} + eDN:$dim_size2, eDJ:$dim_stride2, eDC:$dim_count2)> {} foreach instr = [VLDA_3D_dmw_lda_w, VLDA_3D_dmw_lda_am, VLDA_3D_CONV_FP32_BF16, VLDB_3D, VLDB_3D_128, LDA_3D_dmv_lda_q, VLDB_3D_UNPACK_S8_S4, VLDB_3D_UNPACK_S16_S8, VLDB_3D_UNPACK_D8_D4, VLDB_3D_UNPACK_D16_D8, diff --git a/llvm/lib/Target/AIE/AIE2RegisterInfo.cpp b/llvm/lib/Target/AIE/AIE2RegisterInfo.cpp index d86d09e1196b..f1ccad91345b 100644 --- a/llvm/lib/Target/AIE/AIE2RegisterInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2RegisterInfo.cpp @@ -475,7 +475,6 @@ const std::set &AIE2RegisterInfo::getSubRegSplit(int RegClassId) const { AIE2::sub_dim_size, AIE2::sub_dim_stride, AIE2::sub_dim_count, - AIE2::sub_hi_dim_then_sub_mod, AIE2::sub_hi_dim_then_sub_dim_size, AIE2::sub_hi_dim_then_sub_dim_stride, AIE2::sub_hi_dim_then_sub_dim_count}; diff --git a/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp b/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp index ba6c482b1ae1..0ec24c9a48a9 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIEBaseInstrInfo.cpp @@ -649,15 +649,29 @@ void AIEBaseInstrInfo::copyThroughSubRegs(MachineBasicBlock &MBB, MCRegister SrcReg, bool KillSrc) const { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); - - SmallSet SrcSubRegs; - collectSubRegs(SrcReg, SrcSubRegs, TRI); + auto &TRI = + *static_cast(MRI.getTargetRegisterInfo()); + + const auto *RC = Register::isPhysicalRegister(SrcReg.id()) + ? TRI.getMinimalPhysRegClass(SrcReg) + : MRI.getRegClass(SrcReg); + auto &SubRegSplit = TRI.getSubRegSplit(RC->getID()); + + if (SubRegSplit.size() > 1) { + for (const auto &SubRegIdx : SubRegSplit) { + MCRegister SrcSubReg = TRI.getSubReg(SrcReg, SubRegIdx); + MCRegister DstSubReg = TRI.getSubReg(DstReg, SubRegIdx); + copyPhysReg(MBB, MBBI, DL, DstSubReg, SrcSubReg, KillSrc); + } + } else { + SmallSet SrcSubRegs; + collectSubRegs(SrcReg, SrcSubRegs, TRI); - for (MCRegister SrcSubReg : SrcSubRegs) { - unsigned SubRegIdx = TRI.getSubRegIndex(SrcReg, SrcSubReg); - MCRegister DstSubReg = TRI.getSubReg(DstReg, SubRegIdx); - copyPhysReg(MBB, MBBI, DL, DstSubReg, SrcSubReg, KillSrc); + for (MCRegister SrcSubReg : SrcSubRegs) { + unsigned SubRegIdx = TRI.getSubRegIndex(SrcReg, SrcSubReg); + MCRegister DstSubReg = TRI.getSubReg(DstReg, SubRegIdx); + copyPhysReg(MBB, MBBI, DL, DstSubReg, SrcSubReg, KillSrc); + } } } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index 49d892737bf3..e9139454577e 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -1051,7 +1051,6 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2P::ST_dms_sts_spill, AIE2P::sub_dim_size}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_dim_stride}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_dim_count}, - {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_mod}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_dim_size}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_dim_stride}, {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_dim_then_sub_dim_count}}; @@ -1089,7 +1088,6 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2P::LDA_dms_lda_spill, AIE2P::sub_dim_size}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_dim_stride}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_dim_count}, - {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_mod}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_dim_size}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_dim_stride}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_dim_count}}; @@ -1277,7 +1275,6 @@ AIE2PInstrInfo::getTiedRegInfo(unsigned Opcode) const { SubRegSplit(AIE2P::sub_dim_size), SubRegSplit(AIE2P::sub_dim_stride), SubRegSplit(AIE2P::sub_dim_count), - SubRegSplit(AIE2P::sub_hi_dim_then_sub_mod, /*IsUndef=*/true), SubRegSplit(AIE2P::sub_hi_dim_then_sub_dim_size), SubRegSplit(AIE2P::sub_hi_dim_then_sub_dim_stride), SubRegSplit(AIE2P::sub_hi_dim_then_sub_dim_count)}; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td index 14822e9880b6..52329a838ffc 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td @@ -225,7 +225,7 @@ foreach instr = [ // Define _split variants for instructions using 3D registers class Split3DInstr : SplitPseudo {} + eDN:$dim_size2, eDJ:$dim_stride2, eDC:$dim_count2)> {} foreach instr = [PADDA_3D, PADDB_3D, PADDS_3D, PADD_3D_pseudo] in def instr # _split : Split3DInstr; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp index bfcc2a049a94..410ef25be5a6 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp @@ -505,7 +505,6 @@ const std::set &AIE2PRegisterInfo::getSubRegSplit(int RegClassId) const { AIE2P::sub_dim_size, AIE2P::sub_dim_stride, AIE2P::sub_dim_count, - AIE2P::sub_hi_dim_then_sub_mod, AIE2P::sub_hi_dim_then_sub_dim_size, AIE2P::sub_hi_dim_then_sub_dim_stride, AIE2P::sub_hi_dim_then_sub_dim_count}; diff --git a/llvm/test/CodeGen/AIE/aie2/postrapseudos/dim_spill.mir b/llvm/test/CodeGen/AIE/aie2/postrapseudos/dim_spill.mir index 743562fc33a0..eaa70a19c4f3 100644 --- a/llvm/test/CodeGen/AIE/aie2/postrapseudos/dim_spill.mir +++ b/llvm/test/CodeGen/AIE/aie2/postrapseudos/dim_spill.mir @@ -119,18 +119,16 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_spill -36, implicit $sp :: (load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_spill -32, implicit $sp :: (load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_spill -28, implicit $sp :: (load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_spill -24, implicit $sp :: (load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_spill -20, implicit $sp :: (load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_spill -16, implicit $sp :: (load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_spill -12, implicit $sp :: (load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_spill -24, implicit $sp :: (load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_spill -20, implicit $sp :: (load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_spill -16, implicit $sp :: (load (s32) from %stack.0 + 24) ; CHECK-NEXT: ST_dms_spill $m1, -80, implicit $sp :: (store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_spill $dn1, -76, implicit $sp :: (store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_spill $dj1, -72, implicit $sp :: (store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_spill $dc1, -68, implicit $sp :: (store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_spill $m5, -64, implicit $sp :: (store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_spill $dn5, -60, implicit $sp :: (store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_spill $dj5, -56, implicit $sp :: (store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_spill $dc5, -52, implicit $sp :: (store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_spill $dn5, -64, implicit $sp :: (store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_spill $dj5, -60, implicit $sp :: (store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_spill $dc5, -56, implicit $sp :: (store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL -40, implicit $sp :: (load (s256) from %stack.0, align 4) ST_DS_SPILL $d1_3d, -80, implicit $sp :: (store (s256) into %stack.1, align 4) ... @@ -148,9 +146,9 @@ body: | bb.0 (align 16): ; CHECK-LABEL: name: test_ds_partial - ; CHECK: $dj5 = LDA_dms_spill -16, implicit $sp :: (load (s32) from %stack.0 + 24) + ; CHECK: $dj5 = LDA_dms_spill -20, implicit $sp :: (load (s32) from %stack.0 + 20) ; CHECK-NEXT: $dj6 = MOV_mv_scl $dj5 - ; CHECK-NEXT: ST_dms_spill $dj6, -56, implicit $sp :: (store (s32) into %stack.1 + 24) + ; CHECK-NEXT: ST_dms_spill $dj6, -60, implicit $sp :: (store (s32) into %stack.1 + 20) $d1_3d = LDA_DS_SPILL -40, implicit $sp :: (load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, -80, implicit $sp :: (store (s256) into %stack.1, align 4) @@ -171,19 +169,17 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_spill -36, implicit $sp :: (load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_spill -32, implicit $sp :: (load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_spill -28, implicit $sp :: (load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_spill -24, implicit $sp :: (load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_spill -20, implicit $sp :: (load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_spill -16, implicit $sp :: (load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_spill -12, implicit $sp :: (load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_spill -24, implicit $sp :: (load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_spill -20, implicit $sp :: (load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_spill -16, implicit $sp :: (load (s32) from %stack.0 + 24) ; CHECK-NEXT: $dj6 = MOV_mv_scl $dj5 ; CHECK-NEXT: ST_dms_spill $m2, -80, implicit $sp :: (store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_spill $dn2, -76, implicit $sp :: (store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_spill $dj2, -72, implicit $sp :: (store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_spill $dc2, -68, implicit $sp :: (store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_spill $m6, -64, implicit $sp :: (store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_spill $dn6, -60, implicit $sp :: (store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_spill $dj6, -56, implicit $sp :: (store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_spill $dc6, -52, implicit $sp :: (store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_spill $dn6, -64, implicit $sp :: (store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_spill $dj6, -60, implicit $sp :: (store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_spill $dc6, -56, implicit $sp :: (store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL -40, implicit $sp :: (load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, -80, implicit $sp :: (store (s256) into %stack.1, align 4) @@ -206,19 +202,17 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_spill -36, implicit $sp :: (volatile load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_spill -32, implicit $sp :: (volatile load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_spill -28, implicit $sp :: (volatile load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_spill -24, implicit $sp :: (volatile load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_spill -20, implicit $sp :: (volatile load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_spill -16, implicit $sp :: (volatile load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_spill -12, implicit $sp :: (volatile load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_spill -24, implicit $sp :: (volatile load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_spill -20, implicit $sp :: (volatile load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_spill -16, implicit $sp :: (volatile load (s32) from %stack.0 + 24) ; CHECK-NEXT: $dj6 = MOV_mv_scl $dj5 ; CHECK-NEXT: ST_dms_spill undef $m2, -80, implicit $sp :: (volatile store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_spill undef $dn2, -76, implicit $sp :: (volatile store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_spill undef $dj2, -72, implicit $sp :: (volatile store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_spill undef $dc2, -68, implicit $sp :: (volatile store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_spill undef $m6, -64, implicit $sp :: (volatile store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_spill undef $dn6, -60, implicit $sp :: (volatile store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_spill $dj6, -56, implicit $sp :: (volatile store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_spill undef $dc6, -52, implicit $sp :: (volatile store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_spill undef $dn6, -64, implicit $sp :: (volatile store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_spill $dj6, -60, implicit $sp :: (volatile store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_spill undef $dc6, -56, implicit $sp :: (volatile store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL -40, implicit $sp :: (volatile load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, -80, implicit $sp :: (volatile store (s256) into %stack.1, align 4) diff --git a/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-create.mir b/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-create.mir index 28cc3f4271ab..91ce9b12cf09 100644 --- a/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-create.mir +++ b/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-create.mir @@ -25,9 +25,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = PADDA_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = PADDB_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = PADDS_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = PADDA_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = PADDB_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = PADDS_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count %20:ep = COPY $p0 %100:ed = COPY $d1 %20:ep, %100.sub_dim_count:ed = PADDA_2D killed %20, killed %100 @@ -51,9 +51,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = PADDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = PADDS_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDS_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count %20:ep = COPY $p0 %100:eds = COPY $d1_3d %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D killed %20, killed %100 @@ -76,9 +76,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:mwa, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_dmw_lda_w_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %3:mamm, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_dmw_lda_am_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %4:mwa, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = VLDB_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_2D_dmw_lda_w_split:%[0-9]+]]:mwa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_dmw_lda_w_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_2D_dmw_lda_am_split:%[0-9]+]]:mamm, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_dmw_lda_am_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_2D_split:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %0:mwa, %20:ep, %100.sub_dim_count:ed = VLDA_2D_dmw_lda_w killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -101,9 +101,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:mwa, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_dmw_lda_w_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %3:mamm, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_dmw_lda_am_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %4:mwa, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_3D_dmw_lda_w_split:%[0-9]+]]:mwa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_dmw_lda_w_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_3D_dmw_lda_am_split:%[0-9]+]]:mamm, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_dmw_lda_am_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_3D_split:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %0:mwa, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_dmw_lda_w killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -149,8 +149,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead [[VLD_3D_pseudo_split:%[0-9]+]]:mwa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_3D_pseudo_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead [[VLD_3D_pseudo_split1:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_3D_pseudo_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLD_3D_pseudo_split:%[0-9]+]]:mwa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_3D_pseudo_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLD_3D_pseudo_split1:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_3D_pseudo_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %0:mwa, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLD_3D_pseudo killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -172,7 +172,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:mbms, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = VLDA_2D_CONV_FP32_BF16_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_2D_CONV_FP32_BF16_split:%[0-9]+]]:mbms, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_CONV_FP32_BF16_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %1:mbms, dead %20:ep, dead %100.sub_dim_count:ed = VLDA_2D_CONV_FP32_BF16 killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -193,7 +193,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:mbms, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_CONV_FP32_BF16_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDA_3D_CONV_FP32_BF16_split:%[0-9]+]]:mbms, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_CONV_FP32_BF16_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %1:mbms, dead %20:ep, dead %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_CONV_FP32_BF16 killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -215,10 +215,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDB_2D_UNPACK_S8_S4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %3:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDB_2D_UNPACK_S16_S8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %4:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDB_2D_UNPACK_D8_D4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %5:mxs, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = VLDB_2D_UNPACK_D16_D8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_2D_UNPACK_S8_S4_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_UNPACK_S8_S4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_2D_UNPACK_S16_S8_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_UNPACK_S16_S8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_2D_UNPACK_D8_D4_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_UNPACK_D8_D4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_2D_UNPACK_D16_D8_split:%[0-9]+]]:mxs, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_UNPACK_D16_D8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %0:mxs, %20:ep, %100.sub_dim_count:ed = VLDB_2D_UNPACK_S8_S4 killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -242,10 +242,10 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_S8_S4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %3:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_S16_S8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %4:mxs, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_D8_D4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) - ; CHECK-NEXT: dead %5:mxs, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_D16_D8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_3D_UNPACK_S8_S4_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_S8_S4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_3D_UNPACK_S16_S8_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_S16_S8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_3D_UNPACK_D8_D4_split:%[0-9]+]]:mxs, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_D8_D4_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) + ; CHECK-NEXT: dead [[VLDB_3D_UNPACK_D16_D8_split:%[0-9]+]]:mxs, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_D16_D8_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit $crunpacksign :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %0:mxs, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_UNPACK_S8_S4 killed %20, killed %100 :: (load (<8 x s32>) from unknown-address) @@ -272,9 +272,9 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mws = COPY $wl0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mams = COPY $wl0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_dmw_sts_w_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_128_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = VST_2D_dmw_sts_am_split killed [[COPY3]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_dmw_sts_w_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_128_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_dmw_sts_am_split killed [[COPY3]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %0:mws = COPY $wl0 @@ -300,7 +300,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mqqa = COPY $wl0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = ST_2D_dmv_sts_q_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<4 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = ST_2D_dmv_sts_q_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (<4 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %0:mqqa = COPY $wl0 @@ -325,9 +325,9 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mws = COPY $wl0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mams = COPY $wl0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_dmw_sts_w_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_128_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_dmw_sts_am_split killed [[COPY3]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_dmw_sts_w_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_128_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_dmw_sts_am_split killed [[COPY3]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %0:mws = COPY $wl0 @@ -353,7 +353,7 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mqqa = COPY $wl0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_dmv_sts_q_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<4 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_dmv_sts_q_split killed [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (<4 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %0:mqqa = COPY $wl0 @@ -375,14 +375,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S32_D16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S64_D32_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S32_D8_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S64_D16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S32_S16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S64_S32_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S32_S8_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VLDA_2D_UPS_S64_S16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S32_D16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S64_D32_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S32_D8_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S64_D16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S32_S16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S64_S32_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S32_S8_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_2D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDA_2D_UPS_S64_S16_split $s0, [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %0:mbms, %20:ep, %100.sub_dim_count:ed = VLDA_2D_UPS_S32_D16 $s0, %20, killed %100, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) @@ -410,14 +410,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_D32_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_D16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_S16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %2:mbms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_S32_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_S8_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) - ; CHECK-NEXT: %3:mcms, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_S16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_D32_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D8_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_D16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_S16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D16_split:%[0-9]+]]:mbms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_S32_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_S8_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) + ; CHECK-NEXT: [[VLDA_3D_UPS_S32_D8_split:%[0-9]+]]:mcms, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S64_S16_split $s0, killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %0:mbms, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_S32_D16 $s0, killed %20, killed %100, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) @@ -446,10 +446,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mxs = COPY $x0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_PACK_D4_D8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_PACK_D8_D16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_PACK_S4_S8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_PACK_S8_S16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_PACK_D4_D8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_PACK_D8_D16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_PACK_S4_S8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_PACK_S8_S16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %200:mxs = COPY $x0 @@ -475,10 +475,10 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mxs = COPY $x0 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_D4_D8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_D8_D16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_S4_S8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_S8_S16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_D4_D8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_D8_D16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat, implicit $crpacksign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_S4_S8_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_PACK_S8_S16_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, [[COPY2]], implicit $crsat :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %200:mxs = COPY $x0 @@ -503,14 +503,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_D8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_D16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_D16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_D32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_S8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_S16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_S16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_2D_SRS_S32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_D8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_D16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_D16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_D32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_S8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_S16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_S16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_2D_SRS_S32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %20:ep, %100.sub_dim_count:ed = VST_2D_SRS_D8_S32 killed %20, killed %100, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) @@ -539,14 +539,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S8_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S16_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S16_S32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_S32_S64_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VST_3D_SRS_D8_S32 killed %20, killed %100, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) @@ -574,7 +574,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = VST_CONV_2D_BF16_FP32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = VST_CONV_2D_BF16_FP32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:ed = COPY $d1 %20:ep, %100.sub_dim_count:ed = VST_CONV_2D_BF16_FP32 killed %20, killed %100, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>) into unknown-address) @@ -595,7 +595,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VST_CONV_3D_BF16_FP32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VST_CONV_3D_BF16_FP32_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VST_CONV_3D_BF16_FP32 killed %20, killed %100, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>) into unknown-address) @@ -616,7 +616,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:mwa, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = VLDB_2D_128_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s16>)) + ; CHECK-NEXT: dead [[VLDB_2D_128_split:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = VLDB_2D_128_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (load (<8 x s16>)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %2:mwa, dead %20:ep, dead %100.sub_dim_count:ed = VLDB_2D_128 killed %20, killed %100 :: (load (<8 x s16>) from unknown-address) @@ -637,7 +637,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:mwa, dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_128_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s16>)) + ; CHECK-NEXT: dead [[VLDB_3D_128_split:%[0-9]+]]:mwa, dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_128_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (<8 x s16>)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %2:mwa, dead %20:ep, dead %100.sub_dim_count:eds, dead %100.sub_hi_dim_then_sub_dim_count:eds = VLDB_3D_128 killed %20, killed %100 :: (load (<8 x s16>) from unknown-address) @@ -658,11 +658,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:msclst, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_dms_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %3:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_S8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %4:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_U8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %5:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_S16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %6:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_U16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_dms_lda_split:%[0-9]+]]:msclst, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_dms_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_S8_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_S8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_U8_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_U8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_S16_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_S16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_U16_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_U16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %2:msclst, %20:ep, %100.sub_dim_count:ed = LDA_2D_dms_lda %20, %100 :: (load (s32) from unknown-address) @@ -687,11 +687,11 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:msclst, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %3:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_S8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %4:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_U8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %5:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_S16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) - ; CHECK-NEXT: dead %6:er, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_U16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_dms_lda_split:%[0-9]+]]:msclst, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_S8_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_S8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_U8_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_U8_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_S16_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_S16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_U16_dmhb_lda_split:%[0-9]+]]:er, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_U16_dmhb_lda_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %2:msclst, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda %20, %100 :: (load (s32) from unknown-address) @@ -716,7 +716,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 - ; CHECK-NEXT: dead %2:mqqa, [[COPY]]:ep, [[COPY1]].sub_dim_count:ed = LDA_2D_dmv_lda_q_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_2D_dmv_lda_q_split:%[0-9]+]]:mqqa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:ed = LDA_2D_dmv_lda_q_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (load (s32)) %20:ep = COPY $p0 %100:ed = COPY $d1 dead %2:mqqa, %20:ep, %100.sub_dim_count:ed = LDA_2D_dmv_lda_q %20, %100 :: (load (s32) from unknown-address) @@ -737,7 +737,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: dead %2:mqqa, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dmv_lda_q_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) + ; CHECK-NEXT: dead [[LDA_3D_dmv_lda_q_split:%[0-9]+]]:mqqa, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dmv_lda_q_split [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (load (s32)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d dead %2:mqqa, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dmv_lda_q %20, %100 :: (load (s32) from unknown-address) @@ -759,9 +759,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ed = COPY $d1 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = ST_2D_dms_sts_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (store (s32)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = ST_2D_S8_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (store (s8)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:ed = ST_2D_S16_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (s16)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = ST_2D_dms_sts_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (store (s32)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = ST_2D_S8_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count :: (store (s8)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:ed = ST_2D_S16_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count :: (store (s16)) %20:ep = COPY $p0 %100:ed = COPY $d1 %2:er = COPY $r0 @@ -786,9 +786,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_dms_sts_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s32)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_S8_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s8)) - ; CHECK-NEXT: dead [[COPY]]:ep, dead [[COPY1]].sub_dim_count:eds, dead [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_S16_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s16)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_dms_sts_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s32)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_S8_split [[COPY2]], [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s8)) + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = ST_3D_S16_split [[COPY2]], killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count :: (store (s16)) %20:ep = COPY $p0 %100:eds = COPY $d1_3d %2:er = COPY $r0 diff --git a/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-replace.mir b/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-replace.mir index 00bc697cb385..8261b55e0cd8 100644 --- a/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-replace.mir +++ b/llvm/test/CodeGen/AIE/aie2/ra/split-instrs-replace.mir @@ -156,7 +156,7 @@ body: | bb.3: liveins: $dc4, $dj4, $dn4, $p0, $m0, $dn0, $dj0, $dc0 - $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 + $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -193,9 +193,9 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDB_3D killed $p0, $d0_3d ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDS_3D killed $p0, $d0_3d ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 - $p0, $dc0, $dc4 = PADDB_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 - $p0, $dc0, $dc4 = PADDS_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 + $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 + $p0, $dc0, $dc4 = PADDB_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 + $p0, $dc0, $dc4 = PADDS_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -232,9 +232,9 @@ body: | ; CHECK-NEXT: dead $amll0, $p0, $dc0, $dc4 = VLDA_3D_dmw_lda_am killed $p0, $d0_3d :: (load (<8 x s32>)) ; CHECK-NEXT: dead $wl0, $p0, $dc0, $dc4 = VLDB_3D killed $p0, $d0_3d :: (load (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - dead $wl0, $p0, $dc0, $dc4 = VLDA_3D_dmw_lda_w_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) - dead $amll0, $p0, $dc0, $dc4 = VLDA_3D_dmw_lda_am_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) - dead $wl0, $p0, $dc0, $dc4 = VLDB_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (load (<8 x s32>) from unknown-address) + dead $wl0, $p0, $dc0, $dc4 = VLDA_3D_dmw_lda_w_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) + dead $amll0, $p0, $dc0, $dc4 = VLDA_3D_dmw_lda_am_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) + dead $wl0, $p0, $dc0, $dc4 = VLDB_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 :: (load (<8 x s32>) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -289,10 +289,10 @@ body: | ; CHECK-NEXT: dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D8_D4 killed $p0, $d0_3d, implicit $crunpacksign :: (load (<8 x s32>)) ; CHECK-NEXT: dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D16_D8 killed $p0, $d0_3d, implicit $crunpacksign :: (load (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_S8_S4_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) - dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_S16_S8_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) - dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D8_D4_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit $crunpacksign :: (load (<8 x s32>) from unknown-address) - dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D16_D8_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4, implicit $crunpacksign :: (load (<8 x s32>) from unknown-address) + dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_S8_S4_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) + dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_S16_S8_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) + dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D8_D4_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit $crunpacksign :: (load (<8 x s32>) from unknown-address) + dead $x0, $p0, $dc0, $dc4 = VLDB_3D_UNPACK_D16_D8_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4, implicit $crunpacksign :: (load (<8 x s32>) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -308,7 +308,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $bml0, $p0, $dc0, $dc4 = VLDA_3D_CONV_FP32_BF16 killed $p0, $d0_3d :: (load (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - dead $bml0, $p0, $dc0, $dc4 = VLDA_3D_CONV_FP32_BF16_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) + dead $bml0, $p0, $dc0, $dc4 = VLDA_3D_CONV_FP32_BF16_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (<8 x s32>) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -369,10 +369,10 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_PACK_S4_S8 killed $p0, $d0_3d, $x0, implicit $crsat :: (store (<8 x s32>)) ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_PACK_S8_S16 killed $p0, $d0_3d, $x0, implicit $crsat :: (store (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = VST_3D_PACK_D4_D8_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $x0, implicit $crsat, implicit $crpacksign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_PACK_D8_D16_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $x0, implicit $crsat, implicit $crpacksign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_PACK_S4_S8_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $x0, implicit $crsat :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_PACK_S8_S16_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4, $x0, implicit $crsat :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_PACK_D4_D8_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $x0, implicit $crsat, implicit $crpacksign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_PACK_D8_D16_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $x0, implicit $crsat, implicit $crpacksign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_PACK_S4_S8_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $x0, implicit $crsat :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_PACK_S8_S16_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4, $x0, implicit $crsat :: (store (<8 x s32>) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc4 ... @@ -406,9 +406,9 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_128 killed $wl0, killed $p0, $d0_3d :: (store (<8 x s32>)) ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_dmw_sts_am killed $amll0, killed $p0, $d0_3d :: (store (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = VST_3D_dmw_sts_w_split $wl0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_128_split killed $wl0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_dmw_sts_am_split killed $amll0, killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_dmw_sts_w_split $wl0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_128_split killed $wl0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_dmw_sts_am_split killed $amll0, killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 :: (store (<8 x s32>) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -423,7 +423,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $p0, $dc0, $dc4 = ST_3D_dmv_sts_q killed $q0, killed $p0, $d0_3d :: (store (<4 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = ST_3D_dmv_sts_q_split killed $q0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (store (<4 x s32>) into unknown-address) + $p0, $dc0, $dc4 = ST_3D_dmv_sts_q_split killed $q0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (store (<4 x s32>) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -475,14 +475,14 @@ body: | ; CHECK-NEXT: $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_S8 $s0, killed $p0, $d0_3d, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) ; CHECK-NEXT: $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_S16 $s0, killed $p0, $d0_3d, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_D16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) - $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_D32_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) - $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_D8_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) - $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_D16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) - $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_S16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) - $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_S32_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) - $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_S8_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) - $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_S16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) + $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_D16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) + $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_D32_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) + $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_D8_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) + $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_D16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat, implicit $crupssign :: (load (<8 x s32>) from unknown-address) + $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_S16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) + $bml0, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_S32_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) + $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S32_S8_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) + $cm1, $p0, $dc0, $dc4 = VLDA_3D_UPS_S64_S16_split $s0, killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, implicit-def $srups_of, implicit $crsat :: (load (<8 x s32>) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -534,14 +534,14 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_SRS_S16_S32 killed $p0, $d0_3d, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_3D_SRS_S32_S64 killed $p0, $d0_3d, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = VST_3D_SRS_D8_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_D16_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_D16_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_D32_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_S8_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_S16_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_S16_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) - $p0, $dc0, $dc4 = VST_3D_SRS_S32_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_D8_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_D16_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_D16_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_D32_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd, implicit $crsrssign :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_S8_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_S16_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $cm1, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_S16_S32_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_3D_SRS_S32_S64_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $bml0, $s0, implicit-def $srsrs_of, implicit $crsat, implicit $crrnd :: (store (<8 x s32>) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -573,7 +573,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $p0, $dc0, $dc4 = VST_CONV_3D_BF16_FP32 killed $p0, $d0_3d, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = VST_CONV_3D_BF16_FP32_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>) into unknown-address) + $p0, $dc0, $dc4 = VST_CONV_3D_BF16_FP32_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4, $bml0, implicit-def $srf2fflags, implicit $crrnd, implicit $crf2fmask :: (store (<8 x s32>) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -604,7 +604,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $wl0, $p0, $dc0, $dc4 = VLDB_3D_128 killed $p0, $d0_3d :: (load (<8 x s16>)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - dead $wl0, $p0, $dc0, $dc4 = VLDB_3D_128_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (load (<8 x s16>) from unknown-address) + dead $wl0, $p0, $dc0, $dc4 = VLDB_3D_128_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 :: (load (<8 x s16>) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -649,11 +649,11 @@ body: | ; CHECK-NEXT: $r0, $p0, $dc0, $dc4 = LDA_3D_S16_dmhb_lda killed $p0, $d0_3d :: (load (s32)) ; CHECK-NEXT: $r0, $p0, $dc0, $dc4 = LDA_3D_U16_dmhb_lda killed $p0, $d0_3d :: (load (s32)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $r0, $p0, $dc0, $dc4 = LDA_3D_dms_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) - $r0, $p0, $dc0, $dc4 = LDA_3D_S8_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) - $r0, $p0, $dc0, $dc4 = LDA_3D_U8_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) - $r0, $p0, $dc0, $dc4 = LDA_3D_S16_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) - $r0, $p0, $dc0, $dc4 = LDA_3D_U16_dmhb_lda_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (load (s32) from unknown-address) + $r0, $p0, $dc0, $dc4 = LDA_3D_dms_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) + $r0, $p0, $dc0, $dc4 = LDA_3D_S8_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) + $r0, $p0, $dc0, $dc4 = LDA_3D_U8_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) + $r0, $p0, $dc0, $dc4 = LDA_3D_S16_dmhb_lda_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) + $r0, $p0, $dc0, $dc4 = LDA_3D_U16_dmhb_lda_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 :: (load (s32) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -684,7 +684,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $q0, $p0, $dc0, $dc4 = LDA_3D_dmv_lda_q killed $p0, $d0_3d :: (load (s32)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $q0, $p0, $dc0, $dc4 = LDA_3D_dmv_lda_q_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) + $q0, $p0, $dc0, $dc4 = LDA_3D_dmv_lda_q_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (load (s32) from unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -714,7 +714,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $p0, $dc0, $dc4 = ST_3D_dms_sts $r0, killed $p0, $d0_3d :: (store (s32)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = ST_3D_dms_sts_split $r0, killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (store (s32) into unknown-address) + $p0, $dc0, $dc4 = ST_3D_dms_sts_split $r0, killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 :: (store (s32) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -747,7 +747,7 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = ST_3D_S8 $r0, $p0, $d0_3d :: (store (s8)) ; CHECK-NEXT: $p0, $dc0, $dc4 = ST_3D_S16 $r0, killed $p0, $d0_3d :: (store (s16)) ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = ST_3D_S8_split $r0, $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 :: (store (s8) into unknown-address) - $p0, $dc0, $dc4 = ST_3D_S16_split $r0, killed $p0, $m0, $dn0, $dj0, $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 :: (store (s16) into unknown-address) + $p0, $dc0, $dc4 = ST_3D_S8_split $r0, $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 :: (store (s8) into unknown-address) + $p0, $dc0, $dc4 = ST_3D_S16_split $r0, killed $p0, $m0, $dn0, $dj0, $dc0, killed $dn4, killed $dj4, killed $dc4 :: (store (s16) into unknown-address) PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... diff --git a/llvm/test/CodeGen/AIE/aie2/ra/tie-subregs-flow-3d.mir b/llvm/test/CodeGen/AIE/aie2/ra/tie-subregs-flow-3d.mir index 7f92425e94a4..563d492d2d85 100644 --- a/llvm/test/CodeGen/AIE/aie2/ra/tie-subregs-flow-3d.mir +++ b/llvm/test/CodeGen/AIE/aie2/ra/tie-subregs-flow-3d.mir @@ -34,7 +34,6 @@ body: | ; CHECK-NEXT: $dj4 = MOV_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 ; CHECK-NEXT: $p0, dead $dc0, $dc4 = PADDA_3D killed $p0, killed $d0_3d ; CHECK-NEXT: DelayedSchedBarrier implicit killed renamable $p0, implicit killed renamable $dc4 %0:em = COPY $r0 @@ -47,10 +46,10 @@ body: | %7:edc = COPY $r7 %8:ep = COPY $p0 - %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count + %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count %200:ep, %300:edc, %400:edc = PADDA_3D %8, %100 - %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %300, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %400, %subreg.sub_hi_dim_then_sub_dim_count + %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %300, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %400, %subreg.sub_hi_dim_then_sub_dim_count %201:ep, %301:edc, %401:edc = PADDA_3D %200, %101 PseudoRET implicit $lr, implicit %201, implicit %401 @@ -88,27 +87,26 @@ body: | ; CHECK-NEXT: $dn2 = MOV_mv_scl killed $r1 ; CHECK-NEXT: $dj2 = MOV_mv_scl killed $r2 ; CHECK-NEXT: $dc2 = MOV_mv_scl killed $r3 - ; CHECK-NEXT: $m6 = MOV_mv_scl killed $r4 ; CHECK-NEXT: $dn6 = MOV_mv_scl killed $r5 ; CHECK-NEXT: $dj6 = MOV_mv_scl killed $r6 ; CHECK-NEXT: $dc6 = MOV_mv_scl killed $r7 ; CHECK-NEXT: ST_dms_spill killed $m1, -32, implicit $sp :: (store (s32) into %stack.0) - ; CHECK-NEXT: $m1 = MOV_mv_scl $m2 + ; CHECK-NEXT: $dc1 = MOV_mv_scl $dc2 ; CHECK-NEXT: $dn1 = MOV_mv_scl $dn2 ; CHECK-NEXT: $dj1 = MOV_mv_scl $dj2 - ; CHECK-NEXT: $dc1 = MOV_mv_scl $dc2 - ; CHECK-NEXT: $m5 = MOV_mv_scl $m6 + ; CHECK-NEXT: $m1 = MOV_mv_scl $m2 + ; CHECK-NEXT: $dc5 = MOV_mv_scl $dc6 ; CHECK-NEXT: $dn5 = MOV_mv_scl $dn6 ; CHECK-NEXT: $dj5 = MOV_mv_scl $dj6 - ; CHECK-NEXT: $dc5 = MOV_mv_scl $dc6 - ; CHECK-NEXT: $p0, $dc1, $dc5 = PADDA_3D killed $p0, $d1_3d + ; CHECK-NEXT: $p0, $dc1, $dc5 = PADDA_3D killed $p0, killed $d1_3d ; CHECK-NEXT: $m1 = LDA_dms_spill -32, implicit $sp :: (load (s32) from %stack.0) ; CHECK-NEXT: $p2 = MOV_mv_scl $p1 ; CHECK-NEXT: $p2, $dc2, $dc6 = PADDA_3D killed $p2, $d2_3d ; CHECK-NEXT: $dn1 = MOV_mv_scl killed $r9 ; CHECK-NEXT: $dj1 = MOV_mv_scl killed $r10 - ; CHECK-NEXT: $dn5 = MOV_mv_scl killed $r13 + ; CHECK-NEXT: $m5 = MOV_mv_scl killed $r12 ; CHECK-NEXT: frame-destroy PADDB_sp_imm -32, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $dn5 = MOV_mv_scl killed $r13 ; CHECK-NEXT: $dj5 = MOV_mv_scl killed $r14 ; CHECK-NEXT: $dc2 = MOV_mv_scl killed $dc1 ; CHECK-NEXT: RET implicit $lr @@ -117,7 +115,6 @@ body: | ; CHECK-NEXT: $dc5 = MOV_mv_scl killed $r15 ; CHECK-NEXT: $p0, dead $dc2, dead $dc6 = PADDA_3D killed $p0, killed $d2_3d ; CHECK-NEXT: $p1, dead $dc1, dead $dc5 = PADDA_3D killed $p1, killed $d1_3d - ; CHECK-NEXT: dead renamable $m5 = KILL killed $r12 ; CHECK-NEXT: DelayedSchedBarrier implicit killed renamable $p0, implicit killed renamable $p1, implicit killed renamable $p2, implicit killed $m0, implicit killed $d3_3d %0:em = COPY $r0 %1:edn = COPY $r1 @@ -140,16 +137,16 @@ body: | %20:ep = COPY $p0 %21:ep = COPY $p1 - %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count + %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count %200:ep, %300:edc, %400:edc = PADDA_3D %20, %100 - %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count + %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count %201:ep, %301:edc, %401:edc = PADDA_3D %21, %101 %102:eds = REG_SEQUENCE %10, %subreg.sub_mod, %11, %subreg.sub_dim_size, %12, %subreg.sub_dim_stride, %13, %subreg.sub_dim_count, %14, %subreg.sub_hi_dim_then_sub_mod, %15, %subreg.sub_hi_dim_then_sub_dim_size, %16, %subreg.sub_hi_dim_then_sub_dim_stride, %17, %subreg.sub_hi_dim_then_sub_dim_count %202:ep, %302:edc, %402:edc = PADDA_3D %21, %102 - %103:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %300, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %400, %subreg.sub_hi_dim_then_sub_dim_count + %103:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %300, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %400, %subreg.sub_hi_dim_then_sub_dim_count %203:ep, %303:edc, %403:edc = PADDA_3D %200, %103 PseudoRET implicit $lr, implicit %203, implicit %202, implicit %201, implicit $m0, implicit $d3_3d @@ -172,8 +169,8 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $m0 = MOV_mv_scl killed $r0 ; CHECK-NEXT: renamable $r0 = MOVA_lda_cg 0 - ; CHECK-NEXT: renamable $r9 = GE renamable $r0, renamable $r8 - ; CHECK-NEXT: JNZ killed renamable $r9, %bb.3 + ; CHECK-NEXT: renamable $r4 = GE renamable $r0, renamable $r8 + ; CHECK-NEXT: JNZ killed renamable $r4, %bb.3 ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP @@ -241,7 +238,7 @@ body: | %100:edc = PHI %3, %bb.1, %102, %bb.3 %200:edc = PHI %7, %bb.1, %102, %bb.3 ST_dms_sts_idx_imm %60, %70, 0 :: (store (s32)) - %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %100, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %200, %subreg.sub_hi_dim_then_sub_dim_count + %101:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %100, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %200, %subreg.sub_hi_dim_then_sub_dim_count %10:ep, %102:edc, %202:edc = PADDA_3D %70, %101 %13:er = nuw nsw ADD_add_r_ri killed %60, 1, implicit-def $srcarry %19:er = EQ %9, %13 @@ -270,7 +267,6 @@ body: | ; CHECK-NEXT: $dj4 = MOV_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -302,11 +298,11 @@ body: | %7:edc = COPY $r7 %8:ep = COPY killed $p0 %9:er = COPY killed $r8 - %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count + %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count %101:ep, %102:edc, %103:edc = PADDA_3D %8, %100 PseudoJNZ killed %9, %bb.2 bb.1: - %200:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %102, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count + %200:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %102, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count %201:ep, %202:edc, %203:edc = PADDA_3D %101, %200 bb.2: %10:edc = PHI %202, %bb.1, %102, %bb.0 @@ -336,7 +332,6 @@ body: | ; CHECK-NEXT: $dj4 = MOV_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: @@ -375,15 +370,15 @@ body: | %8:ep = COPY killed $p0 %9:er = COPY killed $r8 %30:edc = COPY $r9 - %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count + %100:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %3, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %7, %subreg.sub_hi_dim_then_sub_dim_count %101:ep, %102:edc, %103:edc = PADDA_3D %8, %100 PseudoJNZ killed %9, %bb.2 bb.1: - %200:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %102, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count + %200:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %102, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count %201:ep, %202:edc, %203:edc = PADDA_3D %101, %200 PseudoJ_jump_imm %bb.3 bb.2: - %300:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %30, %subreg.sub_dim_count, %4, %subreg.sub_hi_dim_then_sub_mod, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count + %300:eds = REG_SEQUENCE %0, %subreg.sub_mod, %1, %subreg.sub_dim_size, %2, %subreg.sub_dim_stride, %30, %subreg.sub_dim_count, %5, %subreg.sub_hi_dim_then_sub_dim_size, %6, %subreg.sub_hi_dim_then_sub_dim_stride, %103, %subreg.sub_hi_dim_then_sub_dim_count %301:ep, %302:edc, %303:edc = PADDA_3D %101, %300 bb.3: %10:edc = PHI %202, %bb.1, %302, %bb.2 diff --git a/llvm/test/CodeGen/AIE/aie2/run-physreg-copy.mir b/llvm/test/CodeGen/AIE/aie2/run-physreg-copy.mir index 120bebb1e3c1..f78b8ca11d0f 100644 --- a/llvm/test/CodeGen/AIE/aie2/run-physreg-copy.mir +++ b/llvm/test/CodeGen/AIE/aie2/run-physreg-copy.mir @@ -26,14 +26,13 @@ name: test_copy_ds body: | bb.0: ; CHECK-LABEL: name: test_copy_ds - ; CHECK: $m1 = MOV_mv_scl $m2 + ; CHECK: $dc1 = MOV_mv_scl $dc2 ; CHECK-NEXT: $dn1 = MOV_mv_scl $dn2 ; CHECK-NEXT: $dj1 = MOV_mv_scl $dj2 - ; CHECK-NEXT: $dc1 = MOV_mv_scl $dc2 - ; CHECK-NEXT: $m5 = MOV_mv_scl $m6 + ; CHECK-NEXT: $m1 = MOV_mv_scl $m2 + ; CHECK-NEXT: $dc5 = MOV_mv_scl $dc6 ; CHECK-NEXT: $dn5 = MOV_mv_scl $dn6 ; CHECK-NEXT: $dj5 = MOV_mv_scl $dj6 - ; CHECK-NEXT: $dc5 = MOV_mv_scl $dc6 $d1_3d = COPY $d2_3d ... diff --git a/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir b/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir index 5a7bfa5776ef..aa0d678c6d40 100644 --- a/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir +++ b/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir @@ -417,12 +417,10 @@ body: | ; CHECK-NEXT: $dj1 = MOVXM -2228 ; CHECK-NEXT: $dc0 = LDA_dms_lda_idx $p0, killed $dj1 ; CHECK-NEXT: $dj1 = MOVXM -2224 - ; CHECK-NEXT: $m4 = LDA_dms_lda_idx $p0, killed $dj1 - ; CHECK-NEXT: $dj1 = MOVXM -2220 ; CHECK-NEXT: $dn4 = LDA_dms_lda_idx $p0, killed $dj1 - ; CHECK-NEXT: $dj1 = MOVXM -2216 + ; CHECK-NEXT: $dj1 = MOVXM -2220 ; CHECK-NEXT: $dj4 = LDA_dms_lda_idx $p0, killed $dj1 - ; CHECK-NEXT: $dj1 = MOVXM -2212 + ; CHECK-NEXT: $dj1 = MOVXM -2216 ; CHECK-NEXT: $dc4 = LDA_dms_lda_idx killed $p0, killed $dj1 ; CHECK-NEXT: $p0 = MOV_alu_mv_mv_mv_scl $sp ; CHECK-NEXT: $dj1 = MOVXM -2240 @@ -434,12 +432,10 @@ body: | ; CHECK-NEXT: $dj0 = MOVXM -2228 ; CHECK-NEXT: ST_dms_sts_idx $dc0, $p0, killed $dj0 ; CHECK-NEXT: $dj0 = MOVXM -2224 - ; CHECK-NEXT: ST_dms_sts_idx $m4, $p0, killed $dj0 - ; CHECK-NEXT: $dj0 = MOVXM -2220 ; CHECK-NEXT: ST_dms_sts_idx $dn4, $p0, killed $dj0 - ; CHECK-NEXT: $dj0 = MOVXM -2216 + ; CHECK-NEXT: $dj0 = MOVXM -2220 ; CHECK-NEXT: ST_dms_sts_idx $dj4, $p0, killed $dj0 - ; CHECK-NEXT: $dj0 = MOVXM -2212 + ; CHECK-NEXT: $dj0 = MOVXM -2216 ; CHECK-NEXT: ST_dms_sts_idx $dc4, killed $p0, killed $dj0 ; CHECK-NEXT: frame-destroy PADDXM_pstm_sp_imm -2240, implicit-def $sp, implicit $sp ; CHECK-NEXT: PseudoRET implicit $lr diff --git a/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-create.mir b/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-create.mir index c115d130cfda..3c5d08b0a7c8 100644 --- a/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-create.mir +++ b/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-create.mir @@ -166,9 +166,9 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count - ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDS_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDB_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:ep, dead [[COPY1:%[0-9]+]].sub_dim_count:eds, dead [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDS_3D_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count %20:ep = COPY $p0 %100:eds = COPY $d1_3d %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D killed %20, killed %100 @@ -190,7 +190,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: $r4, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed undef [[COPY1]].sub_hi_dim_then_sub_mod, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: $r4, [[COPY]]:ep, [[COPY1]].sub_dim_count:eds, [[COPY1]].sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda_split killed [[COPY]], killed [[COPY1]].sub_mod, killed [[COPY1]].sub_dim_size, killed [[COPY1]].sub_dim_stride, killed [[COPY1]].sub_dim_count, killed [[COPY1]].sub_hi_dim_then_sub_dim_size, killed [[COPY1]].sub_hi_dim_then_sub_dim_stride, killed [[COPY1]].sub_hi_dim_then_sub_dim_count %20:ep = COPY $p0 %100:eds = COPY $d1_3d $r4, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = LDA_3D_dms_lda killed %20, killed %100 @@ -210,7 +210,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:eds = COPY $d1_3d - ; CHECK-NEXT: [[VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split:%[0-9]+]]:edm, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split $s0, [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, undef [[COPY1]].sub_hi_dim_then_sub_mod, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 + ; CHECK-NEXT: [[VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split:%[0-9]+]]:edm, [[COPY:%[0-9]+]]:ep, [[COPY1:%[0-9]+]].sub_dim_count:eds, [[COPY1:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split $s0, [[COPY]], [[COPY1]].sub_mod, [[COPY1]].sub_dim_size, [[COPY1]].sub_dim_stride, [[COPY1]].sub_dim_count, [[COPY1]].sub_hi_dim_then_sub_dim_size, [[COPY1]].sub_hi_dim_then_sub_dim_stride, [[COPY1]].sub_hi_dim_then_sub_dim_count, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 %20:ep = COPY $p0 %100:eds = COPY $d1_3d %0:edm, %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0 $s0, %20, %100, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 @@ -238,7 +238,7 @@ body: | ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[MOV_PD_imm11_pseudo]].sub_mod ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[MOV_PD_imm11_pseudo]].sub_dim_size ; CHECK-NEXT: [[COPY:%[0-9]+]]:mpfs = COPY $p0 - ; CHECK-NEXT: $sf, %2:mpfs, $r26, dead [[MOV_PD_imm11_pseudo]].sub_dim_count:eds, dead [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count:eds = VST_FLUSH_512_3D_split $sf, %2, $r26, [[MOV_PD_imm11_pseudo]].sub_mod, [[MOV_PD_imm11_pseudo]].sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_dim_count, undef [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_mod, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count, implicit-def $srfifo_of + ; CHECK-NEXT: $sf, %2:mpfs, $r26, dead [[MOV_PD_imm11_pseudo]].sub_dim_count:eds, dead [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count:eds = VST_FLUSH_512_3D_split $sf, %2, $r26, [[MOV_PD_imm11_pseudo]].sub_mod, [[MOV_PD_imm11_pseudo]].sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_dim_count, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count, implicit-def $srfifo_of ; CHECK-NEXT: PseudoRET implicit $lr undef %12.sub_mod:eds = MOV_PD_imm11_pseudo 0 %12.sub_hi_dim_then_sub_dim_stride:eds = MOV_PD_imm11_pseudo 128 @@ -275,7 +275,7 @@ body: | ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_count:eds = COPY [[MOV_PD_imm11_pseudo]].sub_mod ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = COPY [[MOV_PD_imm11_pseudo]].sub_mod ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_size:eds = COPY [[MOV_PD_imm11_pseudo]].sub_dim_size - ; CHECK-NEXT: [[VLD_POP_512_3D_pseudo_split:%[0-9]+]]:vec512, dead [[COPY:%[0-9]+]].sub_ptr:epsrfldf, dead [[COPY:%[0-9]+]].sub_fifo:epsrfldf, dead [[COPY:%[0-9]+]].sub_avail:epsrfldf, dead [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_count:eds, dead [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_POP_512_3D_pseudo_split [[COPY]].sub_ptr, undef [[COPY]].sub_fifo, [[COPY]].sub_avail, [[MOV_PD_imm11_pseudo]].sub_mod, [[MOV_PD_imm11_pseudo]].sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_dim_count, undef [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_mod, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count, implicit-def $srfifo_uf + ; CHECK-NEXT: [[VLD_POP_512_3D_pseudo_split:%[0-9]+]]:vec512, dead [[COPY:%[0-9]+]].sub_ptr:epsrfldf, dead [[COPY:%[0-9]+]].sub_fifo:epsrfldf, dead [[COPY:%[0-9]+]].sub_avail:epsrfldf, dead [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_dim_count:eds, dead [[MOV_PD_imm11_pseudo:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = VLD_POP_512_3D_pseudo_split [[COPY]].sub_ptr, undef [[COPY]].sub_fifo, [[COPY]].sub_avail, [[MOV_PD_imm11_pseudo]].sub_mod, [[MOV_PD_imm11_pseudo]].sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_dim_count, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo]].sub_hi_dim_then_sub_dim_count, implicit-def $srfifo_uf ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLD_POP_512_3D_pseudo_split]] undef %12.sub_mod:eds = MOV_PD_imm11_pseudo 0 %12.sub_hi_dim_then_sub_dim_stride:eds = MOV_PD_imm11_pseudo 128 diff --git a/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-replace.mir b/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-replace.mir index 4cf0baf18aac..0878183f69f5 100644 --- a/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-replace.mir +++ b/llvm/test/CodeGen/AIE/aie2p/ra/split-instrs-replace.mir @@ -143,7 +143,7 @@ body: | bb.3: liveins: $dc4, $dj4, $dn4, $p0, $m0, $dn0, $dj0, $dc0 - $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 + $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -263,9 +263,9 @@ body: | ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDB_3D killed $p0, $d0_3d ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDS_3D killed $p0, $d0_3d ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 - $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 - $p0, $dc0, $dc4 = PADDB_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 - $p0, $dc0, $dc4 = PADDS_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed undef $m4, killed $dn4, killed $dj4, killed $dc4 + $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 + $p0, $dc0, $dc4 = PADDB_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 + $p0, $dc0, $dc4 = PADDS_3D_split killed $p0, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4 PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit killed renamable $dc4 ... @@ -280,7 +280,7 @@ body: | ; CHECK: liveins: $p0, $d1_3d ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $r4, dead $p0, dead $dc1, dead $dc5 = LDA_3D_dms_lda killed $p0, $d1_3d - $r4, dead $p0, dead $dc1, dead $dc5 = LDA_3D_dms_lda_split killed $p0, $m1, $dn1, $dj1, $dc1, undef $m5, $dn5, $dj5, $dc5 + $r4, dead $p0, dead $dc1, dead $dc5 = LDA_3D_dms_lda_split killed $p0, $m1, $dn1, $dj1, $dc1, $dn5, $dj5, $dc5 ... --- @@ -294,7 +294,7 @@ body: | ; CHECK: liveins: $p0, $s0, $d1_3d ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: dead $dm0, dead $p0, dead $dc1, dead $dc5 = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0 $s0, killed $p0, $d1_3d, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 - dead $dm0, dead $p0, dead $dc1, dead $dc5 = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split $s0, killed $p0, $m1, $dn1, $dj1, $dc1, undef $m5, $dn5, $dj5, $dc5, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 + dead $dm0, dead $p0, dead $dc1, dead $dc5 = VLDA_3D_UPS_4x_dmx_lda_ups_x2d_upsSign0_split $s0, killed $p0, $m1, $dn1, $dj1, $dc1, $dn5, $dj5, $dc5, implicit-def $srups_of, implicit $crsat, implicit $crupsmode, implicit $upssign0 ... @@ -326,7 +326,7 @@ body: | renamable $dc4 = COPY renamable $m0 renamable $dn4 = COPY renamable $dn0 renamable $p2 = COPY $p0 - $sf, dead $p2, $r26, dead $dc0, dead $dc4 = VST_FLUSH_512_3D_split $sf, killed $p2, $r26, killed $m0, killed $dn0, killed $dj0, killed $dc0, undef $m4, killed $dn4, killed $dj4, killed $dc4, implicit-def $srfifo_of + $sf, dead $p2, $r26, dead $dc0, dead $dc4 = VST_FLUSH_512_3D_split $sf, killed $p2, $r26, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4, implicit-def $srfifo_of PseudoRET implicit $lr ... @@ -358,6 +358,6 @@ body: | renamable $dc0 = COPY renamable $m0 renamable $dc4 = COPY renamable $m0 renamable $dn4 = COPY renamable $dn0 - $x0, dead $p0, dead $lf0, dead $r24, dead $dc0, dead $dc4 = VLD_POP_512_3D_pseudo_split killed $p0, undef $lf0, $r24, killed $m0, killed $dn0, killed $dj0, killed $dc0, undef $m4, killed $dn4, killed $dj4, killed $dc4, implicit-def $srfifo_uf + $x0, dead $p0, dead $lf0, dead $r24, dead $dc0, dead $dc4 = VLD_POP_512_3D_pseudo_split killed $p0, undef $lf0, $r24, killed $m0, killed $dn0, killed $dj0, killed $dc0, killed $dn4, killed $dj4, killed $dc4, implicit-def $srfifo_uf PseudoRET implicit $lr, implicit killed renamable $x0 ... diff --git a/llvm/test/CodeGen/AIE/aie2p/ra/tie-subregs-flow-3d.mir b/llvm/test/CodeGen/AIE/aie2p/ra/tie-subregs-flow-3d.mir index d77448271de2..40bf6b110211 100644 --- a/llvm/test/CodeGen/AIE/aie2p/ra/tie-subregs-flow-3d.mir +++ b/llvm/test/CodeGen/AIE/aie2p/ra/tie-subregs-flow-3d.mir @@ -26,13 +26,13 @@ body: | ; CHECK-NEXT: $dn0 = MOV_alu_mv_mv_mv_scl killed $r1 ; CHECK-NEXT: $dj0 = MOV_alu_mv_mv_mv_scl killed $r2 ; CHECK-NEXT: $dc0 = MOV_alu_mv_mv_mv_scl killed $r3 + ; CHECK-NEXT: $m4 = MOV_alu_mv_mv_mv_scl killed $r4 ; CHECK-NEXT: $dn4 = MOV_alu_mv_mv_mv_scl killed $r5 ; CHECK-NEXT: RET implicit $lr ; CHECK-NEXT: $dj4 = MOV_alu_mv_mv_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_alu_mv_mv_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 - ; CHECK-NEXT: $p0, dead $dc0, $dc4 = PADDA_3D killed $p0, killed $d0_3d + ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, killed $d0_3d ; CHECK-NEXT: NOP ; CHECK-NEXT: DelayedSchedBarrier implicit killed renamable $p0, implicit killed renamable $dc4 %0:em = COPY $r0 @@ -139,12 +139,12 @@ body: | ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP - ; CHECK-NEXT: NOP + ; CHECK-NEXT: $m4 = MOV_alu_mv_mv_mv_scl killed $r4 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $m0, $p0, $r0, $r1, $r2, $r3, $r5, $r6, $r7, $r8 + ; CHECK-NEXT: liveins: $p0, $r0, $r1, $r2, $r3, $r5, $r6, $r7, $r8, $d0_3d:0x0001C00000200E00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $dn0 = MOV_alu_mv_mv_mv_scl killed $r1 ; CHECK-NEXT: $dj0 = MOV_alu_mv_mv_mv_scl killed $r2 @@ -155,7 +155,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2 (align 16): ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.2(0x40000000) - ; CHECK-NEXT: liveins: $dc0, $dc4, $dj0, $dj4, $dn0, $dn4, $m0, $p0, $r0, $r8, $d0_3d:0x0001C00000200E00 + ; CHECK-NEXT: liveins: $p0, $r0, $r8, $d0_3d:0x0001C00000200E00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: ST_dms_sts_idx_imm renamable $r0, renamable $p0, 0 :: (store (s32)) ; CHECK-NEXT: renamable $r0 = nuw nsw ADD_add_r_ri killed renamable $r0, 1, implicit-def $srcarry @@ -164,7 +164,7 @@ body: | ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP ; CHECK-NEXT: NOP - ; CHECK-NEXT: $p0, $dc0, dead $dc4 = PADDA_3D killed $p0, $d0_3d + ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d ; CHECK-NEXT: $dc4 = MOV_alu_mv_mv_mv_scl $dc0 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} @@ -225,23 +225,23 @@ body: | ; CHECK-NEXT: $m0 = MOV_alu_mv_mv_mv_scl killed $r0 ; CHECK-NEXT: $dn0 = MOV_alu_mv_mv_mv_scl killed $r1 ; CHECK-NEXT: $dj0 = MOV_alu_mv_mv_mv_scl killed $r2 - ; CHECK-NEXT: JNZ renamable $r8, %bb.2 ; CHECK-NEXT: $dc0 = MOV_alu_mv_mv_mv_scl killed $r3 + ; CHECK-NEXT: JNZ renamable $r8, %bb.2 + ; CHECK-NEXT: $m4 = MOV_alu_mv_mv_mv_scl killed $r4 ; CHECK-NEXT: $dn4 = MOV_alu_mv_mv_mv_scl killed $r5 ; CHECK-NEXT: $dj4 = MOV_alu_mv_mv_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_alu_mv_mv_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: liveins: $dc0, $dc4, $dj0, $dj4, $dn0, $dn4, $m0, $p0, $d0_3d:0x0001C00000200E00 + ; CHECK-NEXT: liveins: $p0, $d0_3d:0x0001C00000200E00 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $p0, $dc0, dead $dc4 = PADDA_3D killed $p0, killed $d0_3d + ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, killed $d0_3d ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2 (align 16): - ; CHECK-NEXT: liveins: $dc0, $p0 + ; CHECK-NEXT: liveins: $p0, $d0_3d:0x0000000000000200 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: RET implicit $lr ; CHECK-NEXT: NOP @@ -291,18 +291,18 @@ body: | ; CHECK-NEXT: $m0 = MOV_alu_mv_mv_mv_scl killed $r0 ; CHECK-NEXT: $dn0 = MOV_alu_mv_mv_mv_scl killed $r1 ; CHECK-NEXT: $dj0 = MOV_alu_mv_mv_mv_scl killed $r2 - ; CHECK-NEXT: JZ renamable $r8, %bb.2 ; CHECK-NEXT: $dc0 = MOV_alu_mv_mv_mv_scl killed $r3 + ; CHECK-NEXT: JZ renamable $r8, %bb.2 + ; CHECK-NEXT: $m4 = MOV_alu_mv_mv_mv_scl killed $r4 ; CHECK-NEXT: $dn4 = MOV_alu_mv_mv_mv_scl killed $r5 ; CHECK-NEXT: $dj4 = MOV_alu_mv_mv_mv_scl killed $r6 ; CHECK-NEXT: $dc4 = MOV_alu_mv_mv_mv_scl killed $r7 ; CHECK-NEXT: $p0, $dc0, $dc4 = PADDA_3D killed $p0, $d0_3d - ; CHECK-NEXT: dead renamable $m4 = KILL killed $r4 ; CHECK-NEXT: DelayedSchedBarrier ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: - ; CHECK-NEXT: liveins: $dc4, $dj0, $dj4, $dn0, $dn4, $m0, $p0, $r9, $d0_3d:0x0001C00000200C00 + ; CHECK-NEXT: liveins: $p0, $r9, $d0_3d:0x0001C00000200C00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: RET implicit $lr ; CHECK-NEXT: NOP @@ -313,7 +313,7 @@ body: | ; CHECK-NEXT: DelayedSchedBarrier implicit killed renamable $dc0, implicit killed renamable $p0, implicit killed renamable $dc4 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2 (align 16): - ; CHECK-NEXT: liveins: $dc0, $dc4, $dj0, $dj4, $dn0, $dn4, $m0, $p0, $d0_3d:0x0001C00000200E00 + ; CHECK-NEXT: liveins: $p0, $d0_3d:0x0001C00000200E00 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: RET implicit $lr ; CHECK-NEXT: NOP diff --git a/llvm/test/CodeGen/AIE/aie2p/spill/dim_spill.mir b/llvm/test/CodeGen/AIE/aie2p/spill/dim_spill.mir index b00ce925bde0..b02205a75416 100644 --- a/llvm/test/CodeGen/AIE/aie2p/spill/dim_spill.mir +++ b/llvm/test/CodeGen/AIE/aie2p/spill/dim_spill.mir @@ -119,18 +119,16 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_lda_spill -60, implicit $sp :: (load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_lda_spill -56, implicit $sp :: (load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_lda_spill -52, implicit $sp :: (load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_lda_spill -48, implicit $sp :: (load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -44, implicit $sp :: (load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -40, implicit $sp :: (load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -36, implicit $sp :: (load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -48, implicit $sp :: (load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -44, implicit $sp :: (load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -40, implicit $sp :: (load (s32) from %stack.0 + 24) ; CHECK-NEXT: ST_dms_sts_spill $m1, -32, implicit $sp :: (store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_sts_spill $dn1, -28, implicit $sp :: (store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_sts_spill $dj1, -24, implicit $sp :: (store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_sts_spill $dc1, -20, implicit $sp :: (store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_sts_spill $m5, -16, implicit $sp :: (store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_sts_spill $dn5, -12, implicit $sp :: (store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_sts_spill $dj5, -8, implicit $sp :: (store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_sts_spill $dc5, -4, implicit $sp :: (store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_sts_spill $dn5, -16, implicit $sp :: (store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_sts_spill $dj5, -12, implicit $sp :: (store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_sts_spill $dc5, -8, implicit $sp :: (store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL %stack.0, implicit $sp :: (load (s256) from %stack.0, align 4) ST_DS_SPILL $d1_3d, %stack.1, implicit $sp :: (store (s256) into %stack.1, align 4) ... @@ -148,9 +146,9 @@ body: | bb.0 (align 16): ; CHECK-LABEL: name: test_ds_partial ; CHECK: frame-setup PADDXM_pstm_sp_imm 64, implicit-def $sp, implicit $sp - ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -40, implicit $sp :: (load (s32) from %stack.0 + 24) + ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -44, implicit $sp :: (load (s32) from %stack.0 + 20) ; CHECK-NEXT: $dj6 = COPY $dj5 - ; CHECK-NEXT: ST_dms_sts_spill $dj6, -8, implicit $sp :: (store (s32) into %stack.1 + 24) + ; CHECK-NEXT: ST_dms_sts_spill $dj6, -12, implicit $sp :: (store (s32) into %stack.1 + 20) $d1_3d = LDA_DS_SPILL %stack.0, implicit $sp :: (load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, %stack.1, implicit $sp :: (store (s256) into %stack.1, align 4) @@ -171,19 +169,17 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_lda_spill -60, implicit $sp :: (load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_lda_spill -56, implicit $sp :: (load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_lda_spill -52, implicit $sp :: (load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_lda_spill -48, implicit $sp :: (load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -44, implicit $sp :: (load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -40, implicit $sp :: (load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -36, implicit $sp :: (load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -48, implicit $sp :: (load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -44, implicit $sp :: (load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -40, implicit $sp :: (load (s32) from %stack.0 + 24) ; CHECK-NEXT: $dj6 = COPY $dj5 ; CHECK-NEXT: ST_dms_sts_spill $m2, -32, implicit $sp :: (store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_sts_spill $dn2, -28, implicit $sp :: (store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_sts_spill $dj2, -24, implicit $sp :: (store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_sts_spill $dc2, -20, implicit $sp :: (store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_sts_spill $m6, -16, implicit $sp :: (store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_sts_spill $dn6, -12, implicit $sp :: (store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_sts_spill $dj6, -8, implicit $sp :: (store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_sts_spill $dc6, -4, implicit $sp :: (store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_sts_spill $dn6, -16, implicit $sp :: (store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_sts_spill $dj6, -12, implicit $sp :: (store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_sts_spill $dc6, -8, implicit $sp :: (store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL %stack.0, implicit $sp :: (load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, %stack.1, implicit $sp :: (store (s256) into %stack.1, align 4) @@ -206,19 +202,17 @@ body: | ; CHECK-NEXT: $dn1 = LDA_dms_lda_spill -60, implicit $sp :: (volatile load (s32) from %stack.0 + 4) ; CHECK-NEXT: $dj1 = LDA_dms_lda_spill -56, implicit $sp :: (volatile load (s32) from %stack.0 + 8) ; CHECK-NEXT: $dc1 = LDA_dms_lda_spill -52, implicit $sp :: (volatile load (s32) from %stack.0 + 12) - ; CHECK-NEXT: $m5 = LDA_dms_lda_spill -48, implicit $sp :: (volatile load (s32) from %stack.0 + 16) - ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -44, implicit $sp :: (volatile load (s32) from %stack.0 + 20) - ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -40, implicit $sp :: (volatile load (s32) from %stack.0 + 24) - ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -36, implicit $sp :: (volatile load (s32) from %stack.0 + 28) + ; CHECK-NEXT: $dn5 = LDA_dms_lda_spill -48, implicit $sp :: (volatile load (s32) from %stack.0 + 16) + ; CHECK-NEXT: $dj5 = LDA_dms_lda_spill -44, implicit $sp :: (volatile load (s32) from %stack.0 + 20) + ; CHECK-NEXT: $dc5 = LDA_dms_lda_spill -40, implicit $sp :: (volatile load (s32) from %stack.0 + 24) ; CHECK-NEXT: $dj6 = COPY $dj5 ; CHECK-NEXT: ST_dms_sts_spill undef $m2, -32, implicit $sp :: (volatile store (s32) into %stack.1) ; CHECK-NEXT: ST_dms_sts_spill undef $dn2, -28, implicit $sp :: (volatile store (s32) into %stack.1 + 4) ; CHECK-NEXT: ST_dms_sts_spill undef $dj2, -24, implicit $sp :: (volatile store (s32) into %stack.1 + 8) ; CHECK-NEXT: ST_dms_sts_spill undef $dc2, -20, implicit $sp :: (volatile store (s32) into %stack.1 + 12) - ; CHECK-NEXT: ST_dms_sts_spill undef $m6, -16, implicit $sp :: (volatile store (s32) into %stack.1 + 16) - ; CHECK-NEXT: ST_dms_sts_spill undef $dn6, -12, implicit $sp :: (volatile store (s32) into %stack.1 + 20) - ; CHECK-NEXT: ST_dms_sts_spill $dj6, -8, implicit $sp :: (volatile store (s32) into %stack.1 + 24) - ; CHECK-NEXT: ST_dms_sts_spill undef $dc6, -4, implicit $sp :: (volatile store (s32) into %stack.1 + 28) + ; CHECK-NEXT: ST_dms_sts_spill undef $dn6, -16, implicit $sp :: (volatile store (s32) into %stack.1 + 16) + ; CHECK-NEXT: ST_dms_sts_spill $dj6, -12, implicit $sp :: (volatile store (s32) into %stack.1 + 20) + ; CHECK-NEXT: ST_dms_sts_spill undef $dc6, -8, implicit $sp :: (volatile store (s32) into %stack.1 + 24) $d1_3d = LDA_DS_SPILL %stack.0, implicit $sp :: (volatile load (s256) from %stack.0, align 4) $dj6 = COPY $dj5 ST_DS_SPILL $d2_3d, %stack.1, implicit $sp :: (volatile store (s256) into %stack.1, align 4) diff --git a/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir b/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir index c24cadc0b912..bec4dbba0b92 100644 --- a/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir +++ b/llvm/test/CodeGen/AIE/staged-ra-rewrite.mir @@ -671,7 +671,7 @@ body: | ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]].sub_hi_dim:eds = COPY [[COPY2]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, undef [[COPY3]].sub_hi_dim_then_sub_mod, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY3]].sub_dim_count, implicit [[COPY3]].sub_hi_dim_then_sub_dim_count ; ; AIE2-RA-LABEL: name: test_split_3d_from_various @@ -692,7 +692,7 @@ body: | ; AIE2-RA-NEXT: bb.1: ; AIE2-RA-NEXT: liveins: $p0, $d0_3d:0x000000000001C870 ; AIE2-RA-NEXT: {{ $}} - ; AIE2-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 + ; AIE2-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 ; AIE2-RA-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit renamable $dc4 ; ; AIE2P-VREGS-LABEL: name: test_split_3d_from_various @@ -717,7 +717,7 @@ body: | ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]].sub_hi_dim:eds = COPY [[COPY2]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, undef [[COPY3]].sub_hi_dim_then_sub_mod, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY3]].sub_dim_count, implicit [[COPY3]].sub_hi_dim_then_sub_dim_count ; ; AIE2P-RA-LABEL: name: test_split_3d_from_various @@ -738,7 +738,7 @@ body: | ; AIE2P-RA-NEXT: bb.1: ; AIE2P-RA-NEXT: liveins: $p0, $d0_3d:0x0001C00000200E00 ; AIE2P-RA-NEXT: {{ $}} - ; AIE2P-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 + ; AIE2P-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 ; AIE2P-RA-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit renamable $dc4 bb.1.entry: successors: %bb.2 @@ -763,7 +763,7 @@ body: | bb.2: - %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split %20, %100.sub_mod, %100.sub_dim_size, %100.sub_dim_stride, %100.sub_dim_count, undef %100.sub_hi_dim_then_sub_mod, %100.sub_hi_dim_then_sub_dim_size, %100.sub_hi_dim_then_sub_dim_stride, %100.sub_hi_dim_then_sub_dim_count + %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split %20, %100.sub_mod, %100.sub_dim_size, %100.sub_dim_stride, %100.sub_dim_count, %100.sub_hi_dim_then_sub_dim_size, %100.sub_hi_dim_then_sub_dim_stride, %100.sub_hi_dim_then_sub_dim_count PseudoRET implicit $lr, implicit %20, implicit %100.sub_dim_count, implicit %100.sub_hi_dim_then_sub_dim_count ... @@ -785,7 +785,7 @@ body: | ; AIE2-VREGS-NEXT: [[COPY3:%[0-9]+]].sub_hi_dim:eds = COPY [[COPY2]] ; AIE2-VREGS-NEXT: {{ $}} ; AIE2-VREGS-NEXT: bb.1: - ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, undef [[COPY3]].sub_hi_dim_then_sub_mod, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count + ; AIE2-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count ; AIE2-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY3]].sub_dim_count, implicit [[COPY3]].sub_hi_dim_then_sub_dim_count ; ; AIE2-RA-LABEL: name: test_split_3d_from_2d @@ -799,7 +799,7 @@ body: | ; AIE2-RA-NEXT: bb.1: ; AIE2-RA-NEXT: liveins: $p0, $d0_3d:0x000000000001C870 ; AIE2-RA-NEXT: {{ $}} - ; AIE2-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 + ; AIE2-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 ; AIE2-RA-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit renamable $dc4 ; ; AIE2P-VREGS-LABEL: name: test_split_3d_from_2d @@ -814,7 +814,7 @@ body: | ; AIE2P-VREGS-NEXT: [[COPY3:%[0-9]+]].sub_hi_dim:eds = COPY [[COPY2]] ; AIE2P-VREGS-NEXT: {{ $}} ; AIE2P-VREGS-NEXT: bb.1: - ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, undef [[COPY3]].sub_hi_dim_then_sub_mod, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count + ; AIE2P-VREGS-NEXT: [[COPY:%[0-9]+]]:ep, [[COPY3:%[0-9]+]].sub_dim_count:eds, [[COPY3:%[0-9]+]].sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split [[COPY]], [[COPY3]].sub_mod, [[COPY3]].sub_dim_size, [[COPY3]].sub_dim_stride, [[COPY3]].sub_dim_count, [[COPY3]].sub_hi_dim_then_sub_dim_size, [[COPY3]].sub_hi_dim_then_sub_dim_stride, [[COPY3]].sub_hi_dim_then_sub_dim_count ; AIE2P-VREGS-NEXT: PseudoRET implicit $lr, implicit [[COPY]], implicit [[COPY3]].sub_dim_count, implicit [[COPY3]].sub_hi_dim_then_sub_dim_count ; ; AIE2P-RA-LABEL: name: test_split_3d_from_2d @@ -828,7 +828,7 @@ body: | ; AIE2P-RA-NEXT: bb.1: ; AIE2P-RA-NEXT: liveins: $p0, $d0_3d:0x0001C00000200E00 ; AIE2P-RA-NEXT: {{ $}} - ; AIE2P-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, undef $m4, $dn4, $dj4, $dc4 + ; AIE2P-RA-NEXT: $p0, $dc0, $dc4 = PADDA_3D_split killed $p0, $m0, $dn0, $dj0, $dc0, $dn4, $dj4, $dc4 ; AIE2P-RA-NEXT: PseudoRET implicit $lr, implicit killed renamable $p0, implicit killed renamable $dc0, implicit renamable $dc4 bb.1.entry: successors: %bb.2 @@ -841,7 +841,7 @@ body: | %100.sub_hi_dim:eds = COPY %5 bb.2: - %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split %20, %100.sub_mod, %100.sub_dim_size, %100.sub_dim_stride, %100.sub_dim_count, undef %100.sub_hi_dim_then_sub_mod, %100.sub_hi_dim_then_sub_dim_size, %100.sub_hi_dim_then_sub_dim_stride, %100.sub_hi_dim_then_sub_dim_count + %20:ep, %100.sub_dim_count:eds, %100.sub_hi_dim_then_sub_dim_count:eds = PADDA_3D_split %20, %100.sub_mod, %100.sub_dim_size, %100.sub_dim_stride, %100.sub_dim_count, %100.sub_hi_dim_then_sub_dim_size, %100.sub_hi_dim_then_sub_dim_stride, %100.sub_hi_dim_then_sub_dim_count PseudoRET implicit $lr, implicit %20, implicit %100.sub_dim_count, implicit %100.sub_hi_dim_then_sub_dim_count ... From 6b97af9e0dd8fdfb097c4a0f4df9e77bc5f01ada Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Mon, 28 Apr 2025 13:30:37 -0700 Subject: [PATCH 6/7] [AIEx] Modify Super-Reg-Rewrite pass to remove dead-MI from bundles of copies With the new strategy liverange splitting end up creating bundle copies where in some of sub-reg are no longer in use at all and when we split them in Super-Reg-Rewrite we end up creating live range that start of index and ends as dead on same slot index. But there is another reg on the same slot-index (since we have a MOV bundle) which actually have a valid live range. --- llvm/lib/Target/AIE/AIESuperRegRewriter.cpp | 21 ++++ llvm/test/CodeGen/AIE/aie2p/issue_1.ll | 104 ++++++++++++++++++++ llvm/test/CodeGen/AIE/aie2p/issue_2.ll | 99 +++++++++++++++++++ 3 files changed, 224 insertions(+) create mode 100644 llvm/test/CodeGen/AIE/aie2p/issue_1.ll create mode 100644 llvm/test/CodeGen/AIE/aie2p/issue_2.ll diff --git a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp index 0b8aff0c78c0..922f31e172f6 100644 --- a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp @@ -316,6 +316,27 @@ void AIESuperRegRewriter::rewriteSuperReg( VRM.grow(); LIS.removeInterval(Reg); + // The liverange splitting logic sometimes produces bundles of copies when + // subregisters are involved. Sometimes some of the copies are not used, + // since super-reg-rewriter is going to modify them into individual virtual + // register with separate live ranges we need to make sure we remove the + // dead-MI from the bundel of copies + SmallVector SubRegsToRemove; + for (auto &[SubRegIdx, VReg] : make_early_inc_range(SubRegToVReg)) { + if (MRI.use_nodbg_empty(VReg)) + for (auto &MI : MRI.reg_nodbg_instructions(VReg)) { + if (MI.isBundled() && MI.isCopy()) { + Indexes.removeSingleMachineInstrFromMaps(MI); + MI.eraseFromBundle(); + SubRegsToRemove.push_back(SubRegIdx); + } + break; + } + } + + for (auto SubRegIdx : SubRegsToRemove) + SubRegToVReg.erase(SubRegIdx); + for (auto &[SubRegIdx, VReg] : SubRegToVReg) { MCRegister SubPhysReg; if (AssignPhysRegIsValid) diff --git a/llvm/test/CodeGen/AIE/aie2p/issue_1.ll b/llvm/test/CodeGen/AIE/aie2p/issue_1.ll new file mode 100644 index 000000000000..5867c08d1ca4 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/issue_1.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2024-25 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc -mtriple=aie2p -verify-machineinstrs -o - < %s | FileCheck %s + + +target datalayout = "e-m:e-p:20:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-f32:32:32-i64:32-f64:32-a:0:32-n32" +target triple = "aie2p-none-unknown-elf" + +define void @issue_1(i1 %exitcond.not.i) { +; CHECK-LABEL: issue_1: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mova dn3, #0; nopb ; movx r1, #1; mov crupsmode, #0 +; CHECK-NEXT: and r7, r0, r1; mov r0, dn3 +; CHECK-NEXT: mova r2, #0; mov r1, dn3 +; CHECK-NEXT: movs dn7, dn3; vbcst.16 x0, r2 +; CHECK-NEXT: movs dc3, dn3; mov s0, r2 +; CHECK-NEXT: movs dc7, dn3; mov r2, dn3 +; CHECK-NEXT: movs dn0, dn3; mov r3, dn3 +; CHECK-NEXT: movs dc0, dn3; mov r4, dn3 +; CHECK-NEXT: movs dc5, dn3; mov r5, dn3 +; CHECK-NEXT: movs dj3, dn3; mov r6, dn3 +; CHECK-NEXT: movs m2, dn3; mov dj6, dn3 +; CHECK-NEXT: movs m1, dn3; mov dj2, dn3 +; CHECK-NEXT: movs dn1, dn3; mov dj1, dn3 +; CHECK-NEXT: movs dn5, dn3; mov dj5, dn3 +; CHECK-NEXT: movs dn4, dn3; mov dj0, dn3 +; CHECK-NEXT: movs m0, dn3; mov dj4, dn3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %for.body.i +; CHECK-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NEXT: // Child Loop BB0_2 Depth 2 +; CHECK-NEXT: nopa ; nopb ; movs dc4, dn3; nopx ; vups.2x cml0, x0, s0, upssign0; nopv +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: // %for.body58.i +; CHECK-NEXT: // Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NEXT: nopa ; nopb ; nopx ; mov dn2, dn7; movs dc2, dc3 +; CHECK-NEXT: mova p0, #0; movs dc6, dc7; mov dn6, r0 +; CHECK-NEXT: movs dn2, r1; paddb.3d [p0], d2; jz r7, #.LBB0_2 +; CHECK-NEXT: mov dn6, r2 // Delay Slot 5 +; CHECK-NEXT: movs dc2, dc4; mov dc6, r3 // Delay Slot 4 +; CHECK-NEXT: paddb.3d [p0], d2 // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; mov dc1, r5 // Delay Slot 2 +; CHECK-NEXT: paddb.3d [p0], d1; mov dc4, dc2 // Delay Slot 1 +; CHECK-NEXT: // %bb.3: // %for.cond.cleanup57.i +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: nopa ; nopb ; nops ; j #.LBB0_1; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; mov dc4, dn3 // Delay Slot 2 +; CHECK-NEXT: paddb.3d [p0], d0 // Delay Slot 1 +entry: + br label %for.body.i + +for.body.i: ; preds = %for.cond.cleanup57.i, %entry + %iterator_pout_cnt0.0489.i = phi i32 [ 0, %entry ], [ %4, %for.cond.cleanup57.i ] + %Ky_cnt.0485.i = phi i32 [ 0, %entry ], [ %14, %for.cond.cleanup57.i ] + %0 = tail call <32 x i32> @llvm.aie2p.acc32.v32.I512.ups(<32 x i16> zeroinitializer, i32 0, i32 0) + br label %for.body58.i + +for.cond.cleanup57.i: ; preds = %for.body58.i + %1 = trunc i32 %iterator_pout_cnt0.0489.i to i20 + %2 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %1, i20 0, i20 0) + %3 = extractvalue { ptr, i20, i20 } %2, 1 + %4 = zext i20 %3 to i32 + br label %for.body.i + +for.body58.i: ; preds = %for.body58.i, %for.body.i + %iterator_inner1_cnt0.1478.i = phi i32 [ 0, %for.body.i ], [ %10, %for.body58.i ] + %Ky_cnt.1476.i = phi i32 [ %Ky_cnt.0485.i, %for.body.i ], [ %14, %for.body58.i ] + %5 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0) + %6 = extractvalue { ptr, i20, i20 } %5, 0 + %7 = trunc i32 %iterator_inner1_cnt0.1478.i to i20 + %8 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr %6, i20 0, i20 0, i20 0, i20 0, i20 %7, i20 0, i20 0) + %9 = extractvalue { ptr, i20, i20 } %8, 1 + %10 = zext i20 %9 to i32 + %11 = trunc i32 %Ky_cnt.1476.i to i20 + %12 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 %11) + %13 = extractvalue { ptr, i20, i20 } %12, 2 + %14 = zext i20 %13 to i32 + br i1 %exitcond.not.i, label %for.cond.cleanup57.i, label %for.body58.i + +; uselistorder directives + uselistorder i32 %14, { 1, 0 } +} + +; Function Attrs: nounwind memory(inaccessiblemem: read) +declare <32 x i32> @llvm.aie2p.acc32.v32.I512.ups(<32 x i16>, i32, i32) #0 + +; Function Attrs: nounwind memory(none) +declare { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) #1 + +; uselistorder directives +uselistorder ptr @llvm.aie2p.add.3d, { 3, 2, 1, 0 } + +attributes #0 = { nounwind memory(inaccessiblemem: read) } +attributes #1 = { nounwind memory(none) } diff --git a/llvm/test/CodeGen/AIE/aie2p/issue_2.ll b/llvm/test/CodeGen/AIE/aie2p/issue_2.ll new file mode 100644 index 000000000000..c1f99c456634 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/issue_2.ll @@ -0,0 +1,99 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2024-25 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc -mtriple=aie2p -verify-machineinstrs -o - < %s | FileCheck %s + + +target datalayout = "e-m:e-p:20:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-f32:32:32-i64:32-f64:32-a:0:32-n32" +target triple = "aie2p-none-unknown-elf" + +define void @issue_2(i32 %0, i1 %exitcond.not.i) { +; CHECK-LABEL: issue_2: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mova m0, #0; nopb ; nopx +; CHECK-NEXT: movs dc5, m0; mov dj0, m0 +; CHECK-NEXT: movs dj4, m0; mov dn0, m0 +; CHECK-NEXT: movs dj2, m0; mov dn4, m0 +; CHECK-NEXT: movs dj6, m0; mov dn2, m0 +; CHECK-NEXT: movs dj3, m0; mov dn6, m0 +; CHECK-NEXT: movs dj7, m0; mov dn3, m0 +; CHECK-NEXT: mova r2, #1; movs dn7, m0; mov dc0, m0 +; CHECK-NEXT: movs dc4, m0; and r5, r1, r2; mov r2, m0 +; CHECK-NEXT: movs dc3, m0; mov r1, m0 +; CHECK-NEXT: movs dc2, m0; mov m2, m0 +; CHECK-NEXT: mova dn5, #1; movs dj5, m0; mov m3, m0 +; CHECK-NEXT: mova r3, #0; movs dn1, m0; mov m1, m0 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %for.body58.i +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jz r5, #.LBB0_1 +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: mova p0, #0 // Delay Slot 4 +; CHECK-NEXT: paddb.3d [p0], d0 // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; mov dc6, r3 // Delay Slot 2 +; CHECK-NEXT: paddb.3d [p0], d2; or r3, r0, r0; mov dc0, dn5 // Delay Slot 1 +; CHECK-NEXT: // %bb.2: // %for.cond.cleanup57.i +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: nopa ; nopb ; movs dc7, dn5; nopx ; mov dc0, dc5; nopv +; CHECK-NEXT: nopa ; nopb ; nopx ; mov r3, dn5; movs dc1, r2 +; CHECK-NEXT: movs dj1, r1; mov dn5, m0 +; CHECK-NEXT: mova p0, #0; movs dc5, m0; j #.LBB0_1 +; CHECK-NEXT: paddb.3d [p0], d1 // Delay Slot 5 +; CHECK-NEXT: mova p0, #0; movs dc2, m0; mov dn5, r3 // Delay Slot 4 +; CHECK-NEXT: movs dj1, m0; paddb.3d [p0], d3; mov r2, dc1 // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; movs dc5, dc0; mov dc1, m0 // Delay Slot 2 +; CHECK-NEXT: mova r3, #0; paddb.3d [p0], d1; movs dc4, m0; mov dc0, m0 // Delay Slot 1 +entry: + br label %for.body.i + +for.body.i: ; preds = %for.cond.cleanup57.i, %entry + %iterator_outer0_cnt0.0496.i = phi i32 [ 0, %entry ], [ %4, %for.cond.cleanup57.i ] + %iterator_weights_cnt0.0493.i = phi i32 [ 0, %entry ], [ %8, %for.cond.cleanup57.i ] + %y_cnt.0487.i = phi i32 [ 0, %entry ], [ %12, %for.cond.cleanup57.i ] + br label %for.body58.i + +for.cond.cleanup57.i: ; preds = %for.body58.i + %1 = trunc i32 %iterator_outer0_cnt0.0496.i to i20 + %2 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %1, i20 0, i20 1) + %3 = extractvalue { ptr, i20, i20 } %2, 1 + %4 = zext i20 %3 to i32 + %5 = trunc i32 %iterator_weights_cnt0.0493.i to i20 + %6 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %5, i20 0, i20 0) + %7 = extractvalue { ptr, i20, i20 } %6, 1 + %8 = zext i20 %7 to i32 + %9 = trunc i32 %y_cnt.0487.i to i20 + %10 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 0, i20 1, i20 %9) + %11 = extractvalue { ptr, i20, i20 } %10, 2 + %12 = zext i20 %11 to i32 + br label %for.body.i + +for.body58.i: ; preds = %for.body58.i, %for.body.i + %iterator_inner0_cnt0.1480.i = phi i32 [ 0, %for.body.i ], [ 1, %for.body58.i ] + %iterator_inner0_cnt1.1479.i = phi i32 [ 0, %for.body.i ], [ %17, %for.body58.i ] + %iterator_inner1_cnt0.1478.i = phi i32 [ 0, %for.body.i ], [ %22, %for.body58.i ] + %iterator_inner1_cnt1.1477.i = phi i32 [ 0, %for.body.i ], [ %0, %for.body58.i ] + %13 = trunc i32 %iterator_inner0_cnt0.1480.i to i20 + %14 = trunc i32 %iterator_inner0_cnt1.1479.i to i20 + %15 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %13, i20 0, i20 %14) + %16 = extractvalue { ptr, i20, i20 } %15, 2 + %17 = zext i20 %16 to i32 + %18 = trunc i32 %iterator_inner1_cnt0.1478.i to i20 + %19 = trunc i32 %iterator_inner1_cnt1.1477.i to i20 + %20 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %18, i20 0, i20 %19) + %21 = extractvalue { ptr, i20, i20 } %20, 1 + %22 = zext i20 %21 to i32 + br i1 %exitcond.not.i, label %for.cond.cleanup57.i, label %for.body58.i +} + +; Function Attrs: nounwind memory(none) +declare { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) #0 + +; uselistorder directives +uselistorder ptr @llvm.aie2p.add.3d, { 4, 3, 2, 1, 0 } + +attributes #0 = { nounwind memory(none) } From 6ee4bd0f863d25fe04698caa8cc1372a86ab3378 Mon Sep 17 00:00:00 2001 From: Krishnam Tibrewala Date: Thu, 1 May 2025 14:15:34 -0500 Subject: [PATCH 7/7] [AIEx] Enhance Super-Reg-Rewrite pass to expandCopyBundle The new strategy exposes a fundamental problem on how bundled instruction in case of sub-reg are created by live range splitting logic(Refer : SplitEditor::buildSingleSubRegCopy) . From standard llvm perspective it is not a problem but when it comes to AIE and what we do in Super-Reg-Rewrite pass. We make them a complete register(which we want/need to do) but now there are COPY instr where in we end a live range on the Bundle and create a new live range by a different COPY instruction in the same bundle which are using the same reg class for src & dst. The major issue comes when reg-alloc end up assigning same register to such COPY in the same bundle, AFAIK this happens because the bundle is assign one unique stack slot. By expanding the CopyBundle we provide the COPY MI a unique slot and the associate operands a proper LiveInterval --- llvm/lib/Target/AIE/AIESuperRegRewriter.cpp | 114 +++++++++++++++++- llvm/test/CodeGen/AIE/aie2p/issue_2.ll | 40 ++++--- llvm/test/CodeGen/AIE/aie2p/issue_3.ll | 122 ++++++++++++++++++++ 3 files changed, 256 insertions(+), 20 deletions(-) create mode 100644 llvm/test/CodeGen/AIE/aie2p/issue_3.ll diff --git a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp index 922f31e172f6..910a8ef451bc 100644 --- a/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp +++ b/llvm/lib/Target/AIE/AIESuperRegRewriter.cpp @@ -73,6 +73,10 @@ class AIESuperRegRewriter : public MachineFunctionPass { const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS, SlotIndexes &Indexes, LiveDebugVariables &DebugVars); + + void expandCopyBundle(MachineInstr &MI, MachineFunction &MF, + const AIEBaseRegisterInfo &TRI, SlotIndexes &Indexes, + SmallSet &RecomputeLIandLRM); }; /// Returns the subreg indices that can be used to rewrite \p Reg into smaller @@ -208,6 +212,36 @@ bool AIESuperRegRewriter::runOnMachineFunction(MachineFunction &MF) { DebugVars); } + // Expand CopyBundle + SmallSet RecomputeLIandLRM; + for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end(); + MBBI != MBBE; ++MBBI) { + LLVM_DEBUG(MBBI->print(dbgs(), &Indexes)); + for (MachineInstr &MI : llvm::make_early_inc_range(MBBI->instrs())) { + expandCopyBundle(MI, MF, TRI, Indexes, RecomputeLIandLRM); + } + } + + for (Register Reg : RecomputeLIandLRM) { + if (LIS.hasInterval(Reg)) { + LLVM_DEBUG(dbgs() << "Recomputing live range for " << printReg(Reg, &TRI) + << '\n'); + // Recompute the LiveIntervals for the register and update the + // LiveRegMatrix + if (Reg.isPhysical()) { + const MCRegister PhysReg = VRM.getPhys(Reg); + const LiveInterval &OldLI = LIS.getInterval(Reg); + LIS.removeInterval(Reg); + LRM.unassign(OldLI); + const LiveInterval &LI = LIS.getInterval(Reg); + LRM.assign(LI, PhysReg); + } else { + LIS.removeInterval(Reg); + LIS.getInterval(Reg); + } + } + } + LLVM_DEBUG(VRM.dump()); return !AssignedPhysRegs.empty(); } @@ -265,7 +299,7 @@ void AIESuperRegRewriter::rewriteSuperReg( MachineRegisterInfo &MRI, const AIEBaseRegisterInfo &TRI, VirtRegMap &VRM, LiveRegMatrix &LRM, LiveIntervals &LIS, SlotIndexes &Indexes, LiveDebugVariables &DebugVars) { - bool AssignPhysRegIsValid = AssignedPhysReg.isValid(); + bool AssignPhysRegIsValid = AssignedPhysReg.isPhysical(); LLVM_DEBUG(dbgs() << "Rewriting " << printReg(Reg, &TRI, 0, &MRI) << " Assigned " << AssignPhysRegIsValid << '\n'); auto *TII = static_cast( @@ -365,6 +399,84 @@ void AIESuperRegRewriter::rewriteSuperReg( DebugVars.splitRegister(Reg, NewVRegs, LIS); } +// The liverange splitting logic sometimes produces bundles of copies when +// subregisters are involved. Expand these into a sequence of copy instructions +// after processing the last in the bundle. This is needed to ensure that the +// un-assigned virtual reg operands of COPY that were part of these bundles have +// a unique SlotIndex and thus a LiveInterval which is better for RA. +void AIESuperRegRewriter::expandCopyBundle( + MachineInstr &MI, MachineFunction &MF, const AIEBaseRegisterInfo &TRI, + SlotIndexes &Indexes, SmallSet &RecomputeLIandLRM) { + if (!MI.isCopy() && !MI.isKill()) + return; + + if (MI.isBundledWithPred() && !MI.isBundledWithSucc()) { + SmallVector MIs({&MI}); + + // Only do this when the complete bundle is made out of COPYs and KILLs. + MachineBasicBlock &MBB = *MI.getParent(); + for (MachineBasicBlock::reverse_instr_iterator + I = std::next(MI.getReverseIterator()), + E = MBB.instr_rend(); + I != E && I->isBundledWithSucc(); ++I) { + if (!I->isCopy() && !I->isKill()) + return; + MIs.push_back(&*I); + } + MachineInstr *FirstMI = MIs.back(); + + auto anyRegsAlias = [](const MachineInstr *Dst, + ArrayRef Srcs, + const TargetRegisterInfo &TRI) { + for (const MachineInstr *Src : Srcs) + if (Src != Dst) + if (TRI.regsOverlap(Dst->getOperand(0).getReg(), + Src->getOperand(1).getReg())) + return true; + return false; + }; + + // If any of the destination registers in the bundle of copies alias any of + // the source registers, try to schedule the instructions to avoid any + // clobbering. + for (int E = MIs.size(), PrevE = E; E > 1; PrevE = E) { + for (int I = E; I--;) + if (!anyRegsAlias(MIs[I], ArrayRef(MIs).take_front(E), TRI)) { + if (I + 1 != E) + std::swap(MIs[I], MIs[E - 1]); + --E; + } + if (PrevE == E) { + MF.getFunction().getContext().emitError( + "super-reg-rewriter register rewriting failed: cycle in copy " + "bundle"); + break; + } + } + + MachineInstr *BundleStart = FirstMI; + for (MachineInstr *BundledMI : llvm::reverse(MIs)) { + // If instruction is in the middle of the bundle, move it before the + // bundle starts, otherwise, just unbundle it. When we get to the last + // instruction, the bundle will have been completely undone. + if (BundledMI != BundleStart) { + BundledMI->removeFromBundle(); + MBB.insert(BundleStart, BundledMI); + } else if (BundledMI->isBundledWithSucc()) { + BundledMI->unbundleFromSucc(); + BundleStart = &*std::next(BundledMI->getIterator()); + } + + if (BundledMI != FirstMI) { + Indexes.insertMachineInstrInMaps(*BundledMI); + RecomputeLIandLRM.insert(BundledMI->getOperand(0).getReg()); + RecomputeLIandLRM.insert(BundledMI->getOperand(1).getReg()); + BundledMI->getOperand(0).setIsInternalRead(false); + } + } + } +} + } // end anonymous namespace char AIESuperRegRewriter::ID = 0; diff --git a/llvm/test/CodeGen/AIE/aie2p/issue_2.ll b/llvm/test/CodeGen/AIE/aie2p/issue_2.ll index c1f99c456634..607f00db45e8 100644 --- a/llvm/test/CodeGen/AIE/aie2p/issue_2.ll +++ b/llvm/test/CodeGen/AIE/aie2p/issue_2.ll @@ -20,34 +20,36 @@ define void @issue_2(i32 %0, i1 %exitcond.not.i) { ; CHECK-NEXT: movs dj4, m0; mov dn0, m0 ; CHECK-NEXT: movs dj2, m0; mov dn4, m0 ; CHECK-NEXT: movs dj6, m0; mov dn2, m0 -; CHECK-NEXT: movs dj3, m0; mov dn6, m0 -; CHECK-NEXT: movs dj7, m0; mov dn3, m0 -; CHECK-NEXT: mova r2, #1; movs dn7, m0; mov dc0, m0 -; CHECK-NEXT: movs dc4, m0; and r5, r1, r2; mov r2, m0 -; CHECK-NEXT: movs dc3, m0; mov r1, m0 -; CHECK-NEXT: movs dc2, m0; mov m2, m0 -; CHECK-NEXT: mova dn5, #1; movs dj5, m0; mov m3, m0 -; CHECK-NEXT: mova r3, #0; movs dn1, m0; mov m1, m0 +; CHECK-NEXT: movs dn6, m0; mov dc0, m0 +; CHECK-NEXT: movs dc4, m0; mov r4, m0 +; CHECK-NEXT: movs dc3, m0; mov r6, m0 +; CHECK-NEXT: mova dn5, #1; movs dc2, m0; mov r3, m0 +; CHECK-NEXT: movs dn3, m0; mov r5, m0 +; CHECK-NEXT: mova r16, #1; movs dj3, m0; mov r2, m0 +; CHECK-NEXT: movs dn7, m0; and r16, r1, r16; mov r1, m0 +; CHECK-NEXT: mova r7, #0; movs dj7, m0; mov m3, m0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_1: // %for.body58.i ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jz r5, #.LBB0_1 -; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nopa ; nopb ; nops ; jz r16, #.LBB0_1; nopv +; CHECK-NEXT: nopx // Delay Slot 5 ; CHECK-NEXT: mova p0, #0 // Delay Slot 4 ; CHECK-NEXT: paddb.3d [p0], d0 // Delay Slot 3 -; CHECK-NEXT: mova p0, #0; mov dc6, r3 // Delay Slot 2 -; CHECK-NEXT: paddb.3d [p0], d2; or r3, r0, r0; mov dc0, dn5 // Delay Slot 1 +; CHECK-NEXT: mova p0, #0; movs dc6, r7; mov m2, m0 // Delay Slot 2 +; CHECK-NEXT: paddb.3d [p0], d2; or r7, r0, r0; mov dc0, dn5 // Delay Slot 1 ; CHECK-NEXT: // %bb.2: // %for.cond.cleanup57.i ; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 -; CHECK-NEXT: nopa ; nopb ; movs dc7, dn5; nopx ; mov dc0, dc5; nopv -; CHECK-NEXT: nopa ; nopb ; nopx ; mov r3, dn5; movs dc1, r2 -; CHECK-NEXT: movs dj1, r1; mov dn5, m0 -; CHECK-NEXT: mova p0, #0; movs dc5, m0; j #.LBB0_1 +; CHECK-NEXT: nopx ; mov dc7, dn5 +; CHECK-NEXT: movs dc0, dc5; mov dc1, r1 +; CHECK-NEXT: movs dj1, r2; mov r7, dn5 +; CHECK-NEXT: movs dj5, m0; mov dn1, m0 +; CHECK-NEXT: movs dn5, m0; mov dc5, m0 +; CHECK-NEXT: mova p0, #0; movs m1, m0; j #.LBB0_1 ; CHECK-NEXT: paddb.3d [p0], d1 // Delay Slot 5 -; CHECK-NEXT: mova p0, #0; movs dc2, m0; mov dn5, r3 // Delay Slot 4 -; CHECK-NEXT: movs dj1, m0; paddb.3d [p0], d3; mov r2, dc1 // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; movs dc2, m0; mov dn5, r7 // Delay Slot 4 +; CHECK-NEXT: movs dj1, m0; paddb.3d [p0], d3; mov r1, dc1 // Delay Slot 3 ; CHECK-NEXT: mova p0, #0; movs dc5, dc0; mov dc1, m0 // Delay Slot 2 -; CHECK-NEXT: mova r3, #0; paddb.3d [p0], d1; movs dc4, m0; mov dc0, m0 // Delay Slot 1 +; CHECK-NEXT: mova r7, #0; paddb.3d [p0], d1; movs dc4, m0; mov dc0, m0 // Delay Slot 1 entry: br label %for.body.i diff --git a/llvm/test/CodeGen/AIE/aie2p/issue_3.ll b/llvm/test/CodeGen/AIE/aie2p/issue_3.ll new file mode 100644 index 000000000000..0001552bf518 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/issue_3.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2024-25 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc -mtriple=aie2p -verify-machineinstrs -o - < %s | FileCheck %s + +target datalayout = "e-m:e-p:20:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-f32:32:32-i64:32-f64:32-a:0:32-n32" +target triple = "aie2p-none-unknown-elf" + +define void @issue_3(i1 %exitcond.not.i) { +; CHECK-LABEL: issue_3: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: mova p3, #0; nopb ; nops ; paddxm [sp], #192; nopv +; CHECK-NEXT: mova r1, #0; nopb ; jl p3; nopm ; nops +; CHECK-NEXT: st r8, [sp, #-192]; vbcst.32 x0, r1 // 4-byte Folded Spill Delay Slot 5 +; CHECK-NEXT: st lr, [sp, #-188]; vmov x1, x0 // 4-byte Folded Spill Delay Slot 4 +; CHECK-NEXT: mova p0, #0; vst x0, [sp, #-128] // 64-byte Folded Spill Delay Slot 3 +; CHECK-NEXT: mova p1, #0; vst x1, [sp, #-64] // 64-byte Folded Spill Delay Slot 2 +; CHECK-NEXT: mova p2, #0; mov r8, r0 // Delay Slot 1 +; CHECK-NEXT: mova m4, #0; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: mov dn0, m4 +; CHECK-NEXT: mov dn4, m4 +; CHECK-NEXT: mov dn1, m4 +; CHECK-NEXT: mov dn5, m4 +; CHECK-NEXT: mov dn2, m4 +; CHECK-NEXT: movs dc5, m4; mov dc1, m4 +; CHECK-NEXT: vlda x2, [sp, #-128]; movs dc2, m4; mov r1, m4 // 64-byte Folded Reload +; CHECK-NEXT: vlda x3, [sp, #-64]; movs dc3, m4; movx r0, #1; mov r2, m4 // 64-byte Folded Reload +; CHECK-NEXT: movs dc0, m4; and r3, r8, r0; mov r0, m4 +; CHECK-NEXT: movs m1, m4; mov dj7, m4 +; CHECK-NEXT: movs m3, m4; mov dj1, r1 +; CHECK-NEXT: movs m2, m4; mov dj5, r1 +; CHECK-NEXT: movs dn7, m4; mov dj2, r1 +; CHECK-NEXT: movs dj6, r1; vmov lfl0, x2 +; CHECK-NEXT: mova dc4, #0; movs dj3, r1; movx r4, #0; vmov lfh0, x3 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_1: // %for.body.i +; CHECK-NEXT: // =>This Loop Header: Depth=1 +; CHECK-NEXT: // Child Loop BB0_2 Depth 2 +; CHECK-NEXT: nopx ; vmov lfl1, lfl0 +; CHECK-NEXT: mova p1, #0; mov r25, r4 +; CHECK-NEXT: vldb.pop.576.3d ex0, [p1, lf1, r25, d1]; mov dj4, r1 +; CHECK-NEXT: mova p0, #0; movs m0, m4; mov dj0, r1 +; CHECK-NEXT: movs dn6, dn0; paddb.3d [p0], d0; vmov lfh1, lfh0 +; CHECK-NEXT: mova p0, #0; movs dn3, dn4; mov dc6, dc4 +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: .LBB0_2: // %for.body103.i +; CHECK-NEXT: // Parent Loop BB0_1 Depth=1 +; CHECK-NEXT: // => This Inner Loop Header: Depth=2 +; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; mov m0, m4; nopv +; CHECK-NEXT: movs dn0, r0; jz r3, #.LBB0_2 +; CHECK-NEXT: movs dj0, r1; mov dc0, m4 // Delay Slot 5 +; CHECK-NEXT: movs dn4, r2; mov dc4, m4 // Delay Slot 4 +; CHECK-NEXT: movs dj4, r1; mov r25, r4 // Delay Slot 3 +; CHECK-NEXT: movs p1, p0; vmov lfl1, x2 // Delay Slot 2 +; CHECK-NEXT: vldb.pop.576.3d ex0, [p1, lf1, r25, d0]; vmov lfh1, x3 // Delay Slot 1 +; CHECK-NEXT: // %bb.3: // %for.cond.cleanup102.i +; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1 +; CHECK-NEXT: nopa ; nopb ; nopxm +; CHECK-NEXT: movs dc5, dc4; j #.LBB0_1 +; CHECK-NEXT: movs dn0, dn6; mov dc4, dc6 // Delay Slot 5 +; CHECK-NEXT: mova p0, #0; movs dn6, m4; mov dc6, m4 // Delay Slot 4 +; CHECK-NEXT: paddb.3d [p0], d2; mov dn4, dn3 // Delay Slot 3 +; CHECK-NEXT: mova p0, #0; movs dc7, m4; mov dn3, m4 // Delay Slot 2 +; CHECK-NEXT: mova dc0, #1; paddb.3d [p0], d3; movs dc1, dc0 // Delay Slot 1 +entry: + tail call void null(ptr null, ptr null, ptr null) + br label %for.body.i + +for.body.i: ; preds = %for.cond.cleanup102.i, %entry + %dimsAI.sroa.17.0665.i = phi i32 [ 0, %entry ], [ %20, %for.cond.cleanup102.i ] + %dimsAI.sroa.13.0664.i = phi i32 [ 0, %entry ], [ %18, %for.cond.cleanup102.i ] + %dimsAO.sroa.8.0662.i = phi i32 [ 0, %entry ], [ %11, %for.cond.cleanup102.i ] + %dimsW.sroa.8.0660.i = phi i32 [ 0, %entry ], [ %15, %for.cond.cleanup102.i ] + %iterator_psum_cnt0.0659.i = phi i32 [ 0, %entry ], [ 1, %for.cond.cleanup102.i ] + %iterator_psum_cnt1.0658.i = phi i32 [ 0, %entry ], [ %7, %for.cond.cleanup102.i ] + %0 = trunc i32 %iterator_psum_cnt0.0659.i to i20 + %1 = trunc i32 %iterator_psum_cnt1.0658.i to i20 + %2 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %0, i20 0, i20 %1) + %3 = extractvalue { ptr, i20, i20 } %2, 2 + %4 = trunc i32 %dimsAI.sroa.13.0664.i to i20 + %5 = trunc i32 %dimsAI.sroa.17.0665.i to i20 + %6 = tail call { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5) null, <32 x i32> zeroinitializer, i32 0, i20 0, i20 0, i20 %4, i20 0, i20 0, i20 %5, i20 0) + br label %for.body103.i + +for.cond.cleanup102.i: ; preds = %for.body103.i + %7 = zext i20 %3 to i32 + %8 = trunc i32 %dimsAO.sroa.8.0662.i to i20 + %9 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %8, i20 0, i20 0) + %10 = extractvalue { ptr, i20, i20 } %9, 1 + %11 = zext i20 %10 to i32 + %12 = trunc i32 %dimsW.sroa.8.0660.i to i20 + %13 = tail call { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr null, i20 0, i20 0, i20 0, i20 0, i20 %12, i20 0, i20 0) + %14 = extractvalue { ptr, i20, i20 } %13, 1 + %15 = zext i20 %14 to i32 + br label %for.body.i + +for.body103.i: ; preds = %for.body103.i, %for.body.i + %16 = tail call { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5) null, <32 x i32> zeroinitializer, i32 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0, i20 0) + %17 = extractvalue { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } %16, 3 + %18 = zext i20 %17 to i32 + %19 = extractvalue { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } %16, 4 + %20 = zext i20 %19 to i32 + br i1 %exitcond.not.i, label %for.cond.cleanup102.i, label %for.body103.i +} + +; Function Attrs: nounwind memory(none) +declare { ptr, i20, i20 } @llvm.aie2p.add.3d(ptr, i20, i20, i20, i20, i20, i20, i20) #0 + +; Function Attrs: nounwind memory(argmem: read) +declare { ptr addrspace(5), <32 x i32>, i32, i20, i20, <64 x i8>, <8 x i8> } @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5(ptr addrspace(5), <32 x i32>, i32, i20, i20, i20, i20, i20, i20, i20) #1 + +; uselistorder directives +uselistorder ptr @llvm.aie2p.add.3d, { 2, 1, 0 } +uselistorder ptr @llvm.aie2p.fifo.ld.pop.576.3d.bfp16.p5.p5, { 1, 0 } + +attributes #0 = { nounwind memory(none) } +attributes #1 = { nounwind memory(argmem: read) }