From c6768ca2fa658d28e4090fbca95283289186abbc Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Tue, 15 Jul 2025 15:10:41 -0700 Subject: [PATCH 01/17] [AMDGPU] Add scheduling stage to rewrite MFMA from VGPR to AGPR Change-Id: I47b2a4274a35f3cf0a6d064674d1d29526e4dfd2 --- .../llvm/CodeGen/MachineInstrBuilder.h | 15 + llvm/lib/Target/AMDGPU/GCNRegPressure.h | 30 + llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 641 ++ llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 70 +- .../AMDGPU/sched_mfma_rewrite_copies.mir | 5591 +++++++++++++++++ .../AMDGPU/sched_mfma_rewrite_cost.mir | 524 ++ 6 files changed, 6866 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir create mode 100644 llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index 060f0c41de73a..1d5d2b6376f5c 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -464,6 +464,21 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD); } +/// This version of the builder inserts the newly-built instruction after the +/// given position in the given MachineBasicBlock, and does NOT take a +/// destination register. +inline MachineInstrBuilder BuildMIAfter(MachineBasicBlock &BB, + MachineBasicBlock::iterator I, + const MIMetadata &MIMD, + const MCInstrDesc &MCID) { + MachineFunction &MF = *BB.getParent(); + MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); + BB.insertAfter(I, MI); + return MachineInstrBuilder(MF, MI) + .setPCSections(MIMD.getPCSections()) + .setMMRAMetadata(MIMD.getMMRAMetadata()); +} + inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::instr_iterator I, const MIMetadata &MIMD, diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index f9d3ce039092e..7ca8ea7be09f0 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -102,6 +102,36 @@ struct GCNRegPressure { DynamicVGPRBlockSize)); } + unsigned getVGPRSpills(const GCNSubtarget &ST, MachineFunction &MF) { + if (!ST.hasGFX90AInsts()) + return 0; + + auto MaxVectorRegs = ST.getMaxNumVectorRegs(MF.getFunction()); + unsigned ArchVGPRThreshold = MaxVectorRegs.first; + unsigned AGPRThreshold = MaxVectorRegs.second; + + unsigned ArchPressure = getArchVGPRNum(); + unsigned AGPRPressure = getAGPRNum(); + + unsigned ArchSpill = ArchPressure > ArchVGPRThreshold + ? (ArchPressure - ArchVGPRThreshold) + : 0; + unsigned AGPRSpill = + AGPRPressure > AGPRThreshold ? (AGPRPressure - AGPRThreshold) : 0; + + unsigned UnifiedSpill = 0; + + if (ST.hasGFX90AInsts()) { + unsigned CombinedThreshold = ST.getMaxNumVGPRs(MF); + unsigned UnifiedPressure = getVGPRNum(true); + UnifiedSpill = UnifiedPressure > CombinedThreshold + ? (UnifiedPressure - CombinedThreshold) + : 0; + } + + return std::max(UnifiedSpill, (ArchSpill + AGPRSpill)); + } + void inc(unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask, diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index c8ce3aab3f303..453e9d28f2a2e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -30,6 +30,7 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/ErrorHandling.h" @@ -690,6 +691,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( const MachineSchedContext *C, bool IsLegacyScheduler) : GCNSchedStrategy(C) { SchedStages.push_back(GCNSchedStageID::OccInitialSchedule); + SchedStages.push_back(GCNSchedStageID::RewriteSchedule); SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule); SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule); SchedStages.push_back(GCNSchedStageID::PreRARematerialize); @@ -946,6 +948,8 @@ GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) { switch (SchedStageID) { case GCNSchedStageID::OccInitialSchedule: return std::make_unique(SchedStageID, *this); + case GCNSchedStageID::RewriteSchedule: + return std::make_unique(SchedStageID, *this); case GCNSchedStageID::UnclusteredHighRPReschedule: return std::make_unique(SchedStageID, *this); case GCNSchedStageID::ClusteredLowOccupancyReschedule: @@ -1183,6 +1187,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) { case GCNSchedStageID::OccInitialSchedule: OS << "Max Occupancy Initial Schedule"; break; + case GCNSchedStageID::RewriteSchedule: + OS << "Instruction Rewriting Reschedule"; + break; case GCNSchedStageID::UnclusteredHighRPReschedule: OS << "Unclustered High Register Pressure Reschedule"; break; @@ -1216,6 +1223,112 @@ bool GCNSchedStage::initGCNSchedStage() { return true; } +SlotIndex +RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO, + LiveIntervals *LIS, + SmallVectorImpl &DefIdxs) { + assert(UseMO.isReg()); + MachineInstr *UseMI = UseMO.getParent(); + LiveInterval &UseLI = LIS->getInterval(UseMO.getReg()); + auto VNInfo = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI)); + + SlotIndex DefMBBStart = + LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNInfo->def)); + + // If the def is in the block, then it must be the only reaching def. + if (DefMBBStart != VNInfo->def) { + DefIdxs.push_back(VNInfo->def); + return VNInfo->def; + } + + SmallPtrSet Visited; + SmallVector Worklist; + + Visited.insert(UseMI->getParent()); + + // Mark the predecessor blocks for traversal + for (auto PredMBB : UseMI->getParent()->predecessors()) { + Worklist.push_back(PredMBB); + Visited.insert(PredMBB); + } + + while (!Worklist.empty()) { + MachineBasicBlock *CurrMBB = Worklist.pop_back_val(); + + SlotIndex CurrMBBEnd = LIS->getMBBEndIdx(CurrMBB); + auto VNInfo = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot()); + + MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNInfo->def); + SlotIndex DefMBBStart = LIS->getMBBStartIdx(DefMBB); + + // If there is a def in this block, then add it to the list. This is the + // reaching def of this path. + if (DefMBBStart != VNInfo->def) { + DefIdxs.push_back(VNInfo->def); + continue; + } + + for (auto PredMBB : DefMBB->predecessors()) { + if (Visited.insert(PredMBB).second) + Worklist.push_back(PredMBB); + } + } + + return VNInfo->def; +} + +void RewriteScheduleStage::findReachingUses( + MachineInstr *DefMI, LiveIntervals *LIS, + SmallVectorImpl &ReachingUses) { + SlotIndex DefIdx = LIS->getInstructionIndex(*DefMI); + for (auto &UseMO : + DAG.MRI.use_nodbg_operands(DefMI->getOperand(0).getReg())) { + SmallVector ReachingDefIndexes; + findReachingDefs(UseMO, LIS, ReachingDefIndexes); + + // If we find a use that contains this DefMI in its reachingDefs, then it is + // a reaching use. + if (find_if(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) { + return SlotIndex::isSameInstr(RDIdx, DefIdx); + }) != ReachingDefIndexes.end()) + ReachingUses.push_back(&UseMO); + } +} + +bool RewriteScheduleStage::initGCNSchedStage() { + const GCNSubtarget &ST = MF.getSubtarget(); + + RegionsWithExcessArchVGPR.resize(DAG.Regions.size()); + RegionsWithExcessArchVGPR.reset(); + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { + auto PressureBefore = DAG.Pressure[Region]; + if (PressureBefore.getArchVGPRNum() > ST.getAddressableNumArchVGPRs()) + RegionsWithExcessArchVGPR[Region] = true; + } + + if (!ST.hasGFX90AInsts() || RegionsWithExcessArchVGPR.none()) + return false; + + TII = ST.getInstrInfo(); + SRI = ST.getRegisterInfo(); + + std::vector> RewriteCands; + DenseMap> CopyForUse; + SmallPtrSet CopyForDef; + + if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef)) + return false; + + int64_t Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef); + + // If we haven't found the beneficial conditions, prefer the VGPR form which + // may result in less cross RC copies. + if (Cost > 0) + return false; + + return rewrite(RewriteCands); +} + bool UnclusteredHighRPStage::initGCNSchedStage() { if (DisableUnclusterHighRP) return false; @@ -1837,6 +1950,534 @@ void GCNSchedStage::revertScheduling() { DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); } +bool RewriteScheduleStage::isRewriteCandidate(MachineInstr *MI) const { + + if (!static_cast(DAG.TII)->isMAI(*MI)) + return false; + return AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()) != -1; +} + +bool RewriteScheduleStage::initHeuristics( + std::vector> &RewriteCands, + DenseMap> &CopyForUse, + SmallPtrSetImpl &CopyForDef) { + // Prepare for the heuristics + for (auto &MBB : MF) { + for (auto &MI : MBB) { + if (isRewriteCandidate(&MI)) { + int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()); + if (ReplacementOp == -1) + continue; + + RewriteCands.push_back({&MI, MI.getOpcode()}); + MI.setDesc(TII->get(ReplacementOp)); + + MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); + if (Src2->isReg()) { + SmallVector Src2ReachingDefs; + findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs); + + // For any definition of the src2 register which is non-MFMA, we + // insert a copy. + for (SlotIndex RDIdx : Src2ReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx); + if (!TII->isMAI(*RD)) + CopyForDef.insert(RD); + } + } + + MachineOperand &Dst = MI.getOperand(0); + SmallVector DstReachingUses; + + findReachingUses(&MI, DAG.LIS, DstReachingUses); + + for (MachineOperand *RUOp : DstReachingUses) { + if (TII->isMAI(*RUOp->getParent())) + continue; + + // For any user of the result of the MFMA which is not an MFMA, we + // insert a copy. For a given register, we will only insert one copy + // per user block. + CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg()); + + SmallVector DstUsesReachingDefs; + findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs); + + for (auto RDIndex : DstUsesReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); + if (TII->isMAI(*RD)) + continue; + + // For any definition of the user of the MFMA which is not an MFMA, + // we insert a copy. We do this to transform all the reaching defs + // of this use to AGPR. By doing this, we can insert a copy from + // AGPR to VGPR at the user rather than after the MFMA. + CopyForDef.insert(RD); + } + } + + // Do the rewrite to allow for updated RP calculation. + const TargetRegisterClass *VGPRRC = DAG.MRI.getRegClass(Dst.getReg()); + const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(VGPRRC); + DAG.MRI.setRegClass(Dst.getReg(), AGPRRC); + if (Src2->isReg()) + DAG.MRI.setRegClass(Src2->getReg(), AGPRRC); + } + } + } + + return true; +} + +int64_t RewriteScheduleStage::getRewriteCost( + std::vector> &RewriteCands, + DenseMap> &CopyForUse, + SmallPtrSetImpl &CopyForDef) { + MBFI.calculate(MF, MBPI, *DAG.MLI); + int64_t BestSpillCost = 0; + int64_t Cost = 0; + + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { + if (!RegionsWithExcessArchVGPR[Region]) + continue; + + auto PressureBefore = DAG.Pressure[Region]; + unsigned SpillCostBefore = PressureBefore.getVGPRSpills(ST, MF); + + // For the cases we care about (i.e. ArchVGPR usage is greater than the + // addressable limit), rewriting alone should bring pressure to manageable + // level. If we find any such region, then the rewrite is potentially + // beneficial. + auto PressureAfter = DAG.getRealRegPressure(Region); + unsigned SpillCostAfter = PressureAfter.getVGPRSpills(ST, MF); + + uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency(); + uint64_t BlockFreq = + MBFI.getBlockFreq(DAG.Regions[Region].first->getParent()) + .getFrequency(); + + bool RelativeFreqIsDenom = EntryFreq > BlockFreq; + uint64_t RelativeFreq = EntryFreq && BlockFreq + ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq + : BlockFreq / EntryFreq) + : 1; + + // This assumes perfect spilling / splitting -- using one spill / copy + // instruction and one restoreFrom / copy for each excess register, + int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2; + + // Also account for the block frequency. + if (RelativeFreqIsDenom) + SpillCost /= (int64_t)RelativeFreq; + else + SpillCost *= (int64_t)RelativeFreq; + + // If we have increased spilling in any block, just bail. + if (SpillCost > 0) + return SpillCost; + + if (SpillCost < BestSpillCost) + BestSpillCost = SpillCost; + } + + // Set the cost to the largest decrease in spill cost in order to not double + // count spill reductions. + Cost = BestSpillCost; + + assert(Cost <= 0); + + unsigned CopyCost = 0; + + uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency(); + + // For each CopyForDef, increase the cost by the register size while + // accounting for block frequency. + for (auto *DefMI : CopyForDef) { + auto DefReg = DefMI->getOperand(0).getReg(); + uint64_t DefFreq = + EntryFreq + ? MBFI.getBlockFreq(DefMI->getParent()).getFrequency() / EntryFreq + : 1; + + unsigned RegSize = DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(DefReg)); + unsigned NumRegs = std::max(RegSize / 32, (unsigned)1); + CopyCost += NumRegs * DefFreq; + } + + // Account for CopyForUse copies in each block that the register is used. + for (auto &UseEntry : CopyForUse) { + uint64_t UseFreq = + EntryFreq ? MBFI.getBlockFreq(UseEntry.first).getFrequency() / EntryFreq + : 1; + + for (auto UseReg : UseEntry.second) { + unsigned RegSize = + DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(UseReg)); + unsigned NumRegs = std::max(RegSize / 32, (unsigned)1); + CopyCost += NumRegs * UseFreq; + } + } + + Cost += CopyCost; + + // Reset to the vgpr form. We must do rewriting after copy-insertion, as some + // defs of the register may require VGPR. + for (auto RI : RewriteCands) { + MachineInstr *MI = RI.first; + + assert(TII->isMAI(*MI)); + const TargetRegisterClass *AGPRRC = + DAG.MRI.getRegClass(MI->getOperand(0).getReg()); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(AGPRRC); + + MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2); + assert(Src2); + + if (Src2->isReg()) { + DAG.MRI.setRegClass(Src2->getReg(), VGPRRC); + } + DAG.MRI.setRegClass(MI->getOperand(0).getReg(), VGPRRC); + MI->setDesc(TII->get(RI.second)); + } + + return Cost; +} + +bool RewriteScheduleStage::rewrite( + std::vector> &RewriteCands) { + DenseMap FirstMIToRegion; + DenseMap LastMIToRegion; + + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { + auto Entry = DAG.Regions[Region]; + if (Entry.first == Entry.second) + continue; + + FirstMIToRegion[&*Entry.first] = Region; + if (Entry.second != Entry.first->getParent()->end()) + LastMIToRegion[&*Entry.second] = Region; + } + + // Rewrite the MFMAs to AGPR, and insert any copies as needed. + // The general assumption of the algorithm (and the previous cost calculation) + // is that it is better to insert the copies in the MBB of the def of the src2 + // operands, and in the MBB of the user of the dest operands. This is based on + // the assumption that the MFMAs are likely to appear in loop bodies, while + // the src2 and dest operands are live-in / live-out of the loop. Due to this + // design, the algorithm for finding copy insertion points is more + // complicated. + // + // There are three main cases to handle: 1. the reaching defs of the src2 + // operands, 2. the reaching uses of the dst operands, and 3. the reaching + // defs of the reaching uses of the dst operand. + // + // In the first case, we simply insert copies after each of the reaching + // definitions. In the second case, we collect all the uses of a given dest + // and organize them by MBB. Then, we insert 1 copy for each MBB before the + // earliest use. Since the use may have multiple reaching defs, and since we + // want to replace the register it is using with the result of the copy, we + // must handle case 3. In the third case, we simply insert a copy after each + // of the reaching defs to connect to the copy of the reaching uses of the dst + // reg. This allows us to avoid inserting copies next to the' MFMAs. + // + // While inserting the copies, we maintain a map of operands which will use + // different regs (i.e. the result of the copies). For example, a case 1 src2 + // operand will use the register result of the copies after the reaching defs, + // as opposed to the original register. Now that we have completed our copy + // analysis and placement, we can bulk update the registers. We do this + // separately as to avoid complicating the reachingDef and reachingUse + // queries. + // + // While inserting the copies, we also maintain a list or registers which we + // will want to reclassify as AGPR. After doing the copy isnertion and the + // register replacement, we can finally do the reclassification. This uses the + // redef map, as the registers we are interested in reclassifying may be + // replaced by the result of a copy. We must do this after the copy analysis + // and placement as we must have an accurate redef map -- otherwise we may end + // up creating illegal instructions. + + // The original registers of the MFMA that need to be reclassified as AGPR + std::set RewriteRegs; + // The map of an original register in the MFMA to a new register (result of a + // copy) that it should be replaced with. + DenseMap RedefMap; + // The map of the original MFMA registers to the relevant MFMA operands. + DenseMap> ReplaceMap; + // The map of reaching defs for a given register -- to avoid duplicate copies. + DenseMap> ReachingDefCopyMap; + // The map of reaching uses for a given register by basic block -- to avoid + // duplicate copies and to calculate per MBB insert pts. + DenseMap>> + ReachingUseTracker; + + for (auto &RI : RewriteCands) { + MachineInstr &MI = *RI.first; + + int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()); + if (ReplacementOp == -1) + continue; + MI.setDesc(TII->get(ReplacementOp)); + + // Case 1: insert copies for the reaching defs of the Src2Reg. + MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); + + if (Src2->isReg()) { + Register Src2Reg = Src2->getReg(); + if (!Src2Reg.isVirtual()) + return false; + + Register MappedReg = Src2->getReg(); + SmallVector Src2ReachingDefs; + findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs); + SmallVector Src2DefsReplace; + + for (auto RDIndex : Src2ReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); + if (TII->isMAI(*RD)) + continue; + + // If there is a non mai reaching def, then we need a copy. + if (find(Src2DefsReplace, RD) == Src2DefsReplace.end()) + Src2DefsReplace.push_back(RD); + } + + if (!Src2DefsReplace.empty()) { + if (RedefMap.contains(Src2Reg)) + MappedReg = RedefMap[Src2Reg]; + else { + assert(!ReachingDefCopyMap.contains(Src2Reg)); + const TargetRegisterClass *Src2RC = DAG.MRI.getRegClass(Src2Reg); + const TargetRegisterClass *VGPRRC = + SRI->getEquivalentVGPRClass(Src2RC); + + // Track the mapping of the original register to the new register. + MappedReg = DAG.MRI.createVirtualRegister(VGPRRC); + RedefMap[Src2Reg] = MappedReg; + } + + // If none exists, create a copy from this reaching def. + // We may have inserted a copy already in an earlier iteration. + for (MachineInstr *RD : Src2DefsReplace) { + // Do not create redundant copies. + if (ReachingDefCopyMap[Src2Reg].insert(RD).second) { + MachineInstrBuilder VGPRCopy = + BuildMIAfter(*RD->getParent(), RD->getIterator(), + RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(MappedReg, 0, 0) + .addUse(Src2Reg, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + + // If this reaching def was the last MI in the region, update the + // region boundaries. + if (LastMIToRegion.contains(RD)) { + unsigned UpdateRegion = LastMIToRegion[RD]; + DAG.Regions[UpdateRegion].second = VGPRCopy; + LastMIToRegion.erase(RD); + } + } + } + } + + // Track the register for reclassification + RewriteRegs.insert(Src2Reg); + + // Always insert the operand for replacement. If this corresponds with a + // chain of tied-def we may not see the VGPR requirement until later. + ReplaceMap[Src2Reg].insert(Src2); + } + + // Case 2 and Case 3: insert copies before the reaching uses of the dsts, + // and after the reaching defs of the reaching uses of the dsts. + + MachineOperand *Dst = &MI.getOperand(0); + Register DstReg = Dst->getReg(); + if (!DstReg.isVirtual()) + return false; + + Register MappedReg = DstReg; + SmallVector DstReachingUses; + + SmallVector DstReachingUseCopies; + SmallVector DstUseDefsReplace; + + findReachingUses(&MI, DAG.LIS, DstReachingUses); + + for (MachineOperand *RUOp : DstReachingUses) { + if (TII->isMAI(*RUOp->getParent())) + continue; + + // If there is a non mai reaching use, then we need a copy. + if (find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.end()) + DstReachingUseCopies.push_back(RUOp); + SmallVector DstUsesReachingDefs; + findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs); + + for (auto RDIndex : DstUsesReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); + if (TII->isMAI(*RD)) + continue; + + // If there is a non mai reaching def of this reaching use, then we will + // need a copy. + if (find(DstUseDefsReplace, RD) == DstUseDefsReplace.end()) + DstUseDefsReplace.push_back(RD); + } + } + + if (!DstUseDefsReplace.empty()) { + if (RedefMap.contains(DstReg)) + MappedReg = RedefMap[DstReg]; + else { + assert(!ReachingDefCopyMap.contains(DstReg)); + const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC); + + // Track the mapping of the original register to the new register. + MappedReg = DAG.MRI.createVirtualRegister(VGPRRC); + RedefMap[DstReg] = MappedReg; + } + + // If none exists, create a copy from this reaching def. + // We may have inserted a copy already in an earlier iteration. + for (MachineInstr *RD : DstUseDefsReplace) { + // Do not create reundant copies. + if (ReachingDefCopyMap[DstReg].insert(RD).second) { + MachineInstrBuilder VGPRCopy = + BuildMIAfter(*RD->getParent(), RD->getIterator(), + RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(MappedReg, 0, 0) + .addUse(DstReg, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + + // If this reaching def was the last MI in the region, update the + // region boundaries. + if (LastMIToRegion.contains(RD)) { + unsigned UpdateRegion = LastMIToRegion[RD]; + DAG.Regions[UpdateRegion].second = VGPRCopy; + LastMIToRegion.erase(RD); + } + } + } + } + + for (MachineOperand *RU : DstReachingUseCopies) { + MachineBasicBlock *RUBlock = RU->getParent()->getParent(); + // Just keep track of the reaching use of this register by block. After we + // have scanned all the MFMAs we can find optimal insert pts. + if (RUBlock != MI.getParent()) { + ReachingUseTracker[RUBlock->getNumber()][DstReg].insert(RU); + continue; + } + + // Special case, the use is in the same block as the MFMA. Insert the copy + // just before the use. + const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC); + Register NewUseReg = DAG.MRI.createVirtualRegister(VGPRRC); + MachineInstr *UseInst = RU->getParent(); + MachineInstrBuilder VGPRCopy = + BuildMI(*UseInst->getParent(), UseInst->getIterator(), + UseInst->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(NewUseReg, 0, 0) + .addUse(DstReg, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + // Since we know this use has only one reaching def, we can replace the + // use reg. + RU->setReg(NewUseReg); + // Track the copy source operand for replacement. + ReplaceMap[DstReg].insert(&VGPRCopy->getOperand(1)); + } + + // Track the register for reclassification + RewriteRegs.insert(DstReg); + // Insert the dst operand for replacement. If this dst is in a chain of + // tied-def MFMAs, and the first src2 needs to be replaced with a new reg, + // all the correspond operands need to be replaced. + ReplaceMap[DstReg].insert(Dst); + } + + // Handle the copies for dst uses. + for (auto RUBlockEntry : ReachingUseTracker) { + for (auto RUDst : RUBlockEntry.second) { + MachineOperand *OpBegin = *RUDst.second.begin(); + SlotIndex InstPt = DAG.LIS->getInstructionIndex(*OpBegin->getParent()); + + // Find the earliest use in this block. + for (auto User : RUDst.second) { + SlotIndex NewInstPt = DAG.LIS->getInstructionIndex(*User->getParent()); + if (SlotIndex::isEarlierInstr(NewInstPt, InstPt)) + InstPt = NewInstPt; + } + + const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(RUDst.first); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC); + Register NewUseReg = DAG.MRI.createVirtualRegister(VGPRRC); + MachineInstr *UseInst = DAG.LIS->getInstructionFromIndex(InstPt); + + MachineInstrBuilder VGPRCopy = + BuildMI(*UseInst->getParent(), UseInst->getIterator(), + UseInst->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(NewUseReg, 0, 0) + .addUse(RUDst.first, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + + // If this UseInst was the first MI in the region, update the region + // boundaries. + if (LastMIToRegion.contains(UseInst)) { + unsigned UpdateRegion = FirstMIToRegion[UseInst]; + DAG.Regions[UpdateRegion].first = VGPRCopy; + LastMIToRegion.erase(UseInst); + } + + // Replace the operand for all users. + for (auto User : RUDst.second) { + User->setReg(NewUseReg); + } + + // Track the copy source operand for replacement. + ReplaceMap[RUDst.first].insert(&VGPRCopy->getOperand(1)); + } + } + + // We may have needed to insert copies after the reaching defs of the MFMAs. + // Replace the original register with the result of the copy for all relevant + // operands. + for (auto NewDef : RedefMap) { + Register OldReg = NewDef.first; + Register NewReg = NewDef.second; + + // Replace the register for any associated operand in the MFMA chain. + for (MachineOperand *ReplaceOp : ReplaceMap[OldReg]) { + ReplaceOp->setReg(NewReg); + } + } + + // Finally, do the reclassification of the MFMA registers. + for (auto RewriteReg : RewriteRegs) { + Register RegToRewrite = RewriteReg; + + // Be sure to update the replacement register and not the original. + if (RedefMap.contains(RewriteReg)) + RegToRewrite = RedefMap[RewriteReg]; + + const TargetRegisterClass *CurrRC = DAG.MRI.getRegClass(RegToRewrite); + const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC); + + DAG.MRI.setRegClass(RegToRewrite, AGPRRC); + } + + // Bulk update the LIS. + DAG.LIS->reanalyze(DAG.MF); + // Liveins may have been modified for cross RC copies + RegionPressureMap LiveInUpdater(&DAG, false); + LiveInUpdater.buildLiveRegMap(); + + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) + DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region); + + return true; +} + bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() { const Function &F = MF.getFunction(); diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 95a931b9beb2a..e2d4f49b4ef16 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -16,6 +16,9 @@ #include "GCNRegPressure.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineScheduler.h" @@ -28,11 +31,12 @@ class GCNSchedStage; enum class GCNSchedStageID : unsigned { OccInitialSchedule = 0, - UnclusteredHighRPReschedule = 1, - ClusteredLowOccupancyReschedule = 2, - PreRARematerialize = 3, - ILPInitialSchedule = 4, - MemoryClauseInitialSchedule = 5 + RewriteSchedule = 1, + UnclusteredHighRPReschedule = 2, + ClusteredLowOccupancyReschedule = 3, + PreRARematerialize = 4, + ILPInitialSchedule = 5, + MemoryClauseInitialSchedule = 6 }; #ifndef NDEBUG @@ -239,6 +243,7 @@ using RegionBoundaries = class GCNScheduleDAGMILive final : public ScheduleDAGMILive { friend class GCNSchedStage; friend class OccInitialScheduleStage; + friend class RewriteScheduleStage; friend class UnclusteredHighRPStage; friend class ClusteredLowOccStage; friend class PreRARematStage; @@ -413,6 +418,61 @@ class OccInitialScheduleStage : public GCNSchedStage { : GCNSchedStage(StageID, DAG) {} }; +class RewriteScheduleStage : public GCNSchedStage { +private: + // Record regions with excess archvgpr register pressure over the physical + // register limit. Register pressure in these regions usually will result in + // spilling. + BitVector RegionsWithExcessArchVGPR; + + MachineBranchProbabilityInfo MBPI; + MachineBlockFrequencyInfo MBFI; + + const SIInstrInfo *TII; + const SIRegisterInfo *SRI; + + /// Do a speculative rewrite and collect copy locations. The speculative + /// rewrite allows us to calulcate the RP of the code after the rewrite, and + /// the copy locations allow us to calculate the total cost of copies required + /// for the rewrite. Stores the rewritten instructions in \p RewriteCands , + /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the + /// copy locations for defs (of the MFMA operands) in \p CopyForDef + bool + initHeuristics(std::vector> &RewriteCands, + DenseMap> &CopyForUse, + SmallPtrSetImpl &CopyForDef); + + /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done + /// in initHueristics. Uses \p CopyForUse and \p CopyForDef to calculate copy + /// costs, and \p RewriteCands to undo rewriting. + int64_t + getRewriteCost(std::vector> &RewriteCands, + DenseMap> &CopyForUse, + SmallPtrSetImpl &CopyForDef); + + /// Do the final rewrite on \p RewriteCands and insert any needed copies. + bool rewrite(std::vector> &RewriteCands); + + /// \returns true if this MI is a rewrite candidate. + bool isRewriteCandidate(MachineInstr *MI) const; + + /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p + /// DefIdx + SlotIndex findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS, + SmallVectorImpl &DefIdxs); + + /// Finds all the reaching uses of \p DefMI and stores the use operands in \p + /// ReachingUses + void findReachingUses(MachineInstr *DefMI, LiveIntervals *LIS, + SmallVectorImpl &ReachingUses); + +public: + bool initGCNSchedStage() override; + + RewriteScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) + : GCNSchedStage(StageID, DAG) {} +}; + class UnclusteredHighRPStage : public GCNSchedStage { private: // Save the initial occupancy before starting this stage. diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir new file mode 100644 index 0000000000000..73eeafb6bccc5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir @@ -0,0 +1,5591 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- | + define void @src2_singledef_singleuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_singleedef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_singledef_mixed() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_multidef_mixed() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_multidef_mixed() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_multidef_mixed() #0 { + entry: + unreachable + } + + define void @no_copy_for_mfma() #0 { + entry: + unreachable + } + + attributes #0 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"} +... + + +--- +name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + KILL %89, %90, %91, %92, %93, %193 + + bb.7: + undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + KILL %89, %90, %91, %92, %93, %193 + + bb.7: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + S_BRANCH %bb.5 + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + + bb.5: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.5 + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.5: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.8 + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.8: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.8 + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.8: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7]] + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6]] + ; CHECK-NEXT: KILL [[COPY10]], [[COPY5]], [[COPY12]], [[COPY7]], [[COPY14]], [[COPY9]], [[COPY16]], [[COPY11]], [[COPY6]], [[COPY13]], [[COPY8]], [[COPY15]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY18]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY19]], [[COPY21]], [[COPY20]], [[COPY22]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %194:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %195:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %196:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %197:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %198:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %199:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + + bb.2: + KILL %89, %90, %91, %92, %93, %193, %194, %195, %196, %197, %198, %199 + + + bb.3: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.5, implicit killed $scc + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + S_BRANCH %bb.5 + + bb.5: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + + bb.6: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY15]], [[COPY14]], [[COPY16]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + KILL %89, %90, %91, %92, %93, %193 + + bb.3: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.5, implicit killed $scc + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.6 + + bb.5: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.6: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.8, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY15]], [[COPY13]], [[COPY16]], [[COPY14]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + KILL %89, %90, %91, %92, %93, %193 + + + bb.7: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.9, implicit killed $scc + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.10 + + bb.9: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.10: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.9: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_singleuse_dst_singleuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY7]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_singleuse_dst_singleuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.1: + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + KILL %89, %90, %91, %92, %93, %193 + + + bb.3: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 128, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + + bb.4: + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_singleuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 128, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_singleuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_singleuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 128, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_multiuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 128, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_multiuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 384, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 384, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_singledef_singleuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY2]], 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec + + bb.5: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_singleuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 256, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec + + bb.5: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + + +--- +name: src2_singledef_multiuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.8 + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.8: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_multiuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.8 + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.8: + DS_WRITE_B32_gfx9 %64:vgpr_32, %84.sub0:vreg_128_align2, 256, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_singleuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY6]], 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 256, 0, implicit $exec + + bb.5: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_singleuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 256, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec + + bb.5: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_singledef_multiuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.9 + + bb.8: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.9: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.9 + + bb.8: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.9: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 256, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_singleuse_dst_singleuse_singledef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_singleuse_dst_multiuse_multidef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: undef [[DEF21:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF21:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[DEF21]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + %94:vreg_128_align2 = IMPLICIT_DEF + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.9: + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_multiuse_dst_singleuse_multidef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY11]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_multiuse_multidef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY2]], 0, 0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.8 + + bb.7: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + %94:vreg_128_align2 = IMPLICIT_DEF + + bb.8: + %95:vgpr_32 = V_ADD_U32_e32 %84.sub0, %64, implicit $exec + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: no_copy_for_mfma +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: no_copy_for_mfma + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: dead [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = COPY [[DEF21]] + ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = COPY [[DEF22]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY2]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %88:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %88:vreg_128_align2 = IMPLICIT_DEF + S_BRANCH %bb.4 + + + bb.3: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.5, implicit killed $scc + + bb.4: + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + + bb.5: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir new file mode 100644 index 0000000000000..050e4bc5e941c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir @@ -0,0 +1,524 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- | + define void @more_copies_than_spills() #0 { + entry: + unreachable + } + + define void @less_copies_than_spills() #0 { + entry: + unreachable + } + + define void @low_pressure() { + entry: + unreachable + } + + attributes #0 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"} +... + + +--- +name: more_copies_than_spills +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: more_copies_than_spills + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.9(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.9, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_3:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: successors: %bb.10(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_4:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF18]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub2, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.10: + ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF18]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.10, implicit killed $scc + + bb.9: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + + bb.10: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %85.sub0, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub2, %64, implicit $exec + + bb.11: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %85.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + + +--- +name: less_copies_than_spills +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: less_copies_than_spills + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY4]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + + bb.9: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: low_pressure +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: low_pressure + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %5 + ; CHECK-NEXT: S_NOP 0, implicit-def %6 + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF8:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF9:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub0, [[DEF5]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF5]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub0, [[DEF5]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub0, [[DEF5]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF6]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: KILL [[DEF4]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF12]], [[DEF6]], [[DEF7]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %5, implicit %6 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + + bb.9: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %8, %10, %11, %12, %13, %62, %72, %85, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... From a1c766989e72b986565d2c0468cd8d491e1139aa Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Fri, 22 Aug 2025 16:14:19 -0700 Subject: [PATCH 02/17] Do not rewrite to AGPR if waves-per-eu >= 2 Change-Id: I4ab71a3c739a203399a201e47d6b37ceba723bf2 --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 453e9d28f2a2e..54a46e4ef222e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1297,6 +1297,8 @@ void RewriteScheduleStage::findReachingUses( bool RewriteScheduleStage::initGCNSchedStage() { const GCNSubtarget &ST = MF.getSubtarget(); + if (!ST.hasGFX90AInsts() || MFI.getMinWavesPerEU() > 1) + return false; RegionsWithExcessArchVGPR.resize(DAG.Regions.size()); RegionsWithExcessArchVGPR.reset(); @@ -1306,7 +1308,7 @@ bool RewriteScheduleStage::initGCNSchedStage() { RegionsWithExcessArchVGPR[Region] = true; } - if (!ST.hasGFX90AInsts() || RegionsWithExcessArchVGPR.none()) + if (RegionsWithExcessArchVGPR.none()) return false; TII = ST.getInstrInfo(); From 7e9fc5f628ac16a144182b9fd5cdd7eba9d305c0 Mon Sep 17 00:00:00 2001 From: Jeffrey Byrnes Date: Thu, 4 Sep 2025 16:22:01 -0700 Subject: [PATCH 03/17] Review comments Change-Id: I99db02cea2777024b4948a55d6a298c384f40534 --- llvm/lib/Target/AMDGPU/GCNRegPressure.h | 3 +- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 130 ++++++++++---------- llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 22 ++-- 3 files changed, 74 insertions(+), 81 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index 7ca8ea7be09f0..d13d1ddd9c0eb 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -102,7 +102,8 @@ struct GCNRegPressure { DynamicVGPRBlockSize)); } - unsigned getVGPRSpills(const GCNSubtarget &ST, MachineFunction &MF) { + unsigned getVGPRSpills(MachineFunction &MF) { + const GCNSubtarget &ST = MF.getSubtarget(); if (!ST.hasGFX90AInsts()) return 0; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 54a46e4ef222e..7f945f5c094ee 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1223,22 +1223,20 @@ bool GCNSchedStage::initGCNSchedStage() { return true; } -SlotIndex -RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO, - LiveIntervals *LIS, - SmallVectorImpl &DefIdxs) { +void RewriteScheduleStage::findReachingDefs( + MachineOperand &UseMO, LiveIntervals *LIS, + SmallVectorImpl &DefIdxs) { assert(UseMO.isReg()); MachineInstr *UseMI = UseMO.getParent(); LiveInterval &UseLI = LIS->getInterval(UseMO.getReg()); - auto VNInfo = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI)); + VNInfo *VNI = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI)); - SlotIndex DefMBBStart = - LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNInfo->def)); + SlotIndex DefMBBStart = LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNI->def)); // If the def is in the block, then it must be the only reaching def. - if (DefMBBStart != VNInfo->def) { - DefIdxs.push_back(VNInfo->def); - return VNInfo->def; + if (DefMBBStart != VNI->def) { + DefIdxs.push_back(VNI->def); + return; } SmallPtrSet Visited; @@ -1256,15 +1254,15 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO, MachineBasicBlock *CurrMBB = Worklist.pop_back_val(); SlotIndex CurrMBBEnd = LIS->getMBBEndIdx(CurrMBB); - auto VNInfo = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot()); + VNInfo *VNI = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot()); - MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNInfo->def); + MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNI->def); SlotIndex DefMBBStart = LIS->getMBBStartIdx(DefMBB); // If there is a def in this block, then add it to the list. This is the // reaching def of this path. - if (DefMBBStart != VNInfo->def) { - DefIdxs.push_back(VNInfo->def); + if (DefMBBStart != VNI->def) { + DefIdxs.push_back(VNI->def); continue; } @@ -1273,8 +1271,6 @@ RewriteScheduleStage::findReachingDefs(MachineOperand &UseMO, Worklist.push_back(PredMBB); } } - - return VNInfo->def; } void RewriteScheduleStage::findReachingUses( @@ -1288,9 +1284,9 @@ void RewriteScheduleStage::findReachingUses( // If we find a use that contains this DefMI in its reachingDefs, then it is // a reaching use. - if (find_if(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) { + if (any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) { return SlotIndex::isSameInstr(RDIdx, DefIdx); - }) != ReachingDefIndexes.end()) + })) ReachingUses.push_back(&UseMO); } } @@ -1966,27 +1962,29 @@ bool RewriteScheduleStage::initHeuristics( // Prepare for the heuristics for (auto &MBB : MF) { for (auto &MI : MBB) { - if (isRewriteCandidate(&MI)) { - int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()); - if (ReplacementOp == -1) - continue; + if (!isRewriteCandidate(&MI)) + continue; - RewriteCands.push_back({&MI, MI.getOpcode()}); - MI.setDesc(TII->get(ReplacementOp)); + int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()); + if (ReplacementOp == -1) + continue; - MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); - if (Src2->isReg()) { - SmallVector Src2ReachingDefs; - findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs); + RewriteCands.push_back({&MI, MI.getOpcode()}); + MI.setDesc(TII->get(ReplacementOp)); - // For any definition of the src2 register which is non-MFMA, we - // insert a copy. - for (SlotIndex RDIdx : Src2ReachingDefs) { - MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx); - if (!TII->isMAI(*RD)) - CopyForDef.insert(RD); - } + MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); + if (Src2->isReg()) { + SmallVector Src2ReachingDefs; + findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs); + + // For any definition of the src2 register which is non-MFMA, we + // insert a copy. + for (SlotIndex RDIdx : Src2ReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx); + if (!TII->isMAI(*RD)) + CopyForDef.insert(RD); } + } MachineOperand &Dst = MI.getOperand(0); SmallVector DstReachingUses; @@ -2024,7 +2022,6 @@ bool RewriteScheduleStage::initHeuristics( DAG.MRI.setRegClass(Dst.getReg(), AGPRRC); if (Src2->isReg()) DAG.MRI.setRegClass(Src2->getReg(), AGPRRC); - } } } @@ -2032,28 +2029,32 @@ bool RewriteScheduleStage::initHeuristics( } int64_t RewriteScheduleStage::getRewriteCost( - std::vector> &RewriteCands, - DenseMap> &CopyForUse, - SmallPtrSetImpl &CopyForDef) { + const std::vector> &RewriteCands, + const DenseMap> &CopyForUse, + const SmallPtrSetImpl &CopyForDef) { + MachineBranchProbabilityInfo MBPI; + MachineBlockFrequencyInfo MBFI; + MBFI.calculate(MF, MBPI, *DAG.MLI); int64_t BestSpillCost = 0; int64_t Cost = 0; + uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency(); + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { if (!RegionsWithExcessArchVGPR[Region]) continue; - auto PressureBefore = DAG.Pressure[Region]; - unsigned SpillCostBefore = PressureBefore.getVGPRSpills(ST, MF); + GCNRegPressure &PressureBefore = DAG.Pressure[Region]; + unsigned SpillCostBefore = PressureBefore.getVGPRSpills(MF); // For the cases we care about (i.e. ArchVGPR usage is greater than the // addressable limit), rewriting alone should bring pressure to manageable // level. If we find any such region, then the rewrite is potentially // beneficial. - auto PressureAfter = DAG.getRealRegPressure(Region); - unsigned SpillCostAfter = PressureAfter.getVGPRSpills(ST, MF); + GCNRegPressure PressureAfter = DAG.getRealRegPressure(Region); + unsigned SpillCostAfter = PressureAfter.getVGPRSpills(MF); - uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency(); uint64_t BlockFreq = MBFI.getBlockFreq(DAG.Regions[Region].first->getParent()) .getFrequency(); @@ -2090,8 +2091,6 @@ int64_t RewriteScheduleStage::getRewriteCost( unsigned CopyCost = 0; - uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency(); - // For each CopyForDef, increase the cost by the register size while // accounting for block frequency. for (auto *DefMI : CopyForDef) { @@ -2107,12 +2106,11 @@ int64_t RewriteScheduleStage::getRewriteCost( } // Account for CopyForUse copies in each block that the register is used. - for (auto &UseEntry : CopyForUse) { + for (auto &[UseBlock, UseRegs] : CopyForUse) { uint64_t UseFreq = - EntryFreq ? MBFI.getBlockFreq(UseEntry.first).getFrequency() / EntryFreq - : 1; + EntryFreq ? MBFI.getBlockFreq(UseBlock).getFrequency() / EntryFreq : 1; - for (auto UseReg : UseEntry.second) { + for (auto UseReg : UseRegs) { unsigned RegSize = DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(UseReg)); unsigned NumRegs = std::max(RegSize / 32, (unsigned)1); @@ -2124,9 +2122,7 @@ int64_t RewriteScheduleStage::getRewriteCost( // Reset to the vgpr form. We must do rewriting after copy-insertion, as some // defs of the register may require VGPR. - for (auto RI : RewriteCands) { - MachineInstr *MI = RI.first; - + for (auto &[MI, OriginalOpcode] : RewriteCands) { assert(TII->isMAI(*MI)); const TargetRegisterClass *AGPRRC = DAG.MRI.getRegClass(MI->getOperand(0).getReg()); @@ -2135,18 +2131,17 @@ int64_t RewriteScheduleStage::getRewriteCost( MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2); assert(Src2); - if (Src2->isReg()) { + if (Src2->isReg()) DAG.MRI.setRegClass(Src2->getReg(), VGPRRC); - } DAG.MRI.setRegClass(MI->getOperand(0).getReg(), VGPRRC); - MI->setDesc(TII->get(RI.second)); + MI->setDesc(TII->get(OriginalOpcode)); } return Cost; } bool RewriteScheduleStage::rewrite( - std::vector> &RewriteCands) { + const std::vector> &RewriteCands) { DenseMap FirstMIToRegion; DenseMap LastMIToRegion; @@ -2180,7 +2175,7 @@ bool RewriteScheduleStage::rewrite( // want to replace the register it is using with the result of the copy, we // must handle case 3. In the third case, we simply insert a copy after each // of the reaching defs to connect to the copy of the reaching uses of the dst - // reg. This allows us to avoid inserting copies next to the' MFMAs. + // reg. This allows us to avoid inserting copies next to the MFMAs. // // While inserting the copies, we maintain a map of operands which will use // different regs (i.e. the result of the copies). For example, a case 1 src2 @@ -2191,14 +2186,14 @@ bool RewriteScheduleStage::rewrite( // queries. // // While inserting the copies, we also maintain a list or registers which we - // will want to reclassify as AGPR. After doing the copy isnertion and the + // will want to reclassify as AGPR. After doing the copy insertion and the // register replacement, we can finally do the reclassification. This uses the // redef map, as the registers we are interested in reclassifying may be // replaced by the result of a copy. We must do this after the copy analysis // and placement as we must have an accurate redef map -- otherwise we may end // up creating illegal instructions. - // The original registers of the MFMA that need to be reclassified as AGPR + // The original registers of the MFMA that need to be reclassified as AGPR. std::set RewriteRegs; // The map of an original register in the MFMA to a new register (result of a // copy) that it should be replaced with. @@ -2212,16 +2207,15 @@ bool RewriteScheduleStage::rewrite( DenseMap>> ReachingUseTracker; - for (auto &RI : RewriteCands) { - MachineInstr &MI = *RI.first; + for (auto &[MI, OriginalOpcode] : RewriteCands) { - int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()); + int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); if (ReplacementOp == -1) continue; - MI.setDesc(TII->get(ReplacementOp)); + MI->setDesc(TII->get(ReplacementOp)); // Case 1: insert copies for the reaching defs of the Src2Reg. - MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); + MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2); if (Src2->isReg()) { Register Src2Reg = Src2->getReg(); @@ -2291,7 +2285,7 @@ bool RewriteScheduleStage::rewrite( // Case 2 and Case 3: insert copies before the reaching uses of the dsts, // and after the reaching defs of the reaching uses of the dsts. - MachineOperand *Dst = &MI.getOperand(0); + MachineOperand *Dst = &MI->getOperand(0); Register DstReg = Dst->getReg(); if (!DstReg.isVirtual()) return false; @@ -2302,7 +2296,7 @@ bool RewriteScheduleStage::rewrite( SmallVector DstReachingUseCopies; SmallVector DstUseDefsReplace; - findReachingUses(&MI, DAG.LIS, DstReachingUses); + findReachingUses(MI, DAG.LIS, DstReachingUses); for (MachineOperand *RUOp : DstReachingUses) { if (TII->isMAI(*RUOp->getParent())) @@ -2366,7 +2360,7 @@ bool RewriteScheduleStage::rewrite( MachineBasicBlock *RUBlock = RU->getParent()->getParent(); // Just keep track of the reaching use of this register by block. After we // have scanned all the MFMAs we can find optimal insert pts. - if (RUBlock != MI.getParent()) { + if (RUBlock != MI->getParent()) { ReachingUseTracker[RUBlock->getNumber()][DstReg].insert(RU); continue; } diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index e2d4f49b4ef16..f7a9f79bf7364 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -425,9 +425,6 @@ class RewriteScheduleStage : public GCNSchedStage { // spilling. BitVector RegionsWithExcessArchVGPR; - MachineBranchProbabilityInfo MBPI; - MachineBlockFrequencyInfo MBFI; - const SIInstrInfo *TII; const SIRegisterInfo *SRI; @@ -443,23 +440,24 @@ class RewriteScheduleStage : public GCNSchedStage { SmallPtrSetImpl &CopyForDef); /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done - /// in initHueristics. Uses \p CopyForUse and \p CopyForDef to calculate copy + /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy /// costs, and \p RewriteCands to undo rewriting. - int64_t - getRewriteCost(std::vector> &RewriteCands, - DenseMap> &CopyForUse, - SmallPtrSetImpl &CopyForDef); + int64_t getRewriteCost( + const std::vector> &RewriteCands, + const DenseMap> &CopyForUse, + const SmallPtrSetImpl &CopyForDef); /// Do the final rewrite on \p RewriteCands and insert any needed copies. - bool rewrite(std::vector> &RewriteCands); + bool + rewrite(const std::vector> &RewriteCands); /// \returns true if this MI is a rewrite candidate. bool isRewriteCandidate(MachineInstr *MI) const; /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p - /// DefIdx - SlotIndex findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS, - SmallVectorImpl &DefIdxs); + /// DefIdxs + void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS, + SmallVectorImpl &DefIdxs); /// Finds all the reaching uses of \p DefMI and stores the use operands in \p /// ReachingUses From ebb7c21739b18999075f190b5382fba5bb35478f Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Thu, 20 Nov 2025 09:30:11 -0600 Subject: [PATCH 04/17] merge of PR 149367 --- llvm/include/llvm/CodeGen/MachineInstrBuilder.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index 1d5d2b6376f5c..5b4d59f2518a1 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -475,8 +475,7 @@ inline MachineInstrBuilder BuildMIAfter(MachineBasicBlock &BB, MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); BB.insertAfter(I, MI); return MachineInstrBuilder(MF, MI) - .setPCSections(MIMD.getPCSections()) - .setMMRAMetadata(MIMD.getMMRAMetadata()); + .copyMIMetadata(MIMD); } inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, From d51f0cbf33445f57c94c158ceec10f7a0360138d Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Tue, 25 Nov 2025 08:20:48 -0600 Subject: [PATCH 05/17] Address PR 149367 review comments --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 18 ++++++++---------- llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 2 +- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 7f945f5c094ee..f1d79820ae232 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1245,7 +1245,7 @@ void RewriteScheduleStage::findReachingDefs( Visited.insert(UseMI->getParent()); // Mark the predecessor blocks for traversal - for (auto PredMBB : UseMI->getParent()->predecessors()) { + for (auto *PredMBB : UseMI->getParent()->predecessors()) { Worklist.push_back(PredMBB); Visited.insert(PredMBB); } @@ -1266,7 +1266,7 @@ void RewriteScheduleStage::findReachingDefs( continue; } - for (auto PredMBB : DefMBB->predecessors()) { + for (auto *PredMBB : DefMBB->predecessors()) { if (Visited.insert(PredMBB).second) Worklist.push_back(PredMBB); } @@ -1966,8 +1966,7 @@ bool RewriteScheduleStage::initHeuristics( continue; int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()); - if (ReplacementOp == -1) - continue; + assert(ReplacementOp != -1) RewriteCands.push_back({&MI, MI.getOpcode()}); MI.setDesc(TII->get(ReplacementOp)); @@ -2238,9 +2237,9 @@ bool RewriteScheduleStage::rewrite( } if (!Src2DefsReplace.empty()) { - if (RedefMap.contains(Src2Reg)) + if (RedefMap.contains(Src2Reg)) { MappedReg = RedefMap[Src2Reg]; - else { + } else { assert(!ReachingDefCopyMap.contains(Src2Reg)); const TargetRegisterClass *Src2RC = DAG.MRI.getRegClass(Src2Reg); const TargetRegisterClass *VGPRRC = @@ -2399,7 +2398,7 @@ bool RewriteScheduleStage::rewrite( SlotIndex InstPt = DAG.LIS->getInstructionIndex(*OpBegin->getParent()); // Find the earliest use in this block. - for (auto User : RUDst.second) { + for (auto *User : RUDst.second) { SlotIndex NewInstPt = DAG.LIS->getInstructionIndex(*User->getParent()); if (SlotIndex::isEarlierInstr(NewInstPt, InstPt)) InstPt = NewInstPt; @@ -2426,7 +2425,7 @@ bool RewriteScheduleStage::rewrite( } // Replace the operand for all users. - for (auto User : RUDst.second) { + for (auto *User : RUDst.second) { User->setReg(NewUseReg); } @@ -2443,9 +2442,8 @@ bool RewriteScheduleStage::rewrite( Register NewReg = NewDef.second; // Replace the register for any associated operand in the MFMA chain. - for (MachineOperand *ReplaceOp : ReplaceMap[OldReg]) { + for (MachineOperand *ReplaceOp : ReplaceMap[OldReg]) ReplaceOp->setReg(NewReg); - } } // Finally, do the reclassification of the MFMA registers. diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index f7a9f79bf7364..76dba27d6f83f 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -429,7 +429,7 @@ class RewriteScheduleStage : public GCNSchedStage { const SIRegisterInfo *SRI; /// Do a speculative rewrite and collect copy locations. The speculative - /// rewrite allows us to calulcate the RP of the code after the rewrite, and + /// rewrite allows us to calculate the RP of the code after the rewrite, and /// the copy locations allow us to calculate the total cost of copies required /// for the rewrite. Stores the rewritten instructions in \p RewriteCands , /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the From 53b6f907a74fbc487ae36965949f9fcf1ade77db Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Mon, 1 Dec 2025 11:09:22 -0600 Subject: [PATCH 06/17] Address PR 149367 review comments --- .../llvm/CodeGen/MachineInstrBuilder.h | 23 ++++++++----------- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 20 ++++++++-------- 2 files changed, 20 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index 5b4d59f2518a1..9895e8d9cb768 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -380,6 +380,15 @@ class MachineInstrBuilder { return *this; } + /// Inserts the newly-built instruction after the given position in the + /// given MachineBasicBlock. + const MachineInstrBuilder &insertAfter(MachineInstr *MInstr) const { + MachineBasicBlock *MBB = MInstr->getParent(); + MachineBasicBlock::iterator I = MInstr->getIterator(); + MBB->insertAfter(I, MI); + return *this; + } + bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const { @@ -464,20 +473,6 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, return MachineInstrBuilder(MF, MI).copyMIMetadata(MIMD); } -/// This version of the builder inserts the newly-built instruction after the -/// given position in the given MachineBasicBlock, and does NOT take a -/// destination register. -inline MachineInstrBuilder BuildMIAfter(MachineBasicBlock &BB, - MachineBasicBlock::iterator I, - const MIMetadata &MIMD, - const MCInstrDesc &MCID) { - MachineFunction &MF = *BB.getParent(); - MachineInstr *MI = MF.CreateMachineInstr(MCID, MIMD.getDL()); - BB.insertAfter(I, MI); - return MachineInstrBuilder(MF, MI) - .copyMIMetadata(MIMD); -} - inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB, MachineBasicBlock::instr_iterator I, const MIMetadata &MIMD, diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index f1d79820ae232..0773789c0ace2 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1966,7 +1966,7 @@ bool RewriteScheduleStage::initHeuristics( continue; int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()); - assert(ReplacementOp != -1) + assert(ReplacementOp != -1); RewriteCands.push_back({&MI, MI.getOpcode()}); MI.setDesc(TII->get(ReplacementOp)); @@ -2256,10 +2256,10 @@ bool RewriteScheduleStage::rewrite( // Do not create redundant copies. if (ReachingDefCopyMap[Src2Reg].insert(RD).second) { MachineInstrBuilder VGPRCopy = - BuildMIAfter(*RD->getParent(), RD->getIterator(), - RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) + BuildMI(DAG.MF, RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) .addDef(MappedReg, 0, 0) - .addUse(Src2Reg, 0, 0); + .addUse(Src2Reg, 0, 0) + .insertAfter(RD); DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); // If this reaching def was the last MI in the region, update the @@ -2338,10 +2338,10 @@ bool RewriteScheduleStage::rewrite( // Do not create reundant copies. if (ReachingDefCopyMap[DstReg].insert(RD).second) { MachineInstrBuilder VGPRCopy = - BuildMIAfter(*RD->getParent(), RD->getIterator(), - RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) + BuildMI(DAG.MF, RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) .addDef(MappedReg, 0, 0) - .addUse(DstReg, 0, 0); + .addUse(DstReg, 0, 0) + .insertAfter(RD); DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); // If this reaching def was the last MI in the region, update the @@ -2418,10 +2418,10 @@ bool RewriteScheduleStage::rewrite( // If this UseInst was the first MI in the region, update the region // boundaries. - if (LastMIToRegion.contains(UseInst)) { + if (FirstMIToRegion.contains(UseInst)) { unsigned UpdateRegion = FirstMIToRegion[UseInst]; DAG.Regions[UpdateRegion].first = VGPRCopy; - LastMIToRegion.erase(UseInst); + FirstMIToRegion.erase(UseInst); } // Replace the operand for all users. @@ -2469,6 +2469,8 @@ bool RewriteScheduleStage::rewrite( for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region); + DAG.Pressure[RegionIdx] = DAG.getRealRegPressure(RegionIdx); + return true; } From 0fd782bab2fc7920e9398a4e23902706ef7e01a6 Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Tue, 2 Dec 2025 10:03:06 -0600 Subject: [PATCH 07/17] [AMDGPU] Add scheduling stage to rewrite MFMA from VGPR to AGPR --- .../AMDGPU/sched_mfma_rewrite_copies.mir | 2082 ++++++++--------- .../AMDGPU/sched_mfma_rewrite_cost.mir | 114 +- 2 files changed, 1098 insertions(+), 1098 deletions(-) diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir index 73eeafb6bccc5..f485b088c8034 100644 --- a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir @@ -215,42 +215,42 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -316,42 +316,42 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -418,46 +418,42 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: SCHED_BARRIER 0 @@ -465,7 +461,11 @@ body: | ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -538,55 +538,55 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -662,32 +662,28 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc @@ -695,16 +691,16 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -719,12 +715,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -805,32 +805,28 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc @@ -838,16 +834,16 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -861,14 +857,18 @@ body: | ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -951,30 +951,26 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc @@ -982,14 +978,14 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -1002,16 +998,16 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) @@ -1026,12 +1022,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1124,30 +1124,26 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc @@ -1155,14 +1151,14 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -1175,16 +1171,16 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) @@ -1198,14 +1194,18 @@ body: | ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1299,33 +1299,29 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -1337,21 +1333,25 @@ body: | ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1427,33 +1427,29 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -1465,25 +1461,29 @@ body: | ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1563,46 +1563,42 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -1614,23 +1610,27 @@ body: | ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1716,46 +1716,42 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -1767,25 +1763,29 @@ body: | ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1871,33 +1871,28 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc @@ -1905,22 +1900,22 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY2]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -1949,21 +1944,26 @@ body: | ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY17]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY18]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY18]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] ; CHECK-NEXT: [[COPY22:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY19]], [[COPY21]], [[COPY20]], [[COPY22]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[COPY19]], [[COPY21]], [[COPY20]], [[COPY22]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -2061,32 +2061,28 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc @@ -2094,16 +2090,16 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -2126,25 +2122,29 @@ body: | ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY13]], [[COPY15]], [[COPY14]], [[COPY16]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY13]], [[COPY15]], [[COPY14]], [[COPY16]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -2238,30 +2238,26 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc @@ -2269,14 +2265,14 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -2289,16 +2285,16 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) @@ -2321,23 +2317,27 @@ body: | ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.9: ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY15]], [[COPY13]], [[COPY16]], [[COPY14]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY15]], [[COPY13]], [[COPY16]], [[COPY14]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -2441,30 +2441,26 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc @@ -2472,14 +2468,14 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -2492,10 +2488,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -2507,25 +2503,29 @@ body: | ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub1, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: - ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -2622,41 +2622,36 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -2671,9 +2666,14 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY7]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY7]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY7]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -2752,41 +2752,37 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -2801,9 +2797,13 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[COPY7]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[V_ADD_U32_e32_]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY7]], [[V_ADD_U32_e32_]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -2882,51 +2882,51 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY1]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3000,55 +3000,55 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3122,52 +3122,52 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3240,53 +3240,53 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 128, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3360,45 +3360,40 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -3411,19 +3406,24 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 128, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3506,45 +3506,40 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -3557,20 +3552,25 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 384, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 384, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3656,33 +3656,28 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -3694,21 +3689,26 @@ body: | ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY1]], 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY2]], 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY2]], 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3783,33 +3783,28 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -3821,25 +3816,30 @@ body: | ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY1]].sub1, 256, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY2]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY2]].sub0, 256, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3920,46 +3920,41 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -3971,23 +3966,28 @@ body: | ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 128, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4074,46 +4074,41 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -4125,25 +4120,30 @@ body: | ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY1]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 128, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[COPY2]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF16]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4230,43 +4230,38 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -4278,21 +4273,26 @@ body: | ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY6]], 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY6]], 256, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4375,43 +4375,38 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY2]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -4423,25 +4418,30 @@ body: | ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub1, 256, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub0, 256, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4528,45 +4528,40 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -4579,10 +4574,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -4594,23 +4589,28 @@ body: | ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub1, 128, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub0, 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4705,45 +4705,40 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 512, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -4756,10 +4751,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY4]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF13]], [[DEF14]], [[COPY3]], 4, 4, [[DEF16]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -4771,25 +4766,30 @@ body: | ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY5]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub1, 128, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY6]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub0, 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF15]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4886,42 +4886,42 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4989,30 +4989,26 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc @@ -5020,14 +5016,14 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -5040,10 +5036,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY4]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -5055,16 +5051,16 @@ body: | ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY5]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[DEF21:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[DEF21:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[DEF17:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[DEF17:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: SCHED_BARRIER 0 @@ -5072,7 +5068,11 @@ body: | ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[DEF21]] + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF19]], [[DEF20]], [[DEF21]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF18]], [[DEF13]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[DEF17]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -5168,32 +5168,28 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc @@ -5201,16 +5197,16 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY3]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -5224,14 +5220,18 @@ body: | ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY11]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY11]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -5314,46 +5314,42 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF16]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -5365,24 +5361,28 @@ body: | ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF16]], [[COPY2]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY2]], 0, 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -5468,68 +5468,68 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: dead [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF19]] + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = COPY [[DEF20]] - ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = COPY [[DEF21]] - ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = COPY [[DEF22]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = COPY [[DEF17]] + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY2]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF20]], [[DEF21]], [[DEF22]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF19]], [[DEF13]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY2]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir index 050e4bc5e941c..2982c99c3fa7b 100644 --- a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir @@ -45,40 +45,33 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: dead [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -90,7 +83,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF12]], [[DEF13]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -101,8 +97,8 @@ body: | ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF16]], implicit $exec - ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF9]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: @@ -114,19 +110,23 @@ body: | ; CHECK-NEXT: bb.8: ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_3:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_3:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF9]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.9: ; CHECK-NEXT: successors: %bb.10(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_4:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub1, [[DEF18]].sub0, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF18]].sub2, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_4:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF11]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub2, [[DEF9]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.10: - ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF18]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF11]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[DEF18]], [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF10]], [[DEF11]], [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -229,42 +229,35 @@ body: | ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: dead [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: dead [[DEF20:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF11]] + ; CHECK-NEXT: dead [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF17]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} @@ -277,7 +270,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF12]], [[DEF13]], [[COPY1]], 4, 4, [[DEF15]].sub0, [[DEF16]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF14]], [[DEF15]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -289,22 +285,26 @@ body: | ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF9]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub0, [[DEF16]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub1, [[DEF16]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub1, [[DEF9]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: - ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF17]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF10]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: KILL [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF9]], [[DEF10]], [[DEF11]], [[DEF14]], [[DEF17]], [[COPY4]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF10]], [[COPY4]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: From 08ed9f76b63275b3dbf8e0b1c25ae52040744fc4 Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Tue, 2 Dec 2025 12:59:08 -0600 Subject: [PATCH 08/17] Remove unnecessary second hasFFX90AInsts conditional check --- llvm/lib/Target/AMDGPU/GCNRegPressure.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index d13d1ddd9c0eb..f04788c5d5715 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -120,15 +120,11 @@ struct GCNRegPressure { unsigned AGPRSpill = AGPRPressure > AGPRThreshold ? (AGPRPressure - AGPRThreshold) : 0; - unsigned UnifiedSpill = 0; - - if (ST.hasGFX90AInsts()) { - unsigned CombinedThreshold = ST.getMaxNumVGPRs(MF); - unsigned UnifiedPressure = getVGPRNum(true); - UnifiedSpill = UnifiedPressure > CombinedThreshold - ? (UnifiedPressure - CombinedThreshold) - : 0; - } + unsigned CombinedThreshold = ST.getMaxNumVGPRs(MF); + unsigned UnifiedPressure = getVGPRNum(true); + unsigned UnifiedSpill = UnifiedPressure > CombinedThreshold + ? (UnifiedPressure - CombinedThreshold) + : 0; return std::max(UnifiedSpill, (ArchSpill + AGPRSpill)); } From 81b0444062333b98cefbd9f911fc0ab34e834bdf Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Thu, 4 Dec 2025 10:38:47 -0600 Subject: [PATCH 09/17] Format fixes from review comments. --- llvm/lib/Target/AMDGPU/GCNRegPressure.h | 5 +- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 58 ++++++++++----------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index f04788c5d5715..878b6d00ba4c6 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -107,7 +107,8 @@ struct GCNRegPressure { if (!ST.hasGFX90AInsts()) return 0; - auto MaxVectorRegs = ST.getMaxNumVectorRegs(MF.getFunction()); + std::pair MaxVectorRegs = + ST.getMaxNumVectorRegs(MF.getFunction()); unsigned ArchVGPRThreshold = MaxVectorRegs.first; unsigned AGPRThreshold = MaxVectorRegs.second; @@ -121,7 +122,7 @@ struct GCNRegPressure { AGPRPressure > AGPRThreshold ? (AGPRPressure - AGPRThreshold) : 0; unsigned CombinedThreshold = ST.getMaxNumVGPRs(MF); - unsigned UnifiedPressure = getVGPRNum(true); + unsigned UnifiedPressure = getVGPRNum(/*UnifiedVGPRFile=*/true); unsigned UnifiedSpill = UnifiedPressure > CombinedThreshold ? (UnifiedPressure - CombinedThreshold) : 0; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 0773789c0ace2..15afe4f52228c 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1299,7 +1299,7 @@ bool RewriteScheduleStage::initGCNSchedStage() { RegionsWithExcessArchVGPR.resize(DAG.Regions.size()); RegionsWithExcessArchVGPR.reset(); for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { - auto PressureBefore = DAG.Pressure[Region]; + GCNRegPressure PressureBefore = DAG.Pressure[Region]; if (PressureBefore.getArchVGPRNum() > ST.getAddressableNumArchVGPRs()) RegionsWithExcessArchVGPR[Region] = true; } @@ -1985,42 +1985,42 @@ bool RewriteScheduleStage::initHeuristics( } } - MachineOperand &Dst = MI.getOperand(0); - SmallVector DstReachingUses; + MachineOperand &Dst = MI.getOperand(0); + SmallVector DstReachingUses; - findReachingUses(&MI, DAG.LIS, DstReachingUses); + findReachingUses(&MI, DAG.LIS, DstReachingUses); - for (MachineOperand *RUOp : DstReachingUses) { - if (TII->isMAI(*RUOp->getParent())) - continue; + for (MachineOperand *RUOp : DstReachingUses) { + if (TII->isMAI(*RUOp->getParent())) + continue; - // For any user of the result of the MFMA which is not an MFMA, we - // insert a copy. For a given register, we will only insert one copy - // per user block. - CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg()); + // For any user of the result of the MFMA which is not an MFMA, we + // insert a copy. For a given register, we will only insert one copy + // per user block. + CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg()); - SmallVector DstUsesReachingDefs; - findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs); + SmallVector DstUsesReachingDefs; + findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs); - for (auto RDIndex : DstUsesReachingDefs) { - MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); - if (TII->isMAI(*RD)) - continue; + for (auto RDIndex : DstUsesReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); + if (TII->isMAI(*RD)) + continue; - // For any definition of the user of the MFMA which is not an MFMA, - // we insert a copy. We do this to transform all the reaching defs - // of this use to AGPR. By doing this, we can insert a copy from - // AGPR to VGPR at the user rather than after the MFMA. - CopyForDef.insert(RD); - } + // For any definition of the user of the MFMA which is not an MFMA, + // we insert a copy. We do this to transform all the reaching defs + // of this use to AGPR. By doing this, we can insert a copy from + // AGPR to VGPR at the user rather than after the MFMA. + CopyForDef.insert(RD); } + } - // Do the rewrite to allow for updated RP calculation. - const TargetRegisterClass *VGPRRC = DAG.MRI.getRegClass(Dst.getReg()); - const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(VGPRRC); - DAG.MRI.setRegClass(Dst.getReg(), AGPRRC); - if (Src2->isReg()) - DAG.MRI.setRegClass(Src2->getReg(), AGPRRC); + // Do the rewrite to allow for updated RP calculation. + const TargetRegisterClass *VGPRRC = DAG.MRI.getRegClass(Dst.getReg()); + const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(VGPRRC); + DAG.MRI.setRegClass(Dst.getReg(), AGPRRC); + if (Src2->isReg()) + DAG.MRI.setRegClass(Src2->getReg(), AGPRRC); } } From 1fdd2acd2a4a91026f9f71b3a7d62df1135bd9e9 Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Thu, 4 Dec 2025 11:51:47 -0600 Subject: [PATCH 10/17] Use VNInfo api to check for PHI def rather than doing it manually --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 15afe4f52228c..51e2c48a63fd2 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1231,10 +1231,8 @@ void RewriteScheduleStage::findReachingDefs( LiveInterval &UseLI = LIS->getInterval(UseMO.getReg()); VNInfo *VNI = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI)); - SlotIndex DefMBBStart = LIS->getMBBStartIdx(LIS->getMBBFromIndex(VNI->def)); - - // If the def is in the block, then it must be the only reaching def. - if (DefMBBStart != VNI->def) { + // If the def is not a PHI, then it must be the only reaching def. + if (!VNI->isPHIDef()) { DefIdxs.push_back(VNI->def); return; } @@ -1257,11 +1255,10 @@ void RewriteScheduleStage::findReachingDefs( VNInfo *VNI = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot()); MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNI->def); - SlotIndex DefMBBStart = LIS->getMBBStartIdx(DefMBB); // If there is a def in this block, then add it to the list. This is the // reaching def of this path. - if (DefMBBStart != VNI->def) { + if (!VNI->isPHIDef()) { DefIdxs.push_back(VNI->def); continue; } From f5c3045eec444bf3f10431ab912b7bc1b36eaba0 Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Fri, 5 Dec 2025 11:36:43 -0600 Subject: [PATCH 11/17] Review comments --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 49 ++++++++++++--------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 51e2c48a63fd2..b16344f46ab44 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1999,7 +1999,7 @@ bool RewriteScheduleStage::initHeuristics( SmallVector DstUsesReachingDefs; findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs); - for (auto RDIndex : DstUsesReachingDefs) { + for (SlotIndex RDIndex : DstUsesReachingDefs) { MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); if (TII->isMAI(*RD)) continue; @@ -2106,7 +2106,7 @@ int64_t RewriteScheduleStage::getRewriteCost( uint64_t UseFreq = EntryFreq ? MBFI.getBlockFreq(UseBlock).getFrequency() / EntryFreq : 1; - for (auto UseReg : UseRegs) { + for (Register UseReg : UseRegs) { unsigned RegSize = DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(UseReg)); unsigned NumRegs = std::max(RegSize / 32, (unsigned)1); @@ -2142,7 +2142,7 @@ bool RewriteScheduleStage::rewrite( DenseMap LastMIToRegion; for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { - auto Entry = DAG.Regions[Region]; + RegionBoundaries Entry = DAG.Regions[Region]; if (Entry.first == Entry.second) continue; @@ -2190,7 +2190,7 @@ bool RewriteScheduleStage::rewrite( // up creating illegal instructions. // The original registers of the MFMA that need to be reclassified as AGPR. - std::set RewriteRegs; + DenseSet RewriteRegs; // The map of an original register in the MFMA to a new register (result of a // copy) that it should be replaced with. DenseMap RedefMap; @@ -2204,7 +2204,6 @@ bool RewriteScheduleStage::rewrite( ReachingUseTracker; for (auto &[MI, OriginalOpcode] : RewriteCands) { - int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); if (ReplacementOp == -1) continue; @@ -2212,7 +2211,6 @@ bool RewriteScheduleStage::rewrite( // Case 1: insert copies for the reaching defs of the Src2Reg. MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2); - if (Src2->isReg()) { Register Src2Reg = Src2->getReg(); if (!Src2Reg.isVirtual()) @@ -2223,7 +2221,7 @@ bool RewriteScheduleStage::rewrite( findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs); SmallVector Src2DefsReplace; - for (auto RDIndex : Src2ReachingDefs) { + for (SlotIndex RDIndex : Src2ReachingDefs) { MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); if (TII->isMAI(*RD)) continue; @@ -2234,8 +2232,9 @@ bool RewriteScheduleStage::rewrite( } if (!Src2DefsReplace.empty()) { - if (RedefMap.contains(Src2Reg)) { - MappedReg = RedefMap[Src2Reg]; + DenseMap::iterator RI = RedefMap.find(Src2Reg); + if (RI != RedefMap.end()) { + MappedReg = RI->second; } else { assert(!ReachingDefCopyMap.contains(Src2Reg)); const TargetRegisterClass *Src2RC = DAG.MRI.getRegClass(Src2Reg); @@ -2304,7 +2303,7 @@ bool RewriteScheduleStage::rewrite( SmallVector DstUsesReachingDefs; findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs); - for (auto RDIndex : DstUsesReachingDefs) { + for (SlotIndex RDIndex : DstUsesReachingDefs) { MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); if (TII->isMAI(*RD)) continue; @@ -2317,9 +2316,10 @@ bool RewriteScheduleStage::rewrite( } if (!DstUseDefsReplace.empty()) { - if (RedefMap.contains(DstReg)) - MappedReg = RedefMap[DstReg]; - else { + DenseMap::iterator RI = RedefMap.find(DstReg); + if (RI != RedefMap.end()) { + MappedReg = RI->second; + } else { assert(!ReachingDefCopyMap.contains(DstReg)); const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg); const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC); @@ -2343,8 +2343,10 @@ bool RewriteScheduleStage::rewrite( // If this reaching def was the last MI in the region, update the // region boundaries. - if (LastMIToRegion.contains(RD)) { - unsigned UpdateRegion = LastMIToRegion[RD]; + DenseMap::iterator LMI = + LastMIToRegion.find(RD); + if (LMI != LastMIToRegion.end()) { + unsigned UpdateRegion = LMI->second; DAG.Regions[UpdateRegion].second = VGPRCopy; LastMIToRegion.erase(RD); } @@ -2389,13 +2391,16 @@ bool RewriteScheduleStage::rewrite( } // Handle the copies for dst uses. - for (auto RUBlockEntry : ReachingUseTracker) { - for (auto RUDst : RUBlockEntry.second) { + using RUBType = + std::pair>>; + for (RUBType RUBlockEntry : ReachingUseTracker) { + using RUDType = std::pair>; + for (RUDType RUDst : RUBlockEntry.second) { MachineOperand *OpBegin = *RUDst.second.begin(); SlotIndex InstPt = DAG.LIS->getInstructionIndex(*OpBegin->getParent()); // Find the earliest use in this block. - for (auto *User : RUDst.second) { + for (MachineOperand *User : RUDst.second) { SlotIndex NewInstPt = DAG.LIS->getInstructionIndex(*User->getParent()); if (SlotIndex::isEarlierInstr(NewInstPt, InstPt)) InstPt = NewInstPt; @@ -2415,8 +2420,10 @@ bool RewriteScheduleStage::rewrite( // If this UseInst was the first MI in the region, update the region // boundaries. - if (FirstMIToRegion.contains(UseInst)) { - unsigned UpdateRegion = FirstMIToRegion[UseInst]; + DenseMap::iterator FI = + FirstMIToRegion.find(UseInst); + if (FI != FirstMIToRegion.end()) { + unsigned UpdateRegion = FI->second; DAG.Regions[UpdateRegion].first = VGPRCopy; FirstMIToRegion.erase(UseInst); } @@ -2434,7 +2441,7 @@ bool RewriteScheduleStage::rewrite( // We may have needed to insert copies after the reaching defs of the MFMAs. // Replace the original register with the result of the copy for all relevant // operands. - for (auto NewDef : RedefMap) { + for (std::pair NewDef : RedefMap) { Register OldReg = NewDef.first; Register NewReg = NewDef.second; From d041e80143a8229ddca3f8ef065ed3e588ff0acd Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Mon, 8 Dec 2025 10:26:44 -0600 Subject: [PATCH 12/17] Review comments --- .../llvm/CodeGen/MachineInstrBuilder.h | 9 - llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 25 +- .../AMDGPU/sched_mfma_rewrite_copies.mir | 1232 ++++++----------- .../AMDGPU/sched_mfma_rewrite_cost.mir | 18 +- 4 files changed, 455 insertions(+), 829 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h index 9895e8d9cb768..060f0c41de73a 100644 --- a/llvm/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/llvm/include/llvm/CodeGen/MachineInstrBuilder.h @@ -380,15 +380,6 @@ class MachineInstrBuilder { return *this; } - /// Inserts the newly-built instruction after the given position in the - /// given MachineBasicBlock. - const MachineInstrBuilder &insertAfter(MachineInstr *MInstr) const { - MachineBasicBlock *MBB = MInstr->getParent(); - MachineBasicBlock::iterator I = MInstr->getIterator(); - MBB->insertAfter(I, MI); - return *this; - } - bool constrainAllUses(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI, const RegisterBankInfo &RBI) const { diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index b16344f46ab44..8bdf623067cd2 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -2096,9 +2096,8 @@ int64_t RewriteScheduleStage::getRewriteCost( ? MBFI.getBlockFreq(DefMI->getParent()).getFrequency() / EntryFreq : 1; - unsigned RegSize = DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(DefReg)); - unsigned NumRegs = std::max(RegSize / 32, (unsigned)1); - CopyCost += NumRegs * DefFreq; + const TargetRegisterClass *RC = DAG.MRI.getRegClass(DefReg); + CopyCost += RC->getCopyCost() * DefFreq; } // Account for CopyForUse copies in each block that the register is used. @@ -2107,10 +2106,8 @@ int64_t RewriteScheduleStage::getRewriteCost( EntryFreq ? MBFI.getBlockFreq(UseBlock).getFrequency() / EntryFreq : 1; for (Register UseReg : UseRegs) { - unsigned RegSize = - DAG.TRI->getRegSizeInBits(*DAG.MRI.getRegClass(UseReg)); - unsigned NumRegs = std::max(RegSize / 32, (unsigned)1); - CopyCost += NumRegs * UseFreq; + const TargetRegisterClass *RC = DAG.MRI.getRegClass(UseReg); + CopyCost += RC->getCopyCost() * UseFreq; } } @@ -2118,7 +2115,7 @@ int64_t RewriteScheduleStage::getRewriteCost( // Reset to the vgpr form. We must do rewriting after copy-insertion, as some // defs of the register may require VGPR. - for (auto &[MI, OriginalOpcode] : RewriteCands) { + for (auto [MI, OriginalOpcode] : RewriteCands) { assert(TII->isMAI(*MI)); const TargetRegisterClass *AGPRRC = DAG.MRI.getRegClass(MI->getOperand(0).getReg()); @@ -2252,10 +2249,10 @@ bool RewriteScheduleStage::rewrite( // Do not create redundant copies. if (ReachingDefCopyMap[Src2Reg].insert(RD).second) { MachineInstrBuilder VGPRCopy = - BuildMI(DAG.MF, RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) + BuildMI(*RD->getParent(), std::next(RD->getIterator()), + RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) .addDef(MappedReg, 0, 0) - .addUse(Src2Reg, 0, 0) - .insertAfter(RD); + .addUse(Src2Reg, 0, 0); DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); // If this reaching def was the last MI in the region, update the @@ -2335,10 +2332,10 @@ bool RewriteScheduleStage::rewrite( // Do not create reundant copies. if (ReachingDefCopyMap[DstReg].insert(RD).second) { MachineInstrBuilder VGPRCopy = - BuildMI(DAG.MF, RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) + BuildMI(*RD->getParent(), std::next(RD->getIterator()), + RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) .addDef(MappedReg, 0, 0) - .addUse(DstReg, 0, 0) - .insertAfter(RD); + .addUse(DstReg, 0, 0); DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); // If this reaching def was the last MI in the region, update the diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir index f485b088c8034..44e5563ce7adb 100644 --- a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir @@ -228,29 +228,24 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -330,28 +325,23 @@ body: | ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -438,34 +428,28 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -558,35 +542,29 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY1]], [[COPY3]], [[COPY2]], [[COPY4]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -661,70 +639,55 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] - ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -804,71 +767,56 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] - ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF12]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -950,43 +898,36 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -998,40 +939,31 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] - ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1123,43 +1055,36 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -1171,41 +1096,32 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] - ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF12]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY13]], [[COPY12]], [[COPY14]], [[COPY11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1312,16 +1228,15 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -1332,26 +1247,20 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1441,15 +1350,14 @@ body: | ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -1460,30 +1368,24 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1583,22 +1485,20 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -1609,28 +1509,22 @@ body: | ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1736,22 +1630,20 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -1762,30 +1654,24 @@ body: | ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1870,69 +1756,51 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF10]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY2]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF8]], [[DEF9]], [[COPY3]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF10]], implicit $exec + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF12]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF13]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_6:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_7:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_8:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_9:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_10:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_11:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_8]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_10]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_7]] - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_9]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_11]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_6]] - ; CHECK-NEXT: KILL [[COPY10]], [[COPY5]], [[COPY12]], [[COPY7]], [[COPY14]], [[COPY9]], [[COPY16]], [[COPY11]], [[COPY6]], [[COPY13]], [[COPY8]], [[COPY15]] + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_6]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_7]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_8]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_11]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) @@ -1943,27 +1811,22 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY17:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY17]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF10]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY18:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY18]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF10]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: [[COPY19:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY20:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY21:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY22:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[COPY19]], [[COPY21]], [[COPY20]], [[COPY22]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: KILL [[DEF16]], [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF15]], [[DEF11]], [[DEF12]], [[DEF13]], [[DEF14]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -2060,57 +1923,45 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] - ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) @@ -2121,30 +1972,25 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.6 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF12]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY13]], [[COPY15]], [[COPY14]], [[COPY16]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -2237,43 +2083,36 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -2285,27 +2124,21 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] - ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.7(0x40000000) @@ -2316,28 +2149,23 @@ body: | ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY11]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: successors: %bb.9(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY12]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.9: - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY16:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY15]], [[COPY13]], [[COPY16]], [[COPY14]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -2455,13 +2283,9 @@ body: | ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -2469,14 +2293,12 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -2488,10 +2310,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF14]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF16]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -2502,30 +2324,24 @@ body: | ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY5]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DEF16]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -2901,32 +2717,29 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY1]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3019,36 +2832,30 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3137,37 +2944,28 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]] + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3255,38 +3053,29 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY6]], [[COPY8]], [[COPY7]], [[COPY5]] + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3374,12 +3163,8 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -3387,14 +3172,12 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -3406,24 +3189,20 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 128, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]] + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3520,12 +3299,8 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -3533,14 +3308,12 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -3552,25 +3325,21 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 384, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 384, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY6]], [[COPY8]], [[COPY5]] + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3666,7 +3435,6 @@ body: | ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF @@ -3674,10 +3442,10 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -3688,27 +3456,21 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY2]], 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3793,7 +3555,6 @@ body: | ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF @@ -3801,10 +3562,10 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -3815,31 +3576,25 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY1]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY1]].sub1, 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 256, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY2]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY2]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 256, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -3939,22 +3694,20 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -3965,29 +3718,23 @@ body: | ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 128, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4093,22 +3840,20 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -4119,31 +3864,25 @@ body: | ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY1]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 128, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[COPY2]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4245,23 +3984,18 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -4272,27 +4006,21 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY6]], 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]] + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4390,23 +4118,18 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -4417,31 +4140,25 @@ body: | ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub1, 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]].sub1, 256, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]].sub0, 256, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY7]], [[COPY9]], [[COPY8]], [[COPY10]] + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4542,12 +4259,8 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -4555,14 +4268,12 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -4574,10 +4285,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -4588,29 +4299,23 @@ body: | ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub1, 128, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub0, 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]] + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4719,12 +4424,8 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF13]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -4732,14 +4433,12 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -4751,10 +4450,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY4]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY3]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -4765,31 +4464,25 @@ body: | ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub0, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY5]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub1, 128, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub1, 0, 0, implicit $exec - ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[COPY6]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub0, 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]] + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -4899,29 +4592,24 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY1]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY2]], [[COPY4]], [[COPY3]], [[COPY1]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -5003,13 +4691,9 @@ body: | ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} @@ -5017,14 +4701,12 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -5036,10 +4718,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY4]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF14]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF16]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -5050,29 +4732,23 @@ body: | ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY5]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[V_ADD_U32_e32_]], 0, 0, implicit $exec ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: undef [[DEF17:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[DEF17:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY6]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[DEF17:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[DEF17:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF19]], [[DEF20]], [[DEF21]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF18]], [[DEF13]], [[COPY9]], [[COPY7]], [[COPY10]], [[COPY8]], [[DEF17]] + ; CHECK-NEXT: KILL [[DEF19]], [[DEF20]], [[DEF21]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF18]], [[DEF13]], [[DEF14]], [[DEF15]], [[DEF16]], [[V_ADD_U32_e32_]], [[DEF17]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -5167,71 +4843,56 @@ body: | ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF14]] ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY3]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] - ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] - ; CHECK-NEXT: KILL [[COPY8]], [[COPY5]], [[COPY9]], [[COPY6]], [[COPY10]], [[COPY7]] + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec - ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vreg_128_align2 = COPY [[COPY4]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY11]], 0, 0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF12]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[V_ADD_U32_e32_1]], 0, 0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vreg_128_align2 = COPY [[COPY3]] - ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vreg_128_align2 = COPY [[COPY2]] - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[COPY12]], [[COPY14]], [[COPY13]], [[COPY11]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -5334,22 +4995,20 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -5360,29 +5019,23 @@ body: | ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub1, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY1]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.7 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] - ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[COPY2]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY3]], [[COPY5]], [[COPY4]], [[COPY6]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -5481,8 +5134,6 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_]] ; CHECK-NEXT: dead [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc @@ -5491,45 +5142,38 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[DEF15]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = COPY [[DEF16]] - ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = COPY [[DEF17]] - ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = COPY [[DEF18]] - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF9]], [[DEF10]], [[COPY1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY1]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] - ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF22:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF20]], [[DEF21]], [[DEF22]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF19]], [[DEF13]], [[COPY4]], [[COPY3]], [[COPY5]], [[COPY2]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[DEF15]], [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir index 2982c99c3fa7b..ab5a5cfd345a4 100644 --- a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir @@ -240,7 +240,6 @@ body: | ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DEF11]] ; CHECK-NEXT: dead [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: dead [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec @@ -251,14 +250,12 @@ body: | ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:areg_128_align2 = COPY [[V_ADD_U32_e32_1]] ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -273,7 +270,7 @@ body: | ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF14]], [[DEF15]], [[COPY1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -284,27 +281,24 @@ body: | ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub1, [[DEF9]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY2]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF9]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.8 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub0, [[DEF9]], implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[COPY3]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF9]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF10]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF10]], [[COPY4]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF10]], [[DEF11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: From a67ff761d742cdcff120d1c018fa237b3f67007a Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Tue, 9 Dec 2025 08:49:29 -0600 Subject: [PATCH 13/17] review comments: make MachineScheduler require MachineBlockFrequencyInfo and delete local calculation. --- llvm/include/llvm/CodeGen/MachineScheduler.h | 5 ++++- llvm/lib/CodeGen/MachineScheduler.cpp | 11 +++++++++-- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 11 +++++------ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index 7b965d400ed08..199d24028777e 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -82,6 +82,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" @@ -147,6 +148,7 @@ struct LLVM_ABI MachineSchedContext { const TargetMachine *TM = nullptr; AAResults *AA = nullptr; LiveIntervals *LIS = nullptr; + MachineBlockFrequencyInfo *MBFI = nullptr; RegisterClassInfo *RegClassInfo; @@ -309,6 +311,7 @@ class LLVM_ABI ScheduleDAGMI : public ScheduleDAGInstrs { protected: AAResults *AA; LiveIntervals *LIS; + MachineBlockFrequencyInfo *MBFI; std::unique_ptr SchedImpl; /// Ordered list of DAG postprocessing steps. @@ -330,7 +333,7 @@ class LLVM_ABI ScheduleDAGMI : public ScheduleDAGInstrs { ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr S, bool RemoveKillFlags) : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), - LIS(C->LIS), SchedImpl(std::move(S)) {} + LIS(C->LIS), MBFI(C->MBFI), SchedImpl(std::move(S)) {} // Provide a vtable anchor ~ScheduleDAGMI() override; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index de29a9fab876e..d59c0e76bcb91 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -332,6 +332,7 @@ class MachineSchedulerImpl : public MachineSchedulerBase { MachineDominatorTree &MDT; AAResults &AA; LiveIntervals &LIS; + MachineBlockFrequencyInfo &MBFI; }; MachineSchedulerImpl() = default; @@ -415,6 +416,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass); INITIALIZE_PASS_END(MachineSchedulerLegacy, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) @@ -432,6 +434,8 @@ void MachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -555,6 +559,7 @@ bool MachineSchedulerImpl::run(MachineFunction &Func, const TargetMachine &TM, this->TM = &TM; AA = &Analyses.AA; LIS = &Analyses.LIS; + MBFI = &Analyses.MBFI; if (VerifyScheduling) { LLVM_DEBUG(LIS->dump()); @@ -660,8 +665,9 @@ bool MachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) { auto &TM = getAnalysis().getTM(); auto &AA = getAnalysis().getAAResults(); auto &LIS = getAnalysis().getLIS(); + auto &MBFI = getAnalysis().getMBFI(); Impl.setLegacyPass(this); - return Impl.run(MF, TM, {MLI, MDT, AA, LIS}); + return Impl.run(MF, TM, {MLI, MDT, AA, LIS, MBFI}); } MachineSchedulerPass::MachineSchedulerPass(const TargetMachine *TM) @@ -693,8 +699,9 @@ MachineSchedulerPass::run(MachineFunction &MF, .getManager(); auto &AA = FAM.getResult(MF.getFunction()); auto &LIS = MFAM.getResult(MF); + auto &MBFI = MFAM.getResult(MF); Impl->setMFAM(&MFAM); - bool Changed = Impl->run(MF, *TM, {MLI, MDT, AA, LIS}); + bool Changed = Impl->run(MF, *TM, {MLI, MDT, AA, LIS, MBFI}); if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 8bdf623067cd2..de7362b209dbc 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -2029,13 +2029,12 @@ int64_t RewriteScheduleStage::getRewriteCost( const DenseMap> &CopyForUse, const SmallPtrSetImpl &CopyForDef) { MachineBranchProbabilityInfo MBPI; - MachineBlockFrequencyInfo MBFI; + MachineBlockFrequencyInfo *MBFI = DAG.MBFI; - MBFI.calculate(MF, MBPI, *DAG.MLI); int64_t BestSpillCost = 0; int64_t Cost = 0; - uint64_t EntryFreq = MBFI.getEntryFreq().getFrequency(); + uint64_t EntryFreq = MBFI->getEntryFreq().getFrequency(); for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { if (!RegionsWithExcessArchVGPR[Region]) @@ -2052,7 +2051,7 @@ int64_t RewriteScheduleStage::getRewriteCost( unsigned SpillCostAfter = PressureAfter.getVGPRSpills(MF); uint64_t BlockFreq = - MBFI.getBlockFreq(DAG.Regions[Region].first->getParent()) + MBFI->getBlockFreq(DAG.Regions[Region].first->getParent()) .getFrequency(); bool RelativeFreqIsDenom = EntryFreq > BlockFreq; @@ -2093,7 +2092,7 @@ int64_t RewriteScheduleStage::getRewriteCost( auto DefReg = DefMI->getOperand(0).getReg(); uint64_t DefFreq = EntryFreq - ? MBFI.getBlockFreq(DefMI->getParent()).getFrequency() / EntryFreq + ? MBFI->getBlockFreq(DefMI->getParent()).getFrequency() / EntryFreq : 1; const TargetRegisterClass *RC = DAG.MRI.getRegClass(DefReg); @@ -2103,7 +2102,7 @@ int64_t RewriteScheduleStage::getRewriteCost( // Account for CopyForUse copies in each block that the register is used. for (auto &[UseBlock, UseRegs] : CopyForUse) { uint64_t UseFreq = - EntryFreq ? MBFI.getBlockFreq(UseBlock).getFrequency() / EntryFreq : 1; + EntryFreq ? MBFI->getBlockFreq(UseBlock).getFrequency() / EntryFreq : 1; for (Register UseReg : UseRegs) { const TargetRegisterClass *RC = DAG.MRI.getRegClass(UseReg); From 75e7be839d04107d2f53f2e72de5276e30d40453 Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Tue, 9 Dec 2025 11:56:37 -0600 Subject: [PATCH 14/17] Review comments --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 26 ++++++++++----------- llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 8 +++---- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index de7362b209dbc..b9c4e45f9ba94 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -691,7 +691,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( const MachineSchedContext *C, bool IsLegacyScheduler) : GCNSchedStrategy(C) { SchedStages.push_back(GCNSchedStageID::OccInitialSchedule); - SchedStages.push_back(GCNSchedStageID::RewriteSchedule); + SchedStages.push_back(GCNSchedStageID::RewriteMFMAForm); SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule); SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule); SchedStages.push_back(GCNSchedStageID::PreRARematerialize); @@ -948,8 +948,8 @@ GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) { switch (SchedStageID) { case GCNSchedStageID::OccInitialSchedule: return std::make_unique(SchedStageID, *this); - case GCNSchedStageID::RewriteSchedule: - return std::make_unique(SchedStageID, *this); + case GCNSchedStageID::RewriteMFMAForm: + return std::make_unique(SchedStageID, *this); case GCNSchedStageID::UnclusteredHighRPReschedule: return std::make_unique(SchedStageID, *this); case GCNSchedStageID::ClusteredLowOccupancyReschedule: @@ -1187,7 +1187,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) { case GCNSchedStageID::OccInitialSchedule: OS << "Max Occupancy Initial Schedule"; break; - case GCNSchedStageID::RewriteSchedule: + case GCNSchedStageID::RewriteMFMAForm: OS << "Instruction Rewriting Reschedule"; break; case GCNSchedStageID::UnclusteredHighRPReschedule: @@ -1223,10 +1223,9 @@ bool GCNSchedStage::initGCNSchedStage() { return true; } -void RewriteScheduleStage::findReachingDefs( +void RewriteMFMAFormStage::findReachingDefs( MachineOperand &UseMO, LiveIntervals *LIS, SmallVectorImpl &DefIdxs) { - assert(UseMO.isReg()); MachineInstr *UseMI = UseMO.getParent(); LiveInterval &UseLI = LIS->getInterval(UseMO.getReg()); VNInfo *VNI = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI)); @@ -1239,7 +1238,6 @@ void RewriteScheduleStage::findReachingDefs( SmallPtrSet Visited; SmallVector Worklist; - Visited.insert(UseMI->getParent()); // Mark the predecessor blocks for traversal @@ -1270,11 +1268,11 @@ void RewriteScheduleStage::findReachingDefs( } } -void RewriteScheduleStage::findReachingUses( +void RewriteMFMAFormStage::findReachingUses( MachineInstr *DefMI, LiveIntervals *LIS, SmallVectorImpl &ReachingUses) { SlotIndex DefIdx = LIS->getInstructionIndex(*DefMI); - for (auto &UseMO : + for (MachineOperand &UseMO : DAG.MRI.use_nodbg_operands(DefMI->getOperand(0).getReg())) { SmallVector ReachingDefIndexes; findReachingDefs(UseMO, LIS, ReachingDefIndexes); @@ -1288,7 +1286,7 @@ void RewriteScheduleStage::findReachingUses( } } -bool RewriteScheduleStage::initGCNSchedStage() { +bool RewriteMFMAFormStage::initGCNSchedStage() { const GCNSubtarget &ST = MF.getSubtarget(); if (!ST.hasGFX90AInsts() || MFI.getMinWavesPerEU() > 1) return false; @@ -1945,14 +1943,14 @@ void GCNSchedStage::revertScheduling() { DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); } -bool RewriteScheduleStage::isRewriteCandidate(MachineInstr *MI) const { +bool RewriteMFMAFormStage::isRewriteCandidate(MachineInstr *MI) const { if (!static_cast(DAG.TII)->isMAI(*MI)) return false; return AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()) != -1; } -bool RewriteScheduleStage::initHeuristics( +bool RewriteMFMAFormStage::initHeuristics( std::vector> &RewriteCands, DenseMap> &CopyForUse, SmallPtrSetImpl &CopyForDef) { @@ -2024,7 +2022,7 @@ bool RewriteScheduleStage::initHeuristics( return true; } -int64_t RewriteScheduleStage::getRewriteCost( +int64_t RewriteMFMAFormStage::getRewriteCost( const std::vector> &RewriteCands, const DenseMap> &CopyForUse, const SmallPtrSetImpl &CopyForDef) { @@ -2132,7 +2130,7 @@ int64_t RewriteScheduleStage::getRewriteCost( return Cost; } -bool RewriteScheduleStage::rewrite( +bool RewriteMFMAFormStage::rewrite( const std::vector> &RewriteCands) { DenseMap FirstMIToRegion; DenseMap LastMIToRegion; diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 76dba27d6f83f..0a79da061ab8e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -31,7 +31,7 @@ class GCNSchedStage; enum class GCNSchedStageID : unsigned { OccInitialSchedule = 0, - RewriteSchedule = 1, + RewriteMFMAForm = 1, UnclusteredHighRPReschedule = 2, ClusteredLowOccupancyReschedule = 3, PreRARematerialize = 4, @@ -243,7 +243,7 @@ using RegionBoundaries = class GCNScheduleDAGMILive final : public ScheduleDAGMILive { friend class GCNSchedStage; friend class OccInitialScheduleStage; - friend class RewriteScheduleStage; + friend class RewriteMFMAFormStage; friend class UnclusteredHighRPStage; friend class ClusteredLowOccStage; friend class PreRARematStage; @@ -418,7 +418,7 @@ class OccInitialScheduleStage : public GCNSchedStage { : GCNSchedStage(StageID, DAG) {} }; -class RewriteScheduleStage : public GCNSchedStage { +class RewriteMFMAFormStage : public GCNSchedStage { private: // Record regions with excess archvgpr register pressure over the physical // register limit. Register pressure in these regions usually will result in @@ -467,7 +467,7 @@ class RewriteScheduleStage : public GCNSchedStage { public: bool initGCNSchedStage() override; - RewriteScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) + RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) : GCNSchedStage(StageID, DAG) {} }; From 90f46d8e9227d7378f506d3463e8bef7cc761a64 Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Tue, 9 Dec 2025 11:58:20 -0600 Subject: [PATCH 15/17] Review comments: add MachineBlockFrequencyAnalysis as preserved to PHI elimination, unreachable block elimination and SI lower control flow (AMDGPU) passes. --- llvm/lib/CodeGen/PHIElimination.cpp | 3 +++ llvm/lib/CodeGen/UnreachableBlockElim.cpp | 7 ++++++- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 3 +++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp index 74e46121e65c7..f74af9d3f0765 100644 --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDomTreeUpdater.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -181,6 +182,7 @@ PHIEliminationPass::run(MachineFunction &MF, PA.preserve(); PA.preserve(); PA.preserve(); + PA.preserve(); return PA; } @@ -208,6 +210,7 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp index cf8c1a7bd08d0..9c97f090ae1aa 100644 --- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -51,6 +52,7 @@ class UnreachableBlockElimLegacyPass : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); + AU.addPreserved(); } }; } @@ -69,6 +71,7 @@ PreservedAnalyses UnreachableBlockElimPass::run(Function &F, return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); + PA.preserve(); return PA; } @@ -106,6 +109,7 @@ void UnreachableMachineBlockElimLegacy::getAnalysisUsage( AnalysisUsage &AU) const { AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -120,7 +124,8 @@ UnreachableMachineBlockElimPass::run(MachineFunction &MF, return getMachineFunctionPassPreservedAnalyses() .preserve() - .preserve(); + .preserve() + .preserve(); } bool UnreachableMachineBlockElimLegacy::runOnMachineFunction( diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 8586d6c18b361..bb912be85de74 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -56,6 +56,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePostDominators.h" @@ -160,6 +161,7 @@ class SILowerControlFlowLegacy : public MachineFunctionPass { AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -880,5 +882,6 @@ SILowerControlFlowPass::run(MachineFunction &MF, PA.preserve(); PA.preserve(); PA.preserve(); + PA.preserve(); return PA; } From afeb238adccafc57e1b3df184973834314ea5310 Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Tue, 9 Dec 2025 14:24:54 -0600 Subject: [PATCH 16/17] Review comments: only reset instruction descriptor when the rewrite is actually going to be performed. --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 31 +- .../AMDGPU/sched_mfma_rewrite_copies.mir | 688 +++++++++--------- .../AMDGPU/sched_mfma_rewrite_cost.mir | 16 +- 3 files changed, 356 insertions(+), 379 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index b9c4e45f9ba94..cd5b66d5d4bb8 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -1955,8 +1955,8 @@ bool RewriteMFMAFormStage::initHeuristics( DenseMap> &CopyForUse, SmallPtrSetImpl &CopyForDef) { // Prepare for the heuristics - for (auto &MBB : MF) { - for (auto &MI : MBB) { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { if (!isRewriteCandidate(&MI)) continue; @@ -1964,7 +1964,6 @@ bool RewriteMFMAFormStage::initHeuristics( assert(ReplacementOp != -1); RewriteCands.push_back({&MI, MI.getOpcode()}); - MI.setDesc(TII->get(ReplacementOp)); MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); if (Src2->isReg()) { @@ -1975,7 +1974,7 @@ bool RewriteMFMAFormStage::initHeuristics( // insert a copy. for (SlotIndex RDIdx : Src2ReachingDefs) { MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx); - if (!TII->isMAI(*RD)) + if (!isRewriteCandidate(RD)) CopyForDef.insert(RD); } } @@ -2026,12 +2025,9 @@ int64_t RewriteMFMAFormStage::getRewriteCost( const std::vector> &RewriteCands, const DenseMap> &CopyForUse, const SmallPtrSetImpl &CopyForDef) { - MachineBranchProbabilityInfo MBPI; MachineBlockFrequencyInfo *MBFI = DAG.MBFI; - int64_t BestSpillCost = 0; int64_t Cost = 0; - uint64_t EntryFreq = MBFI->getEntryFreq().getFrequency(); for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { @@ -2108,26 +2104,7 @@ int64_t RewriteMFMAFormStage::getRewriteCost( } } - Cost += CopyCost; - - // Reset to the vgpr form. We must do rewriting after copy-insertion, as some - // defs of the register may require VGPR. - for (auto [MI, OriginalOpcode] : RewriteCands) { - assert(TII->isMAI(*MI)); - const TargetRegisterClass *AGPRRC = - DAG.MRI.getRegClass(MI->getOperand(0).getReg()); - const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(AGPRRC); - - MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2); - assert(Src2); - - if (Src2->isReg()) - DAG.MRI.setRegClass(Src2->getReg(), VGPRRC); - DAG.MRI.setRegClass(MI->getOperand(0).getReg(), VGPRRC); - MI->setDesc(TII->get(OriginalOpcode)); - } - - return Cost; + return Cost + CopyCost; } bool RewriteMFMAFormStage::rewrite( diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir index 44e5563ce7adb..56a307d2afb56 100644 --- a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir @@ -228,15 +228,15 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec @@ -324,15 +324,15 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec @@ -427,21 +427,21 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: SCHED_BARRIER 0 @@ -541,21 +541,21 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec @@ -652,27 +652,27 @@ body: | ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -680,14 +680,14 @@ body: | ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -780,27 +780,27 @@ body: | ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -911,23 +911,23 @@ body: | ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -939,16 +939,16 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) @@ -1068,23 +1068,23 @@ body: | ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -1096,16 +1096,16 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) @@ -1228,15 +1228,15 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -1349,15 +1349,15 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -1484,21 +1484,21 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -1629,21 +1629,21 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -1756,46 +1756,47 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_512 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_64 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF ; CHECK-NEXT: S_NOP 0, implicit-def %12 ; CHECK-NEXT: S_NOP 0, implicit-def %13 - ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 - ; CHECK-NEXT: [[DEF7:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF10]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF10]], implicit $exec - ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF12]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF13]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_6:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_7:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_8:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_9:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_10:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_11:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF7]], [[DEF8]], [[DEF14]], 4, 4, [[DEF9]].sub0, [[DEF10]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_6:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_7:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_8:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_9:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_10:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -1811,22 +1812,21 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF10]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF10]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF - ; CHECK-NEXT: KILL [[DEF16]], [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF15]], [[DEF11]], [[DEF12]], [[DEF13]], [[DEF14]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_3]] ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 ; CHECK-NEXT: S_ENDPGM 0 bb.0: @@ -1936,27 +1936,27 @@ body: | ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -2096,23 +2096,23 @@ body: | ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -2124,16 +2124,16 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.6(0x80000000) @@ -2282,23 +2282,23 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -2310,10 +2310,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF14]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF16]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF14]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF16]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -2447,7 +2447,7 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF @@ -2577,7 +2577,7 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF @@ -2716,21 +2716,21 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: SCHED_BARRIER 0 @@ -2831,21 +2831,21 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec @@ -2938,24 +2938,24 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 128, 0, implicit $exec ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 0, 0, implicit $exec @@ -3047,24 +3047,24 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 128, 0, implicit $exec ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec @@ -3158,26 +3158,26 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -3189,10 +3189,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 128, 0, implicit $exec @@ -3294,26 +3294,26 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -3325,10 +3325,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec @@ -3434,7 +3434,7 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF @@ -3442,10 +3442,10 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -3554,7 +3554,7 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF @@ -3562,10 +3562,10 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -3693,21 +3693,21 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -3839,21 +3839,21 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -3978,24 +3978,24 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -4112,24 +4112,24 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) @@ -4254,26 +4254,26 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -4285,10 +4285,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -4419,26 +4419,26 @@ body: | ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -4450,10 +4450,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -4592,15 +4592,15 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec @@ -4690,23 +4690,23 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -4718,10 +4718,10 @@ body: | ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.5(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF14]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF16]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF14]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF16]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -4856,27 +4856,27 @@ body: | ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) @@ -4994,21 +4994,21 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) @@ -5134,6 +5134,7 @@ body: | ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec ; CHECK-NEXT: dead [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: $scc = IMPLICIT_DEF ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc @@ -5141,30 +5142,29 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: $scc = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4: ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF12]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir index ab5a5cfd345a4..40f87e838d314 100644 --- a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir @@ -55,7 +55,7 @@ body: | ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: dead [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: dead [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec @@ -65,13 +65,13 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -86,7 +86,7 @@ body: | ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) @@ -239,7 +239,7 @@ body: | ; CHECK-NEXT: SCHED_BARRIER 0 ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: dead [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: dead [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec @@ -249,13 +249,13 @@ body: | ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec ; CHECK-NEXT: S_BRANCH %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: @@ -270,7 +270,7 @@ body: | ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF15:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) From 111750d438f2ea4f0a034ec245ddfe2f45996037 Mon Sep 17 00:00:00 2001 From: Tony Linthicum Date: Tue, 9 Dec 2025 16:44:35 -0600 Subject: [PATCH 17/17] Review comments: remove map lookup inside loop --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index cd5b66d5d4bb8..37e80d729c275 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -2325,6 +2325,7 @@ bool RewriteMFMAFormStage::rewrite( } } + std::set &DstRegSet = ReplaceMap[DstReg]; for (MachineOperand *RU : DstReachingUseCopies) { MachineBasicBlock *RUBlock = RU->getParent()->getParent(); // Just keep track of the reaching use of this register by block. After we @@ -2350,15 +2351,16 @@ bool RewriteMFMAFormStage::rewrite( // use reg. RU->setReg(NewUseReg); // Track the copy source operand for replacement. - ReplaceMap[DstReg].insert(&VGPRCopy->getOperand(1)); + DstRegSet.insert(&VGPRCopy->getOperand(1)); } // Track the register for reclassification RewriteRegs.insert(DstReg); + // Insert the dst operand for replacement. If this dst is in a chain of // tied-def MFMAs, and the first src2 needs to be replaced with a new reg, // all the correspond operands need to be replaced. - ReplaceMap[DstReg].insert(Dst); + DstRegSet.insert(Dst); } // Handle the copies for dst uses.