diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index 7b965d400ed08..199d24028777e 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -82,6 +82,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachinePassRegistry.h" #include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/ScheduleDAG.h" @@ -147,6 +148,7 @@ struct LLVM_ABI MachineSchedContext { const TargetMachine *TM = nullptr; AAResults *AA = nullptr; LiveIntervals *LIS = nullptr; + MachineBlockFrequencyInfo *MBFI = nullptr; RegisterClassInfo *RegClassInfo; @@ -309,6 +311,7 @@ class LLVM_ABI ScheduleDAGMI : public ScheduleDAGInstrs { protected: AAResults *AA; LiveIntervals *LIS; + MachineBlockFrequencyInfo *MBFI; std::unique_ptr SchedImpl; /// Ordered list of DAG postprocessing steps. @@ -330,7 +333,7 @@ class LLVM_ABI ScheduleDAGMI : public ScheduleDAGInstrs { ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr S, bool RemoveKillFlags) : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), - LIS(C->LIS), SchedImpl(std::move(S)) {} + LIS(C->LIS), MBFI(C->MBFI), SchedImpl(std::move(S)) {} // Provide a vtable anchor ~ScheduleDAGMI() override; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index de29a9fab876e..d59c0e76bcb91 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -332,6 +332,7 @@ class MachineSchedulerImpl : public MachineSchedulerBase { MachineDominatorTree &MDT; AAResults &AA; LiveIntervals &LIS; + MachineBlockFrequencyInfo &MBFI; }; MachineSchedulerImpl() = default; @@ -415,6 +416,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(SlotIndexesWrapperPass) INITIALIZE_PASS_DEPENDENCY(LiveIntervalsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass); INITIALIZE_PASS_END(MachineSchedulerLegacy, DEBUG_TYPE, "Machine Instruction Scheduler", false, false) @@ -432,6 +434,8 @@ void MachineSchedulerLegacy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -555,6 +559,7 @@ bool MachineSchedulerImpl::run(MachineFunction &Func, const TargetMachine &TM, this->TM = &TM; AA = &Analyses.AA; LIS = &Analyses.LIS; + MBFI = &Analyses.MBFI; if (VerifyScheduling) { LLVM_DEBUG(LIS->dump()); @@ -660,8 +665,9 @@ bool MachineSchedulerLegacy::runOnMachineFunction(MachineFunction &MF) { auto &TM = getAnalysis().getTM(); auto &AA = getAnalysis().getAAResults(); auto &LIS = getAnalysis().getLIS(); + auto &MBFI = getAnalysis().getMBFI(); Impl.setLegacyPass(this); - return Impl.run(MF, TM, {MLI, MDT, AA, LIS}); + return Impl.run(MF, TM, {MLI, MDT, AA, LIS, MBFI}); } MachineSchedulerPass::MachineSchedulerPass(const TargetMachine *TM) @@ -693,8 +699,9 @@ MachineSchedulerPass::run(MachineFunction &MF, .getManager(); auto &AA = FAM.getResult(MF.getFunction()); auto &LIS = MFAM.getResult(MF); + auto &MBFI = MFAM.getResult(MF); Impl->setMFAM(&MFAM); - bool Changed = Impl->run(MF, *TM, {MLI, MDT, AA, LIS}); + bool Changed = Impl->run(MF, *TM, {MLI, MDT, AA, LIS, MBFI}); if (!Changed) return PreservedAnalyses::all(); diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp index 74e46121e65c7..f74af9d3f0765 100644 --- a/llvm/lib/CodeGen/PHIElimination.cpp +++ b/llvm/lib/CodeGen/PHIElimination.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDomTreeUpdater.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" @@ -181,6 +182,7 @@ PHIEliminationPass::run(MachineFunction &MF, PA.preserve(); PA.preserve(); PA.preserve(); + PA.preserve(); return PA; } @@ -208,6 +210,7 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } diff --git a/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/llvm/lib/CodeGen/UnreachableBlockElim.cpp index cf8c1a7bd08d0..9c97f090ae1aa 100644 --- a/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -23,6 +23,7 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -51,6 +52,7 @@ class UnreachableBlockElimLegacyPass : public FunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addPreserved(); + AU.addPreserved(); } }; } @@ -69,6 +71,7 @@ PreservedAnalyses UnreachableBlockElimPass::run(Function &F, return PreservedAnalyses::all(); PreservedAnalyses PA; PA.preserve(); + PA.preserve(); return PA; } @@ -106,6 +109,7 @@ void UnreachableMachineBlockElimLegacy::getAnalysisUsage( AnalysisUsage &AU) const { AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -120,7 +124,8 @@ UnreachableMachineBlockElimPass::run(MachineFunction &MF, return getMachineFunctionPassPreservedAnalyses() .preserve() - .preserve(); + .preserve() + .preserve(); } bool UnreachableMachineBlockElimLegacy::runOnMachineFunction( diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h index f9d3ce039092e..878b6d00ba4c6 100644 --- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h +++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h @@ -102,6 +102,34 @@ struct GCNRegPressure { DynamicVGPRBlockSize)); } + unsigned getVGPRSpills(MachineFunction &MF) { + const GCNSubtarget &ST = MF.getSubtarget(); + if (!ST.hasGFX90AInsts()) + return 0; + + std::pair MaxVectorRegs = + ST.getMaxNumVectorRegs(MF.getFunction()); + unsigned ArchVGPRThreshold = MaxVectorRegs.first; + unsigned AGPRThreshold = MaxVectorRegs.second; + + unsigned ArchPressure = getArchVGPRNum(); + unsigned AGPRPressure = getAGPRNum(); + + unsigned ArchSpill = ArchPressure > ArchVGPRThreshold + ? (ArchPressure - ArchVGPRThreshold) + : 0; + unsigned AGPRSpill = + AGPRPressure > AGPRThreshold ? (AGPRPressure - AGPRThreshold) : 0; + + unsigned CombinedThreshold = ST.getMaxNumVGPRs(MF); + unsigned UnifiedPressure = getVGPRNum(/*UnifiedVGPRFile=*/true); + unsigned UnifiedSpill = UnifiedPressure > CombinedThreshold + ? (UnifiedPressure - CombinedThreshold) + : 0; + + return std::max(UnifiedSpill, (ArchSpill + AGPRSpill)); + } + void inc(unsigned Reg, LaneBitmask PrevMask, LaneBitmask NewMask, diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index c8ce3aab3f303..37e80d729c275 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -30,6 +30,7 @@ #include "Utils/AMDGPUBaseInfo.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/CalcSpillWeights.h" +#include "llvm/CodeGen/MachineCycleAnalysis.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/MC/LaneBitmask.h" #include "llvm/Support/ErrorHandling.h" @@ -690,6 +691,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( const MachineSchedContext *C, bool IsLegacyScheduler) : GCNSchedStrategy(C) { SchedStages.push_back(GCNSchedStageID::OccInitialSchedule); + SchedStages.push_back(GCNSchedStageID::RewriteMFMAForm); SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule); SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule); SchedStages.push_back(GCNSchedStageID::PreRARematerialize); @@ -946,6 +948,8 @@ GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) { switch (SchedStageID) { case GCNSchedStageID::OccInitialSchedule: return std::make_unique(SchedStageID, *this); + case GCNSchedStageID::RewriteMFMAForm: + return std::make_unique(SchedStageID, *this); case GCNSchedStageID::UnclusteredHighRPReschedule: return std::make_unique(SchedStageID, *this); case GCNSchedStageID::ClusteredLowOccupancyReschedule: @@ -1183,6 +1187,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) { case GCNSchedStageID::OccInitialSchedule: OS << "Max Occupancy Initial Schedule"; break; + case GCNSchedStageID::RewriteMFMAForm: + OS << "Instruction Rewriting Reschedule"; + break; case GCNSchedStageID::UnclusteredHighRPReschedule: OS << "Unclustered High Register Pressure Reschedule"; break; @@ -1216,6 +1223,105 @@ bool GCNSchedStage::initGCNSchedStage() { return true; } +void RewriteMFMAFormStage::findReachingDefs( + MachineOperand &UseMO, LiveIntervals *LIS, + SmallVectorImpl &DefIdxs) { + MachineInstr *UseMI = UseMO.getParent(); + LiveInterval &UseLI = LIS->getInterval(UseMO.getReg()); + VNInfo *VNI = UseLI.getVNInfoAt(LIS->getInstructionIndex(*UseMI)); + + // If the def is not a PHI, then it must be the only reaching def. + if (!VNI->isPHIDef()) { + DefIdxs.push_back(VNI->def); + return; + } + + SmallPtrSet Visited; + SmallVector Worklist; + Visited.insert(UseMI->getParent()); + + // Mark the predecessor blocks for traversal + for (auto *PredMBB : UseMI->getParent()->predecessors()) { + Worklist.push_back(PredMBB); + Visited.insert(PredMBB); + } + + while (!Worklist.empty()) { + MachineBasicBlock *CurrMBB = Worklist.pop_back_val(); + + SlotIndex CurrMBBEnd = LIS->getMBBEndIdx(CurrMBB); + VNInfo *VNI = UseLI.getVNInfoAt(CurrMBBEnd.getPrevSlot()); + + MachineBasicBlock *DefMBB = LIS->getMBBFromIndex(VNI->def); + + // If there is a def in this block, then add it to the list. This is the + // reaching def of this path. + if (!VNI->isPHIDef()) { + DefIdxs.push_back(VNI->def); + continue; + } + + for (auto *PredMBB : DefMBB->predecessors()) { + if (Visited.insert(PredMBB).second) + Worklist.push_back(PredMBB); + } + } +} + +void RewriteMFMAFormStage::findReachingUses( + MachineInstr *DefMI, LiveIntervals *LIS, + SmallVectorImpl &ReachingUses) { + SlotIndex DefIdx = LIS->getInstructionIndex(*DefMI); + for (MachineOperand &UseMO : + DAG.MRI.use_nodbg_operands(DefMI->getOperand(0).getReg())) { + SmallVector ReachingDefIndexes; + findReachingDefs(UseMO, LIS, ReachingDefIndexes); + + // If we find a use that contains this DefMI in its reachingDefs, then it is + // a reaching use. + if (any_of(ReachingDefIndexes, [DefIdx](SlotIndex RDIdx) { + return SlotIndex::isSameInstr(RDIdx, DefIdx); + })) + ReachingUses.push_back(&UseMO); + } +} + +bool RewriteMFMAFormStage::initGCNSchedStage() { + const GCNSubtarget &ST = MF.getSubtarget(); + if (!ST.hasGFX90AInsts() || MFI.getMinWavesPerEU() > 1) + return false; + + RegionsWithExcessArchVGPR.resize(DAG.Regions.size()); + RegionsWithExcessArchVGPR.reset(); + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { + GCNRegPressure PressureBefore = DAG.Pressure[Region]; + if (PressureBefore.getArchVGPRNum() > ST.getAddressableNumArchVGPRs()) + RegionsWithExcessArchVGPR[Region] = true; + } + + if (RegionsWithExcessArchVGPR.none()) + return false; + + TII = ST.getInstrInfo(); + SRI = ST.getRegisterInfo(); + + std::vector> RewriteCands; + DenseMap> CopyForUse; + SmallPtrSet CopyForDef; + + if (!initHeuristics(RewriteCands, CopyForUse, CopyForDef)) + return false; + + int64_t Cost = getRewriteCost(RewriteCands, CopyForUse, CopyForDef); + + // If we haven't found the beneficial conditions, prefer the VGPR form which + // may result in less cross RC copies. + if (Cost > 0) + return false; + + return rewrite(RewriteCands); +} + bool UnclusteredHighRPStage::initGCNSchedStage() { if (DisableUnclusterHighRP) return false; @@ -1837,6 +1943,514 @@ void GCNSchedStage::revertScheduling() { DAG.Regions[RegionIdx] = std::pair(DAG.RegionBegin, DAG.RegionEnd); } +bool RewriteMFMAFormStage::isRewriteCandidate(MachineInstr *MI) const { + + if (!static_cast(DAG.TII)->isMAI(*MI)) + return false; + return AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()) != -1; +} + +bool RewriteMFMAFormStage::initHeuristics( + std::vector> &RewriteCands, + DenseMap> &CopyForUse, + SmallPtrSetImpl &CopyForDef) { + // Prepare for the heuristics + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : MBB) { + if (!isRewriteCandidate(&MI)) + continue; + + int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI.getOpcode()); + assert(ReplacementOp != -1); + + RewriteCands.push_back({&MI, MI.getOpcode()}); + + MachineOperand *Src2 = TII->getNamedOperand(MI, AMDGPU::OpName::src2); + if (Src2->isReg()) { + SmallVector Src2ReachingDefs; + findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs); + + // For any definition of the src2 register which is non-MFMA, we + // insert a copy. + for (SlotIndex RDIdx : Src2ReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIdx); + if (!isRewriteCandidate(RD)) + CopyForDef.insert(RD); + } + } + + MachineOperand &Dst = MI.getOperand(0); + SmallVector DstReachingUses; + + findReachingUses(&MI, DAG.LIS, DstReachingUses); + + for (MachineOperand *RUOp : DstReachingUses) { + if (TII->isMAI(*RUOp->getParent())) + continue; + + // For any user of the result of the MFMA which is not an MFMA, we + // insert a copy. For a given register, we will only insert one copy + // per user block. + CopyForUse[RUOp->getParent()->getParent()].insert(RUOp->getReg()); + + SmallVector DstUsesReachingDefs; + findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs); + + for (SlotIndex RDIndex : DstUsesReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); + if (TII->isMAI(*RD)) + continue; + + // For any definition of the user of the MFMA which is not an MFMA, + // we insert a copy. We do this to transform all the reaching defs + // of this use to AGPR. By doing this, we can insert a copy from + // AGPR to VGPR at the user rather than after the MFMA. + CopyForDef.insert(RD); + } + } + + // Do the rewrite to allow for updated RP calculation. + const TargetRegisterClass *VGPRRC = DAG.MRI.getRegClass(Dst.getReg()); + const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(VGPRRC); + DAG.MRI.setRegClass(Dst.getReg(), AGPRRC); + if (Src2->isReg()) + DAG.MRI.setRegClass(Src2->getReg(), AGPRRC); + } + } + + return true; +} + +int64_t RewriteMFMAFormStage::getRewriteCost( + const std::vector> &RewriteCands, + const DenseMap> &CopyForUse, + const SmallPtrSetImpl &CopyForDef) { + MachineBlockFrequencyInfo *MBFI = DAG.MBFI; + int64_t BestSpillCost = 0; + int64_t Cost = 0; + uint64_t EntryFreq = MBFI->getEntryFreq().getFrequency(); + + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { + if (!RegionsWithExcessArchVGPR[Region]) + continue; + + GCNRegPressure &PressureBefore = DAG.Pressure[Region]; + unsigned SpillCostBefore = PressureBefore.getVGPRSpills(MF); + + // For the cases we care about (i.e. ArchVGPR usage is greater than the + // addressable limit), rewriting alone should bring pressure to manageable + // level. If we find any such region, then the rewrite is potentially + // beneficial. + GCNRegPressure PressureAfter = DAG.getRealRegPressure(Region); + unsigned SpillCostAfter = PressureAfter.getVGPRSpills(MF); + + uint64_t BlockFreq = + MBFI->getBlockFreq(DAG.Regions[Region].first->getParent()) + .getFrequency(); + + bool RelativeFreqIsDenom = EntryFreq > BlockFreq; + uint64_t RelativeFreq = EntryFreq && BlockFreq + ? (RelativeFreqIsDenom ? EntryFreq / BlockFreq + : BlockFreq / EntryFreq) + : 1; + + // This assumes perfect spilling / splitting -- using one spill / copy + // instruction and one restoreFrom / copy for each excess register, + int64_t SpillCost = ((int)SpillCostAfter - (int)SpillCostBefore) * 2; + + // Also account for the block frequency. + if (RelativeFreqIsDenom) + SpillCost /= (int64_t)RelativeFreq; + else + SpillCost *= (int64_t)RelativeFreq; + + // If we have increased spilling in any block, just bail. + if (SpillCost > 0) + return SpillCost; + + if (SpillCost < BestSpillCost) + BestSpillCost = SpillCost; + } + + // Set the cost to the largest decrease in spill cost in order to not double + // count spill reductions. + Cost = BestSpillCost; + + assert(Cost <= 0); + + unsigned CopyCost = 0; + + // For each CopyForDef, increase the cost by the register size while + // accounting for block frequency. + for (auto *DefMI : CopyForDef) { + auto DefReg = DefMI->getOperand(0).getReg(); + uint64_t DefFreq = + EntryFreq + ? MBFI->getBlockFreq(DefMI->getParent()).getFrequency() / EntryFreq + : 1; + + const TargetRegisterClass *RC = DAG.MRI.getRegClass(DefReg); + CopyCost += RC->getCopyCost() * DefFreq; + } + + // Account for CopyForUse copies in each block that the register is used. + for (auto &[UseBlock, UseRegs] : CopyForUse) { + uint64_t UseFreq = + EntryFreq ? MBFI->getBlockFreq(UseBlock).getFrequency() / EntryFreq : 1; + + for (Register UseReg : UseRegs) { + const TargetRegisterClass *RC = DAG.MRI.getRegClass(UseReg); + CopyCost += RC->getCopyCost() * UseFreq; + } + } + + return Cost + CopyCost; +} + +bool RewriteMFMAFormStage::rewrite( + const std::vector> &RewriteCands) { + DenseMap FirstMIToRegion; + DenseMap LastMIToRegion; + + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) { + RegionBoundaries Entry = DAG.Regions[Region]; + if (Entry.first == Entry.second) + continue; + + FirstMIToRegion[&*Entry.first] = Region; + if (Entry.second != Entry.first->getParent()->end()) + LastMIToRegion[&*Entry.second] = Region; + } + + // Rewrite the MFMAs to AGPR, and insert any copies as needed. + // The general assumption of the algorithm (and the previous cost calculation) + // is that it is better to insert the copies in the MBB of the def of the src2 + // operands, and in the MBB of the user of the dest operands. This is based on + // the assumption that the MFMAs are likely to appear in loop bodies, while + // the src2 and dest operands are live-in / live-out of the loop. Due to this + // design, the algorithm for finding copy insertion points is more + // complicated. + // + // There are three main cases to handle: 1. the reaching defs of the src2 + // operands, 2. the reaching uses of the dst operands, and 3. the reaching + // defs of the reaching uses of the dst operand. + // + // In the first case, we simply insert copies after each of the reaching + // definitions. In the second case, we collect all the uses of a given dest + // and organize them by MBB. Then, we insert 1 copy for each MBB before the + // earliest use. Since the use may have multiple reaching defs, and since we + // want to replace the register it is using with the result of the copy, we + // must handle case 3. In the third case, we simply insert a copy after each + // of the reaching defs to connect to the copy of the reaching uses of the dst + // reg. This allows us to avoid inserting copies next to the MFMAs. + // + // While inserting the copies, we maintain a map of operands which will use + // different regs (i.e. the result of the copies). For example, a case 1 src2 + // operand will use the register result of the copies after the reaching defs, + // as opposed to the original register. Now that we have completed our copy + // analysis and placement, we can bulk update the registers. We do this + // separately as to avoid complicating the reachingDef and reachingUse + // queries. + // + // While inserting the copies, we also maintain a list or registers which we + // will want to reclassify as AGPR. After doing the copy insertion and the + // register replacement, we can finally do the reclassification. This uses the + // redef map, as the registers we are interested in reclassifying may be + // replaced by the result of a copy. We must do this after the copy analysis + // and placement as we must have an accurate redef map -- otherwise we may end + // up creating illegal instructions. + + // The original registers of the MFMA that need to be reclassified as AGPR. + DenseSet RewriteRegs; + // The map of an original register in the MFMA to a new register (result of a + // copy) that it should be replaced with. + DenseMap RedefMap; + // The map of the original MFMA registers to the relevant MFMA operands. + DenseMap> ReplaceMap; + // The map of reaching defs for a given register -- to avoid duplicate copies. + DenseMap> ReachingDefCopyMap; + // The map of reaching uses for a given register by basic block -- to avoid + // duplicate copies and to calculate per MBB insert pts. + DenseMap>> + ReachingUseTracker; + + for (auto &[MI, OriginalOpcode] : RewriteCands) { + int ReplacementOp = AMDGPU::getMFMASrcCVDstAGPROp(MI->getOpcode()); + if (ReplacementOp == -1) + continue; + MI->setDesc(TII->get(ReplacementOp)); + + // Case 1: insert copies for the reaching defs of the Src2Reg. + MachineOperand *Src2 = TII->getNamedOperand(*MI, AMDGPU::OpName::src2); + if (Src2->isReg()) { + Register Src2Reg = Src2->getReg(); + if (!Src2Reg.isVirtual()) + return false; + + Register MappedReg = Src2->getReg(); + SmallVector Src2ReachingDefs; + findReachingDefs(*Src2, DAG.LIS, Src2ReachingDefs); + SmallVector Src2DefsReplace; + + for (SlotIndex RDIndex : Src2ReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); + if (TII->isMAI(*RD)) + continue; + + // If there is a non mai reaching def, then we need a copy. + if (find(Src2DefsReplace, RD) == Src2DefsReplace.end()) + Src2DefsReplace.push_back(RD); + } + + if (!Src2DefsReplace.empty()) { + DenseMap::iterator RI = RedefMap.find(Src2Reg); + if (RI != RedefMap.end()) { + MappedReg = RI->second; + } else { + assert(!ReachingDefCopyMap.contains(Src2Reg)); + const TargetRegisterClass *Src2RC = DAG.MRI.getRegClass(Src2Reg); + const TargetRegisterClass *VGPRRC = + SRI->getEquivalentVGPRClass(Src2RC); + + // Track the mapping of the original register to the new register. + MappedReg = DAG.MRI.createVirtualRegister(VGPRRC); + RedefMap[Src2Reg] = MappedReg; + } + + // If none exists, create a copy from this reaching def. + // We may have inserted a copy already in an earlier iteration. + for (MachineInstr *RD : Src2DefsReplace) { + // Do not create redundant copies. + if (ReachingDefCopyMap[Src2Reg].insert(RD).second) { + MachineInstrBuilder VGPRCopy = + BuildMI(*RD->getParent(), std::next(RD->getIterator()), + RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(MappedReg, 0, 0) + .addUse(Src2Reg, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + + // If this reaching def was the last MI in the region, update the + // region boundaries. + if (LastMIToRegion.contains(RD)) { + unsigned UpdateRegion = LastMIToRegion[RD]; + DAG.Regions[UpdateRegion].second = VGPRCopy; + LastMIToRegion.erase(RD); + } + } + } + } + + // Track the register for reclassification + RewriteRegs.insert(Src2Reg); + + // Always insert the operand for replacement. If this corresponds with a + // chain of tied-def we may not see the VGPR requirement until later. + ReplaceMap[Src2Reg].insert(Src2); + } + + // Case 2 and Case 3: insert copies before the reaching uses of the dsts, + // and after the reaching defs of the reaching uses of the dsts. + + MachineOperand *Dst = &MI->getOperand(0); + Register DstReg = Dst->getReg(); + if (!DstReg.isVirtual()) + return false; + + Register MappedReg = DstReg; + SmallVector DstReachingUses; + + SmallVector DstReachingUseCopies; + SmallVector DstUseDefsReplace; + + findReachingUses(MI, DAG.LIS, DstReachingUses); + + for (MachineOperand *RUOp : DstReachingUses) { + if (TII->isMAI(*RUOp->getParent())) + continue; + + // If there is a non mai reaching use, then we need a copy. + if (find(DstReachingUseCopies, RUOp) == DstReachingUseCopies.end()) + DstReachingUseCopies.push_back(RUOp); + SmallVector DstUsesReachingDefs; + findReachingDefs(*RUOp, DAG.LIS, DstUsesReachingDefs); + + for (SlotIndex RDIndex : DstUsesReachingDefs) { + MachineInstr *RD = DAG.LIS->getInstructionFromIndex(RDIndex); + if (TII->isMAI(*RD)) + continue; + + // If there is a non mai reaching def of this reaching use, then we will + // need a copy. + if (find(DstUseDefsReplace, RD) == DstUseDefsReplace.end()) + DstUseDefsReplace.push_back(RD); + } + } + + if (!DstUseDefsReplace.empty()) { + DenseMap::iterator RI = RedefMap.find(DstReg); + if (RI != RedefMap.end()) { + MappedReg = RI->second; + } else { + assert(!ReachingDefCopyMap.contains(DstReg)); + const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC); + + // Track the mapping of the original register to the new register. + MappedReg = DAG.MRI.createVirtualRegister(VGPRRC); + RedefMap[DstReg] = MappedReg; + } + + // If none exists, create a copy from this reaching def. + // We may have inserted a copy already in an earlier iteration. + for (MachineInstr *RD : DstUseDefsReplace) { + // Do not create reundant copies. + if (ReachingDefCopyMap[DstReg].insert(RD).second) { + MachineInstrBuilder VGPRCopy = + BuildMI(*RD->getParent(), std::next(RD->getIterator()), + RD->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(MappedReg, 0, 0) + .addUse(DstReg, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + + // If this reaching def was the last MI in the region, update the + // region boundaries. + DenseMap::iterator LMI = + LastMIToRegion.find(RD); + if (LMI != LastMIToRegion.end()) { + unsigned UpdateRegion = LMI->second; + DAG.Regions[UpdateRegion].second = VGPRCopy; + LastMIToRegion.erase(RD); + } + } + } + } + + std::set &DstRegSet = ReplaceMap[DstReg]; + for (MachineOperand *RU : DstReachingUseCopies) { + MachineBasicBlock *RUBlock = RU->getParent()->getParent(); + // Just keep track of the reaching use of this register by block. After we + // have scanned all the MFMAs we can find optimal insert pts. + if (RUBlock != MI->getParent()) { + ReachingUseTracker[RUBlock->getNumber()][DstReg].insert(RU); + continue; + } + + // Special case, the use is in the same block as the MFMA. Insert the copy + // just before the use. + const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(DstReg); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC); + Register NewUseReg = DAG.MRI.createVirtualRegister(VGPRRC); + MachineInstr *UseInst = RU->getParent(); + MachineInstrBuilder VGPRCopy = + BuildMI(*UseInst->getParent(), UseInst->getIterator(), + UseInst->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(NewUseReg, 0, 0) + .addUse(DstReg, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + // Since we know this use has only one reaching def, we can replace the + // use reg. + RU->setReg(NewUseReg); + // Track the copy source operand for replacement. + DstRegSet.insert(&VGPRCopy->getOperand(1)); + } + + // Track the register for reclassification + RewriteRegs.insert(DstReg); + + // Insert the dst operand for replacement. If this dst is in a chain of + // tied-def MFMAs, and the first src2 needs to be replaced with a new reg, + // all the correspond operands need to be replaced. + DstRegSet.insert(Dst); + } + + // Handle the copies for dst uses. + using RUBType = + std::pair>>; + for (RUBType RUBlockEntry : ReachingUseTracker) { + using RUDType = std::pair>; + for (RUDType RUDst : RUBlockEntry.second) { + MachineOperand *OpBegin = *RUDst.second.begin(); + SlotIndex InstPt = DAG.LIS->getInstructionIndex(*OpBegin->getParent()); + + // Find the earliest use in this block. + for (MachineOperand *User : RUDst.second) { + SlotIndex NewInstPt = DAG.LIS->getInstructionIndex(*User->getParent()); + if (SlotIndex::isEarlierInstr(NewInstPt, InstPt)) + InstPt = NewInstPt; + } + + const TargetRegisterClass *DstRC = DAG.MRI.getRegClass(RUDst.first); + const TargetRegisterClass *VGPRRC = SRI->getEquivalentVGPRClass(DstRC); + Register NewUseReg = DAG.MRI.createVirtualRegister(VGPRRC); + MachineInstr *UseInst = DAG.LIS->getInstructionFromIndex(InstPt); + + MachineInstrBuilder VGPRCopy = + BuildMI(*UseInst->getParent(), UseInst->getIterator(), + UseInst->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addDef(NewUseReg, 0, 0) + .addUse(RUDst.first, 0, 0); + DAG.LIS->InsertMachineInstrInMaps(*VGPRCopy); + + // If this UseInst was the first MI in the region, update the region + // boundaries. + DenseMap::iterator FI = + FirstMIToRegion.find(UseInst); + if (FI != FirstMIToRegion.end()) { + unsigned UpdateRegion = FI->second; + DAG.Regions[UpdateRegion].first = VGPRCopy; + FirstMIToRegion.erase(UseInst); + } + + // Replace the operand for all users. + for (auto *User : RUDst.second) { + User->setReg(NewUseReg); + } + + // Track the copy source operand for replacement. + ReplaceMap[RUDst.first].insert(&VGPRCopy->getOperand(1)); + } + } + + // We may have needed to insert copies after the reaching defs of the MFMAs. + // Replace the original register with the result of the copy for all relevant + // operands. + for (std::pair NewDef : RedefMap) { + Register OldReg = NewDef.first; + Register NewReg = NewDef.second; + + // Replace the register for any associated operand in the MFMA chain. + for (MachineOperand *ReplaceOp : ReplaceMap[OldReg]) + ReplaceOp->setReg(NewReg); + } + + // Finally, do the reclassification of the MFMA registers. + for (auto RewriteReg : RewriteRegs) { + Register RegToRewrite = RewriteReg; + + // Be sure to update the replacement register and not the original. + if (RedefMap.contains(RewriteReg)) + RegToRewrite = RedefMap[RewriteReg]; + + const TargetRegisterClass *CurrRC = DAG.MRI.getRegClass(RegToRewrite); + const TargetRegisterClass *AGPRRC = SRI->getEquivalentAGPRClass(CurrRC); + + DAG.MRI.setRegClass(RegToRewrite, AGPRRC); + } + + // Bulk update the LIS. + DAG.LIS->reanalyze(DAG.MF); + // Liveins may have been modified for cross RC copies + RegionPressureMap LiveInUpdater(&DAG, false); + LiveInUpdater.buildLiveRegMap(); + + for (unsigned Region = 0; Region < DAG.Regions.size(); Region++) + DAG.LiveIns[Region] = LiveInUpdater.getLiveRegsForRegionIdx(Region); + + DAG.Pressure[RegionIdx] = DAG.getRealRegPressure(RegionIdx); + + return true; +} + bool PreRARematStage::canIncreaseOccupancyOrReduceSpill() { const Function &F = MF.getFunction(); diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 95a931b9beb2a..0a79da061ab8e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -16,6 +16,9 @@ #include "GCNRegPressure.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineScheduler.h" @@ -28,11 +31,12 @@ class GCNSchedStage; enum class GCNSchedStageID : unsigned { OccInitialSchedule = 0, - UnclusteredHighRPReschedule = 1, - ClusteredLowOccupancyReschedule = 2, - PreRARematerialize = 3, - ILPInitialSchedule = 4, - MemoryClauseInitialSchedule = 5 + RewriteMFMAForm = 1, + UnclusteredHighRPReschedule = 2, + ClusteredLowOccupancyReschedule = 3, + PreRARematerialize = 4, + ILPInitialSchedule = 5, + MemoryClauseInitialSchedule = 6 }; #ifndef NDEBUG @@ -239,6 +243,7 @@ using RegionBoundaries = class GCNScheduleDAGMILive final : public ScheduleDAGMILive { friend class GCNSchedStage; friend class OccInitialScheduleStage; + friend class RewriteMFMAFormStage; friend class UnclusteredHighRPStage; friend class ClusteredLowOccStage; friend class PreRARematStage; @@ -413,6 +418,59 @@ class OccInitialScheduleStage : public GCNSchedStage { : GCNSchedStage(StageID, DAG) {} }; +class RewriteMFMAFormStage : public GCNSchedStage { +private: + // Record regions with excess archvgpr register pressure over the physical + // register limit. Register pressure in these regions usually will result in + // spilling. + BitVector RegionsWithExcessArchVGPR; + + const SIInstrInfo *TII; + const SIRegisterInfo *SRI; + + /// Do a speculative rewrite and collect copy locations. The speculative + /// rewrite allows us to calculate the RP of the code after the rewrite, and + /// the copy locations allow us to calculate the total cost of copies required + /// for the rewrite. Stores the rewritten instructions in \p RewriteCands , + /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the + /// copy locations for defs (of the MFMA operands) in \p CopyForDef + bool + initHeuristics(std::vector> &RewriteCands, + DenseMap> &CopyForUse, + SmallPtrSetImpl &CopyForDef); + + /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done + /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy + /// costs, and \p RewriteCands to undo rewriting. + int64_t getRewriteCost( + const std::vector> &RewriteCands, + const DenseMap> &CopyForUse, + const SmallPtrSetImpl &CopyForDef); + + /// Do the final rewrite on \p RewriteCands and insert any needed copies. + bool + rewrite(const std::vector> &RewriteCands); + + /// \returns true if this MI is a rewrite candidate. + bool isRewriteCandidate(MachineInstr *MI) const; + + /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p + /// DefIdxs + void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS, + SmallVectorImpl &DefIdxs); + + /// Finds all the reaching uses of \p DefMI and stores the use operands in \p + /// ReachingUses + void findReachingUses(MachineInstr *DefMI, LiveIntervals *LIS, + SmallVectorImpl &ReachingUses); + +public: + bool initGCNSchedStage() override; + + RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG) + : GCNSchedStage(StageID, DAG) {} +}; + class UnclusteredHighRPStage : public GCNSchedStage { private: // Save the initial occupancy before starting this stage. diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 8586d6c18b361..bb912be85de74 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -56,6 +56,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachinePostDominators.h" @@ -160,6 +161,7 @@ class SILowerControlFlowLegacy : public MachineFunctionPass { AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } }; @@ -880,5 +882,6 @@ SILowerControlFlowPass::run(MachineFunction &MF, PA.preserve(); PA.preserve(); PA.preserve(); + PA.preserve(); return PA; } diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir new file mode 100644 index 0000000000000..56a307d2afb56 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_copies.mir @@ -0,0 +1,5235 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- | + define void @src2_singledef_singleuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_singledef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_multidef_vgpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_singleedef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_singleuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_singledef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_multidef_agpr() #0 { + entry: + unreachable + } + + define void @src2_singledef_singleuse_dst_singleuse_singledef_mixed() #0 { + entry: + unreachable + } + + define void @src2_multidef_singleuse_dst_multiuse_multidef_mixed() #0 { + entry: + unreachable + } + + define void @src2_singledef_multiuse_dst_singleuse_multidef_mixed() #0 { + entry: + unreachable + } + + define void @src2_multidef_multiuse_dst_multiuse_multidef_mixed() #0 { + entry: + unreachable + } + + define void @no_copy_for_mfma() #0 { + entry: + unreachable + } + + attributes #0 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"} +... + + +--- +name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF12]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + KILL %89, %90, %91, %92, %93, %193 + + bb.7: + undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF12]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + KILL %89, %90, %91, %92, %93, %193 + + bb.7: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + undef %95.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + S_BRANCH %bb.5 + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + + bb.5: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.5 + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.5: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.8 + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.8: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.8 + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.8: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_6:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_7:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_8:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_9:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_10:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_6]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_7]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_8]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_11]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %194:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %195:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %196:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %197:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %198:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %199:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + + bb.2: + KILL %89, %90, %91, %92, %93, %193, %194, %195, %196, %197, %198, %199 + + + bb.3: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.5, implicit killed $scc + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + S_BRANCH %bb.5 + + bb.5: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + + bb.6: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_1]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF12]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + KILL %89, %90, %91, %92, %93, %193 + + bb.3: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.5, implicit killed $scc + + bb.4: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.6 + + bb.5: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.6: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.6(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.7(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.8, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + KILL %89, %90, %91, %92, %93, %193 + + + bb.7: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.9, implicit killed $scc + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.10 + + bb.9: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.10: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_vgpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF14]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF16]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DEF16]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.9: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_singleuse_dst_singleuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[COPY7]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[COPY7]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_singleuse_dst_singleuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64 [[DEF10]], [[DEF11]], [[COPY]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_5]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_2]] + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_4]] + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_1]] + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vreg_128_align2 = COPY [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_e64_3]] + ; CHECK-NEXT: KILL [[COPY2]], [[COPY5]], [[COPY3]], [[COPY6]], [[COPY4]], [[COPY1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vreg_128_align2 = COPY [[COPY]] + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF13]].sub1, [[COPY7]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[COPY7]], [[V_ADD_U32_e32_]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.1: + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + KILL %89, %90, %91, %92, %93, %193 + + + bb.3: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_singleedef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + + bb.4: + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %84:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %84 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_singleuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.7: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_singleuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_singleuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 128, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 128, 0, implicit $exec + + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_multiuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 128, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_multiuse_dst_singleuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_singleuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 128, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 384, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 384, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_singledef_singleuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 128, 0, implicit $exec + + bb.5: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_multidef_singleuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 256, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec + + bb.5: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + + +--- +name: src2_singledef_multiuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.8 + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.8: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_multiuse_dst_multiuse_singledef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_singledef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF12]], [[DS_READ_B128_gfx9_]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF13]], [[DEF14]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.8 + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.8: + DS_WRITE_B32_gfx9 %64:vgpr_32, %84.sub0:vreg_128_align2, 256, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: src2_singledef_singleuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 256, 0, implicit $exec + + bb.5: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_singleuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]].sub1, 256, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]].sub0, 256, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.1: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.2: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.3: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 256, 0, implicit $exec + S_BRANCH %bb.5 + + bb.4: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 256, 0, implicit $exec + + bb.5: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_singledef_multiuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.9 + + bb.8: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.9: + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_multiuse_multidef_agpr +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_agpr + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 256, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF9]], 512, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DS_READ_B128_gfx9_1]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF13]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF14]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[DEF15]], 4, 4, [[DEF12]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub0, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub1, 128, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub1, 0, 0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B32_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_]].sub0, 128, 0, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF9]], [[DS_READ_B128_gfx9_1]], 256, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF17]], [[DEF13]], [[DEF14]], [[DEF15]], [[DS_READ_B128_gfx9_]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + %88:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 256, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 512, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 128, 0, implicit $exec + S_BRANCH %bb.9 + + bb.8: + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub1:vreg_128_align2, 0, 0, implicit $exec + DS_WRITE_B32_gfx9 %64:vgpr_32, %88.sub0:vreg_128_align2, 128, 0, implicit $exec + + bb.9: + DS_WRITE_B128_gfx9 %64:vgpr_32, %84:vreg_128_align2, 256, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_singleuse_dst_singleuse_singledef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_singleuse_dst_singleuse_singledef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + + +--- +name: src2_multidef_singleuse_dst_multiuse_multidef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_singleuse_dst_multiuse_multidef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF14]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF16]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[V_ADD_U32_e32_]], 0, 0, implicit $exec + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[DEF17:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[DEF17:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_ADD_U32_e32_]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF21:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF19]], [[DEF20]], [[DEF21]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF18]], [[DEF13]], [[DEF14]], [[DEF15]], [[DEF16]], [[V_ADD_U32_e32_]], [[DEF17]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + %94:vreg_128_align2 = IMPLICIT_DEF + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + + bb.9: + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + +--- +name: src2_singledef_multiuse_dst_singleuse_multidef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_singledef_multiuse_dst_singleuse_multidef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub0, [[DEF11]], implicit $exec + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF12]].sub1, [[DEF11]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.3, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[V_ADD_U32_e32_]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF13]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF14]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF8]], [[DEF9]], [[DEF15]], 4, 4, [[DEF10]].sub0, [[DEF11]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: KILL [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_4]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_5]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF12]].sub1, [[V_ADD_U32_e32_]].sub0, implicit $exec + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF11]], [[V_ADD_U32_e32_1]], 0, 0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF16]], [[DEF12]], [[DEF13]], [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], [[V_ADD_U32_e32_2]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.4, implicit killed $scc + + bb.2: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %89:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %90:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %91:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %92:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %93:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %193:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.3: + KILL %89, %90, %91, %92, %93, %193 + + bb.4: + %94:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: src2_multidef_multiuse_dst_multiuse_multidef_mixed +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: src2_multidef_multiuse_dst_multiuse_multidef_mixed + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:areg_128_align2 = DS_READ_B128_gfx9 [[DEF12]], 0, 0, implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[DS_READ_B128_gfx9_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DS_READ_B128_gfx9_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.5(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.6, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub1, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.7(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: DS_WRITE_B128_gfx9 [[DEF12]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], 0, 0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DS_READ_B128_gfx9_]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF15]], [[DEF16]], [[DEF17]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF14]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_3]], [[V_ADD_U32_e32_]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %84:vreg_128_align2 = DS_READ_B128_gfx9 %64:vgpr_32, 0, 0, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + + bb.4: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.5: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.7, implicit killed $scc + + bb.6: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + S_BRANCH %bb.8 + + bb.7: + DS_WRITE_B128_gfx9 %64:vgpr_32, %88:vreg_128_align2, 0, 0, implicit $exec + %94:vreg_128_align2 = IMPLICIT_DEF + + bb.8: + %95:vgpr_32 = V_ADD_U32_e32 %84.sub0, %64, implicit $exec + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94, %95 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + +--- +name: no_copy_for_mfma +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: no_copy_for_mfma + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF13]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: dead [[DEF14:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_ADD_U32_e32_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF9]], [[DEF10]], [[DEF15]], 4, 4, [[DEF11]].sub0, [[DEF12]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF15]].sub0, [[DEF12]], implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF17]], [[DEF18]], [[DEF19]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF16]], [[DEF13]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_1]], [[V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64_2]], [[DEF15]], [[V_ADD_U32_e32_1]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + %88:vreg_128_align2 = IMPLICIT_DEF + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + %88:vreg_128_align2 = IMPLICIT_DEF + S_BRANCH %bb.4 + + + bb.3: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %85:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %87:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %86:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %87:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.5, implicit killed $scc + + bb.4: + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %88:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %88:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + %86:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + + bb.5: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %88.sub0, %64, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %86, %87, %88, %94 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir new file mode 100644 index 0000000000000..40f87e838d314 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched_mfma_rewrite_cost.mir @@ -0,0 +1,518 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck %s + +--- | + define void @more_copies_than_spills() #0 { + entry: + unreachable + } + + define void @less_copies_than_spills() #0 { + entry: + unreachable + } + + define void @low_pressure() { + entry: + unreachable + } + + attributes #0 = { "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="64,64"} +... + + +--- +name: more_copies_than_spills +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: more_copies_than_spills + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.9(0x40000000), %bb.8(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.9, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: successors: %bb.9(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_3:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_3:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9: + ; CHECK-NEXT: successors: %bb.10(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_4:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF11]].sub0, implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub2, [[DEF9]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.10: + ; CHECK-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF11]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF10]], [[DEF11]], [[V_ADD_U32_e32_4]], [[V_ADD_U32_e32_5]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.10, implicit killed $scc + + bb.9: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + + bb.10: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %85.sub0, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub2, %64, implicit $exec + + bb.11: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %85.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +... + + +--- +name: less_copies_than_spills +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: less_copies_than_spills + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %12 + ; CHECK-NEXT: S_NOP 0, implicit-def %13 + ; CHECK-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF13:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:areg_128_align2 = V_ADD_U32_e32 [[DEF10]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF14:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF15:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF16:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:areg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF14]], [[DEF15]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF16]].sub0, [[DEF9]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub0, [[DEF9]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF11]].sub1, [[DEF9]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF10]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF18:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF19:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF20:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: KILL [[DEF18]], [[DEF19]], [[DEF20]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF4]], [[DEF5]], [[DEF6]], [[DEF7]], [[DEF8]], [[DEF17]], [[DEF10]], [[DEF11]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %12, implicit %13 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %1:vreg_1024 = IMPLICIT_DEF + %2:vreg_1024 = IMPLICIT_DEF + %3:vreg_1024 = IMPLICIT_DEF + %4:vreg_1024 = IMPLICIT_DEF + %5:vreg_1024 = IMPLICIT_DEF + %6:vreg_1024 = IMPLICIT_DEF + %7:vreg_1024 = IMPLICIT_DEF + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + + bb.9: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %1, %2, %3, %4, %5, %6, %7, %8, %10, %11, %12, %13, %62, %72, %85, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 +... + + +--- +name: low_pressure +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + workGroupIDX: { reg: '$sgpr6' } + privateSegmentWaveByteOffset: { reg: '$sgpr7' } + workItemIDX: { reg: '$vgpr0' } + sgprForEXECCopy: '$sgpr100_sgpr101' +body: | + ; CHECK-LABEL: name: low_pressure + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vreg_128 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF + ; CHECK-NEXT: S_NOP 0, implicit-def %5 + ; CHECK-NEXT: S_NOP 0, implicit-def %6 + ; CHECK-NEXT: [[DEF4:%[0-9]+]]:vreg_512 = IMPLICIT_DEF + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF6:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF8:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead [[DEF9:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: dead undef [[V_ADD_U32_e32_:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF10:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF11:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF12:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF13:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub0, [[DEF5]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_1:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF6]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.4(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.5, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF7:%[0-9]+]]:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 [[DEF10]], [[DEF11]], [[V_ADD_U32_e32_1]], 4, 4, [[DEF13]].sub0, [[DEF5]], 0, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.6(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $scc = IMPLICIT_DEF + ; CHECK-NEXT: S_CBRANCH_SCC1 %bb.7, implicit killed $scc + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub0, [[DEF5]], implicit $exec + ; CHECK-NEXT: S_BRANCH %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7: + ; CHECK-NEXT: successors: %bb.8(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: undef [[V_ADD_U32_e32_2:%[0-9]+]].sub0:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub0, [[DEF5]], implicit $exec + ; CHECK-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]].sub1:vreg_128_align2 = V_ADD_U32_e32 [[DEF7]].sub1, [[DEF5]], implicit $exec + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8: + ; CHECK-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF6]].sub1, [[V_ADD_U32_e32_1]].sub0, implicit $exec + ; CHECK-NEXT: SCHED_BARRIER 0 + ; CHECK-NEXT: KILL [[DEF4]], [[DEF]], [[DEF1]], [[DEF2]], [[DEF3]], [[DEF12]], [[DEF6]], [[DEF7]], [[V_ADD_U32_e32_2]], [[V_ADD_U32_e32_3]] + ; CHECK-NEXT: S_NOP 0, implicit %5, implicit %6 + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5 + %8:vreg_512 = IMPLICIT_DEF + %10:vreg_64 = IMPLICIT_DEF + %11:vgpr_32 = IMPLICIT_DEF + %12:vreg_128 = IMPLICIT_DEF + %13:vreg_1024 = IMPLICIT_DEF + S_NOP 0, implicit-def %50:av_512 + S_NOP 0, implicit-def %51:av_512 + SCHED_BARRIER 0 + %60:av_128_align2 = IMPLICIT_DEF + %61:av_128_align2 = IMPLICIT_DEF + %62:vreg_128_align2 = IMPLICIT_DEF + %63:vreg_64_align2 = IMPLICIT_DEF + %64:vgpr_32 = IMPLICIT_DEF + %72:vreg_128_align2 = IMPLICIT_DEF + %85:vreg_128_align2 = IMPLICIT_DEF + %86:vreg_128_align2 = IMPLICIT_DEF + %87:vreg_128_align2 = IMPLICIT_DEF + undef %88.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.3, implicit killed $scc + + bb.2: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub0, %64, implicit $exec + S_BRANCH %bb.4 + + bb.3: + undef %84.sub0:vreg_128_align2 = V_ADD_U32_e32 %72.sub1, %64, implicit $exec + S_BRANCH %bb.4 + + bb.4: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.6, implicit killed $scc + + bb.5: + %85:vreg_128_align2 = contract nofpexcept V_MFMA_SCALE_F32_16X16X128_F8F6F4_f4_f4_vgprcd_e64 %60:av_128_align2, %61:av_128_align2, %84:vreg_128_align2, 4, 4, %63.sub0:vreg_64_align2, %64:vgpr_32, 0, 0, implicit $mode, implicit $exec + + bb.6: + $scc = IMPLICIT_DEF + S_CBRANCH_SCC1 %bb.8, implicit killed $scc + + bb.7: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + S_BRANCH %bb.9 + + bb.8: + undef %94.sub0:vreg_128_align2 = V_ADD_U32_e32 %85.sub0, %64, implicit $exec + %94.sub1:vreg_128_align2 = V_ADD_U32_e32 %85.sub1, %64, implicit $exec + + bb.9: + %104:vgpr_32 = V_ADD_U32_e32 %72.sub1, %84.sub0, implicit $exec + SCHED_BARRIER 0 + KILL %8, %10, %11, %12, %13, %62, %72, %85, %94, %104 + S_NOP 0, implicit %50, implicit %51 + S_ENDPGM 0 + +...