diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp index 03d93cecaa596..5cb5c0bf40a08 100644 --- a/llvm/lib/CodeGen/MachineSink.cpp +++ b/llvm/lib/CodeGen/MachineSink.cpp @@ -45,6 +45,7 @@ #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -113,6 +114,8 @@ STATISTIC(NumSplit, "Number of critical edges split"); STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumPostRACopySink, "Number of copies sunk after RA"); +using RegSubRegPair = TargetInstrInfo::RegSubRegPair; + namespace { class MachineSinking : public MachineFunctionPass { @@ -128,6 +131,7 @@ class MachineSinking : public MachineFunctionPass { const MachineBranchProbabilityInfo *MBPI = nullptr; AliasAnalysis *AA = nullptr; RegisterClassInfo RegClassInfo; + TargetSchedModel SchedModel; // Remember which edges have been considered for breaking. SmallSet, 8> @@ -161,6 +165,8 @@ class MachineSinking : public MachineFunctionPass { /// would re-order assignments. using SeenDbgUser = PointerIntPair; + using SinkItem = std::pair; + /// Record of DBG_VALUE uses of vregs in a block, so that we can identify /// debug instructions to sink. SmallDenseMap> SeenDbgUsers; @@ -255,7 +261,10 @@ class MachineSinking : public MachineFunctionPass { void FindCycleSinkCandidates(MachineCycle *Cycle, MachineBasicBlock *BB, SmallVectorImpl &Candidates); - bool SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I); + + bool + aggressivelySinkIntoCycle(MachineCycle *Cycle, MachineInstr &I, + DenseMap &SunkInstrs); bool isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -271,11 +280,14 @@ class MachineSinking : public MachineFunctionPass { GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB, AllSuccsCache &AllSuccessors) const; - std::vector &getBBRegisterPressure(const MachineBasicBlock &MBB); + std::vector &getBBRegisterPressure(const MachineBasicBlock &MBB, + bool UseCache = true); bool registerPressureSetExceedsLimit(unsigned NRegs, const TargetRegisterClass *RC, const MachineBasicBlock &MBB); + + bool registerPressureExceedsLimit(const MachineBasicBlock &MBB); }; } // end anonymous namespace @@ -680,6 +692,10 @@ void MachineSinking::FindCycleSinkCandidates( SmallVectorImpl &Candidates) { for (auto &MI : *BB) { LLVM_DEBUG(dbgs() << "CycleSink: Analysing candidate: " << MI); + if (MI.isMetaInstruction()) { + LLVM_DEBUG(dbgs() << "CycleSink: not sinking meta instruction\n"); + continue; + } if (!TII->shouldSink(MI)) { LLVM_DEBUG(dbgs() << "CycleSink: Instruction not a candidate for this " "target\n"); @@ -775,31 +791,62 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { if (SinkInstsIntoCycle) { SmallVector Cycles(CI->toplevel_cycles()); - for (auto *Cycle : Cycles) { - MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); - if (!Preheader) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); - continue; - } - SmallVector Candidates; - FindCycleSinkCandidates(Cycle, Preheader, Candidates); - - // Walk the candidates in reverse order so that we start with the use - // of a def-use chain, if there is any. - // TODO: Sort the candidates using a cost-model. - unsigned i = 0; - for (MachineInstr *I : llvm::reverse(Candidates)) { - if (i++ == SinkIntoCycleLimit) { - LLVM_DEBUG(dbgs() << "CycleSink: Limit reached of instructions to " - "be analysed."); - break; + SchedModel.init(STI); + bool HasHighPressure; + + DenseMap SunkInstrs; + + enum CycleSinkStage { COPY, LOW_LATENCY, AGGRESSIVE, END }; + for (unsigned Stage = CycleSinkStage::COPY; Stage != CycleSinkStage::END; + ++Stage, SunkInstrs.clear()) { + HasHighPressure = false; + + for (auto *Cycle : Cycles) { + MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); + if (!Preheader) { + LLVM_DEBUG(dbgs() << "CycleSink: Can't find preheader\n"); + continue; } + SmallVector Candidates; + FindCycleSinkCandidates(Cycle, Preheader, Candidates); + + unsigned i = 0; + + // Walk the candidates in reverse order so that we start with the use + // of a def-use chain, if there is any. + // TODO: Sort the candidates using a cost-model. + for (MachineInstr *I : llvm::reverse(Candidates)) { + // CycleSinkStage::COPY: Sink a limited number of copies + if (Stage == CycleSinkStage::COPY) { + if (i++ == SinkIntoCycleLimit) { + LLVM_DEBUG(dbgs() + << "CycleSink: Limit reached of instructions to " + "be analyzed."); + break; + } + + if (!I->isCopy()) + continue; + } - if (!SinkIntoCycle(Cycle, *I)) - break; - EverMadeChange = true; - ++NumCycleSunk; + // CycleSinkStage::LOW_LATENCY: sink unlimited number of instructions + // which the target specifies as low-latency + if (Stage == CycleSinkStage::LOW_LATENCY && + !TII->hasLowDefLatency(SchedModel, *I, 0)) + continue; + + if (!aggressivelySinkIntoCycle(Cycle, *I, SunkInstrs)) + break; + EverMadeChange = true; + ++NumCycleSunk; + } + + // Recalculate the pressure after sinking + if (!HasHighPressure) + HasHighPressure = registerPressureExceedsLimit(*Preheader); } + if (!HasHighPressure) + break; } } @@ -1055,13 +1102,15 @@ bool MachineSinking::PostponeSplitCriticalEdge(MachineInstr &MI, } std::vector & -MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) { +MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB, + bool UseCache) { // Currently to save compiling time, MBB's register pressure will not change // in one ProcessBlock iteration because of CachedRegisterPressure. but MBB's // register pressure is changed after sinking any instructions into it. // FIXME: need a accurate and cheap register pressure estiminate model here. + auto RP = CachedRegisterPressure.find(&MBB); - if (RP != CachedRegisterPressure.end()) + if (UseCache && RP != CachedRegisterPressure.end()) return RP->second; RegionPressure Pressure; @@ -1085,6 +1134,12 @@ MachineSinking::getBBRegisterPressure(const MachineBasicBlock &MBB) { } RPTracker.closeRegion(); + + if (RP != CachedRegisterPressure.end()) { + CachedRegisterPressure[&MBB] = RPTracker.getPressure().MaxSetPressure; + return CachedRegisterPressure[&MBB]; + } + auto It = CachedRegisterPressure.insert( std::make_pair(&MBB, RPTracker.getPressure().MaxSetPressure)); return It.first->second; @@ -1103,6 +1158,21 @@ bool MachineSinking::registerPressureSetExceedsLimit( return false; } +// Recalculate RP and check if any pressure set exceeds the set limit. +bool MachineSinking::registerPressureExceedsLimit( + const MachineBasicBlock &MBB) { + std::vector BBRegisterPressure = getBBRegisterPressure(MBB, false); + + for (unsigned PS = 0; PS < BBRegisterPressure.size(); ++PS) { + if (BBRegisterPressure[PS] >= + TRI->getRegPressureSetLimit(*MBB.getParent(), PS)) { + return true; + } + } + + return false; +} + /// isProfitableToSinkTo - Return true if it is profitable to sink MI. bool MachineSinking::isProfitableToSinkTo(Register Reg, MachineInstr &MI, MachineBasicBlock *MBB, @@ -1581,83 +1651,98 @@ bool MachineSinking::hasStoreBetween(MachineBasicBlock *From, return HasAliasedStore; } -/// Sink instructions into cycles if profitable. This especially tries to -/// prevent register spills caused by register pressure if there is little to no -/// overhead moving instructions into cycles. -bool MachineSinking::SinkIntoCycle(MachineCycle *Cycle, MachineInstr &I) { - LLVM_DEBUG(dbgs() << "CycleSink: Finding sink block for: " << I); +/// Aggressively sink instructions into cycles. This will aggressively try to +/// sink all instructions in the top-most preheaders in an attempt to reduce RP. +/// In particular, it will sink into multiple successor blocks without limits +/// based on the amount of sinking, or the type of ops being sunk (so long as +/// they are safe to sink). +bool MachineSinking::aggressivelySinkIntoCycle( + MachineCycle *Cycle, MachineInstr &I, + DenseMap &SunkInstrs) { + // TODO: support instructions with multiple defs + if (I.getNumDefs() > 1) + return false; + + LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Finding sink block for: " << I); MachineBasicBlock *Preheader = Cycle->getCyclePreheader(); assert(Preheader && "Cycle sink needs a preheader block"); - MachineBasicBlock *SinkBlock = nullptr; - bool CanSink = true; - const MachineOperand &MO = I.getOperand(0); - - for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { - LLVM_DEBUG(dbgs() << "CycleSink: Analysing use: " << MI); - if (!Cycle->contains(MI.getParent())) { - LLVM_DEBUG(dbgs() << "CycleSink: Use not in cycle, can't sink.\n"); - CanSink = false; - break; - } + SmallVector> Uses; - // FIXME: Come up with a proper cost model that estimates whether sinking - // the instruction (and thus possibly executing it on every cycle - // iteration) is more expensive than a register. - // For now assumes that copies are cheap and thus almost always worth it. - if (!MI.isCopy()) { - LLVM_DEBUG(dbgs() << "CycleSink: Use is not a copy\n"); - CanSink = false; - break; + MachineOperand &DefMO = I.getOperand(0); + for (MachineInstr &MI : MRI->use_instructions(DefMO.getReg())) { + Uses.push_back({{DefMO.getReg(), DefMO.getSubReg()}, &MI}); + } + + for (std::pair Entry : Uses) { + MachineInstr *MI = Entry.second; + LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Analysing use: " << MI); + if (MI->isPHI()) { + LLVM_DEBUG( + dbgs() << "AggressiveCycleSink: Not attempting to sink for PHI.\n"); + continue; } - if (!SinkBlock) { - SinkBlock = MI.getParent(); - LLVM_DEBUG(dbgs() << "CycleSink: Setting sink block to: " - << printMBBReference(*SinkBlock) << "\n"); + // We cannot sink before the prologue + if (MI->isPosition() || TII->isBasicBlockPrologue(*MI)) { + LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Use is BasicBlock prologue, " + "can't sink.\n"); continue; } - SinkBlock = DT->findNearestCommonDominator(SinkBlock, MI.getParent()); - if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't find nearest dominator\n"); - CanSink = false; - break; + if (!Cycle->contains(MI->getParent())) { + LLVM_DEBUG( + dbgs() << "AggressiveCycleSink: Use not in cycle, can't sink.\n"); + continue; } - LLVM_DEBUG(dbgs() << "CycleSink: Setting nearest common dom block: " - << printMBBReference(*SinkBlock) << "\n"); - } - if (!CanSink) { - LLVM_DEBUG(dbgs() << "CycleSink: Can't sink instruction.\n"); - return false; - } - if (!SinkBlock) { - LLVM_DEBUG(dbgs() << "CycleSink: Not sinking, can't find sink block.\n"); - return false; - } - if (SinkBlock == Preheader) { - LLVM_DEBUG( - dbgs() << "CycleSink: Not sinking, sink block is the preheader\n"); - return false; - } - if (SinkBlock->sizeWithoutDebugLargerThan(SinkLoadInstsPerBlockThreshold)) { - LLVM_DEBUG( - dbgs() << "CycleSink: Not Sinking, block too large to analyse.\n"); - return false; - } + MachineBasicBlock *SinkBlock = MI->getParent(); + MachineInstr *NewMI = nullptr; + SinkItem MapEntry(&I, SinkBlock); + + auto SI = SunkInstrs.find(MapEntry); + + // Check for the case in which we have already sunk a copy of this + // instruction into the user block. + if (SI != SunkInstrs.end()) { + LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Already sunk to block: " + << printMBBReference(*SinkBlock) << "\n"); + NewMI = SI->second; + } - LLVM_DEBUG(dbgs() << "CycleSink: Sinking instruction!\n"); - SinkBlock->splice(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), Preheader, - I); + // Create a copy of the instruction in the use block. + if (!NewMI) { + LLVM_DEBUG(dbgs() << "AggressiveCycleSink: Sinking instruction to block: " + << printMBBReference(*SinkBlock) << "\n"); + + NewMI = I.getMF()->CloneMachineInstr(&I); + if (DefMO.getReg().isVirtual()) { + const TargetRegisterClass *TRC = MRI->getRegClass(DefMO.getReg()); + Register DestReg = MRI->createVirtualRegister(TRC); + NewMI->substituteRegister(DefMO.getReg(), DestReg, DefMO.getSubReg(), + *TRI); + } + SinkBlock->insert(SinkBlock->SkipPHIsAndLabels(SinkBlock->begin()), + NewMI); + SunkInstrs.insert({MapEntry, NewMI}); + } - // Conservatively clear any kill flags on uses of sunk instruction - for (MachineOperand &MO : I.operands()) { - if (MO.isReg() && MO.readsReg()) + // Conservatively clear any kill flags on uses of sunk instruction + for (MachineOperand &MO : NewMI->all_uses()) { + assert(MO.isReg() && MO.isUse()); RegsToClearKillFlags.insert(MO.getReg()); - } + } - // The instruction is moved from its basic block, so do not retain the - // debug information. - assert(!I.isDebugInstr() && "Should not sink debug inst"); - I.setDebugLoc(DebugLoc()); + // The instruction is moved from its basic block, so do not retain the + // debug information. + assert(!NewMI->isDebugInstr() && "Should not sink debug inst"); + NewMI->setDebugLoc(DebugLoc()); + + // Replace the use with the newly created virtual register. + RegSubRegPair &UseReg = Entry.first; + MI->substituteRegister(UseReg.Reg, NewMI->getOperand(0).getReg(), + UseReg.SubReg, *TRI); + } + // If we have replaced all uses, then delete the dead instruction + if (I.isDead(*MRI)) + I.eraseFromParent(); return true; } diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir new file mode 100644 index 0000000000000..bca1517ed183a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-cycle.mir @@ -0,0 +1,1272 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 --sink-insts-to-avoid-spills=1 --stop-after=machine-sink -o - %s | FileCheck -check-prefixes=GFX9-SUNK %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 --sink-insts-to-avoid-spills=1 -mattr=+wavefrontsize64 --stop-after=machine-sink -o - %s | FileCheck -check-prefixes=GFX10-SUNK %s + +--- +name: test_sink_copy +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_sink_copy + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.4: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.5: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.6.entry: + ; + ; GFX10-SUNK-LABEL: name: test_sink_copy + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.4: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.5: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.6.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vreg_256 = IMPLICIT_DEF + %5:vreg_256 = COPY %4 + %6:vreg_256 = COPY %4 + %7:vreg_256 = COPY %4 + %8:vreg_256 = COPY %4 + %9:vreg_256 = COPY %4 + + + S_BRANCH %bb.1 + + bb.1: + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9 + S_BRANCH %bb.4 + + bb.3: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9 + S_BRANCH %bb.4 + + bb.4: + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + S_ENDPGM 0 +... + +# For gfx9, after sinking the copies, pressure is within the desired limit + +--- +name: test_sink_multi_stage +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_sink_multi_stage + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256_align2 = COPY [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.4: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.5: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.6.entry: + ; + ; GFX10-SUNK-LABEL: name: test_sink_multi_stage + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[COPY:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY1:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY2:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY3:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY4:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY]], implicit [[COPY1]], implicit [[COPY2]], implicit [[COPY3]], implicit [[COPY4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[COPY5:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY6:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY7:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY8:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: [[COPY9:%[0-9]+]]:vreg_256 = COPY [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[COPY5]], implicit [[COPY6]], implicit [[COPY7]], implicit [[COPY8]], implicit [[COPY9]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.4: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.5: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.6.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vreg_256 = IMPLICIT_DEF + %5:vreg_256 = COPY %4 + %6:vreg_256 = COPY %4 + %7:vreg_256 = COPY %4 + %8:vreg_256 = COPY %4 + %9:vreg_256 = COPY %4 + %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + S_BRANCH %bb.1 + + bb.1: + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9 + INLINEASM &"", 1, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14 + S_BRANCH %bb.4 + + bb.3: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9 + INLINEASM &"", 1, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14 + S_BRANCH %bb.4 + + bb.4: + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + S_ENDPGM 0 +... + +--- +name: test_sink_low_rp +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_sink_low_rp + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.4: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.5: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.6.entry: + ; + ; GFX10-SUNK-LABEL: name: test_sink_low_rp + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]], implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.4: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.5: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.6.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vreg_256 = IMPLICIT_DEF + %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + + S_BRANCH %bb.1 + + bb.1: + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25 + S_BRANCH %bb.4 + + bb.3: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25 + S_BRANCH %bb.4 + + bb.4: + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + S_ENDPGM 0 +... + +--- +name: test_sink_high_rp +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_sink_high_rp + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.4: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.5: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.6.entry: + ; + ; GFX10-SUNK-LABEL: name: test_sink_high_rp + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.4: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.5: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.6.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vreg_256 = IMPLICIT_DEF + %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %26:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %27:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %28:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %29:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub3:vreg_256, 0, implicit $exec + %30:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub3:vreg_256, 0, implicit $exec + %31:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub3:vreg_256, 0, implicit $exec + + S_BRANCH %bb.1 + + bb.1: + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31 + S_BRANCH %bb.4 + + bb.3: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31 + S_BRANCH %bb.4 + + bb.4: + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + S_ENDPGM 0 +... + +# Do not sink convergent op (MFMA) + +--- +name: test_sink_convergent +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_sink_convergent + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF6:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[DEF5]], [[DEF7]], [[DEF6]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF8]], implicit [[V_MFMA_F32_4X4X1F32_e64_]] + ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.4: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.5: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.6.entry: + ; + ; GFX10-SUNK-LABEL: name: test_sink_convergent + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF6:%[0-9]+]]:areg_128_align2 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[V_MFMA_F32_4X4X1F32_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 [[DEF5]], [[DEF7]], [[DEF6]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF8]], implicit [[V_MFMA_F32_4X4X1F32_e64_]] + ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_27:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_28:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_29:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_30:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_31:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_32:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_33:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_34:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_37:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_38:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_39:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_40:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_43:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_44:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_47:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_49:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_51:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_53:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_27]], implicit [[V_ADD_U32_e64_28]], implicit [[V_ADD_U32_e64_29]], implicit [[V_ADD_U32_e64_30]], implicit [[V_ADD_U32_e64_31]], implicit [[V_ADD_U32_e64_32]], implicit [[V_ADD_U32_e64_33]], implicit [[V_ADD_U32_e64_34]], implicit [[V_ADD_U32_e64_35]], implicit [[V_ADD_U32_e64_36]], implicit [[V_ADD_U32_e64_37]], implicit [[V_ADD_U32_e64_38]], implicit [[V_ADD_U32_e64_39]], implicit [[V_ADD_U32_e64_40]], implicit [[V_ADD_U32_e64_41]], implicit [[V_ADD_U32_e64_42]], implicit [[V_ADD_U32_e64_43]], implicit [[V_ADD_U32_e64_44]], implicit [[V_ADD_U32_e64_45]], implicit [[V_ADD_U32_e64_46]], implicit [[V_ADD_U32_e64_47]], implicit [[V_ADD_U32_e64_48]], implicit [[V_ADD_U32_e64_49]], implicit [[V_ADD_U32_e64_50]], implicit [[V_ADD_U32_e64_51]], implicit [[V_ADD_U32_e64_52]], implicit [[V_ADD_U32_e64_53]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.4: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.5: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.6.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vreg_256 = IMPLICIT_DEF + %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %26:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %27:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %28:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %29:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub3:vreg_256, 0, implicit $exec + %30:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub3:vreg_256, 0, implicit $exec + %31:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub3:vreg_256, 0, implicit $exec + %40:vgpr_32 = IMPLICIT_DEF + %41:areg_128_align2 = IMPLICIT_DEF + %42:vgpr_32 = IMPLICIT_DEF + %43:vgpr_32 = IMPLICIT_DEF + %44:areg_128_align2 = V_MFMA_F32_4X4X1F32_e64 %40, %42, %41, 0, 0, 0, implicit $mode, implicit $exec + + + S_BRANCH %bb.1 + + bb.1: + INLINEASM &"", 1, implicit %43, implicit %44 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31 + S_BRANCH %bb.4 + + bb.3: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31 + S_BRANCH %bb.4 + + bb.4: + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + S_ENDPGM 0 +... + +# Do not sink instructions with multiple defs + +--- +name: test_sink_multi_def +alignment: 1 +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GFX9-SUNK-LABEL: name: test_sink_multi_def + ; GFX9-SUNK: bb.0: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256_align2 = IMPLICIT_DEF + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX9-SUNK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF]].sub2, [[DEF1]].sub4, 0, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.1: + ; GFX9-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_1]] + ; GFX9-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX9-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.2: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.3: + ; GFX9-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX9-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX9-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.4: + ; GFX9-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX9-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.5: + ; GFX9-SUNK-NEXT: S_ENDPGM 0 + ; GFX9-SUNK-NEXT: {{ $}} + ; GFX9-SUNK-NEXT: bb.6.entry: + ; + ; GFX10-SUNK-LABEL: name: test_sink_multi_def + ; GFX10-SUNK: bb.0: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: [[DEF:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF1:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF2:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF3:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[DEF4:%[0-9]+]]:vreg_256 = IMPLICIT_DEF + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub0, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_14:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_15:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub1, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_18:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_19:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub3, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_20:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub4, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_21:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub5, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub6, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub7, [[DEF1]].sub2, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_24:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub0, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_25:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub1, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_U32_e64_26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[DEF]].sub2, [[DEF1]].sub3, 0, implicit $exec + ; GFX10-SUNK-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[DEF]].sub2, [[DEF1]].sub4, 0, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.1 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.1: + ; GFX10-SUNK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_ADD_CO_U32_e64_1]] + ; GFX10-SUNK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec + ; GFX10-SUNK-NEXT: S_BRANCH %bb.3 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.2: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.3: + ; GFX10-SUNK-NEXT: successors: %bb.4(0x80000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]] + ; GFX10-SUNK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[V_ADD_U32_e64_]], implicit [[V_ADD_U32_e64_1]], implicit [[V_ADD_U32_e64_2]], implicit [[V_ADD_U32_e64_3]], implicit [[V_ADD_U32_e64_4]], implicit [[V_ADD_U32_e64_5]], implicit [[V_ADD_U32_e64_6]], implicit [[V_ADD_U32_e64_7]], implicit [[V_ADD_U32_e64_8]], implicit [[V_ADD_U32_e64_9]], implicit [[V_ADD_U32_e64_10]], implicit [[V_ADD_U32_e64_11]], implicit [[V_ADD_U32_e64_12]], implicit [[V_ADD_U32_e64_13]], implicit [[V_ADD_U32_e64_14]], implicit [[V_ADD_U32_e64_15]], implicit [[V_ADD_U32_e64_16]], implicit [[V_ADD_U32_e64_17]], implicit [[V_ADD_U32_e64_18]], implicit [[V_ADD_U32_e64_19]], implicit [[V_ADD_U32_e64_20]], implicit [[V_ADD_U32_e64_21]], implicit [[V_ADD_U32_e64_22]], implicit [[V_ADD_U32_e64_23]], implicit [[V_ADD_U32_e64_24]], implicit [[V_ADD_U32_e64_25]], implicit [[V_ADD_U32_e64_26]] + ; GFX10-SUNK-NEXT: S_BRANCH %bb.4 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.4: + ; GFX10-SUNK-NEXT: successors: %bb.1(0x40000000), %bb.5(0x40000000) + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc + ; GFX10-SUNK-NEXT: S_BRANCH %bb.5 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.5: + ; GFX10-SUNK-NEXT: S_ENDPGM 0 + ; GFX10-SUNK-NEXT: {{ $}} + ; GFX10-SUNK-NEXT: bb.6.entry: + bb.0: + %0:vreg_256 = IMPLICIT_DEF + %1:vreg_256 = IMPLICIT_DEF + %2:vreg_256 = IMPLICIT_DEF + %3:vreg_256 = IMPLICIT_DEF + %4:vreg_256 = IMPLICIT_DEF + %5:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %6:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %7:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %8:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %9:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %10:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %11:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %12:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub0:vreg_256, 0, implicit $exec + %13:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %14:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %15:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %16:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %17:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %18:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %19:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %20:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub1:vreg_256, 0, implicit $exec + %21:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %22:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %23:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %24:vgpr_32 = V_ADD_U32_e64 %0.sub3:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %25:vgpr_32 = V_ADD_U32_e64 %0.sub4:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %26:vgpr_32 = V_ADD_U32_e64 %0.sub5:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %27:vgpr_32 = V_ADD_U32_e64 %0.sub6:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %28:vgpr_32 = V_ADD_U32_e64 %0.sub7:vreg_256, %1.sub2:vreg_256, 0, implicit $exec + %29:vgpr_32 = V_ADD_U32_e64 %0.sub0:vreg_256, %1.sub3:vreg_256, 0, implicit $exec + %30:vgpr_32 = V_ADD_U32_e64 %0.sub1:vreg_256, %1.sub3:vreg_256, 0, implicit $exec + %31:vgpr_32 = V_ADD_U32_e64 %0.sub2:vreg_256, %1.sub3:vreg_256, 0, implicit $exec + %32:vgpr_32, %33:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub2:vreg_256, %1.sub4:vreg_256, 0, implicit $exec + + S_BRANCH %bb.1 + + bb.1: + INLINEASM &"", 1, implicit %32, implicit %33 + S_CBRANCH_EXECZ %bb.2, implicit $exec + S_BRANCH %bb.3 + + bb.2: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31 + S_BRANCH %bb.4 + + bb.3: + INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4 + INLINEASM &"", 1, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31 + S_BRANCH %bb.4 + + bb.4: + S_CBRANCH_SCC1 %bb.1, implicit undef $scc + S_BRANCH %bb.5 + + bb.5: + S_ENDPGM 0 +... + diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir index efa21052e3ae2..0fc31ea9d6437 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-ignorable-exec-use.mir @@ -1,5 +1,7 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass=machine-sink -o - %s | FileCheck -check-prefixes=GFX9 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass=machine-sink --sink-insts-to-avoid-spills=1 -o - %s | FileCheck -check-prefixes=GFX9 %s + --- name: test_sink_fmac_to_only_use diff --git a/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir b/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir index 04c80582f6f07..ef6771278b06f 100644 --- a/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir +++ b/llvm/test/CodeGen/AMDGPU/machine-sink-lane-mask.mir @@ -1,78 +1,79 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 -# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o - %s | FileCheck %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink -o - %s | FileCheck -check-prefixes=GFX10 %s +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -run-pass=machine-sink --sink-insts-to-avoid-spills=1 -o - %s | FileCheck -check-prefixes=GFX10 %s --- name: multi_else_break tracksRegLiveness: true body: | - ; CHECK-LABEL: name: multi_else_break - ; CHECK: bb.0: - ; CHECK-NEXT: successors: %bb.1(0x80000000) - ; CHECK-NEXT: liveins: $vgpr4, $vgpr5 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 - ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec - ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.2(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %9, %bb.6 - ; CHECK-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, %11, %bb.6 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2: - ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]], %bb.1, %13, %bb.5 - ; CHECK-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[DEF]], %bb.1, %15, %bb.5 - ; CHECK-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.1, %17, %bb.5 - ; CHECK-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, %19, %bb.5 - ; CHECK-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[PHI5]], [[COPY1]], implicit $exec - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF2]] - ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI3]], $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_LT_I32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3: - ; CHECK-NEXT: SI_END_CF %9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: S_ENDPGM 0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: successors: %bb.5(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI5]], [[S_MOV_B32_1]], 0, implicit $exec - ; CHECK-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[COPY]], [[V_ADD_U32_e64_]], implicit $exec - ; CHECK-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_]], $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_ANDN2_B32_]] - ; CHECK-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_1]], $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], $exec_lo, implicit-def $scc - ; CHECK-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_1]], [[S_AND_B32_]], implicit-def $scc - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.5: - ; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.2(0x7c000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]], %bb.2, [[S_OR_B32_2]], %bb.4 - ; CHECK-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.2, [[COPY4]], %bb.4 - ; CHECK-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.2, [[V_ADD_U32_e64_]], %bb.4 - ; CHECK-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI6]], [[PHI4]], implicit-def dead $scc - ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.6 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.6: - ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PHI9:%[0-9]+]]:vgpr_32 = PHI [[PHI8]], %bb.5 - ; CHECK-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: [[SI_IF_BREAK1:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI7]], [[PHI]], implicit-def dead $scc - ; CHECK-NEXT: SI_LOOP [[SI_IF_BREAK1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; CHECK-NEXT: S_BRANCH %bb.3 + ; GFX10-LABEL: name: multi_else_break + ; GFX10: bb.0: + ; GFX10-NEXT: successors: %bb.1(0x80000000) + ; GFX10-NEXT: liveins: $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]], implicit $exec + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32 = IMPLICIT_DEF + ; GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.1: + ; GFX10-NEXT: successors: %bb.2(0x80000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.0, %9, %bb.6 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, %11, %bb.6 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]], %bb.1, %13, %bb.5 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[DEF]], %bb.1, %15, %bb.5 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_MOV_B32_]], %bb.1, %17, %bb.5 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI1]], %bb.1, %19, %bb.5 + ; GFX10-NEXT: [[V_CMP_LT_I32_e64_:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[PHI5]], [[COPY1]], implicit $exec + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[DEF2]] + ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI3]], $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32 = S_OR_B32 [[PHI2]], $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32 = SI_IF killed [[V_CMP_LT_I32_e64_]], %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.4 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.3: + ; GFX10-NEXT: SI_END_CF %9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX10-NEXT: S_ENDPGM 0 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.4: + ; GFX10-NEXT: successors: %bb.5(0x80000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[PHI5]], [[S_MOV_B32_1]], 0, implicit $exec + ; GFX10-NEXT: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 [[COPY]], [[V_ADD_U32_e64_]], implicit $exec + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_]], $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32 = COPY [[S_ANDN2_B32_]] + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32 = S_ANDN2_B32 [[S_OR_B32_1]], $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[V_CMP_NE_U32_e64_]], $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32 = S_OR_B32 [[S_ANDN2_B32_1]], [[S_AND_B32_]], implicit-def $scc + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.5: + ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.2(0x7c000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]], %bb.2, [[S_OR_B32_2]], %bb.4 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]], %bb.2, [[COPY4]], %bb.4 + ; GFX10-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.2, [[V_ADD_U32_e64_]], %bb.4 + ; GFX10-NEXT: SI_END_CF [[SI_IF]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX10-NEXT: [[SI_IF_BREAK:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI6]], [[PHI4]], implicit-def dead $scc + ; GFX10-NEXT: SI_LOOP [[SI_IF_BREAK]], %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.6 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: bb.6: + ; GFX10-NEXT: successors: %bb.3(0x04000000), %bb.1(0x7c000000) + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[PHI9:%[0-9]+]]:vgpr_32 = PHI [[PHI8]], %bb.5 + ; GFX10-NEXT: SI_END_CF [[SI_IF_BREAK]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX10-NEXT: [[SI_IF_BREAK1:%[0-9]+]]:sreg_32 = SI_IF_BREAK [[PHI7]], [[PHI]], implicit-def dead $scc + ; GFX10-NEXT: SI_LOOP [[SI_IF_BREAK1]], %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + ; GFX10-NEXT: S_BRANCH %bb.3 bb.0: successors: %bb.1(0x80000000) liveins: $vgpr4, $vgpr5 diff --git a/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir b/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir index 43c286a830b42..f23afe52f97de 100644 --- a/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir +++ b/llvm/test/CodeGen/SystemZ/machinelicm-sunk-kill-flags.mir @@ -25,23 +25,24 @@ body: | ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[LARL:%[0-9]+]]:addr64bit = LARL @b + ; CHECK-NEXT: [[LA:%[0-9]+]]:gr64bit = LA killed [[LARL]], 49, $noreg + ; CHECK-NEXT: [[LGHI:%[0-9]+]]:gr64bit = LGHI 7 + ; CHECK-NEXT: [[DEF:%[0-9]+]]:gr64bit = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:gr64bit = IMPLICIT_DEF ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[LA:%[0-9]+]]:gr64bit = LA [[LARL]], 49, $noreg - ; CHECK-NEXT: [[LGHI:%[0-9]+]]:gr64bit = LGHI 7 - ; CHECK-NEXT: [[DEF:%[0-9]+]]:gr64bit = IMPLICIT_DEF - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:gr64bit = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64bit = COPY [[LA]] ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0 ; CHECK-NEXT: $r2d = COPY [[DEF]] - ; CHECK-NEXT: $r3d = COPY [[LA]] + ; CHECK-NEXT: $r3d = COPY [[COPY]] ; CHECK-NEXT: $r4d = COPY [[LGHI]] ; CHECK-NEXT: CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d ; CHECK-NEXT: ADJCALLSTACKUP 0, 0 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0 ; CHECK-NEXT: $r2d = COPY [[DEF1]] - ; CHECK-NEXT: $r3d = COPY [[LA]] + ; CHECK-NEXT: $r3d = COPY [[COPY]] ; CHECK-NEXT: $r4d = COPY [[LGHI]] ; CHECK-NEXT: CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d ; CHECK-NEXT: ADJCALLSTACKUP 0, 0 @@ -54,19 +55,20 @@ body: | %2:gr64bit = LGHI 7 %3:gr64bit = IMPLICIT_DEF %5:gr64bit = IMPLICIT_DEF + %6:gr64bit = COPY killed %0 bb.1: successors: %bb.1(0x80000000) ADJCALLSTACKDOWN 0, 0 $r2d = COPY %3 - $r3d = COPY %0 + $r3d = COPY %6 $r4d = COPY %2 CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d ADJCALLSTACKUP 0, 0 ADJCALLSTACKDOWN 0, 0 $r2d = COPY %5 - $r3d = COPY %0 + $r3d = COPY %6 $r4d = COPY %2 CallBRASL &memcpy, $r2d, $r3d, $r4d, csr_systemz_elf, implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc, implicit-def $r2d ADJCALLSTACKUP 0, 0