diff --git a/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h new file mode 100644 index 0000000000000..7e46e94d3512a --- /dev/null +++ b/llvm/include/llvm/CodeGen/MachineIDFSSAUpdater.h @@ -0,0 +1,80 @@ +//===- MachineIDFSSAUpdater.h - Unstructured SSA Update Tool ----*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares the MachineIDFSSAUpdater class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_MACHINE_SSAUPDATER2_H +#define LLVM_TRANSFORMS_UTILS_MACHINE_SSAUPDATER2_H + +#include "llvm/CodeGen/MachineRegisterInfo.h" + +namespace llvm { + +class MachineDominatorTree; +class MachineInstrBuilder; +class MachineBasicBlock; + +class MachineIDFSSAUpdater { + struct BBValueInfo { + Register LiveInValue; + Register LiveOutValue; + }; + + MachineDominatorTree &DT; + MachineRegisterInfo &MRI; + const TargetInstrInfo &TII; + MachineRegisterInfo::VRegAttrs RegAttrs; + const bool RunOnGenericRegs; + + SmallVector, 4> Defines; + SmallVector UseBlocks; + DenseMap BBInfos; + + MachineInstrBuilder createInst(unsigned Opc, MachineBasicBlock *BB, + MachineBasicBlock::iterator I); + + // IsLiveOut indicates whether we are computing live-out values (true) or + // live-in values (false). + Register computeValue(MachineBasicBlock *BB, bool IsLiveOut); + +public: + MachineIDFSSAUpdater(MachineDominatorTree &DT, MachineFunction &MF, + const MachineRegisterInfo::VRegAttrs &RegAttr, + bool RunOnGenericRegs = false) + : DT(DT), MRI(MF.getRegInfo()), TII(*MF.getSubtarget().getInstrInfo()), + RegAttrs(RegAttr), RunOnGenericRegs(RunOnGenericRegs) {} + + MachineIDFSSAUpdater(MachineDominatorTree &DT, MachineFunction &MF, + Register Reg, bool RunOnGenericRegs = false) + : MachineIDFSSAUpdater(DT, MF, MF.getRegInfo().getVRegAttrs(Reg), + RunOnGenericRegs) {} + + /// Indicate that a rewritten value is available in the specified block + /// with the specified value. Must be called before invoking Calculate(). + void addAvailableValue(MachineBasicBlock *BB, Register V) { + Defines.emplace_back(BB, V); + } + + /// Record a basic block that uses the value. This method should be called for + /// every basic block where the value will be used. Must be called before + /// invoking Calculate(). + void addUseBlock(MachineBasicBlock *BB) { UseBlocks.push_back(BB); } + + /// Calculate and insert necessary PHI nodes for SSA form. + /// Must be called after registering all definitions and uses. + void calculate(); + + /// See SSAUpdater::GetValueInMiddleOfBlock description. + Register getValueInMiddleOfBlock(MachineBasicBlock *BB); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_MACHINE_SSAUPDATER2_H diff --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt index b6872605e22aa..19e5f1a52a47c 100644 --- a/llvm/lib/CodeGen/CMakeLists.txt +++ b/llvm/lib/CodeGen/CMakeLists.txt @@ -126,6 +126,7 @@ add_llvm_component_library(LLVMCodeGen MachineFunctionPass.cpp MachineFunctionPrinterPass.cpp MachineFunctionSplitter.cpp + MachineIDFSSAUpdater.cpp MachineInstrBundle.cpp MachineInstr.cpp MachineLateInstrsCleanup.cpp diff --git a/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp b/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp new file mode 100644 index 0000000000000..b184579cb0740 --- /dev/null +++ b/llvm/lib/CodeGen/MachineIDFSSAUpdater.cpp @@ -0,0 +1,187 @@ +//===- MachineIDFSSAUpdater.cpp - Unstructured SSA Update Tool ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the MachineIDFSSAUpdater class, which provides an +// efficient SSA form maintenance utility for machine-level IR. It uses the +// iterated dominance frontier (IDF) algorithm via MachineForwardIDFCalculator +// to compute phi-function placement, offering better performance than the +// incremental MachineSSAUpdater approach. The updater requires a single call +// to calculate() after all definitions and uses have been registered. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/MachineIDFSSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetOpcodes.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Debug.h" + +namespace llvm { + +template +class MachineIDFCalculator final + : public IDFCalculatorBase { +public: + using IDFCalculatorBase = + typename llvm::IDFCalculatorBase; + using ChildrenGetterTy = typename IDFCalculatorBase::ChildrenGetterTy; + + MachineIDFCalculator(DominatorTreeBase &DT) + : IDFCalculatorBase(DT) {} +}; + +using MachineForwardIDFCalculator = MachineIDFCalculator; +using MachineReverseIDFCalculator = MachineIDFCalculator; + +} // namespace llvm + +using namespace llvm; + +/// Given sets of UsingBlocks and DefBlocks, compute the set of LiveInBlocks. +/// This is basically a subgraph limited by DefBlocks and UsingBlocks. +static void +computeLiveInBlocks(const SmallPtrSetImpl &UsingBlocks, + const SmallPtrSetImpl &DefBlocks, + SmallPtrSetImpl &LiveInBlocks) { + // To determine liveness, we must iterate through the predecessors of blocks + // where the def is live. Blocks are added to the worklist if we need to + // check their predecessors. Start with all the using blocks. + SmallVector LiveInBlockWorklist(UsingBlocks.begin(), + UsingBlocks.end()); + + // Now that we have a set of blocks where the phi is live-in, recursively add + // their predecessors until we find the full region the value is live. + while (!LiveInBlockWorklist.empty()) { + MachineBasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + + // The block really is live in here, insert it into the set. If already in + // the set, then it has already been processed. + if (!LiveInBlocks.insert(BB).second) + continue; + + // Since the value is live into BB, it is either defined in a predecessor or + // live into it to. Add the preds to the worklist unless they are a + // defining block. + for (MachineBasicBlock *P : BB->predecessors()) { + // The value is not live into a predecessor if it defines the value. + if (DefBlocks.count(P)) + continue; + + // Otherwise it is, add to the worklist. + LiveInBlockWorklist.push_back(P); + } + } +} + +MachineInstrBuilder +MachineIDFSSAUpdater::createInst(unsigned Opc, MachineBasicBlock *BB, + MachineBasicBlock::iterator I) { + return BuildMI(*BB, I, DebugLoc(), TII.get(Opc), + MRI.createVirtualRegister(RegAttrs)); +} + +// IsLiveOut indicates whether we are computing live-out values (true) or +// live-in values (false). +Register MachineIDFSSAUpdater::computeValue(MachineBasicBlock *BB, + bool IsLiveOut) { + BBValueInfo *BBInfo = &BBInfos[BB]; + + if (IsLiveOut && BBInfo->LiveOutValue) + return BBInfo->LiveOutValue; + + if (BBInfo->LiveInValue) + return BBInfo->LiveInValue; + + SmallVector DomPath = {BBInfo}; + MachineBasicBlock *DomBB = BB, *TopDomBB = BB; + Register V; + + while (DT.isReachableFromEntry(DomBB) && !DomBB->pred_empty() && + (DomBB = DT.getNode(DomBB)->getIDom()->getBlock())) { + BBInfo = &BBInfos[DomBB]; + if (BBInfo->LiveOutValue) { + V = BBInfo->LiveOutValue; + break; + } + if (BBInfo->LiveInValue) { + V = BBInfo->LiveInValue; + break; + } + TopDomBB = DomBB; + DomPath.emplace_back(BBInfo); + } + + if (!V) { + V = createInst(RunOnGenericRegs ? TargetOpcode::G_IMPLICIT_DEF + : TargetOpcode::IMPLICIT_DEF, + TopDomBB, TopDomBB->getFirstNonPHI()) + .getReg(0); + } + + for (BBValueInfo *BBInfo : DomPath) { + // Loop above can insert new entries into the BBInfos map: assume the + // map shouldn't grow as the caller should have been allocated enough + // buckets, see [1]. + BBInfo->LiveInValue = V; + } + + return V; +} + +/// Perform all the necessary updates, including new PHI-nodes insertion and the +/// requested uses update. +void MachineIDFSSAUpdater::calculate() { + MachineForwardIDFCalculator IDF(DT); + + SmallPtrSet DefBlocks; + for (auto [BB, V] : Defines) + DefBlocks.insert(BB); + IDF.setDefiningBlocks(DefBlocks); + + SmallPtrSet UsingBlocks(UseBlocks.begin(), + UseBlocks.end()); + SmallVector IDFBlocks; + SmallPtrSet LiveInBlocks; + computeLiveInBlocks(UsingBlocks, DefBlocks, LiveInBlocks); + IDF.setLiveInBlocks(LiveInBlocks); + IDF.calculate(IDFBlocks); + + // Reserve sufficient buckets to prevent map growth. [1] + BBInfos.reserve(LiveInBlocks.size() + DefBlocks.size()); + + for (auto [BB, V] : Defines) + BBInfos[BB].LiveOutValue = V; + + for (MachineBasicBlock *FrontierBB : IDFBlocks) { + Register NewVR = + createInst(TargetOpcode::PHI, FrontierBB, FrontierBB->begin()) + .getReg(0); + BBInfos[FrontierBB].LiveInValue = NewVR; + } + + for (MachineBasicBlock *BB : IDFBlocks) { + auto *PHI = &BB->front(); + assert(PHI->isPHI()); + MachineInstrBuilder MIB(*BB->getParent(), PHI); + for (MachineBasicBlock *Pred : BB->predecessors()) + MIB.addReg(computeValue(Pred, /*IsLiveOut=*/true)).addMBB(Pred); + } +} + +Register MachineIDFSSAUpdater::getValueInMiddleOfBlock(MachineBasicBlock *BB) { + return computeValue(BB, /*IsLiveOut=*/false); +} diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp index 96131bd591a17..ba7781443711b 100644 --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -23,7 +23,7 @@ #include "SILowerI1Copies.h" #include "AMDGPU.h" -#include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/CodeGen/MachineIDFSSAUpdater.h" #include "llvm/InitializePasses.h" #define DEBUG_TYPE "si-i1-copies" @@ -275,7 +275,7 @@ class LoopFinder { /// Add undef values dominating the loop and the optionally given additional /// blocks, so that the SSA updater doesn't have to search all the way to the /// function entry. - void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater, + void addLoopEntries(unsigned LoopLevel, MachineIDFSSAUpdater &SSAUpdater, MachineRegisterInfo &MRI, MachineRegisterInfo::VRegAttrs LaneMaskRegAttrs, ArrayRef Incomings = {}) { @@ -286,14 +286,14 @@ class LoopFinder { Dom = DT.findNearestCommonDominator(Dom, Incoming.Block); if (!inLoopLevel(*Dom, LoopLevel, Incomings)) { - SSAUpdater.AddAvailableValue( + SSAUpdater.addAvailableValue( Dom, insertUndefLaneMask(Dom, &MRI, LaneMaskRegAttrs)); } else { // The dominator is part of the loop or the given blocks, so add the // undef value to unreachable predecessors instead. for (MachineBasicBlock *Pred : Dom->predecessors()) { if (!inLoopLevel(*Pred, LoopLevel, Incomings)) - SSAUpdater.AddAvailableValue( + SSAUpdater.addAvailableValue( Pred, insertUndefLaneMask(Pred, &MRI, LaneMaskRegAttrs)); } } @@ -469,7 +469,6 @@ PhiLoweringHelper::PhiLoweringHelper(MachineFunction *MF, } bool PhiLoweringHelper::lowerPhis() { - MachineSSAUpdater SSAUpdater(*MF); LoopFinder LF(*DT, *PDT); PhiIncomingAnalysis PIA(*PDT, TII); SmallVector Vreg1Phis; @@ -524,22 +523,26 @@ bool PhiLoweringHelper::lowerPhis() { // in practice. unsigned FoundLoopLevel = LF.findLoop(PostDomBound); - SSAUpdater.Initialize(DstReg); + MachineIDFSSAUpdater SSAUpdater(*DT, *MF, DstReg); + SSAUpdater.addUseBlock(&MBB); if (FoundLoopLevel) { LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs, Incomings); for (auto &Incoming : Incomings) { + SSAUpdater.addUseBlock(Incoming.Block); Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); - SSAUpdater.AddAvailableValue(Incoming.Block, Incoming.UpdatedReg); + SSAUpdater.addAvailableValue(Incoming.Block, Incoming.UpdatedReg); } + SSAUpdater.calculate(); + for (auto &Incoming : Incomings) { MachineBasicBlock &IMBB = *Incoming.Block; buildMergeLaneMasks( IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg, - SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg); + SSAUpdater.getValueInMiddleOfBlock(&IMBB), Incoming.Reg); } } else { // The phi is not observed from outside a loop. Use a more accurate @@ -547,20 +550,23 @@ bool PhiLoweringHelper::lowerPhis() { PIA.analyze(MBB, Incomings); for (MachineBasicBlock *MBB : PIA.predecessors()) - SSAUpdater.AddAvailableValue( + SSAUpdater.addAvailableValue( MBB, insertUndefLaneMask(MBB, MRI, LaneMaskRegAttrs)); for (auto &Incoming : Incomings) { MachineBasicBlock &IMBB = *Incoming.Block; if (PIA.isSource(IMBB)) { constrainAsLaneMask(Incoming); - SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg); + SSAUpdater.addAvailableValue(&IMBB, Incoming.Reg); } else { + SSAUpdater.addUseBlock(&IMBB); Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); - SSAUpdater.AddAvailableValue(&IMBB, Incoming.UpdatedReg); + SSAUpdater.addAvailableValue(&IMBB, Incoming.UpdatedReg); } } + SSAUpdater.calculate(); + for (auto &Incoming : Incomings) { if (!Incoming.UpdatedReg.isValid()) continue; @@ -568,11 +574,11 @@ bool PhiLoweringHelper::lowerPhis() { MachineBasicBlock &IMBB = *Incoming.Block; buildMergeLaneMasks( IMBB, getSaluInsertionAtEnd(IMBB), {}, Incoming.UpdatedReg, - SSAUpdater.GetValueInMiddleOfBlock(&IMBB), Incoming.Reg); + SSAUpdater.getValueInMiddleOfBlock(&IMBB), Incoming.Reg); } } - Register NewReg = SSAUpdater.GetValueInMiddleOfBlock(&MBB); + Register NewReg = SSAUpdater.getValueInMiddleOfBlock(&MBB); if (NewReg != DstReg) { replaceDstReg(NewReg, DstReg, &MBB); MI->eraseFromParent(); @@ -585,7 +591,6 @@ bool PhiLoweringHelper::lowerPhis() { bool Vreg1LoweringHelper::lowerCopiesToI1() { bool Changed = false; - MachineSSAUpdater SSAUpdater(*MF); LoopFinder LF(*DT, *PDT); SmallVector DeadCopies; @@ -643,12 +648,14 @@ bool Vreg1LoweringHelper::lowerCopiesToI1() { PDT->findNearestCommonDominator(DomBlocks); unsigned FoundLoopLevel = LF.findLoop(PostDomBound); if (FoundLoopLevel) { - SSAUpdater.Initialize(DstReg); - SSAUpdater.AddAvailableValue(&MBB, DstReg); + MachineIDFSSAUpdater SSAUpdater(*DT, *MF, DstReg); + SSAUpdater.addUseBlock(&MBB); + SSAUpdater.addAvailableValue(&MBB, DstReg); LF.addLoopEntries(FoundLoopLevel, SSAUpdater, *MRI, LaneMaskRegAttrs); + SSAUpdater.calculate(); buildMergeLaneMasks(MBB, MI, DL, DstReg, - SSAUpdater.GetValueInMiddleOfBlock(&MBB), SrcReg); + SSAUpdater.getValueInMiddleOfBlock(&MBB), SrcReg); DeadCopies.push_back(&MI); } } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir index e800cb2e24a7a..cfef60c66d6a7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir @@ -101,19 +101,19 @@ body: | ; GFX10-NEXT: successors: %bb.1(0x80000000) ; GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF + ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]](s1) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C]](s1) + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C]](s1) ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY]](s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir index b76d421c16172..994640e524fc9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir @@ -1026,10 +1026,10 @@ body: | ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR1]] ; GFX10-NEXT: [[XOR2:%[0-9]+]]:_(s1) = G_XOR [[OR]], [[C4]] ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[XOR2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %46(s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %47(s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 %53(s1), $exec_lo, implicit-def $scc + ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 %54(s1), $exec_lo, implicit-def $scc ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.1 @@ -1195,7 +1195,7 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %52(s1), %bb.6, %56(s1), %bb.7 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = PHI %52(s1), %bb.6, %57(s1), %bb.7 ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = PHI %41(s1), %bb.6, %40(s1), %bb.7 ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, [[DEF]](s1), %bb.7 ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]](s1) diff --git a/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir b/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir index fb052e28e2c20..9618abe1770bf 100644 --- a/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/si-i1-copies.mir @@ -10,10 +10,10 @@ body: | ; GCN: bb.0: ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GCN-NEXT: S_BRANCH %bb.1 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.1: - ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY [[DEF]] ; GCN-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, [[COPY]], implicit $exec bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir index ecbd47a9e8d0d..9c27cb3017e95 100644 --- a/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir +++ b/llvm/test/CodeGen/AMDGPU/si-lower-i1-copies-order-of-phi-incomings.mir @@ -20,21 +20,21 @@ body: | ; GCN-NEXT: successors: %bb.1(0x80000000) ; GCN-NEXT: liveins: $vgpr1, $vgpr2, $vgpr3, $vgpr4 ; GCN-NEXT: {{ $}} + ; GCN-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; GCN-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF - ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 ; GCN-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1 ; GCN-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF + ; GCN-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF ; GCN-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1 ; GCN-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GCN-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 ; GCN-NEXT: [[COPY4:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]] ; GCN-NEXT: [[COPY5:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]] - ; GCN-NEXT: [[DEF4:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: [[COPY6:%[0-9]+]]:sreg_32 = COPY $exec_lo ; GCN-NEXT: [[DEF5:%[0-9]+]]:sreg_32 = IMPLICIT_DEF ; GCN-NEXT: {{ $}}