-
Notifications
You must be signed in to change notification settings - Fork 15.4k
Revert "[StructurizeCFG] Hoist and simplify zero-cost incoming else phi values" #148016
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…hi value…" This reverts commit 8d3f497.
|
@llvm/pr-subscribers-llvm-transforms Author: Vigneshwar Jayakumar (VigneshwarJ) ChangesReverts llvm/llvm-project#139605 Patch is 23.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148016.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 5b2ca8c5915ff..a69d64956d6d9 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -129,7 +128,6 @@ struct PredInfo {
using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
-using Val2BBMap = DenseMap<Value *, BasicBlock *>;
// A traits type that is intended to be used in graph algorithms. The graph
// traits starts at an entry node, and traverses the RegionNodes that are in
@@ -281,7 +279,7 @@ class StructurizeCFG {
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
Value *BoolPoison;
- const TargetTransformInfo *TTI;
+
Function *Func;
Region *ParentRegion;
@@ -303,12 +301,8 @@ class StructurizeCFG {
PredMap LoopPreds;
BranchVector LoopConds;
- Val2BBMap HoistedValues;
-
RegionNode *PrevNode;
- void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
-
void orderNodes();
void analyzeLoops(RegionNode *N);
@@ -338,8 +332,6 @@ class StructurizeCFG {
void simplifyAffectedPhis();
- void simplifyHoistedPhis();
-
DebugLoc killTerminator(BasicBlock *BB);
void changeExit(RegionNode *Node, BasicBlock *NewExit,
@@ -367,7 +359,7 @@ class StructurizeCFG {
public:
void init(Region *R);
- bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
+ bool run(Region *R, DominatorTree *DT);
bool makeUniformRegion(Region *R, UniformityInfo &UA);
};
@@ -393,11 +385,8 @@ class StructurizeCFGLegacyPass : public RegionPass {
if (SCFG.makeUniformRegion(R, UA))
return false;
}
- Function *F = R->getEntry()->getParent();
- const TargetTransformInfo *TTI =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return SCFG.run(R, DT, TTI);
+ return SCFG.run(R, DT);
}
StringRef getPassName() const override { return "Structurize control flow"; }
@@ -405,9 +394,7 @@ class StructurizeCFGLegacyPass : public RegionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
AU.addRequired<UniformityInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
@@ -416,34 +403,6 @@ class StructurizeCFGLegacyPass : public RegionPass {
} // end anonymous namespace
-/// Checks whether an instruction is zero cost instruction and checks if the
-/// operands are from different BB. If so, this instruction can be coalesced
-/// if its hoisted to predecessor block. So, this returns true.
-static bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
- const TargetTransformInfo *TTI) {
- if (I->getParent() != BB)
- return false;
-
- // If the instruction is not a zero cost instruction, return false.
- auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
- InstructionCost::CostType CostVal =
- Cost.isValid()
- ? Cost.getValue()
- : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
- if (CostVal != 0)
- return false;
-
- // Check if any operands are instructions defined in the same block.
- for (auto &Op : I->operands()) {
- if (auto *OpI = dyn_cast<Instruction>(Op)) {
- if (OpI->getParent() == BB)
- return false;
- }
- }
-
- return true;
-}
-
char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
@@ -454,39 +413,6 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
-/// Structurization can introduce unnecessary VGPR copies due to register
-/// coalescing interference. For example, if the Else block has a zero-cost
-/// instruction and the Then block modifies the VGPR value, only one value is
-/// live at a time in merge block before structurization. After structurization,
-/// the coalescer may incorrectly treat the Then value as live in the Else block
-/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
-///
-/// This function examines phi nodes whose incoming values are zero-cost
-/// instructions in the Else block. It identifies such values that can be safely
-/// hoisted and moves them to the nearest common dominator of Then and Else
-/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
-/// value to poison phi nodes along the if→flow edge, aiding register coalescing
-/// and minimizing unnecessary live ranges.
-void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
- BasicBlock *ThenBB) {
-
- BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
- BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
-
- if (!ElseSucc || !CommonDominator)
- return;
- Instruction *Term = CommonDominator->getTerminator();
- for (PHINode &Phi : ElseSucc->phis()) {
- Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
- auto *Inst = dyn_cast<Instruction>(ElseVal);
- if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI))
- continue;
- Inst->removeFromParent();
- Inst->insertInto(CommonDominator, Term->getIterator());
- HoistedValues[Inst] = CommonDominator;
- }
-}
-
/// Build up the general order of nodes, by performing a topological sort of the
/// parent region's nodes, while ensuring that there is no outer cycle node
/// between any two inner cycle nodes.
@@ -609,7 +535,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
BasicBlock *Other = Term->getSuccessor(!i);
if (Visited.count(Other) && !Loops.count(Other) &&
!Pred.count(Other) && !Pred.count(P)) {
- hoistZeroCostElseBlockPhiValues(Succ, Other);
+
Pred[Other] = {BoolFalse, std::nullopt};
Pred[P] = {BoolTrue, std::nullopt};
continue;
@@ -965,44 +891,6 @@ void StructurizeCFG::setPhiValues() {
AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
}
-/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
-/// entries on Flow nodes with the appropriate hoisted values
-void StructurizeCFG::simplifyHoistedPhis() {
- for (WeakVH VH : AffectedPhis) {
- PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
- if (!Phi || Phi->getNumIncomingValues() != 2)
- continue;
-
- for (int i = 0; i < 2; i++) {
- Value *V = Phi->getIncomingValue(i);
- auto BBIt = HoistedValues.find(V);
-
- if (BBIt == HoistedValues.end())
- continue;
-
- Value *OtherV = Phi->getIncomingValue(!i);
- PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
- if (!OtherPhi)
- continue;
-
- int PoisonValBBIdx = -1;
- for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
- if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
- continue;
- PoisonValBBIdx = i;
- break;
- }
- if (PoisonValBBIdx == -1 ||
- !DT->dominates(BBIt->second,
- OtherPhi->getIncomingBlock(PoisonValBBIdx)))
- continue;
-
- OtherPhi->setIncomingValue(PoisonValBBIdx, V);
- Phi->setIncomingValue(i, OtherV);
- }
- }
-}
-
void StructurizeCFG::simplifyAffectedPhis() {
bool Changed;
do {
@@ -1395,13 +1283,12 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
}
/// Run the transformation for each region found
-bool StructurizeCFG::run(Region *R, DominatorTree *DT,
- const TargetTransformInfo *TTI) {
+bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
if (R->isTopLevelRegion())
return false;
this->DT = DT;
- this->TTI = TTI;
+
Func = R->getEntry()->getParent();
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
@@ -1413,7 +1300,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
insertConditions(false);
insertConditions(true);
setPhiValues();
- simplifyHoistedPhis();
simplifyConditions();
simplifyAffectedPhis();
rebuildSSA();
@@ -1463,7 +1349,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
bool Changed = false;
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
auto &RI = AM.getResult<RegionInfoAnalysis>(F);
- TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
+
UniformityInfo *UI = nullptr;
if (SkipUniformRegions)
UI = &AM.getResult<UniformityInfoAnalysis>(F);
@@ -1482,7 +1368,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
continue;
}
- Changed |= SCFG.run(R, DT, TTI);
+ Changed |= SCFG.run(R, DT);
}
if (!Changed)
return PreservedAnalyses::all();
diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
index be020457ce87d..9cc42ac448067 100644
--- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
+++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
@@ -9851,8 +9851,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s6
; CHECK-NEXT: s_cbranch_execz .LBB8_6
; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
-; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
+; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
; CHECK-NEXT: s_movk_i32 s4, 0xf800
; CHECK-NEXT: s_mov_b32 s5, -1
; CHECK-NEXT: .LBB8_5: ; %memmove_bwd_loop
@@ -11167,8 +11167,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; ALIGNED-NEXT: s_andn2_saveexec_b32 s6, s6
; ALIGNED-NEXT: s_cbranch_execz .LBB8_6
; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
-; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
+; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
; ALIGNED-NEXT: s_movk_i32 s4, 0xf800
; ALIGNED-NEXT: s_mov_b32 s5, -1
; ALIGNED-NEXT: .LBB8_5: ; %memmove_bwd_loop
@@ -12381,8 +12381,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2024
; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2020
; UNROLL3-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:2016
-; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1
; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0
+; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1
; UNROLL3-NEXT: s_waitcnt vmcnt(3)
; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2028
; UNROLL3-NEXT: s_waitcnt vmcnt(2)
diff --git a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
deleted file mode 100644
index 42436a1b4c279..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
+++ /dev/null
@@ -1,180 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s
-
-
-%pair = type { i32, i32 }
-
-define void @test_extractvalue_then_else(ptr %ptr, i1 %cond) {
-; GFX900-LABEL: test_extractvalue_then_else:
-; GFX900: ; %bb.0: ; %if
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_load_dword v3, v[0:1]
-; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
-; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GFX900-NEXT: s_cbranch_execz .LBB0_2
-; GFX900-NEXT: ; %bb.1: ; %else
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
-; GFX900-NEXT: .LBB0_2: ; %Flow
-; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_store_dword v[0:1], v3
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-if:
- %load_then = load %pair, ptr %ptr
- br i1 %cond, label %then, label %else
-
-then:
- %a_then = extractvalue %pair %load_then, 0
- br label %merge
-
-else:
- %a_else = extractvalue %pair %load_then, 0
- %sum_else = add i32 %a_else, 1
- br label %merge
-
-merge:
- %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-define void @test_extractvalue_else_then(ptr %ptr, i1 %cond) {
-; GFX900-LABEL: test_extractvalue_else_then:
-; GFX900: ; %bb.0: ; %if
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_load_dword v3, v[0:1]
-; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
-; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; GFX900-NEXT: s_cbranch_execz .LBB1_2
-; GFX900-NEXT: ; %bb.1: ; %else
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
-; GFX900-NEXT: .LBB1_2: ; %merge
-; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_store_dword v[0:1], v3
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-if:
- %load_then = load %pair, ptr %ptr
- br i1 %cond, label %else, label %then
-
-else:
- %a_else = extractvalue %pair %load_then, 0
- %sum_else = add i32 %a_else, 1
- br label %merge
-
-then:
- %a_then = extractvalue %pair %load_then, 0
- br label %merge
-
-merge:
- %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 {
-; GFX900-LABEL: test_loop_with_if:
-; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_load_dword s2, s[4:5], 0x2c
-; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: s_mov_b64 s[4:5], 0
-; GFX900-NEXT: s_movk_i32 s10, 0xfe
-; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: s_bitcmp1_b32 s2, 0
-; GFX900-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GFX900-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[2:3]
-; GFX900-NEXT: v_mov_b32_e32 v2, s1
-; GFX900-NEXT: s_xor_b64 s[2:3], s[2:3], -1
-; GFX900-NEXT: v_mov_b32_e32 v1, s0
-; GFX900-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v3
-; GFX900-NEXT: s_branch .LBB2_2
-; GFX900-NEXT: .LBB2_1: ; %latch
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_u32_e32 v5, 20, v3
-; GFX900-NEXT: v_cmp_lt_i32_e32 vcc, s10, v5
-; GFX900-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX900-NEXT: flat_store_dword v[1:2], v3
-; GFX900-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GFX900-NEXT: s_cbranch_execz .LBB2_8
-; GFX900-NEXT: .LBB2_2: ; %loop
-; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX900-NEXT: flat_load_dwordx2 v[3:4], v[1:2]
-; GFX900-NEXT: s_and_b64 vcc, exec, s[0:1]
-; GFX900-NEXT: s_mov_b64 s[8:9], s[2:3]
-; GFX900-NEXT: s_mov_b64 s[6:7], 0
-; GFX900-NEXT: s_cbranch_vccnz .LBB2_4
-; GFX900-NEXT: ; %bb.3: ; %if
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: v_cmp_gt_i32_e32 vcc, 11, v5
-; GFX900-NEXT: s_andn2_b64 s[8:9], s[2:3], exec
-; GFX900-NEXT: s_and_b64 s[12:13], vcc, exec
-; GFX900-NEXT: s_mov_b64 s[6:7], -1
-; GFX900-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
-; GFX900-NEXT: .LBB2_4: ; %Flow
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: s_and_saveexec_b64 s[12:13], s[8:9]
-; GFX900-NEXT: s_xor_b64 s[8:9], exec, s[12:13]
-; GFX900-NEXT: s_cbranch_execz .LBB2_6
-; GFX900-NEXT: ; %bb.5: ; %else
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_u32_e32 v3, v3, v4
-; GFX900-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
-; GFX900-NEXT: .LBB2_6: ; %Flow1
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
-; GFX900-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
-; GFX900-NEXT: s_cbranch_execz .LBB2_1
-; GFX900-NEXT: ; %bb.7: ; %then
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: flat_store_dword v[1:2], v0
-; GFX900-NEXT: s_branch .LBB2_1
-; GFX900-NEXT: .LBB2_8: ; %end
-; GFX900-NEXT: s_endpgm
-entry:
- %a = tail call i32 @llvm.amdgcn.workitem.id.x()
- br label %loop
-
-loop:
- %entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ]
- %load = load %pair, ptr %ptr
- br i1 %cond, label %if, label %else
-
-if:
- %cmp = icmp sgt i32 %entry_phi, 10
- br i1 %cmp, label %then, label %else
-
-then:
- %a_then = extractvalue %pair %load, 0
- store i32 %a, ptr %ptr, align 4
- br label %latch
-
-else:
- %a2 = extractvalue %pair %load, 1
- %y = extractvalue %pair %load, 0
- %a_else = add i32 %y, %a2
- br label %latch
-
-latch:
- %a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ]
- store i32 %a_test, ptr %ptr
- %a15 = add nsw i32 %a_test, 20
- %a16 = icmp slt i32 %a15, 255
- br i1 %a16, label %loop, label %end
-
-end:
- ret void
-}
diff --git a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll b/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll
deleted file mode 100644
index 10d4fa2be0a70..0000000000000
--- a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll
+++ /dev/null
@@ -1,161 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=structurizecfg < %s | FileCheck %s
-
-
-%pair = type { i32, i32 }
-define void @test_if_then_else(ptr %ptr, i1 %cond) {
-; CHECK-LABEL: define void @test_if_then_else(
-; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
-; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
-; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
-; CHECK: [[FLOW]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[SUM_ELSE:%.*]], %[[ELSE]] ], [ [[A_THEN]], %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
-; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
-; CHECK: [[THEN]]:
-; CHECK-NEXT: br label %[[MERGE]]
-; CHECK: [[ELSE]]:
-; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
-; CHECK-NEXT: [[SUM_ELSE]] = add i32 [[A_ELSE]], 1
-; CHECK-NEXT: br label %[[FLOW]]
-; CHECK: [[MERGE]]:
-; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4
-; CHECK-NEXT: ret void
-;
-entry:
- %load_then = load %pair, ptr %ptr
- br i1 %cond, label %then, label %else
-
-then:
- %a_then = extractvalue %pair %load_then, 0
- br label %merge
-
-else:
- %a_else = extractvalue %pair %load_then, 0
- %sum_else = add i32 %a_else, 1
- br label %merge
-
-merge:
- %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-define void @test_if_else_then(ptr %ptr, i1 %cond) {
-; CHECK-LABEL: define void @test_if_else_then(
-; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
-; CHECK-NEXT: br i1 [[COND_INV]], label %[[THEN:.*]], label %[[FLOW:.*]]
-; CHECK: [[THEN]]:
-; CHECK-NEXT: ...
[truncated]
|
|
@llvm/pr-subscribers-backend-amdgpu Author: Vigneshwar Jayakumar (VigneshwarJ) ChangesReverts llvm/llvm-project#139605 Patch is 23.90 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/148016.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index 5b2ca8c5915ff..a69d64956d6d9 100644
--- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -19,7 +19,6 @@
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/RegionPass.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/UniformityAnalysis.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFG.h"
@@ -129,7 +128,6 @@ struct PredInfo {
using BBPredicates = DenseMap<BasicBlock *, PredInfo>;
using PredMap = DenseMap<BasicBlock *, BBPredicates>;
using BB2BBMap = DenseMap<BasicBlock *, BasicBlock *>;
-using Val2BBMap = DenseMap<Value *, BasicBlock *>;
// A traits type that is intended to be used in graph algorithms. The graph
// traits starts at an entry node, and traverses the RegionNodes that are in
@@ -281,7 +279,7 @@ class StructurizeCFG {
ConstantInt *BoolTrue;
ConstantInt *BoolFalse;
Value *BoolPoison;
- const TargetTransformInfo *TTI;
+
Function *Func;
Region *ParentRegion;
@@ -303,12 +301,8 @@ class StructurizeCFG {
PredMap LoopPreds;
BranchVector LoopConds;
- Val2BBMap HoistedValues;
-
RegionNode *PrevNode;
- void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
-
void orderNodes();
void analyzeLoops(RegionNode *N);
@@ -338,8 +332,6 @@ class StructurizeCFG {
void simplifyAffectedPhis();
- void simplifyHoistedPhis();
-
DebugLoc killTerminator(BasicBlock *BB);
void changeExit(RegionNode *Node, BasicBlock *NewExit,
@@ -367,7 +359,7 @@ class StructurizeCFG {
public:
void init(Region *R);
- bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
+ bool run(Region *R, DominatorTree *DT);
bool makeUniformRegion(Region *R, UniformityInfo &UA);
};
@@ -393,11 +385,8 @@ class StructurizeCFGLegacyPass : public RegionPass {
if (SCFG.makeUniformRegion(R, UA))
return false;
}
- Function *F = R->getEntry()->getParent();
- const TargetTransformInfo *TTI =
- &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- return SCFG.run(R, DT, TTI);
+ return SCFG.run(R, DT);
}
StringRef getPassName() const override { return "Structurize control flow"; }
@@ -405,9 +394,7 @@ class StructurizeCFGLegacyPass : public RegionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
if (SkipUniformRegions)
AU.addRequired<UniformityInfoWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
RegionPass::getAnalysisUsage(AU);
@@ -416,34 +403,6 @@ class StructurizeCFGLegacyPass : public RegionPass {
} // end anonymous namespace
-/// Checks whether an instruction is zero cost instruction and checks if the
-/// operands are from different BB. If so, this instruction can be coalesced
-/// if its hoisted to predecessor block. So, this returns true.
-static bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
- const TargetTransformInfo *TTI) {
- if (I->getParent() != BB)
- return false;
-
- // If the instruction is not a zero cost instruction, return false.
- auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
- InstructionCost::CostType CostVal =
- Cost.isValid()
- ? Cost.getValue()
- : (InstructionCost::CostType)TargetTransformInfo::TCC_Expensive;
- if (CostVal != 0)
- return false;
-
- // Check if any operands are instructions defined in the same block.
- for (auto &Op : I->operands()) {
- if (auto *OpI = dyn_cast<Instruction>(Op)) {
- if (OpI->getParent() == BB)
- return false;
- }
- }
-
- return true;
-}
-
char StructurizeCFGLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
@@ -454,39 +413,6 @@ INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
"Structurize the CFG", false, false)
-/// Structurization can introduce unnecessary VGPR copies due to register
-/// coalescing interference. For example, if the Else block has a zero-cost
-/// instruction and the Then block modifies the VGPR value, only one value is
-/// live at a time in merge block before structurization. After structurization,
-/// the coalescer may incorrectly treat the Then value as live in the Else block
-/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
-///
-/// This function examines phi nodes whose incoming values are zero-cost
-/// instructions in the Else block. It identifies such values that can be safely
-/// hoisted and moves them to the nearest common dominator of Then and Else
-/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
-/// value to poison phi nodes along the if→flow edge, aiding register coalescing
-/// and minimizing unnecessary live ranges.
-void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
- BasicBlock *ThenBB) {
-
- BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
- BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
-
- if (!ElseSucc || !CommonDominator)
- return;
- Instruction *Term = CommonDominator->getTerminator();
- for (PHINode &Phi : ElseSucc->phis()) {
- Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
- auto *Inst = dyn_cast<Instruction>(ElseVal);
- if (!Inst || !isHoistableInstruction(Inst, ElseBB, TTI))
- continue;
- Inst->removeFromParent();
- Inst->insertInto(CommonDominator, Term->getIterator());
- HoistedValues[Inst] = CommonDominator;
- }
-}
-
/// Build up the general order of nodes, by performing a topological sort of the
/// parent region's nodes, while ensuring that there is no outer cycle node
/// between any two inner cycle nodes.
@@ -609,7 +535,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
BasicBlock *Other = Term->getSuccessor(!i);
if (Visited.count(Other) && !Loops.count(Other) &&
!Pred.count(Other) && !Pred.count(P)) {
- hoistZeroCostElseBlockPhiValues(Succ, Other);
+
Pred[Other] = {BoolFalse, std::nullopt};
Pred[P] = {BoolTrue, std::nullopt};
continue;
@@ -965,44 +891,6 @@ void StructurizeCFG::setPhiValues() {
AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
}
-/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
-/// entries on Flow nodes with the appropriate hoisted values
-void StructurizeCFG::simplifyHoistedPhis() {
- for (WeakVH VH : AffectedPhis) {
- PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
- if (!Phi || Phi->getNumIncomingValues() != 2)
- continue;
-
- for (int i = 0; i < 2; i++) {
- Value *V = Phi->getIncomingValue(i);
- auto BBIt = HoistedValues.find(V);
-
- if (BBIt == HoistedValues.end())
- continue;
-
- Value *OtherV = Phi->getIncomingValue(!i);
- PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
- if (!OtherPhi)
- continue;
-
- int PoisonValBBIdx = -1;
- for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
- if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
- continue;
- PoisonValBBIdx = i;
- break;
- }
- if (PoisonValBBIdx == -1 ||
- !DT->dominates(BBIt->second,
- OtherPhi->getIncomingBlock(PoisonValBBIdx)))
- continue;
-
- OtherPhi->setIncomingValue(PoisonValBBIdx, V);
- Phi->setIncomingValue(i, OtherV);
- }
- }
-}
-
void StructurizeCFG::simplifyAffectedPhis() {
bool Changed;
do {
@@ -1395,13 +1283,12 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
}
/// Run the transformation for each region found
-bool StructurizeCFG::run(Region *R, DominatorTree *DT,
- const TargetTransformInfo *TTI) {
+bool StructurizeCFG::run(Region *R, DominatorTree *DT) {
if (R->isTopLevelRegion())
return false;
this->DT = DT;
- this->TTI = TTI;
+
Func = R->getEntry()->getParent();
assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
@@ -1413,7 +1300,6 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT,
insertConditions(false);
insertConditions(true);
setPhiValues();
- simplifyHoistedPhis();
simplifyConditions();
simplifyAffectedPhis();
rebuildSSA();
@@ -1463,7 +1349,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
bool Changed = false;
DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
auto &RI = AM.getResult<RegionInfoAnalysis>(F);
- TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
+
UniformityInfo *UI = nullptr;
if (SkipUniformRegions)
UI = &AM.getResult<UniformityInfoAnalysis>(F);
@@ -1482,7 +1368,7 @@ PreservedAnalyses StructurizeCFGPass::run(Function &F,
continue;
}
- Changed |= SCFG.run(R, DT, TTI);
+ Changed |= SCFG.run(R, DT);
}
if (!Changed)
return PreservedAnalyses::all();
diff --git a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
index be020457ce87d..9cc42ac448067 100644
--- a/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
+++ b/llvm/test/CodeGen/AMDGPU/memintrinsic-unroll.ll
@@ -9851,8 +9851,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; CHECK-NEXT: s_andn2_saveexec_b32 s6, s6
; CHECK-NEXT: s_cbranch_execz .LBB8_6
; CHECK-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
-; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
; CHECK-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
+; CHECK-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
; CHECK-NEXT: s_movk_i32 s4, 0xf800
; CHECK-NEXT: s_mov_b32 s5, -1
; CHECK-NEXT: .LBB8_5: ; %memmove_bwd_loop
@@ -11167,8 +11167,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; ALIGNED-NEXT: s_andn2_saveexec_b32 s6, s6
; ALIGNED-NEXT: s_cbranch_execz .LBB8_6
; ALIGNED-NEXT: ; %bb.4: ; %memmove_bwd_loop.preheader
-; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
; ALIGNED-NEXT: v_add_nc_u32_e32 v0, 0x700, v0
+; ALIGNED-NEXT: v_add_nc_u32_e32 v1, 0x700, v1
; ALIGNED-NEXT: s_movk_i32 s4, 0xf800
; ALIGNED-NEXT: s_mov_b32 s5, -1
; ALIGNED-NEXT: .LBB8_5: ; %memmove_bwd_loop
@@ -12381,8 +12381,8 @@ define void @memmove_p5_p5_sz2048(ptr addrspace(5) align 1 %dst, ptr addrspace(5
; UNROLL3-NEXT: buffer_load_dword v4, v1, s[0:3], 0 offen offset:2024
; UNROLL3-NEXT: buffer_load_dword v5, v1, s[0:3], 0 offen offset:2020
; UNROLL3-NEXT: buffer_load_dword v6, v1, s[0:3], 0 offen offset:2016
-; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1
; UNROLL3-NEXT: v_add_nc_u32_e32 v2, 0x7b0, v0
+; UNROLL3-NEXT: v_add_nc_u32_e32 v1, 0x7b0, v1
; UNROLL3-NEXT: s_waitcnt vmcnt(3)
; UNROLL3-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen offset:2028
; UNROLL3-NEXT: s_waitcnt vmcnt(2)
diff --git a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll b/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
deleted file mode 100644
index 42436a1b4c279..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/structurize-hoist.ll
+++ /dev/null
@@ -1,180 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX900 %s
-
-
-%pair = type { i32, i32 }
-
-define void @test_extractvalue_then_else(ptr %ptr, i1 %cond) {
-; GFX900-LABEL: test_extractvalue_then_else:
-; GFX900: ; %bb.0: ; %if
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_load_dword v3, v[0:1]
-; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
-; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GFX900-NEXT: s_cbranch_execz .LBB0_2
-; GFX900-NEXT: ; %bb.1: ; %else
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
-; GFX900-NEXT: .LBB0_2: ; %Flow
-; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_store_dword v[0:1], v3
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-if:
- %load_then = load %pair, ptr %ptr
- br i1 %cond, label %then, label %else
-
-then:
- %a_then = extractvalue %pair %load_then, 0
- br label %merge
-
-else:
- %a_else = extractvalue %pair %load_then, 0
- %sum_else = add i32 %a_else, 1
- br label %merge
-
-merge:
- %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-define void @test_extractvalue_else_then(ptr %ptr, i1 %cond) {
-; GFX900-LABEL: test_extractvalue_else_then:
-; GFX900: ; %bb.0: ; %if
-; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_load_dword v3, v[0:1]
-; GFX900-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX900-NEXT: v_cmp_ne_u32_e32 vcc, 1, v2
-; GFX900-NEXT: s_and_saveexec_b64 s[4:5], vcc
-; GFX900-NEXT: s_xor_b64 s[4:5], exec, s[4:5]
-; GFX900-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5]
-; GFX900-NEXT: s_cbranch_execz .LBB1_2
-; GFX900-NEXT: ; %bb.1: ; %else
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_u32_e32 v3, 1, v3
-; GFX900-NEXT: .LBB1_2: ; %merge
-; GFX900-NEXT: s_or_b64 exec, exec, s[4:5]
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: flat_store_dword v[0:1], v3
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: s_setpc_b64 s[30:31]
-if:
- %load_then = load %pair, ptr %ptr
- br i1 %cond, label %else, label %then
-
-else:
- %a_else = extractvalue %pair %load_then, 0
- %sum_else = add i32 %a_else, 1
- br label %merge
-
-then:
- %a_then = extractvalue %pair %load_then, 0
- br label %merge
-
-merge:
- %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-define amdgpu_kernel void @test_loop_with_if( ptr %ptr, i1 %cond) #0 {
-; GFX900-LABEL: test_loop_with_if:
-; GFX900: ; %bb.0: ; %entry
-; GFX900-NEXT: s_load_dword s2, s[4:5], 0x2c
-; GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX900-NEXT: v_mov_b32_e32 v5, 0
-; GFX900-NEXT: s_mov_b64 s[4:5], 0
-; GFX900-NEXT: s_movk_i32 s10, 0xfe
-; GFX900-NEXT: s_waitcnt lgkmcnt(0)
-; GFX900-NEXT: s_bitcmp1_b32 s2, 0
-; GFX900-NEXT: s_cselect_b64 s[2:3], -1, 0
-; GFX900-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[2:3]
-; GFX900-NEXT: v_mov_b32_e32 v2, s1
-; GFX900-NEXT: s_xor_b64 s[2:3], s[2:3], -1
-; GFX900-NEXT: v_mov_b32_e32 v1, s0
-; GFX900-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v3
-; GFX900-NEXT: s_branch .LBB2_2
-; GFX900-NEXT: .LBB2_1: ; %latch
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_u32_e32 v5, 20, v3
-; GFX900-NEXT: v_cmp_lt_i32_e32 vcc, s10, v5
-; GFX900-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
-; GFX900-NEXT: flat_store_dword v[1:2], v3
-; GFX900-NEXT: s_andn2_b64 exec, exec, s[4:5]
-; GFX900-NEXT: s_cbranch_execz .LBB2_8
-; GFX900-NEXT: .LBB2_2: ; %loop
-; GFX900-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX900-NEXT: flat_load_dwordx2 v[3:4], v[1:2]
-; GFX900-NEXT: s_and_b64 vcc, exec, s[0:1]
-; GFX900-NEXT: s_mov_b64 s[8:9], s[2:3]
-; GFX900-NEXT: s_mov_b64 s[6:7], 0
-; GFX900-NEXT: s_cbranch_vccnz .LBB2_4
-; GFX900-NEXT: ; %bb.3: ; %if
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: v_cmp_gt_i32_e32 vcc, 11, v5
-; GFX900-NEXT: s_andn2_b64 s[8:9], s[2:3], exec
-; GFX900-NEXT: s_and_b64 s[12:13], vcc, exec
-; GFX900-NEXT: s_mov_b64 s[6:7], -1
-; GFX900-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13]
-; GFX900-NEXT: .LBB2_4: ; %Flow
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: s_and_saveexec_b64 s[12:13], s[8:9]
-; GFX900-NEXT: s_xor_b64 s[8:9], exec, s[12:13]
-; GFX900-NEXT: s_cbranch_execz .LBB2_6
-; GFX900-NEXT: ; %bb.5: ; %else
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX900-NEXT: v_add_u32_e32 v3, v3, v4
-; GFX900-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
-; GFX900-NEXT: .LBB2_6: ; %Flow1
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: s_or_b64 exec, exec, s[8:9]
-; GFX900-NEXT: s_and_saveexec_b64 s[8:9], s[6:7]
-; GFX900-NEXT: s_cbranch_execz .LBB2_1
-; GFX900-NEXT: ; %bb.7: ; %then
-; GFX900-NEXT: ; in Loop: Header=BB2_2 Depth=1
-; GFX900-NEXT: flat_store_dword v[1:2], v0
-; GFX900-NEXT: s_branch .LBB2_1
-; GFX900-NEXT: .LBB2_8: ; %end
-; GFX900-NEXT: s_endpgm
-entry:
- %a = tail call i32 @llvm.amdgcn.workitem.id.x()
- br label %loop
-
-loop:
- %entry_phi = phi i32 [ 0, %entry ], [ %a15, %latch ]
- %load = load %pair, ptr %ptr
- br i1 %cond, label %if, label %else
-
-if:
- %cmp = icmp sgt i32 %entry_phi, 10
- br i1 %cmp, label %then, label %else
-
-then:
- %a_then = extractvalue %pair %load, 0
- store i32 %a, ptr %ptr, align 4
- br label %latch
-
-else:
- %a2 = extractvalue %pair %load, 1
- %y = extractvalue %pair %load, 0
- %a_else = add i32 %y, %a2
- br label %latch
-
-latch:
- %a_test = phi i32 [ %a_then, %then ], [ %a_else, %else ]
- store i32 %a_test, ptr %ptr
- %a15 = add nsw i32 %a_test, 20
- %a16 = icmp slt i32 %a15, 255
- br i1 %a16, label %loop, label %end
-
-end:
- ret void
-}
diff --git a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll b/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll
deleted file mode 100644
index 10d4fa2be0a70..0000000000000
--- a/llvm/test/Transforms/StructurizeCFG/hoist-zerocost.ll
+++ /dev/null
@@ -1,161 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -passes=structurizecfg < %s | FileCheck %s
-
-
-%pair = type { i32, i32 }
-define void @test_if_then_else(ptr %ptr, i1 %cond) {
-; CHECK-LABEL: define void @test_if_then_else(
-; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
-; CHECK-NEXT: [[A_THEN:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
-; CHECK-NEXT: br i1 [[COND_INV]], label %[[ELSE:.*]], label %[[FLOW:.*]]
-; CHECK: [[FLOW]]:
-; CHECK-NEXT: [[TMP0:%.*]] = phi i32 [ [[SUM_ELSE:%.*]], %[[ELSE]] ], [ [[A_THEN]], %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[ELSE]] ], [ true, %[[ENTRY]] ]
-; CHECK-NEXT: br i1 [[TMP1]], label %[[THEN:.*]], label %[[MERGE:.*]]
-; CHECK: [[THEN]]:
-; CHECK-NEXT: br label %[[MERGE]]
-; CHECK: [[ELSE]]:
-; CHECK-NEXT: [[A_ELSE:%.*]] = extractvalue [[PAIR]] [[LOAD_THEN]], 0
-; CHECK-NEXT: [[SUM_ELSE]] = add i32 [[A_ELSE]], 1
-; CHECK-NEXT: br label %[[FLOW]]
-; CHECK: [[MERGE]]:
-; CHECK-NEXT: store i32 [[TMP0]], ptr [[PTR]], align 4
-; CHECK-NEXT: ret void
-;
-entry:
- %load_then = load %pair, ptr %ptr
- br i1 %cond, label %then, label %else
-
-then:
- %a_then = extractvalue %pair %load_then, 0
- br label %merge
-
-else:
- %a_else = extractvalue %pair %load_then, 0
- %sum_else = add i32 %a_else, 1
- br label %merge
-
-merge:
- %phi = phi i32 [ %a_then, %then ], [ %sum_else, %else ]
- store i32 %phi, ptr %ptr
- ret void
-}
-
-define void @test_if_else_then(ptr %ptr, i1 %cond) {
-; CHECK-LABEL: define void @test_if_else_then(
-; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[COND_INV:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: [[LOAD_THEN:%.*]] = load [[PAIR:%.*]], ptr [[PTR]], align 4
-; CHECK-NEXT: br i1 [[COND_INV]], label %[[THEN:.*]], label %[[FLOW:.*]]
-; CHECK: [[THEN]]:
-; CHECK-NEXT: ...
[truncated]
|
shiltian
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You don't need a PR to revert. Just revert it locally and push directly to main, but please include the reason for the revert in the commit description.
Reverts #139605