Skip to content

Commit 0487b74

Browse files
ppogotovigcbot
authored andcommitted
Split PHI nodes with a structure of vectors into multiple PHI nodes.
Splitting structure PHIs into individual vector PHIs helps the emitter generate more efficient code. This optimization is particularly beneficial in cases where the incoming value is a zeroinitializer. By splitting the PHI node, the emitter can avoid generating unnecessary move instructions for initialization when the PHI node's incoming type is a vector type.
1 parent b1ae5f3 commit 0487b74

File tree

7 files changed

+644
-0
lines changed

7 files changed

+644
-0
lines changed

IGC/Compiler/CISACodeGen/ShaderCodeGen.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ SPDX-License-Identifier: MIT
7171
#include "Compiler/CISACodeGen/FPRoundingModeCoalescing.hpp"
7272

7373
#include "Compiler/CISACodeGen/SLMConstProp.hpp"
74+
#include "Compiler/Optimizer/OpenCLPasses/SplitStructurePhisPass/SplitStructurePhisPass.hpp"
7475
#include "Compiler/Legalizer/AddRequiredMemoryFences.h"
7576
#include "Compiler/Optimizer/OpenCLPasses/GenericAddressResolution/GenericAddressDynamicResolution.hpp"
7677
#include "Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.hpp"
@@ -1593,6 +1594,8 @@ void OptimizeIR(CodeGenContext* const pContext)
15931594
}
15941595
}
15951596

1597+
mpm.add(new SplitStructurePhisPass());
1598+
15961599
if (IGC_IS_FLAG_ENABLED(EnableRemoveLoopDependency))
15971600
{
15981601
mpm.add(new RemoveLoopDependency());

IGC/Compiler/InitializePasses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ void initializeScalarizeFunctionPass(llvm::PassRegistry&);
151151
void initializeSimd32ProfitabilityAnalysisPass(llvm::PassRegistry&);
152152
void initializeSetFastMathFlagsPass(llvm::PassRegistry&);
153153
void initializeSPIRMetaDataTranslationPass(llvm::PassRegistry&);
154+
void initializeSplitStructurePhisPassPass(llvm::PassRegistry&);
154155
void initializeSpv2dBlockIOResolutionPass(llvm::PassRegistry&);
155156
void initializeSpvSubgroupMMAResolutionPass(llvm::PassRegistry&);
156157
void initializeStatelessToStatefulPass(llvm::PassRegistry&);

IGC/Compiler/Optimizer/OpenCLPasses/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ add_subdirectory(Spv2dBlockIOResolution)
5353
add_subdirectory(SpvSubgroupMMAResolution)
5454
add_subdirectory(StackOverflowDetection)
5555
add_subdirectory(StatelessToStateful)
56+
add_subdirectory(SplitStructurePhisPass)
5657
add_subdirectory(SubGroupFuncs)
5758
add_subdirectory(SubGroupReductionPattern)
5859
add_subdirectory(TransformUnmaskedFunctionsPass)
@@ -103,6 +104,7 @@ set(IGC_BUILD__SRC__Optimizer_OpenCLPasses_All
103104
${IGC_BUILD__SRC__OpenCLPasses_RewriteLocalSize}
104105
${IGC_BUILD__SRC__OpenCLPasses_ScalarArgAsPointer}
105106
${IGC_BUILD__SRC__OpenCLPasses_SetFastMathFlags}
107+
${IGC_BUILD__SRC__OpenCLPasses_SplitStructurePhisPass}
106108
${IGC_BUILD__SRC__OpenCLPasses_Spv2dBlockIOResolution}
107109
${IGC_BUILD__SRC__OpenCLPasses_SpvSubgroupMMAResolution}
108110
${IGC_BUILD__SRC__OpenCLPasses_StackOverflowDetection}
@@ -162,6 +164,7 @@ set(IGC_BUILD__HDR__Optimizer_OpenCLPasses_All
162164
${IGC_BUILD__HDR__OpenCLPasses_RewriteLocalSize}
163165
${IGC_BUILD__HDR__OpenCLPasses_ScalarArgAsPointer}
164166
${IGC_BUILD__HDR__OpenCLPasses_SetFastMathFlags}
167+
${IGC_BUILD__HDR__OpenCLPasses_SplitStructurePhisPass}
165168
${IGC_BUILD__HDR__OpenCLPasses_Spv2dBlockIOResolution}
166169
${IGC_BUILD__HDR__OpenCLPasses_SpvSubgroupMMAResolution}
167170
${IGC_BUILD__HDR__OpenCLPasses_StackOverflowDetection}
@@ -222,6 +225,7 @@ set(IGC_BUILD_Compiler_OpenCLPasses_Groups
222225
Compiler__OpenCLPasses_RewriteLocalSize
223226
Compiler__OpenCLPasses_ScalarArgAsPointer
224227
Compiler__OpenCLPasses_SetFastMathFlags
228+
Compiler__OpenCLPasses_SplitStructurePhisPass
225229
Compiler__OpenCLPasses_StackOverflowDetection
226230
Compiler__OpenCLPasses_StatelessToStateful
227231
Compiler__OpenCLPasses_SubGroupFuncs
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#=========================== begin_copyright_notice ============================
2+
#
3+
# Copyright (C) 2025 Intel Corporation
4+
#
5+
# SPDX-License-Identifier: MIT
6+
#
7+
#============================ end_copyright_notice =============================
8+
9+
include_directories("${CMAKE_CURRENT_SOURCE_DIR}")
10+
11+
12+
set(IGC_BUILD__SRC__SplitStructurePhisPass
13+
"${CMAKE_CURRENT_SOURCE_DIR}/SplitStructurePhisPass.cpp"
14+
)
15+
set(IGC_BUILD__SRC__OpenCLPasses_SplitStructurePhisPass ${IGC_BUILD__SRC__SplitStructurePhisPass} PARENT_SCOPE)
16+
17+
set(IGC_BUILD__HDR__SplitStructurePhisPass
18+
"${CMAKE_CURRENT_SOURCE_DIR}/SplitStructurePhisPass.hpp"
19+
)
20+
set(IGC_BUILD__HDR__OpenCLPasses_SplitStructurePhisPass ${IGC_BUILD__HDR__SplitStructurePhisPass} PARENT_SCOPE)
21+
22+
23+
igc_sg_register(
24+
Compiler__OpenCLPasses_SplitStructurePhisPass
25+
"SplitStructurePhisPass"
26+
FILES
27+
${IGC_BUILD__SRC__SplitStructurePhisPass}
28+
${IGC_BUILD__HDR__SplitStructurePhisPass}
29+
)
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2025 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
#include "SplitStructurePhisPass.hpp"
10+
11+
using namespace llvm;
12+
using namespace IGC;
13+
14+
char SplitStructurePhisPass::ID = 0;
15+
16+
#define PASS_FLAG "split-structure-phis"
17+
#define PASS_DESCRIPTION "Split structure phis pass."
18+
#define PASS_CFG_ONLY false
19+
#define PASS_ANALYSIS false
20+
#define POISON_SIZE_T 999
21+
22+
23+
// The SplitStructurePhisPass is a function pass designed to optimize the handling of PHI nodes that operate on structures containing multiple fields,
24+
// such as vectors and scalars. This pass splits the PHI nodes into separate PHI nodes for each individual field in the structure in case one of
25+
// the incoming values is a zeroinitializer.
26+
// This helps the emitter avoid generating intermediate mov instructions to initialize the structure with zero values.
27+
28+
IGC_INITIALIZE_PASS_BEGIN(SplitStructurePhisPass, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
29+
IGC_INITIALIZE_PASS_END(SplitStructurePhisPass, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG_ONLY, PASS_ANALYSIS)
30+
31+
SplitStructurePhisPass::SplitStructurePhisPass() : FunctionPass(ID) {
32+
initializeSplitStructurePhisPassPass(*PassRegistry::getPassRegistry());
33+
}
34+
35+
bool SplitStructurePhisPass::runOnFunction(Function &F) {
36+
if (skipFunction(F))
37+
return false;
38+
39+
for (auto &BB : F) {
40+
// Iterate over all instructions in the basic block.
41+
for (auto &I : BB) {
42+
auto *Phi = dyn_cast<PHINode>(&I);
43+
IncomingValuesMap InsertValues;
44+
ExtractValueMap ExtractValues;
45+
46+
// Skip non-phi instructions.
47+
if (!Phi)
48+
continue;
49+
50+
// Currently, we only support PHI nodes with two incoming values.
51+
if (Phi->getNumIncomingValues() != 2)
52+
continue;
53+
54+
// Skip phi node instruction if its structure type doesn't have vector types.
55+
if (!isStructOfVectorsType(Phi->getType()))
56+
continue;
57+
58+
// Get indices of the incoming values.
59+
IndicesTuple Indices = getIndices(Phi);
60+
61+
// Skip if phi node doesn't have zero incoming value.
62+
if (std::get<Zero>(Indices) == POISON_SIZE_T || std::get<NonZero>(Indices) == POISON_SIZE_T)
63+
continue;
64+
65+
// Skip phi nodes that are used by other instructions, other than extractvalue.
66+
if (!isPhiNodeParsedByExtrVal(Phi, ExtractValues))
67+
continue;
68+
69+
Value *NonZeroIncVal = Phi->getIncomingValue(std::get<NonZero>(Indices));
70+
71+
// Check that the non-zero incoming value was created by insertvalue instructions.
72+
if (!checkNonZeroIncValue(NonZeroIncVal, InsertValues))
73+
continue;
74+
75+
PhiNodes[Phi] = std::make_tuple(Indices, ExtractValues, InsertValues);
76+
}
77+
}
78+
79+
bool Changed = PhiNodes.size() > 0 ? true : false;
80+
81+
// Iterate over the collected PHI nodes and
82+
// 1. create new phis for each vector type
83+
// 2. create new phis for each scalar types
84+
// 3. save dead instructions for removal
85+
// 4. update incoming phis incoming values
86+
for (auto PhiPair : PhiNodes) {
87+
PHINode *OldPhi = PhiPair.first;
88+
auto Indices = std::get<0>(PhiPair.second);
89+
ExtractValueMap ExtractValues = std::get<1>(PhiPair.second);
90+
IncomingValuesMap InsertValues = std::get<2>(PhiPair.second);
91+
92+
StructType *StTy = cast<StructType>(OldPhi->getType());
93+
for (unsigned i = 0; i < StTy->getNumElements(); ++i) {
94+
auto *VecTy = dyn_cast<VectorType>(StTy->getElementType(i));
95+
96+
if (VecTy) {
97+
createVectorPhi(OldPhi, Indices, ExtractValues[i], InsertValues[i]);
98+
} else {
99+
createScalarPhi(OldPhi, StTy->getElementType(i), Indices, ExtractValues[i], InsertValues[i]);
100+
}
101+
}
102+
103+
// Save old phi to remove it later.
104+
PhiNodeInstsToRemove.insert(OldPhi);
105+
}
106+
107+
// Clean up the dead instructions.
108+
cleanUp();
109+
110+
return Changed;
111+
}
112+
113+
void SplitStructurePhisPass::cleanUp() {
114+
for (auto *ExtrValInst : ExtractValueInstsToRemove)
115+
ExtrValInst->eraseFromParent();
116+
117+
for (auto *Phi : PhiNodeInstsToRemove)
118+
Phi->eraseFromParent();
119+
120+
for (auto *InsValInst : InsertValueInstsToRemove) {
121+
while (InsValInst) {
122+
InsertValueInst *InstToRemov = InsValInst;
123+
InsValInst = dyn_cast<InsertValueInst>(InsValInst->getAggregateOperand());
124+
InstToRemov->eraseFromParent();
125+
}
126+
}
127+
128+
// Clear the maps and sets after work on function.
129+
PhiNodes.clear();
130+
InsertValueInstsToRemove.clear();
131+
ExtractValueInstsToRemove.clear();
132+
PhiNodeInstsToRemove.clear();
133+
}
134+
135+
IndicesTuple SplitStructurePhisPass::getIndices(PHINode *Phi) {
136+
size_t ZeroIncValIndex = POISON_SIZE_T;
137+
size_t OtherIncValIndex = POISON_SIZE_T;
138+
139+
if (isa<ConstantAggregateZero>(Phi->getIncomingValue(0))) {
140+
ZeroIncValIndex = 0;
141+
OtherIncValIndex = 1;
142+
} else if (isa<ConstantAggregateZero>(Phi->getIncomingValue(1))) {
143+
ZeroIncValIndex = 1;
144+
OtherIncValIndex = 0;
145+
} else {
146+
return std::make_tuple(POISON_SIZE_T, POISON_SIZE_T);
147+
}
148+
149+
return std::make_tuple(ZeroIncValIndex, OtherIncValIndex);
150+
}
151+
152+
void SplitStructurePhisPass::createScalarPhi(PHINode *OldPhi, Type *NewScalarType, const IndicesTuple &Indices, ExtractValueInst *OldExtractInst, InsertValueInst *OldInsertValInst) {
153+
IRBuilder<> Builder(OldPhi);
154+
auto *NewPhi = cast<PHINode>(Builder.CreatePHI(NewScalarType, 2, "splitted_phi"));
155+
156+
size_t ZeroIncomingIndex = std::get<Zero>(Indices);
157+
size_t NonZeroIncomingIndex = std::get<NonZero>(Indices);
158+
159+
NewPhi->addIncoming(Constant::getNullValue(NewScalarType), OldPhi->getIncomingBlock(ZeroIncomingIndex));
160+
NewPhi->addIncoming(OldInsertValInst->getInsertedValueOperand(), OldPhi->getIncomingBlock(NonZeroIncomingIndex));
161+
162+
OldExtractInst->replaceAllUsesWith(NewPhi);
163+
ExtractValueInstsToRemove.insert(OldExtractInst);
164+
if (isLastInsertValueInst(OldInsertValInst, OldPhi))
165+
InsertValueInstsToRemove.insert(OldInsertValInst);
166+
}
167+
168+
void SplitStructurePhisPass::createVectorPhi(PHINode *OldPhi, const IndicesTuple &Indices, ExtractValueInst *ExtractInst, InsertValueInst *InsertValInst) {
169+
Value *NewIncomingNonZeroVal = InsertValInst->getInsertedValueOperand();
170+
Type *NewIncomingTy = NewIncomingNonZeroVal->getType();
171+
172+
IRBuilder<> Builder(OldPhi);
173+
PHINode *NewPhi = cast<PHINode>(Builder.CreatePHI(NewIncomingTy, 2, "splitted_phi"));
174+
175+
size_t ZeroIncomingIndex = std::get<Zero>(Indices);
176+
size_t NonZeroIncomingIndex = std::get<NonZero>(Indices);
177+
178+
NewPhi->addIncoming(ConstantAggregateZero::get(NewIncomingTy), OldPhi->getIncomingBlock(ZeroIncomingIndex));
179+
NewPhi->addIncoming(NewIncomingNonZeroVal, OldPhi->getIncomingBlock(NonZeroIncomingIndex));
180+
ExtractInst->replaceAllUsesWith(NewPhi);
181+
182+
ExtractValueInstsToRemove.insert(ExtractInst);
183+
184+
// Save only the last insert value instruction for safe removal.
185+
if (isLastInsertValueInst(InsertValInst, OldPhi))
186+
InsertValueInstsToRemove.insert(InsertValInst);
187+
}
188+
189+
bool SplitStructurePhisPass::isLastInsertValueInst(InsertValueInst *InsertValInst, PHINode *OldPhi) {
190+
auto U = *InsertValInst->user_begin();
191+
if (U != OldPhi)
192+
return false;
193+
194+
return true;
195+
}
196+
197+
// Check if non-zero increment value was created by insertvalue instructions.
198+
bool SplitStructurePhisPass::checkNonZeroIncValue(Value *IncVal, IncomingValuesMap &InsertValues) {
199+
StructType *StTy = cast<StructType>(IncVal->getType());
200+
201+
Value *InsertVal = IncVal;
202+
for (unsigned i = 0; i < StTy->getNumElements(); ++i) {
203+
InsertValueInst *InsertInst = dyn_cast<InsertValueInst>(InsertVal);
204+
205+
if (!InsertInst)
206+
return false;
207+
208+
if (!InsertInst->hasOneUse())
209+
return false;
210+
211+
if (InsertInst->getNumIndices() != 1)
212+
return false;
213+
214+
size_t ValueIndexInStruct = InsertInst->getIndices()[0];
215+
if (InsertValues.find(ValueIndexInStruct) != InsertValues.end())
216+
return false;
217+
218+
InsertValues[ValueIndexInStruct] = InsertInst;
219+
InsertVal = InsertInst->getAggregateOperand();
220+
}
221+
222+
if (!isa<PoisonValue>(InsertVal) && !isa<UndefValue>(InsertVal))
223+
return false;
224+
225+
return true;
226+
}
227+
228+
bool SplitStructurePhisPass::isPhiNodeParsedByExtrVal(PHINode *Phi, ExtractValueMap &ExtractValues) {
229+
for (auto *User : Phi->users()) {
230+
ExtractValueInst *ExtractInst = dyn_cast<ExtractValueInst>(User);
231+
if (!ExtractInst)
232+
return false;
233+
234+
if (ExtractInst->getNumIndices() != 1)
235+
return false;
236+
237+
size_t ValueIndexInStruct = ExtractInst->getIndices()[0];
238+
if (ExtractValues.find(ValueIndexInStruct) != ExtractValues.end())
239+
return false;
240+
241+
ExtractValues[ValueIndexInStruct] = ExtractInst;
242+
}
243+
244+
return true;
245+
}
246+
247+
bool SplitStructurePhisPass::isStructOfVectorsType(Type *Ty) {
248+
bool HasVector = false;
249+
// Check if the type is a struct
250+
auto *STy = dyn_cast<StructType>(Ty);
251+
252+
if (!STy)
253+
return false;
254+
255+
// Check if the struct type is an array of structs
256+
for (unsigned i = 0; i < STy->getNumElements(); ++i) {
257+
Type *ElemTy = STy->getElementType(i);
258+
259+
if (ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() || ElemTy->isPointerTy())
260+
continue;
261+
262+
auto *VecTy = dyn_cast<VectorType>(ElemTy);
263+
if (!VecTy)
264+
return false;
265+
266+
Type *VecElTy = VecTy->getElementType();
267+
if (!VecElTy->isIntegerTy() && !VecElTy->isFloatingPointTy() && !VecElTy->isPointerTy())
268+
return false;
269+
270+
HasVector = true;
271+
}
272+
273+
return HasVector;
274+
}

0 commit comments

Comments
 (0)