@@ -69,12 +69,12 @@ IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG
6969
7070char ScalarizeFunction::ID = 0;
7171
72- ScalarizeFunction::ScalarizeFunction (bool selectiveScalarization ) : FunctionPass(ID)
72+ ScalarizeFunction::ScalarizeFunction (bool scalarizingVectorLDSTType ) : FunctionPass(ID)
7373{
7474 initializeScalarizeFunctionPass (*PassRegistry::getPassRegistry ());
7575
7676 for (int i = 0 ; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0 ;
77- m_SelectiveScalarization = selectiveScalarization ;
77+ m_ScalarizingVectorLDSTType = scalarizingVectorLDSTType ;
7878
7979 // Initialize SCM buffers and allocation
8080 m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
@@ -121,13 +121,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
121121 m_SCM.clear ();
122122 releaseAllSCMEntries ();
123123 m_DRL.clear ();
124- m_Excludes.clear ();
125-
126- // collecting instructions that we want to avoid scalarization
127- if (m_SelectiveScalarization)
128- {
129- buildExclusiveSet ();
130- }
131124
132125 // Scalarization. Iterate over all the instructions
133126 // Always hold the iterator at the instruction following the one being scalarized (so the
@@ -139,14 +132,7 @@ bool ScalarizeFunction::runOnFunction(Function& F)
139132 Instruction* currInst = &*sI ;
140133 // Move iterator to next instruction BEFORE scalarizing current instruction
141134 ++sI ;
142- if (m_Excludes.count (currInst))
143- {
144- recoverNonScalarizableInst (currInst);
145- }
146- else
147- {
148- dispatchInstructionToScalarize (currInst);
149- }
135+ dispatchInstructionToScalarize (currInst);
150136 }
151137
152138 resolveVectorValues ();
@@ -175,111 +161,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
175161 return true ;
176162}
177163
178- void ScalarizeFunction::buildExclusiveSet ()
179- {
180- inst_iterator sI = inst_begin (m_currFunc);
181- inst_iterator sE = inst_end (m_currFunc);
182- std::vector<llvm::Value*> workset;
183- while (sI != sE )
184- {
185- Instruction* currInst = &*sI ;
186- ++sI ;
187- if (CallInst* CI = dyn_cast<CallInst>(currInst))
188- {
189- unsigned numOperands = CI->getNumArgOperands ();
190- for (unsigned i = 0 ; i < numOperands; i++)
191- {
192- Value* operand = CI->getArgOperand (i);
193- if (isa<VectorType>(operand->getType ()))
194- {
195- workset.push_back (operand);
196- }
197- }
198- }
199- else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
200- {
201- Value* scalarIndexVal = IEI->getOperand (2 );
202- // If the index is not a constant - we cannot statically remove this inst
203- if (!isa<ConstantInt>(scalarIndexVal)) {
204- workset.push_back (IEI);
205- }
206- }
207- else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
208- {
209- Value* scalarIndexVal = EEI->getOperand (1 );
210- // If the index is not a constant - we cannot statically remove this inst
211- if (!isa<ConstantInt>(scalarIndexVal)) {
212- workset.push_back (EEI->getOperand (0 ));
213- }
214- }
215- }
216- while (!workset.empty ())
217- {
218- auto Def = workset.back ();
219- workset.pop_back ();
220- if (m_Excludes.count (Def))
221- {
222- continue ;
223- }
224- if (auto IEI = dyn_cast<InsertElementInst>(Def))
225- {
226- m_Excludes.insert (IEI);
227- if (!m_Excludes.count (IEI->getOperand (0 )) &&
228- (isa<PHINode>(IEI->getOperand (0 )) ||
229- isa<ShuffleVectorInst>(IEI->getOperand (0 )) ||
230- isa<InsertElementInst>(IEI->getOperand (0 ))))
231- {
232- workset.push_back (IEI->getOperand (0 ));
233- }
234- }
235- else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
236- {
237- m_Excludes.insert (SVI);
238- if (!m_Excludes.count (SVI->getOperand (0 )) &&
239- (isa<PHINode>(SVI->getOperand (0 )) ||
240- isa<ShuffleVectorInst>(SVI->getOperand (0 )) ||
241- isa<InsertElementInst>(SVI->getOperand (0 ))))
242- {
243- workset.push_back (SVI->getOperand (0 ));
244- }
245- if (!m_Excludes.count (SVI->getOperand (1 )) &&
246- (isa<PHINode>(SVI->getOperand (1 )) ||
247- isa<ShuffleVectorInst>(SVI->getOperand (1 )) ||
248- isa<InsertElementInst>(SVI->getOperand (1 ))))
249- {
250- workset.push_back (SVI->getOperand (1 ));
251- }
252- }
253- else if (auto PHI = dyn_cast<PHINode>(Def))
254- {
255- m_Excludes.insert (PHI);
256- for (int i = 0 , n = PHI->getNumOperands (); i < n; ++i)
257- if (!m_Excludes.count (PHI->getOperand (i)) &&
258- (isa<PHINode>(PHI->getOperand (i)) ||
259- isa<ShuffleVectorInst>(PHI->getOperand (i)) ||
260- isa<InsertElementInst>(PHI->getOperand (i))))
261- {
262- workset.push_back (PHI->getOperand (i));
263- }
264- }
265- else
266- {
267- continue ;
268- }
269- // check use
270- for (auto U : Def->users ())
271- {
272- if (!m_Excludes.count (U) &&
273- (isa<PHINode>(U) ||
274- isa<ShuffleVectorInst>(U) ||
275- isa<InsertElementInst>(U)))
276- {
277- workset.push_back (U);
278- }
279- }
280- }
281- }
282-
283164void ScalarizeFunction::dispatchInstructionToScalarize (Instruction* I)
284165{
285166 V_PRINT (scalarizer, " \t Scalarizing Instruction: " << *I << " \n " );
@@ -354,6 +235,13 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
354235 case Instruction::GetElementPtr:
355236 scalarizeInstruction (dyn_cast<GetElementPtrInst>(I));
356237 break ;
238+ case Instruction::Load:
239+ scalarizeInstruction (dyn_cast<LoadInst>(I));
240+ break ;
241+ case Instruction::Store:
242+ scalarizeInstruction (dyn_cast<StoreInst>(I));
243+ break ;
244+
357245 // The remaining instructions are not supported for scalarization. Keep "as is"
358246 default :
359247 recoverNonScalarizableInst (I);
@@ -1004,6 +892,149 @@ void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
1004892 m_removedInsts.insert (GI);
1005893}
1006894
895+ void ScalarizeFunction::scalarizeInstruction (LoadInst* LI)
896+ {
897+ V_PRINT (scalarizer, " \t\t Load instruction\n " );
898+ IGC_ASSERT_MESSAGE (LI, " instruction type dynamic cast failed" );
899+
900+ VectorType* dataType = dyn_cast<VectorType>(LI->getType ());
901+ if (isScalarizableLoadStoreType (dataType) && m_pDL)
902+ {
903+ // Prepare empty SCM entry for the instruction
904+ SCMEntry* newEntry = getSCMEntry (LI);
905+
906+ // Get additional info from instruction
907+ unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
908+ unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
909+ IGC_ASSERT (elementSize);
910+ IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
911+ IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
912+ unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
913+
914+ // Obtain scalarized arguments
915+ // 1 - to allow scalarizing Load with any pointer type
916+ // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
917+ #if 1
918+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
919+ Value * GepPtr = LI->getOperand (0 );
920+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
921+ Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , LI);
922+ Type* indexType = Type::getInt32Ty (*m_moduleContext);
923+ // Generate new (scalar) instructions
924+ SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
925+ newScalarizedInsts.resize (numDupElements);
926+ for (unsigned dup = 0 ; dup < numDupElements; dup++)
927+ {
928+ Constant* laneVal = ConstantInt::get (indexType, dup);
929+ Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , LI);
930+ newScalarizedInsts[dup] = new LoadInst (pGEP->getType ()->getPointerElementType (), pGEP, LI->getName (), LI);
931+ }
932+ #else
933+ GetElementPtrInst* operand = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
934+ if (!operand || operand->getNumIndices() != 1)
935+ {
936+ return recoverNonScalarizableInst(LI);
937+ }
938+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
939+ Value* GepPtr = operand->getPointerOperand();
940+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
941+ Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
942+ Type* indexType = operand->getOperand(1)->getType();
943+ // Generate new (scalar) instructions
944+ Value* newScalarizedInsts[MAX_INPUT_VECTOR_WIDTH];
945+ Constant* elementNumVal = ConstantInt::get(indexType, numElements);
946+ for (unsigned dup = 0; dup < numDupElements; dup++)
947+ {
948+ Constant* laneVal = ConstantInt::get(indexType, dup);
949+ Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", LI);
950+ Value* pIndex = BinaryOperator::CreateMul(operand->getOperand(1), elementNumVal, "GEPIndex_s", LI);
951+ pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", LI);
952+ newScalarizedInsts[dup] = new LoadInst(pGEP, LI->getName(), LI);
953+ }
954+ #endif
955+ // Add new value/s to SCM
956+ updateSCMEntryWithValues (newEntry, &(newScalarizedInsts[0 ]), LI, true );
957+
958+ // Remove original instruction
959+ m_removedInsts.insert (LI);
960+ return ;
961+ }
962+ return recoverNonScalarizableInst (LI);
963+ }
964+
965+ void ScalarizeFunction::scalarizeInstruction (StoreInst* SI)
966+ {
967+ V_PRINT (scalarizer, " \t\t Store instruction\n " );
968+ IGC_ASSERT_MESSAGE (SI, " instruction type dynamic cast failed" );
969+
970+ int indexPtr = SI->getPointerOperandIndex ();
971+ int indexData = 1 - indexPtr;
972+ VectorType* dataType = dyn_cast<VectorType>(SI->getOperand (indexData)->getType ());
973+ if (isScalarizableLoadStoreType (dataType) && m_pDL)
974+ {
975+ // Get additional info from instruction
976+ unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
977+ unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
978+ IGC_ASSERT (elementSize);
979+ IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
980+ IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
981+
982+ unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
983+
984+ // Obtain scalarized arguments
985+ // 1 - to allow scalarizing Load with any pointer type
986+ // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
987+ #if 1
988+ SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
989+
990+ bool opIsConst;
991+ obtainScalarizedValues (operand0, &opIsConst, SI->getOperand (indexData), SI);
992+
993+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
994+ Value* GepPtr = SI->getOperand (indexPtr);
995+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
996+ Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , SI);
997+ Type* indexType = Type::getInt32Ty (*m_moduleContext);
998+ // Generate new (scalar) instructions
999+ for (unsigned dup = 0 ; dup < numDupElements; dup++)
1000+ {
1001+ Constant* laneVal = ConstantInt::get (indexType, dup);
1002+ Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , SI);
1003+ new StoreInst (operand0[dup], pGEP, SI);
1004+ }
1005+ #else
1006+ GetElementPtrInst* operand1 = dyn_cast<GetElementPtrInst>(SI->getOperand(indexPtr));
1007+ if (!operand1 || operand1->getNumIndices() != 1)
1008+ {
1009+ return recoverNonScalarizableInst(SI);
1010+ }
1011+ Value* operand0[MAX_INPUT_VECTOR_WIDTH];
1012+ bool opIsConst;
1013+ obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
1014+
1015+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
1016+ Value* GepPtr = operand1->getPointerOperand();
1017+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
1018+ Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
1019+ Type* indexType = operand1->getOperand(1)->getType();
1020+ // Generate new (scalar) instructions
1021+ Constant* elementNumVal = ConstantInt::get(indexType, numElements);
1022+ for (unsigned dup = 0; dup < numDupElements; dup++)
1023+ {
1024+ Constant* laneVal = ConstantInt::get(indexType, dup);
1025+ Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", SI);
1026+ Value* pIndex = BinaryOperator::CreateMul(operand1->getOperand(1), elementNumVal, "GEPIndex_s", SI);
1027+ pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", SI);
1028+ new StoreInst(operand0[dup], pGEP, SI);
1029+ }
1030+ #endif
1031+ // Remove original instruction
1032+ m_removedInsts.insert (SI);
1033+ return ;
1034+ }
1035+ return recoverNonScalarizableInst (SI);
1036+ }
1037+
10071038void ScalarizeFunction::obtainScalarizedValues (SmallVectorImpl<Value*>& retValues, bool * retIsConstant,
10081039 Value* origValue, Instruction* origInst, int destIdx)
10091040{
@@ -1380,9 +1411,17 @@ void ScalarizeFunction::resolveDeferredInstructions()
13801411 m_DRL.clear ();
13811412}
13821413
1383- extern " C" FunctionPass* createScalarizerPass (bool selectiveScalarization)
1414+ bool ScalarizeFunction::isScalarizableLoadStoreType (VectorType* type)
1415+ {
1416+ // Scalarize Load/Store worth doing only if:
1417+ // 1. Gather/Scatter are supported
1418+ // 2. Load/Store type is a vector
1419+ return (m_ScalarizingVectorLDSTType && (NULL != type));
1420+ }
1421+
1422+ extern " C" FunctionPass* createScalarizerPass (bool scalarizingVectorLDSTType)
13841423{
1385- return new ScalarizeFunction (selectiveScalarization );
1424+ return new ScalarizeFunction (scalarizingVectorLDSTType );
13861425}
13871426
13881427
0 commit comments