@@ -69,12 +69,12 @@ IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG
6969
7070char ScalarizeFunction::ID = 0;
7171
72- ScalarizeFunction::ScalarizeFunction (bool selectiveScalarization ) : FunctionPass(ID)
72+ ScalarizeFunction::ScalarizeFunction (bool scalarizingVectorLDSTType ) : FunctionPass(ID)
7373{
7474 initializeScalarizeFunctionPass (*PassRegistry::getPassRegistry ());
7575
7676 for (int i = 0 ; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0 ;
77- m_SelectiveScalarization = selectiveScalarization ;
77+ m_ScalarizingVectorLDSTType = scalarizingVectorLDSTType ;
7878
7979 // Initialize SCM buffers and allocation
8080 m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
@@ -121,13 +121,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
121121 m_SCM.clear ();
122122 releaseAllSCMEntries ();
123123 m_DRL.clear ();
124- m_Excludes.clear ();
125-
126- // collecting instructions that we want to avoid scalarization
127- if (m_SelectiveScalarization)
128- {
129- buildExclusiveSet ();
130- }
131124
132125 // Scalarization. Iterate over all the instructions
133126 // Always hold the iterator at the instruction following the one being scalarized (so the
@@ -139,14 +132,7 @@ bool ScalarizeFunction::runOnFunction(Function& F)
139132 Instruction* currInst = &*sI ;
140133 // Move iterator to next instruction BEFORE scalarizing current instruction
141134 ++sI ;
142- if (m_Excludes.count (currInst))
143- {
144- recoverNonScalarizableInst (currInst);
145- }
146- else
147- {
148- dispatchInstructionToScalarize (currInst);
149- }
135+ dispatchInstructionToScalarize (currInst);
150136 }
151137
152138 resolveVectorValues ();
@@ -175,119 +161,6 @@ bool ScalarizeFunction::runOnFunction(Function& F)
175161 return true ;
176162}
177163
178- void ScalarizeFunction::buildExclusiveSet ()
179- {
180- inst_iterator sI = inst_begin (m_currFunc);
181- inst_iterator sE = inst_end (m_currFunc);
182- std::vector<llvm::Value*> workset;
183- while (sI != sE )
184- {
185- Instruction* currInst = &*sI ;
186- ++sI ;
187- if (CallInst* CI = dyn_cast<CallInst>(currInst))
188- {
189- unsigned numOperands = CI->getNumArgOperands ();
190- for (unsigned i = 0 ; i < numOperands; i++)
191- {
192- Value* operand = CI->getArgOperand (i);
193- if (isa<VectorType>(operand->getType ()))
194- {
195- workset.push_back (operand);
196- }
197- }
198- }
199- else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
200- {
201- Value* scalarIndexVal = IEI->getOperand (2 );
202- // If the index is not a constant - we cannot statically remove this inst
203- if (!isa<ConstantInt>(scalarIndexVal)) {
204- workset.push_back (IEI);
205- }
206- }
207- else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
208- {
209- Value* scalarIndexVal = EEI->getOperand (1 );
210- // If the index is not a constant - we cannot statically remove this inst
211- if (!isa<ConstantInt>(scalarIndexVal)) {
212- workset.push_back (EEI->getOperand (0 ));
213- }
214- }
215- else if (auto STI = dyn_cast<StoreInst>(currInst))
216- {
217- auto V = STI->getValueOperand ();
218- if (V->getType ()->isVectorTy ())
219- {
220- workset.push_back (V);
221- }
222- }
223- }
224- while (!workset.empty ())
225- {
226- auto Def = workset.back ();
227- workset.pop_back ();
228- if (m_Excludes.count (Def))
229- {
230- continue ;
231- }
232- if (auto IEI = dyn_cast<InsertElementInst>(Def))
233- {
234- m_Excludes.insert (IEI);
235- if (!m_Excludes.count (IEI->getOperand (0 )) &&
236- (isa<PHINode>(IEI->getOperand (0 )) ||
237- isa<ShuffleVectorInst>(IEI->getOperand (0 )) ||
238- isa<InsertElementInst>(IEI->getOperand (0 ))))
239- {
240- workset.push_back (IEI->getOperand (0 ));
241- }
242- }
243- else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
244- {
245- m_Excludes.insert (SVI);
246- if (!m_Excludes.count (SVI->getOperand (0 )) &&
247- (isa<PHINode>(SVI->getOperand (0 )) ||
248- isa<ShuffleVectorInst>(SVI->getOperand (0 )) ||
249- isa<InsertElementInst>(SVI->getOperand (0 ))))
250- {
251- workset.push_back (SVI->getOperand (0 ));
252- }
253- if (!m_Excludes.count (SVI->getOperand (1 )) &&
254- (isa<PHINode>(SVI->getOperand (1 )) ||
255- isa<ShuffleVectorInst>(SVI->getOperand (1 )) ||
256- isa<InsertElementInst>(SVI->getOperand (1 ))))
257- {
258- workset.push_back (SVI->getOperand (1 ));
259- }
260- }
261- else if (auto PHI = dyn_cast<PHINode>(Def))
262- {
263- m_Excludes.insert (PHI);
264- for (int i = 0 , n = PHI->getNumOperands (); i < n; ++i)
265- if (!m_Excludes.count (PHI->getOperand (i)) &&
266- (isa<PHINode>(PHI->getOperand (i)) ||
267- isa<ShuffleVectorInst>(PHI->getOperand (i)) ||
268- isa<InsertElementInst>(PHI->getOperand (i))))
269- {
270- workset.push_back (PHI->getOperand (i));
271- }
272- }
273- else
274- {
275- continue ;
276- }
277- // check use
278- for (auto U : Def->users ())
279- {
280- if (!m_Excludes.count (U) &&
281- (isa<PHINode>(U) ||
282- isa<ShuffleVectorInst>(U) ||
283- isa<InsertElementInst>(U)))
284- {
285- workset.push_back (U);
286- }
287- }
288- }
289- }
290-
291164void ScalarizeFunction::dispatchInstructionToScalarize (Instruction* I)
292165{
293166 V_PRINT (scalarizer, " \t Scalarizing Instruction: " << *I << " \n " );
@@ -362,6 +235,13 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
362235 case Instruction::GetElementPtr:
363236 scalarizeInstruction (dyn_cast<GetElementPtrInst>(I));
364237 break ;
238+ case Instruction::Load:
239+ scalarizeInstruction (dyn_cast<LoadInst>(I));
240+ break ;
241+ case Instruction::Store:
242+ scalarizeInstruction (dyn_cast<StoreInst>(I));
243+ break ;
244+
365245 // The remaining instructions are not supported for scalarization. Keep "as is"
366246 default :
367247 recoverNonScalarizableInst (I);
@@ -1012,6 +892,149 @@ void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
1012892 m_removedInsts.insert (GI);
1013893}
1014894
895+ void ScalarizeFunction::scalarizeInstruction (LoadInst* LI)
896+ {
897+ V_PRINT (scalarizer, " \t\t Load instruction\n " );
898+ IGC_ASSERT_MESSAGE (LI, " instruction type dynamic cast failed" );
899+
900+ VectorType* dataType = dyn_cast<VectorType>(LI->getType ());
901+ if (isScalarizableLoadStoreType (dataType) && m_pDL)
902+ {
903+ // Prepare empty SCM entry for the instruction
904+ SCMEntry* newEntry = getSCMEntry (LI);
905+
906+ // Get additional info from instruction
907+ unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
908+ unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
909+ IGC_ASSERT (elementSize);
910+ IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
911+ IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
912+ unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
913+
914+ // Obtain scalarized arguments
915+ // 1 - to allow scalarizing Load with any pointer type
916+ // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
917+ #if 1
918+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
919+ Value * GepPtr = LI->getOperand (0 );
920+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
921+ Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , LI);
922+ Type* indexType = Type::getInt32Ty (*m_moduleContext);
923+ // Generate new (scalar) instructions
924+ SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
925+ newScalarizedInsts.resize (numDupElements);
926+ for (unsigned dup = 0 ; dup < numDupElements; dup++)
927+ {
928+ Constant* laneVal = ConstantInt::get (indexType, dup);
929+ Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , LI);
930+ newScalarizedInsts[dup] = new LoadInst (pGEP->getType ()->getPointerElementType (), pGEP, LI->getName (), LI);
931+ }
932+ #else
933+ GetElementPtrInst* operand = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
934+ if (!operand || operand->getNumIndices() != 1)
935+ {
936+ return recoverNonScalarizableInst(LI);
937+ }
938+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
939+ Value* GepPtr = operand->getPointerOperand();
940+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
941+ Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
942+ Type* indexType = operand->getOperand(1)->getType();
943+ // Generate new (scalar) instructions
944+ Value* newScalarizedInsts[MAX_INPUT_VECTOR_WIDTH];
945+ Constant* elementNumVal = ConstantInt::get(indexType, numElements);
946+ for (unsigned dup = 0; dup < numDupElements; dup++)
947+ {
948+ Constant* laneVal = ConstantInt::get(indexType, dup);
949+ Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", LI);
950+ Value* pIndex = BinaryOperator::CreateMul(operand->getOperand(1), elementNumVal, "GEPIndex_s", LI);
951+ pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", LI);
952+ newScalarizedInsts[dup] = new LoadInst(pGEP, LI->getName(), LI);
953+ }
954+ #endif
955+ // Add new value/s to SCM
956+ updateSCMEntryWithValues (newEntry, &(newScalarizedInsts[0 ]), LI, true );
957+
958+ // Remove original instruction
959+ m_removedInsts.insert (LI);
960+ return ;
961+ }
962+ return recoverNonScalarizableInst (LI);
963+ }
964+
965+ void ScalarizeFunction::scalarizeInstruction (StoreInst* SI)
966+ {
967+ V_PRINT (scalarizer, " \t\t Store instruction\n " );
968+ IGC_ASSERT_MESSAGE (SI, " instruction type dynamic cast failed" );
969+
970+ int indexPtr = SI->getPointerOperandIndex ();
971+ int indexData = 1 - indexPtr;
972+ VectorType* dataType = dyn_cast<VectorType>(SI->getOperand (indexData)->getType ());
973+ if (isScalarizableLoadStoreType (dataType) && m_pDL)
974+ {
975+ // Get additional info from instruction
976+ unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
977+ unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
978+ IGC_ASSERT (elementSize);
979+ IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
980+ IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
981+
982+ unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
983+
984+ // Obtain scalarized arguments
985+ // 1 - to allow scalarizing Load with any pointer type
986+ // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
987+ #if 1
988+ SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
989+
990+ bool opIsConst;
991+ obtainScalarizedValues (operand0, &opIsConst, SI->getOperand (indexData), SI);
992+
993+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
994+ Value* GepPtr = SI->getOperand (indexPtr);
995+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
996+ Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , SI);
997+ Type* indexType = Type::getInt32Ty (*m_moduleContext);
998+ // Generate new (scalar) instructions
999+ for (unsigned dup = 0 ; dup < numDupElements; dup++)
1000+ {
1001+ Constant* laneVal = ConstantInt::get (indexType, dup);
1002+ Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , SI);
1003+ new StoreInst (operand0[dup], pGEP, SI);
1004+ }
1005+ #else
1006+ GetElementPtrInst* operand1 = dyn_cast<GetElementPtrInst>(SI->getOperand(indexPtr));
1007+ if (!operand1 || operand1->getNumIndices() != 1)
1008+ {
1009+ return recoverNonScalarizableInst(SI);
1010+ }
1011+ Value* operand0[MAX_INPUT_VECTOR_WIDTH];
1012+ bool opIsConst;
1013+ obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
1014+
1015+ // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
1016+ Value* GepPtr = operand1->getPointerOperand();
1017+ PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
1018+ Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
1019+ Type* indexType = operand1->getOperand(1)->getType();
1020+ // Generate new (scalar) instructions
1021+ Constant* elementNumVal = ConstantInt::get(indexType, numElements);
1022+ for (unsigned dup = 0; dup < numDupElements; dup++)
1023+ {
1024+ Constant* laneVal = ConstantInt::get(indexType, dup);
1025+ Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", SI);
1026+ Value* pIndex = BinaryOperator::CreateMul(operand1->getOperand(1), elementNumVal, "GEPIndex_s", SI);
1027+ pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", SI);
1028+ new StoreInst(operand0[dup], pGEP, SI);
1029+ }
1030+ #endif
1031+ // Remove original instruction
1032+ m_removedInsts.insert (SI);
1033+ return ;
1034+ }
1035+ return recoverNonScalarizableInst (SI);
1036+ }
1037+
10151038void ScalarizeFunction::obtainScalarizedValues (SmallVectorImpl<Value*>& retValues, bool * retIsConstant,
10161039 Value* origValue, Instruction* origInst, int destIdx)
10171040{
@@ -1388,9 +1411,17 @@ void ScalarizeFunction::resolveDeferredInstructions()
13881411 m_DRL.clear ();
13891412}
13901413
1391- extern " C" FunctionPass* createScalarizerPass (bool selectiveScalarization)
1414+ bool ScalarizeFunction::isScalarizableLoadStoreType (VectorType* type)
1415+ {
1416+ // Scalarize Load/Store worth doing only if:
1417+ // 1. Gather/Scatter are supported
1418+ // 2. Load/Store type is a vector
1419+ return (m_ScalarizingVectorLDSTType && (NULL != type));
1420+ }
1421+
1422+ extern " C" FunctionPass* createScalarizerPass (bool scalarizingVectorLDSTType)
13921423{
1393- return new ScalarizeFunction (selectiveScalarization );
1424+ return new ScalarizeFunction (scalarizingVectorLDSTType );
13941425}
13951426
13961427
0 commit comments