@@ -69,12 +69,12 @@ IGC_INITIALIZE_PASS_END(ScalarizeFunction, PASS_FLAG, PASS_DESCRIPTION, PASS_CFG
6969
7070char ScalarizeFunction::ID = 0;
7171
72- ScalarizeFunction::ScalarizeFunction (bool scalarizingVectorLDSTType ) : FunctionPass(ID)
72+ ScalarizeFunction::ScalarizeFunction (bool selectiveScalarization ) : FunctionPass(ID)
7373{
7474 initializeScalarizeFunctionPass (*PassRegistry::getPassRegistry ());
7575
7676 for (int i = 0 ; i < Instruction::OtherOpsEnd; i++) m_transposeCtr[i] = 0 ;
77- m_ScalarizingVectorLDSTType = scalarizingVectorLDSTType ;
77+ m_SelectiveScalarization = selectiveScalarization ;
7878
7979 // Initialize SCM buffers and allocation
8080 m_SCMAllocationArray = new SCMEntry[ESTIMATED_INST_NUM];
@@ -121,6 +121,13 @@ bool ScalarizeFunction::runOnFunction(Function& F)
121121 m_SCM.clear ();
122122 releaseAllSCMEntries ();
123123 m_DRL.clear ();
124+ m_Excludes.clear ();
125+
126+ // collecting instructions that we want to avoid scalarization
127+ if (m_SelectiveScalarization)
128+ {
129+ buildExclusiveSet ();
130+ }
124131
125132 // Scalarization. Iterate over all the instructions
126133 // Always hold the iterator at the instruction following the one being scalarized (so the
@@ -132,7 +139,14 @@ bool ScalarizeFunction::runOnFunction(Function& F)
132139 Instruction* currInst = &*sI ;
133140 // Move iterator to next instruction BEFORE scalarizing current instruction
134141 ++sI ;
135- dispatchInstructionToScalarize (currInst);
142+ if (m_Excludes.count (currInst))
143+ {
144+ recoverNonScalarizableInst (currInst);
145+ }
146+ else
147+ {
148+ dispatchInstructionToScalarize (currInst);
149+ }
136150 }
137151
138152 resolveVectorValues ();
@@ -161,6 +175,111 @@ bool ScalarizeFunction::runOnFunction(Function& F)
161175 return true ;
162176}
163177
178+ void ScalarizeFunction::buildExclusiveSet ()
179+ {
180+ inst_iterator sI = inst_begin (m_currFunc);
181+ inst_iterator sE = inst_end (m_currFunc);
182+ std::vector<llvm::Value*> workset;
183+ while (sI != sE )
184+ {
185+ Instruction* currInst = &*sI ;
186+ ++sI ;
187+ if (CallInst* CI = dyn_cast<CallInst>(currInst))
188+ {
189+ unsigned numOperands = CI->getNumArgOperands ();
190+ for (unsigned i = 0 ; i < numOperands; i++)
191+ {
192+ Value* operand = CI->getArgOperand (i);
193+ if (isa<VectorType>(operand->getType ()))
194+ {
195+ workset.push_back (operand);
196+ }
197+ }
198+ }
199+ else if (auto IEI = dyn_cast<InsertElementInst>(currInst))
200+ {
201+ Value* scalarIndexVal = IEI->getOperand (2 );
202+ // If the index is not a constant - we cannot statically remove this inst
203+ if (!isa<ConstantInt>(scalarIndexVal)) {
204+ workset.push_back (IEI);
205+ }
206+ }
207+ else if (auto EEI = dyn_cast<ExtractElementInst>(currInst))
208+ {
209+ Value* scalarIndexVal = EEI->getOperand (1 );
210+ // If the index is not a constant - we cannot statically remove this inst
211+ if (!isa<ConstantInt>(scalarIndexVal)) {
212+ workset.push_back (EEI->getOperand (0 ));
213+ }
214+ }
215+ }
216+ while (!workset.empty ())
217+ {
218+ auto Def = workset.back ();
219+ workset.pop_back ();
220+ if (m_Excludes.count (Def))
221+ {
222+ continue ;
223+ }
224+ if (auto IEI = dyn_cast<InsertElementInst>(Def))
225+ {
226+ m_Excludes.insert (IEI);
227+ if (!m_Excludes.count (IEI->getOperand (0 )) &&
228+ (isa<PHINode>(IEI->getOperand (0 )) ||
229+ isa<ShuffleVectorInst>(IEI->getOperand (0 )) ||
230+ isa<InsertElementInst>(IEI->getOperand (0 ))))
231+ {
232+ workset.push_back (IEI->getOperand (0 ));
233+ }
234+ }
235+ else if (auto SVI = dyn_cast<ShuffleVectorInst>(Def))
236+ {
237+ m_Excludes.insert (SVI);
238+ if (!m_Excludes.count (SVI->getOperand (0 )) &&
239+ (isa<PHINode>(SVI->getOperand (0 )) ||
240+ isa<ShuffleVectorInst>(SVI->getOperand (0 )) ||
241+ isa<InsertElementInst>(SVI->getOperand (0 ))))
242+ {
243+ workset.push_back (SVI->getOperand (0 ));
244+ }
245+ if (!m_Excludes.count (SVI->getOperand (1 )) &&
246+ (isa<PHINode>(SVI->getOperand (1 )) ||
247+ isa<ShuffleVectorInst>(SVI->getOperand (1 )) ||
248+ isa<InsertElementInst>(SVI->getOperand (1 ))))
249+ {
250+ workset.push_back (SVI->getOperand (1 ));
251+ }
252+ }
253+ else if (auto PHI = dyn_cast<PHINode>(Def))
254+ {
255+ m_Excludes.insert (PHI);
256+ for (int i = 0 , n = PHI->getNumOperands (); i < n; ++i)
257+ if (!m_Excludes.count (PHI->getOperand (i)) &&
258+ (isa<PHINode>(PHI->getOperand (i)) ||
259+ isa<ShuffleVectorInst>(PHI->getOperand (i)) ||
260+ isa<InsertElementInst>(PHI->getOperand (i))))
261+ {
262+ workset.push_back (PHI->getOperand (i));
263+ }
264+ }
265+ else
266+ {
267+ continue ;
268+ }
269+ // check use
270+ for (auto U : Def->users ())
271+ {
272+ if (!m_Excludes.count (U) &&
273+ (isa<PHINode>(U) ||
274+ isa<ShuffleVectorInst>(U) ||
275+ isa<InsertElementInst>(U)))
276+ {
277+ workset.push_back (U);
278+ }
279+ }
280+ }
281+ }
282+
164283void ScalarizeFunction::dispatchInstructionToScalarize (Instruction* I)
165284{
166285 V_PRINT (scalarizer, " \t Scalarizing Instruction: " << *I << " \n " );
@@ -235,13 +354,6 @@ void ScalarizeFunction::dispatchInstructionToScalarize(Instruction* I)
235354 case Instruction::GetElementPtr:
236355 scalarizeInstruction (dyn_cast<GetElementPtrInst>(I));
237356 break ;
238- case Instruction::Load:
239- scalarizeInstruction (dyn_cast<LoadInst>(I));
240- break ;
241- case Instruction::Store:
242- scalarizeInstruction (dyn_cast<StoreInst>(I));
243- break ;
244-
245357 // The remaining instructions are not supported for scalarization. Keep "as is"
246358 default :
247359 recoverNonScalarizableInst (I);
@@ -892,149 +1004,6 @@ void ScalarizeFunction::scalarizeInstruction(GetElementPtrInst* GI)
8921004 m_removedInsts.insert (GI);
8931005}
8941006
895- void ScalarizeFunction::scalarizeInstruction (LoadInst* LI)
896- {
897- V_PRINT (scalarizer, " \t\t Load instruction\n " );
898- IGC_ASSERT_MESSAGE (LI, " instruction type dynamic cast failed" );
899-
900- VectorType* dataType = dyn_cast<VectorType>(LI->getType ());
901- if (isScalarizableLoadStoreType (dataType) && m_pDL)
902- {
903- // Prepare empty SCM entry for the instruction
904- SCMEntry* newEntry = getSCMEntry (LI);
905-
906- // Get additional info from instruction
907- unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
908- unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
909- IGC_ASSERT (elementSize);
910- IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
911- IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
912- unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
913-
914- // Obtain scalarized arguments
915- // 1 - to allow scalarizing Load with any pointer type
916- // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
917- #if 1
918- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
919- Value * GepPtr = LI->getOperand (0 );
920- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
921- Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , LI);
922- Type* indexType = Type::getInt32Ty (*m_moduleContext);
923- // Generate new (scalar) instructions
924- SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>newScalarizedInsts;
925- newScalarizedInsts.resize (numDupElements);
926- for (unsigned dup = 0 ; dup < numDupElements; dup++)
927- {
928- Constant* laneVal = ConstantInt::get (indexType, dup);
929- Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , LI);
930- newScalarizedInsts[dup] = new LoadInst (pGEP->getType ()->getPointerElementType (), pGEP, LI->getName (), LI);
931- }
932- #else
933- GetElementPtrInst* operand = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
934- if (!operand || operand->getNumIndices() != 1)
935- {
936- return recoverNonScalarizableInst(LI);
937- }
938- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
939- Value* GepPtr = operand->getPointerOperand();
940- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
941- Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", LI);
942- Type* indexType = operand->getOperand(1)->getType();
943- // Generate new (scalar) instructions
944- Value* newScalarizedInsts[MAX_INPUT_VECTOR_WIDTH];
945- Constant* elementNumVal = ConstantInt::get(indexType, numElements);
946- for (unsigned dup = 0; dup < numDupElements; dup++)
947- {
948- Constant* laneVal = ConstantInt::get(indexType, dup);
949- Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", LI);
950- Value* pIndex = BinaryOperator::CreateMul(operand->getOperand(1), elementNumVal, "GEPIndex_s", LI);
951- pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", LI);
952- newScalarizedInsts[dup] = new LoadInst(pGEP, LI->getName(), LI);
953- }
954- #endif
955- // Add new value/s to SCM
956- updateSCMEntryWithValues (newEntry, &(newScalarizedInsts[0 ]), LI, true );
957-
958- // Remove original instruction
959- m_removedInsts.insert (LI);
960- return ;
961- }
962- return recoverNonScalarizableInst (LI);
963- }
964-
965- void ScalarizeFunction::scalarizeInstruction (StoreInst* SI)
966- {
967- V_PRINT (scalarizer, " \t\t Store instruction\n " );
968- IGC_ASSERT_MESSAGE (SI, " instruction type dynamic cast failed" );
969-
970- int indexPtr = SI->getPointerOperandIndex ();
971- int indexData = 1 - indexPtr;
972- VectorType* dataType = dyn_cast<VectorType>(SI->getOperand (indexData)->getType ());
973- if (isScalarizableLoadStoreType (dataType) && m_pDL)
974- {
975- // Get additional info from instruction
976- unsigned int vectorSize = int_cast<unsigned int >(m_pDL->getTypeAllocSize (dataType));
977- unsigned int elementSize = int_cast<unsigned int >(m_pDL->getTypeSizeInBits (dataType->getElementType ()) / 8 );
978- IGC_ASSERT (elementSize);
979- IGC_ASSERT_MESSAGE ((vectorSize / elementSize > 0 ), " vector size should be a multiply of element size" );
980- IGC_ASSERT_MESSAGE ((vectorSize % elementSize == 0 ), " vector size should be a multiply of element size" );
981-
982- unsigned numDupElements = int_cast<unsigned >(dataType->getNumElements ());
983-
984- // Obtain scalarized arguments
985- // 1 - to allow scalarizing Load with any pointer type
986- // 0 - to limit scalarizing to special case where packetizer benifit from the scalarizing
987- #if 1
988- SmallVector<Value*, MAX_INPUT_VECTOR_WIDTH>operand0;
989-
990- bool opIsConst;
991- obtainScalarizedValues (operand0, &opIsConst, SI->getOperand (indexData), SI);
992-
993- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
994- Value* GepPtr = SI->getOperand (indexPtr);
995- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType ());
996- Value* operandBase = BitCastInst::CreatePointerCast (GepPtr, dataType->getScalarType ()->getPointerTo (GepPtrType->getAddressSpace ()), " ptrVec2ptrScl" , SI);
997- Type* indexType = Type::getInt32Ty (*m_moduleContext);
998- // Generate new (scalar) instructions
999- for (unsigned dup = 0 ; dup < numDupElements; dup++)
1000- {
1001- Constant* laneVal = ConstantInt::get (indexType, dup);
1002- Value* pGEP = GetElementPtrInst::Create (nullptr , operandBase, laneVal, " GEP_lane" , SI);
1003- new StoreInst (operand0[dup], pGEP, SI);
1004- }
1005- #else
1006- GetElementPtrInst* operand1 = dyn_cast<GetElementPtrInst>(SI->getOperand(indexPtr));
1007- if (!operand1 || operand1->getNumIndices() != 1)
1008- {
1009- return recoverNonScalarizableInst(SI);
1010- }
1011- Value* operand0[MAX_INPUT_VECTOR_WIDTH];
1012- bool opIsConst;
1013- obtainScalarizedValues(operand0, &opIsConst, SI->getOperand(indexData), SI);
1014-
1015- // Apply the bit-cast on the GEP base and add base-offset then fix the index by multiply it with numElements. (assuming one index only).
1016- Value* GepPtr = operand1->getPointerOperand();
1017- PointerType* GepPtrType = cast<PointerType>(GepPtr->getType());
1018- Value* operandBase = BitCastInst::CreatePointerCast(GepPtr, dataType->getScalarType()->getPointerTo(GepPtrType->getAddressSpace()), "ptrVec2ptrScl", SI);
1019- Type* indexType = operand1->getOperand(1)->getType();
1020- // Generate new (scalar) instructions
1021- Constant* elementNumVal = ConstantInt::get(indexType, numElements);
1022- for (unsigned dup = 0; dup < numDupElements; dup++)
1023- {
1024- Constant* laneVal = ConstantInt::get(indexType, dup);
1025- Value* pGEP = GetElementPtrInst::Create(operandBase, laneVal, "GEP_lane", SI);
1026- Value* pIndex = BinaryOperator::CreateMul(operand1->getOperand(1), elementNumVal, "GEPIndex_s", SI);
1027- pGEP = GetElementPtrInst::Create(pGEP, pIndex, "GEP_s", SI);
1028- new StoreInst(operand0[dup], pGEP, SI);
1029- }
1030- #endif
1031- // Remove original instruction
1032- m_removedInsts.insert (SI);
1033- return ;
1034- }
1035- return recoverNonScalarizableInst (SI);
1036- }
1037-
10381007void ScalarizeFunction::obtainScalarizedValues (SmallVectorImpl<Value*>& retValues, bool * retIsConstant,
10391008 Value* origValue, Instruction* origInst, int destIdx)
10401009{
@@ -1411,17 +1380,9 @@ void ScalarizeFunction::resolveDeferredInstructions()
14111380 m_DRL.clear ();
14121381}
14131382
1414- bool ScalarizeFunction::isScalarizableLoadStoreType (VectorType* type)
1415- {
1416- // Scalarize Load/Store worth doing only if:
1417- // 1. Gather/Scatter are supported
1418- // 2. Load/Store type is a vector
1419- return (m_ScalarizingVectorLDSTType && (NULL != type));
1420- }
1421-
1422- extern " C" FunctionPass* createScalarizerPass (bool scalarizingVectorLDSTType)
1383+ extern " C" FunctionPass* createScalarizerPass (bool selectiveScalarization)
14231384{
1424- return new ScalarizeFunction (scalarizingVectorLDSTType );
1385+ return new ScalarizeFunction (selectiveScalarization );
14251386}
14261387
14271388
0 commit comments