@@ -1395,7 +1395,7 @@ class BoUpSLP {
13951395
13961396 /// \returns the cost incurred by unwanted spills and fills, caused by
13971397 /// holding live values over call sites.
1398- InstructionCost getSpillCost() const ;
1398+ InstructionCost getSpillCost();
13991399
14001400 /// \returns the vectorization cost of the subtree that starts at \p VL.
14011401 /// A negative number means that this is profitable.
@@ -2958,7 +2958,7 @@ class BoUpSLP {
29582958 }
29592959
29602960 /// Check if the value is vectorized in the tree.
2961- bool isVectorized(Value *V) const {
2961+ bool isVectorized(const Value *V) const {
29622962 assert(V && "V cannot be nullptr.");
29632963 return ScalarToTreeEntries.contains(V);
29642964 }
@@ -12160,78 +12160,80 @@ bool BoUpSLP::isTreeNotExtendable() const {
1216012160 return Res;
1216112161}
1216212162
12163- InstructionCost BoUpSLP::getSpillCost() const {
12163+ InstructionCost BoUpSLP::getSpillCost() {
1216412164 // Walk from the bottom of the tree to the top, tracking which values are
1216512165 // live. When we see a call instruction that is not part of our tree,
1216612166 // query TTI to see if there is a cost to keeping values live over it
1216712167 // (for example, if spills and fills are required).
12168- unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
1216912168 InstructionCost Cost = 0;
1217012169
12171- SmallPtrSet<Instruction *, 4> LiveValues ;
12172- Instruction *PrevInst = nullptr;
12170+ SmallPtrSet<const TreeEntry *, 4> LiveEntries ;
12171+ const TreeEntry *Prev = nullptr;
1217312172
1217412173 // The entries in VectorizableTree are not necessarily ordered by their
1217512174 // position in basic blocks. Collect them and order them by dominance so later
1217612175 // instructions are guaranteed to be visited first. For instructions in
1217712176 // different basic blocks, we only scan to the beginning of the block, so
1217812177 // their order does not matter, as long as all instructions in a basic block
1217912178 // are grouped together. Using dominance ensures a deterministic order.
12180- SmallVector<Instruction *, 16> OrderedScalars ;
12179+ SmallVector<TreeEntry *, 16> OrderedEntries ;
1218112180 for (const auto &TEPtr : VectorizableTree) {
12182- if (TEPtr->State != TreeEntry::Vectorize )
12181+ if (TEPtr->isGather() )
1218312182 continue;
12184- Instruction *Inst = dyn_cast<Instruction> (TEPtr->Scalars[0] );
12185- if (!Inst)
12186- continue;
12187- OrderedScalars.push_back(Inst);
12188- }
12189- llvm::sort(OrderedScalars, [&](Instruction *A, Instruction *B) {
12190- auto *NodeA = DT->getNode(A-> getParent());
12191- auto *NodeB = DT->getNode(B-> getParent());
12183+ OrderedEntries.push_back (TEPtr.get() );
12184+ }
12185+ llvm::stable_sort(OrderedEntries, [&](const TreeEntry *TA,
12186+ const TreeEntry *TB) {
12187+ Instruction &A = getLastInstructionInBundle(TA);
12188+ Instruction &B = getLastInstructionInBundle(TB);
12189+ auto *NodeA = DT->getNode(A. getParent());
12190+ auto *NodeB = DT->getNode(B. getParent());
1219212191 assert(NodeA && "Should only process reachable instructions");
1219312192 assert(NodeB && "Should only process reachable instructions");
1219412193 assert((NodeA == NodeB) == (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
1219512194 "Different nodes should have different DFS numbers");
1219612195 if (NodeA != NodeB)
1219712196 return NodeA->getDFSNumIn() > NodeB->getDFSNumIn();
12198- return B-> comesBefore(A);
12197+ return B. comesBefore(& A);
1219912198 });
1220012199
12201- for (Instruction *Inst : OrderedScalars ) {
12202- if (!PrevInst ) {
12203- PrevInst = Inst ;
12200+ for (const TreeEntry *TE : OrderedEntries ) {
12201+ if (!Prev ) {
12202+ Prev = TE ;
1220412203 continue;
1220512204 }
1220612205
12207- // Update LiveValues.
12208- LiveValues.erase(PrevInst);
12209- for (auto &J : PrevInst->operands()) {
12210- if (isa<Instruction>(&*J) && isVectorized(&*J))
12211- LiveValues.insert(cast<Instruction>(&*J));
12206+ LiveEntries.erase(Prev);
12207+ for (unsigned I : seq<unsigned>(Prev->getNumOperands())) {
12208+ const TreeEntry *Op = getVectorizedOperand(Prev, I);
12209+ if (!Op)
12210+ continue;
12211+ assert(!Op->isGather() && "Expected vectorized operand.");
12212+ LiveEntries.insert(Op);
1221212213 }
1221312214
1221412215 LLVM_DEBUG({
12215- dbgs() << "SLP: #LV: " << LiveValues .size();
12216- for (auto *X : LiveValues )
12217- dbgs() << " " << X->getName ();
12216+ dbgs() << "SLP: #LV: " << LiveEntries .size();
12217+ for (auto *X : LiveEntries )
12218+ X->dump ();
1221812219 dbgs() << ", Looking at ";
12219- Inst ->dump();
12220+ TE ->dump();
1222012221 });
1222112222
1222212223 // Now find the sequence of instructions between PrevInst and Inst.
1222312224 unsigned NumCalls = 0;
12224- BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
12225- PrevInstIt =
12226- PrevInst->getIterator().getReverse();
12225+ const Instruction *PrevInst = &getLastInstructionInBundle(Prev);
12226+ BasicBlock::const_reverse_iterator
12227+ InstIt = ++getLastInstructionInBundle(TE).getIterator().getReverse(),
12228+ PrevInstIt = PrevInst->getIterator().getReverse();
1222712229 while (InstIt != PrevInstIt) {
1222812230 if (PrevInstIt == PrevInst->getParent()->rend()) {
12229- PrevInstIt = Inst-> getParent()->rbegin();
12231+ PrevInstIt = getLastInstructionInBundle(TE). getParent()->rbegin();
1223012232 continue;
1223112233 }
1223212234
12233- auto NoCallIntrinsic = [this](Instruction *I) {
12234- auto *II = dyn_cast<IntrinsicInst>(I);
12235+ auto NoCallIntrinsic = [this](const Instruction *I) {
12236+ const auto *II = dyn_cast<IntrinsicInst>(I);
1223512237 if (!II)
1223612238 return false;
1223712239 if (II->isAssumeLikeIntrinsic())
@@ -12252,25 +12254,28 @@ InstructionCost BoUpSLP::getSpillCost() const {
1225212254 };
1225312255
1225412256 // Debug information does not impact spill cost.
12255- if (isa<CallBase>(&*PrevInstIt) && !NoCallIntrinsic(&*PrevInstIt) &&
12256- &*PrevInstIt != PrevInst)
12257+ // Vectorized calls, represented as vector intrinsics, do not impact spill
12258+ // cost.
12259+ if (const auto *CB = dyn_cast<CallBase>(&*PrevInstIt);
12260+ CB && !NoCallIntrinsic(CB) && !isVectorized(CB))
1225712261 NumCalls++;
1225812262
1225912263 ++PrevInstIt;
1226012264 }
1226112265
1226212266 if (NumCalls) {
12263- SmallVector<Type *, 4> V;
12264- for (auto *II : LiveValues) {
12265- auto *ScalarTy = II->getType();
12266- if (auto *VectorTy = dyn_cast<FixedVectorType>(ScalarTy))
12267- ScalarTy = VectorTy->getElementType();
12268- V.push_back(getWidenedType(ScalarTy, BundleWidth));
12267+ SmallVector<Type *, 4> EntriesTypes;
12268+ for (const TreeEntry *TE : LiveEntries) {
12269+ auto *ScalarTy = TE->getMainOp()->getType();
12270+ auto It = MinBWs.find(TE);
12271+ if (It != MinBWs.end())
12272+ ScalarTy = IntegerType::get(ScalarTy->getContext(), It->second.first);
12273+ EntriesTypes.push_back(getWidenedType(ScalarTy, TE->getVectorFactor()));
1226912274 }
12270- Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V );
12275+ Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(EntriesTypes );
1227112276 }
1227212277
12273- PrevInst = Inst ;
12278+ Prev = TE ;
1227412279 }
1227512280
1227612281 return Cost;
0 commit comments