@@ -2283,14 +2283,6 @@ class BoUpSLP {
22832283 ~BoUpSLP();
22842284
22852285private:
2286- /// Determine if a vectorized value \p V in can be demoted to
2287- /// a smaller type with a truncation. We collect the values that will be
2288- /// demoted in ToDemote and additional roots that require investigating in
2289- /// Roots.
2290- bool collectValuesToDemote(Value *V, SmallVectorImpl<Value *> &ToDemote,
2291- SmallVectorImpl<Value *> &Roots,
2292- DenseSet<Value *> &Visited) const;
2293-
22942286 /// Check if the operands on the edges \p Edges of the \p UserTE allows
22952287 /// reordering (i.e. the operands can be reordered because they have only one
22962288 /// user and reordarable).
@@ -9052,7 +9044,8 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef<Value *> VectorizedVals) {
90529044 // for the extract and the added cost of the sign extend if needed.
90539045 auto *VecTy = FixedVectorType::get(EU.Scalar->getType(), BundleWidth);
90549046 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
9055- auto It = MinBWs.find(EU.Scalar);
9047+ auto *ScalarRoot = VectorizableTree[0]->Scalars[0];
9048+ auto It = MinBWs.find(ScalarRoot);
90569049 if (It != MinBWs.end()) {
90579050 auto *MinTy = IntegerType::get(F->getContext(), It->second.first);
90589051 unsigned Extend =
@@ -13081,20 +13074,19 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
1308113074// Determine if a value V in a vectorizable expression Expr can be demoted to a
1308213075// smaller type with a truncation. We collect the values that will be demoted
1308313076// in ToDemote and additional roots that require investigating in Roots.
13084- bool BoUpSLP::collectValuesToDemote(Value *V,
13085- SmallVectorImpl<Value *> &ToDemote,
13086- SmallVectorImpl<Value *> &Roots,
13087- DenseSet<Value *> &Visited) const {
13077+ static bool collectValuesToDemote(Value *V, SmallPtrSetImpl<Value *> &Expr,
13078+ SmallVectorImpl<Value *> &ToDemote,
13079+ SmallVectorImpl<Value *> &Roots) {
1308813080 // We can always demote constants.
1308913081 if (isa<Constant>(V)) {
1309013082 ToDemote.push_back(V);
1309113083 return true;
1309213084 }
1309313085
13094- // If the value is not a vectorized instruction in the expression with only
13095- // one use, it cannot be demoted.
13086+ // If the value is not an instruction in the expression with only one use, it
13087+ // cannot be demoted.
1309613088 auto *I = dyn_cast<Instruction>(V);
13097- if (!I || !I->hasOneUse() || !getTreeEntry(I) || !Visited.insert (I).second )
13089+ if (!I || !I->hasOneUse() || !Expr.count (I))
1309813090 return false;
1309913091
1310013092 switch (I->getOpcode()) {
@@ -13118,16 +13110,16 @@ bool BoUpSLP::collectValuesToDemote(Value *V,
1311813110 case Instruction::And:
1311913111 case Instruction::Or:
1312013112 case Instruction::Xor:
13121- if (!collectValuesToDemote(I->getOperand(0), ToDemote, Roots, Visited ) ||
13122- !collectValuesToDemote(I->getOperand(1), ToDemote, Roots, Visited ))
13113+ if (!collectValuesToDemote(I->getOperand(0), Expr, ToDemote, Roots ) ||
13114+ !collectValuesToDemote(I->getOperand(1), Expr, ToDemote, Roots ))
1312313115 return false;
1312413116 break;
1312513117
1312613118 // We can demote selects if we can demote their true and false values.
1312713119 case Instruction::Select: {
1312813120 SelectInst *SI = cast<SelectInst>(I);
13129- if (!collectValuesToDemote(SI->getTrueValue(), ToDemote, Roots, Visited ) ||
13130- !collectValuesToDemote(SI->getFalseValue(), ToDemote, Roots, Visited ))
13121+ if (!collectValuesToDemote(SI->getTrueValue(), Expr, ToDemote, Roots ) ||
13122+ !collectValuesToDemote(SI->getFalseValue(), Expr, ToDemote, Roots ))
1313113123 return false;
1313213124 break;
1313313125 }
@@ -13137,7 +13129,7 @@ bool BoUpSLP::collectValuesToDemote(Value *V,
1313713129 case Instruction::PHI: {
1313813130 PHINode *PN = cast<PHINode>(I);
1313913131 for (Value *IncValue : PN->incoming_values())
13140- if (!collectValuesToDemote(IncValue, ToDemote, Roots, Visited ))
13132+ if (!collectValuesToDemote(IncValue, Expr, ToDemote, Roots ))
1314113133 return false;
1314213134 break;
1314313135 }
@@ -13164,16 +13156,36 @@ void BoUpSLP::computeMinimumValueSizes() {
1316413156 if (!TreeRootIT)
1316513157 return;
1316613158
13159+ // If the expression is not rooted by a store, these roots should have
13160+ // external uses.
13161+ // TOSO: investigate if this can be relaxed.
13162+ SmallPtrSet<Value *, 32> Expr(TreeRoot.begin(), TreeRoot.end());
13163+ for (auto &EU : ExternalUses)
13164+ if (!Expr.erase(EU.Scalar))
13165+ return;
13166+ if (!Expr.empty())
13167+ return;
13168+
13169+ // Collect the scalar values of the vectorizable expression. We will use this
13170+ // context to determine which values can be demoted. If we see a truncation,
13171+ // we mark it as seeding another demotion.
13172+ for (auto &EntryPtr : VectorizableTree)
13173+ Expr.insert(EntryPtr->Scalars.begin(), EntryPtr->Scalars.end());
13174+
13175+ // Ensure the roots of the vectorizable tree don't form a cycle. They must
13176+ // have a single external user that is not in the vectorizable tree.
13177+ for (auto *Root : TreeRoot)
13178+ if (!Root->hasOneUse() || Expr.count(*Root->user_begin()))
13179+ return;
13180+
1316713181 // Conservatively determine if we can actually truncate the roots of the
1316813182 // expression. Collect the values that can be demoted in ToDemote and
1316913183 // additional roots that require investigating in Roots.
1317013184 SmallVector<Value *, 32> ToDemote;
1317113185 SmallVector<Value *, 4> Roots;
13172- for (auto *Root : TreeRoot) {
13173- DenseSet<Value *> Visited;
13174- if (!collectValuesToDemote(Root, ToDemote, Roots, Visited))
13186+ for (auto *Root : TreeRoot)
13187+ if (!collectValuesToDemote(Root, Expr, ToDemote, Roots))
1317513188 return;
13176- }
1317713189
1317813190 // The maximum bit width required to represent all the values that can be
1317913191 // demoted without loss of precision. It would be safe to truncate the roots
@@ -13203,9 +13215,9 @@ void BoUpSLP::computeMinimumValueSizes() {
1320313215 // maximum bit width required to store the scalar by using ValueTracking to
1320413216 // compute the number of high-order bits we can truncate.
1320513217 if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType()) &&
13206- all_of(TreeRoot, [](Value *V ) {
13207- return all_of(V->users(),
13208- [](User *U) { return isa<GetElementPtrInst>(U); } );
13218+ llvm:: all_of(TreeRoot, [](Value *R ) {
13219+ assert(R->hasOneUse() && "Root should have only one use!");
13220+ return isa<GetElementPtrInst>(R->user_back() );
1320913221 })) {
1321013222 MaxBitWidth = 8u;
1321113223
@@ -13254,10 +13266,8 @@ void BoUpSLP::computeMinimumValueSizes() {
1325413266 // If we can truncate the root, we must collect additional values that might
1325513267 // be demoted as a result. That is, those seeded by truncations we will
1325613268 // modify.
13257- while (!Roots.empty()) {
13258- DenseSet<Value *> Visited;
13259- collectValuesToDemote(Roots.pop_back_val(), ToDemote, Roots, Visited);
13260- }
13269+ while (!Roots.empty())
13270+ collectValuesToDemote(Roots.pop_back_val(), Expr, ToDemote, Roots);
1326113271
1326213272 // Finally, map the values we can demote to the maximum bit with we computed.
1326313273 DenseMap<const TreeEntry *, bool> Signendness;
0 commit comments