@@ -13988,6 +13988,15 @@ bool BoUpSLP::collectValuesToDemote(
1398813988 // If the value is not a vectorized instruction in the expression and not used
1398913989 // by the insertelement instruction and not used in multiple vector nodes, it
1399013990 // cannot be demoted.
13991+ // TODO: improve handling of gathered values and others.
13992+ auto *I = dyn_cast<Instruction>(V);
13993+ const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
13994+ if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
13995+ all_of(I->users(), [&](User *U) {
13996+ return isa<InsertElementInst>(U) && !getTreeEntry(U);
13997+ }))
13998+ return false;
13999+
1399114000 auto IsPotentiallyTruncated = [&](Value *V, unsigned &BitWidth) -> bool {
1399214001 if (MultiNodeScalars.contains(V))
1399314002 return false;
@@ -14002,44 +14011,8 @@ bool BoUpSLP::collectValuesToDemote(
1400214011 BitWidth = std::max(BitWidth, BitWidth1);
1400314012 return BitWidth > 0 && OrigBitWidth >= (BitWidth * 2);
1400414013 };
14005- auto FinalAnalysis = [&](const TreeEntry *ITE = nullptr) {
14006- if (!IsProfitableToDemote)
14007- return false;
14008- return (ITE && ITE->UserTreeIndices.size() > 1) ||
14009- IsPotentiallyTruncated(V, BitWidth);
14010- };
14011- // TODO: improve handling of gathered values and others.
14012- auto *I = dyn_cast<Instruction>(V);
14013- const TreeEntry *ITE = I ? getTreeEntry(I) : nullptr;
14014- if (!ITE || !Visited.insert(I).second || MultiNodeScalars.contains(I) ||
14015- all_of(I->users(), [&](User *U) {
14016- return isa<InsertElementInst>(U) && !getTreeEntry(U);
14017- }))
14018- return FinalAnalysis();
14019-
1402014014 unsigned Start = 0;
1402114015 unsigned End = I->getNumOperands();
14022-
14023- auto ProcessOperands = [&](ArrayRef<Value *> Operands, bool &NeedToExit) {
14024- NeedToExit = false;
14025- unsigned InitLevel = MaxDepthLevel;
14026- for (Value *IncValue : Operands) {
14027- unsigned Level = InitLevel;
14028- if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14029- ToDemote, DemotedConsts, Visited, Level,
14030- IsProfitableToDemote, IsTruncRoot)) {
14031- if (!IsProfitableToDemote)
14032- return false;
14033- NeedToExit = true;
14034- if (!FinalAnalysis(ITE))
14035- return false;
14036- continue;
14037- }
14038- MaxDepthLevel = std::max(MaxDepthLevel, Level);
14039- }
14040- return true;
14041- };
14042- bool NeedToExit = false;
1404314016 switch (I->getOpcode()) {
1404414017
1404514018 // We can always demote truncations and extensions. Since truncations can
@@ -14065,21 +14038,35 @@ bool BoUpSLP::collectValuesToDemote(
1406514038 case Instruction::And:
1406614039 case Instruction::Or:
1406714040 case Instruction::Xor: {
14068- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14069- return false;
14070- if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit))
14041+ unsigned Level1 = MaxDepthLevel, Level2 = MaxDepthLevel;
14042+ if ((ITE->UserTreeIndices.size() > 1 &&
14043+ !IsPotentiallyTruncated(I, BitWidth)) ||
14044+ !collectValuesToDemote(I->getOperand(0), IsProfitableToDemoteRoot,
14045+ BitWidth, ToDemote, DemotedConsts, Visited,
14046+ Level1, IsProfitableToDemote, IsTruncRoot) ||
14047+ !collectValuesToDemote(I->getOperand(1), IsProfitableToDemoteRoot,
14048+ BitWidth, ToDemote, DemotedConsts, Visited,
14049+ Level2, IsProfitableToDemote, IsTruncRoot))
1407114050 return false;
14051+ MaxDepthLevel = std::max(Level1, Level2);
1407214052 break;
1407314053 }
1407414054
1407514055 // We can demote selects if we can demote their true and false values.
1407614056 case Instruction::Select: {
14077- if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
14078- return false;
1407914057 Start = 1;
14080- auto *SI = cast<SelectInst>(I);
14081- if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit))
14058+ unsigned Level1 = MaxDepthLevel, Level2 = MaxDepthLevel;
14059+ SelectInst *SI = cast<SelectInst>(I);
14060+ if ((ITE->UserTreeIndices.size() > 1 &&
14061+ !IsPotentiallyTruncated(I, BitWidth)) ||
14062+ !collectValuesToDemote(SI->getTrueValue(), IsProfitableToDemoteRoot,
14063+ BitWidth, ToDemote, DemotedConsts, Visited,
14064+ Level1, IsProfitableToDemote, IsTruncRoot) ||
14065+ !collectValuesToDemote(SI->getFalseValue(), IsProfitableToDemoteRoot,
14066+ BitWidth, ToDemote, DemotedConsts, Visited,
14067+ Level2, IsProfitableToDemote, IsTruncRoot))
1408214068 return false;
14069+ MaxDepthLevel = std::max(Level1, Level2);
1408314070 break;
1408414071 }
1408514072
@@ -14089,20 +14076,23 @@ bool BoUpSLP::collectValuesToDemote(
1408914076 PHINode *PN = cast<PHINode>(I);
1409014077 if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth))
1409114078 return false;
14092- SmallVector<Value *> Ops(PN->incoming_values().begin(),
14093- PN->incoming_values().end());
14094- if (!ProcessOperands(Ops, NeedToExit))
14095- return false;
14079+ unsigned InitLevel = MaxDepthLevel;
14080+ for (Value *IncValue : PN->incoming_values()) {
14081+ unsigned Level = InitLevel;
14082+ if (!collectValuesToDemote(IncValue, IsProfitableToDemoteRoot, BitWidth,
14083+ ToDemote, DemotedConsts, Visited, Level,
14084+ IsProfitableToDemote, IsTruncRoot))
14085+ return false;
14086+ MaxDepthLevel = std::max(MaxDepthLevel, Level);
14087+ }
1409614088 break;
1409714089 }
1409814090
1409914091 // Otherwise, conservatively give up.
1410014092 default:
1410114093 MaxDepthLevel = 1;
14102- return FinalAnalysis( );
14094+ return IsProfitableToDemote && IsPotentiallyTruncated(I, BitWidth );
1410314095 }
14104- if (NeedToExit)
14105- return true;
1410614096
1410714097 ++MaxDepthLevel;
1410814098 // Gather demoted constant operands.
@@ -14141,17 +14131,15 @@ void BoUpSLP::computeMinimumValueSizes() {
1414114131
1414214132 // The first value node for store/insertelement is sext/zext/trunc? Skip it,
1414314133 // resize to the final type.
14144- bool IsTruncRoot = false;
1414514134 bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
1414614135 if (NodeIdx != 0 &&
1414714136 VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1414814137 (VectorizableTree[NodeIdx]->getOpcode() == Instruction::ZExt ||
1414914138 VectorizableTree[NodeIdx]->getOpcode() == Instruction::SExt ||
1415014139 VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc)) {
1415114140 assert(IsStoreOrInsertElt && "Expected store/insertelement seeded graph.");
14152- IsTruncRoot = VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc;
14153- IsProfitableToDemoteRoot = true;
1415414141 ++NodeIdx;
14142+ IsProfitableToDemoteRoot = true;
1415514143 }
1415614144
1415714145 // Analyzed in reduction already and not profitable - exit.
@@ -14283,6 +14271,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1428314271 ReductionBitWidth = bit_ceil(ReductionBitWidth);
1428414272 }
1428514273 bool IsTopRoot = NodeIdx == 0;
14274+ bool IsTruncRoot = false;
1428614275 while (NodeIdx < VectorizableTree.size() &&
1428714276 VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1428814277 VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
0 commit comments