@@ -1107,7 +1107,7 @@ class BoUpSLP {
11071107 MinBWs.clear();
11081108 ReductionBitWidth = 0;
11091109 CastMaxMinBWSizes.reset();
1110- TruncNodes .clear();
1110+ ExtraBitWidthNodes .clear();
11111111 InstrElementSize.clear();
11121112 UserIgnoreList = nullptr;
11131113 PostponedGathers.clear();
@@ -3683,8 +3683,9 @@ class BoUpSLP {
36833683 /// type sizes, used in the tree.
36843684 std::optional<std::pair<unsigned, unsigned>> CastMaxMinBWSizes;
36853685
3686- /// Indices of the vectorized trunc nodes.
3687- DenseSet<unsigned> TruncNodes;
3686+ /// Indices of the vectorized nodes, which supposed to be the roots of the new
3687+ /// bitwidth analysis attempt, like trunc, IToFP or ICmp.
3688+ DenseSet<unsigned> ExtraBitWidthNodes;
36883689};
36893690
36903691} // end namespace slpvectorizer
@@ -6612,7 +6613,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
66126613 PrevMaxBW),
66136614 std::min<unsigned>(DL->getTypeSizeInBits(VL0->getType()),
66146615 PrevMinBW));
6615- TruncNodes.insert(VectorizableTree.size());
6616+ ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
6617+ } else if (ShuffleOrOp == Instruction::SIToFP ||
6618+ ShuffleOrOp == Instruction::UIToFP) {
6619+ unsigned NumSignBits =
6620+ ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT);
6621+ if (auto *OpI = dyn_cast<Instruction>(VL0->getOperand(0))) {
6622+ APInt Mask = DB->getDemandedBits(OpI);
6623+ NumSignBits = std::max(NumSignBits, Mask.countl_zero());
6624+ }
6625+ if (NumSignBits * 2 >=
6626+ DL->getTypeSizeInBits(VL0->getOperand(0)->getType()))
6627+ ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
66166628 }
66176629 TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
66186630 ReuseShuffleIndicies);
@@ -6660,6 +6672,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
66606672 TE->setOperand(1, Right);
66616673 buildTree_rec(Left, Depth + 1, {TE, 0});
66626674 buildTree_rec(Right, Depth + 1, {TE, 1});
6675+ if (ShuffleOrOp == Instruction::ICmp) {
6676+ unsigned NumSignBits0 =
6677+ ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT);
6678+ if (NumSignBits0 * 2 >=
6679+ DL->getTypeSizeInBits(VL0->getOperand(0)->getType()))
6680+ ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
6681+ unsigned NumSignBits1 =
6682+ ComputeNumSignBits(VL0->getOperand(1), *DL, 0, AC, nullptr, DT);
6683+ if (NumSignBits1 * 2 >=
6684+ DL->getTypeSizeInBits(VL0->getOperand(1)->getType()))
6685+ ExtraBitWidthNodes.insert(getOperandEntry(TE, 1)->Idx);
6686+ }
66636687 return;
66646688 }
66656689 case Instruction::Select:
@@ -14302,7 +14326,8 @@ void BoUpSLP::computeMinimumValueSizes() {
1430214326 bool IsStoreOrInsertElt =
1430314327 VectorizableTree.front()->getOpcode() == Instruction::Store ||
1430414328 VectorizableTree.front()->getOpcode() == Instruction::InsertElement;
14305- if ((IsStoreOrInsertElt || UserIgnoreList) && TruncNodes.size() <= 1 &&
14329+ if ((IsStoreOrInsertElt || UserIgnoreList) &&
14330+ ExtraBitWidthNodes.size() <= 1 &&
1430614331 (!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 ||
1430714332 CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2))
1430814333 return;
@@ -14506,16 +14531,23 @@ void BoUpSLP::computeMinimumValueSizes() {
1450614531 IsTopRoot = false;
1450714532 IsProfitableToDemoteRoot = true;
1450814533
14509- if (TruncNodes .empty()) {
14534+ if (ExtraBitWidthNodes .empty()) {
1451014535 NodeIdx = VectorizableTree.size();
1451114536 } else {
1451214537 unsigned NewIdx = 0;
1451314538 do {
14514- NewIdx = *TruncNodes .begin() + 1 ;
14515- TruncNodes .erase(TruncNodes .begin());
14516- } while (NewIdx <= NodeIdx && !TruncNodes .empty());
14539+ NewIdx = *ExtraBitWidthNodes .begin();
14540+ ExtraBitWidthNodes .erase(ExtraBitWidthNodes .begin());
14541+ } while (NewIdx <= NodeIdx && !ExtraBitWidthNodes .empty());
1451714542 NodeIdx = NewIdx;
14518- IsTruncRoot = true;
14543+ IsTruncRoot =
14544+ NodeIdx < VectorizableTree.size() &&
14545+ any_of(VectorizableTree[NodeIdx]->UserTreeIndices,
14546+ [](const EdgeInfo &EI) {
14547+ return EI.EdgeIdx == 0 &&
14548+ EI.UserTE->getOpcode() == Instruction::Trunc &&
14549+ !EI.UserTE->isAltShuffle();
14550+ });
1451914551 }
1452014552
1452114553 // If the maximum bit width we compute is less than the with of the roots'
0 commit comments