@@ -9531,20 +9531,25 @@ getMainAltOpsNoStateVL(ArrayRef<Value *> VL) {
95319531}
95329532
95339533/// Checks that every instruction appears once in the list and if not, packs
9534- /// them, building \p ReuseShuffleIndices mask. The list of unique scalars is
9535- /// extended by poison values to the whole register size.
9534+ /// them, building \p ReuseShuffleIndices mask and mutating \p VL. The list of
9535+ /// unique scalars is extended by poison values to the whole register size.
9536+ ///
9537+ /// \returns false if \p VL could not be uniquified, in which case \p VL is
9538+ /// unchanged and \p ReuseShuffleIndices is empty.
95369539static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
95379540 SmallVectorImpl<int> &ReuseShuffleIndices,
95389541 const TargetTransformInfo &TTI,
95399542 const TargetLibraryInfo &TLI,
95409543 const InstructionsState &S,
95419544 const BoUpSLP::EdgeInfo &UserTreeIdx,
9542- bool TryPad) {
9545+ bool TryPad = false ) {
95439546 // Check that every instruction appears once in this bundle.
95449547 SmallVector<Value *> UniqueValues;
95459548 SmallDenseMap<Value *, unsigned, 16> UniquePositions(VL.size());
95469549 for (Value *V : VL) {
95479550 if (isConstant(V)) {
9551+ // Constants are always considered distinct, even if the same constant
9552+ // appears multiple times in VL.
95489553 ReuseShuffleIndices.emplace_back(
95499554 isa<PoisonValue>(V) ? PoisonMaskElem : UniqueValues.size());
95509555 UniqueValues.emplace_back(V);
@@ -9555,6 +9560,8 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
95559560 if (Res.second)
95569561 UniqueValues.emplace_back(V);
95579562 }
9563+
9564+ // Easy case: VL has unique values and a "natural" size
95589565 size_t NumUniqueScalarValues = UniqueValues.size();
95599566 bool IsFullVectors = hasFullVectorsOrPowerOf2(
95609567 TTI, getValueType(UniqueValues.front()), NumUniqueScalarValues);
@@ -9570,8 +9577,10 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
95709577 !hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) {
95719578 LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
95729579 "for nodes with padding.\n");
9580+ ReuseShuffleIndices.clear();
95739581 return false;
95749582 }
9583+
95759584 LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
95769585 if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
95779586 (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
@@ -9600,16 +9609,18 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
96009609 // vectorization (div/rem are not allowed).
96019610 if (!getSameOpcode(PaddedUniqueValues, TLI).valid()) {
96029611 LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9612+ ReuseShuffleIndices.clear();
96039613 return false;
96049614 }
96059615 VL = std::move(PaddedUniqueValues);
96069616 }
96079617 return true;
96089618 }
96099619 LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9620+ ReuseShuffleIndices.clear();
96109621 return false;
96119622 }
9612- VL = UniqueValues;
9623+ VL = std::move( UniqueValues) ;
96139624 return true;
96149625}
96159626
@@ -10010,24 +10021,13 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
1001010021 return true;
1001110022}
1001210023
10013- void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL , unsigned Depth,
10024+ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef , unsigned Depth,
1001410025 const EdgeInfo &UserTreeIdx,
1001510026 unsigned InterleaveFactor) {
10016- assert((allConstant(VL ) || allSameType(VL )) && "Invalid types!");
10027+ assert((allConstant(VLRef ) || allSameType(VLRef )) && "Invalid types!");
1001710028
1001810029 SmallVector<int> ReuseShuffleIndices;
10019- SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
10020- auto TryToFindDuplicates = [&](const InstructionsState &S,
10021- bool TryPad = false) {
10022- if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
10023- S, UserTreeIdx, TryPad)) {
10024- VL = NonUniqueValueVL;
10025- return true;
10026- }
10027- auto Invalid = ScheduleBundle::invalid();
10028- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
10029- return false;
10030- };
10030+ SmallVector<Value *> VL(VLRef.begin(), VLRef.end());
1003110031
1003210032 InstructionsState S = InstructionsState::invalid();
1003310033 // Tries to build split node.
@@ -10073,11 +10073,12 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
1007310073 if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
1007410074 return;
1007510075 }
10076- if (!TryToPackDuplicates || TryToFindDuplicates(S)) {
10077- auto Invalid = ScheduleBundle::invalid();
10078- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10079- ReuseShuffleIndices);
10080- }
10076+ if (TryToPackDuplicates)
10077+ tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx);
10078+
10079+ auto Invalid = ScheduleBundle::invalid();
10080+ newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10081+ ReuseShuffleIndices);
1008110082 return;
1008210083 }
1008310084
@@ -10086,8 +10087,13 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
1008610087 return;
1008710088
1008810089 // Check that every instruction appears once in this bundle.
10089- if (!TryToFindDuplicates(S, /*TryPad=*/true))
10090+ if (!tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx,
10091+ /*TryPad=*/true)) {
10092+ auto Invalid = ScheduleBundle::invalid();
10093+ newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10094+ ReuseShuffleIndices);
1009010095 return;
10096+ }
1009110097
1009210098 // Perform specific checks for each particular instruction kind.
1009310099 bool IsScatterVectorizeUserTE =
@@ -10130,7 +10136,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
1013010136 NonScheduledFirst.insert(VL.front());
1013110137 if (S.getOpcode() == Instruction::Load &&
1013210138 BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit)
10133- registerNonVectorizableLoads(VL );
10139+ registerNonVectorizableLoads(ArrayRef(VL) );
1013410140 return;
1013510141 }
1013610142 ScheduleBundle Empty;
0 commit comments