@@ -9531,21 +9531,25 @@ getMainAltOpsNoStateVL(ArrayRef<Value *> VL) {
95319531}
95329532
95339533/// Checks that every instruction appears once in the list and if not, packs
9534- /// them, building \p ReuseShuffleIndices mask. The list of unique scalars is
9535- /// extended by poison values to the whole register size.
9534+ /// them, building \p ReuseShuffleIndices mask and mutating \p VL. The list of
9535+ /// unique scalars is extended by poison values to the whole register size.
9536+ ///
9537+ /// \returns false if \p VL could not be uniquified, in which case \p VL is
9538+ /// unchanged and \p ReuseShuffleIndices is empty.
95369539static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
95379540 SmallVectorImpl<int> &ReuseShuffleIndices,
95389541 const TargetTransformInfo &TTI,
95399542 const TargetLibraryInfo &TLI,
95409543 const InstructionsState &S,
95419544 const BoUpSLP::EdgeInfo &UserTreeIdx,
9542- bool DoNotFail ) {
9545+ bool TryPad = false ) {
95439546 // Check that every instruction appears once in this bundle.
95449547 SmallVector<Value *> UniqueValues;
9545- SmallVector<Value *> NonUniqueValueVL;
95469548 SmallDenseMap<Value *, unsigned, 16> UniquePositions(VL.size());
95479549 for (Value *V : VL) {
95489550 if (isConstant(V)) {
9551+ // Constants are always considered distinct, even if the same constant
9552+ // appears multiple times in VL.
95499553 ReuseShuffleIndices.emplace_back(
95509554 isa<PoisonValue>(V) ? PoisonMaskElem : UniqueValues.size());
95519555 UniqueValues.emplace_back(V);
@@ -9556,55 +9560,67 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
95569560 if (Res.second)
95579561 UniqueValues.emplace_back(V);
95589562 }
9563+
9564+ // Easy case: VL has unique values and a "natural" size
95599565 size_t NumUniqueScalarValues = UniqueValues.size();
95609566 bool IsFullVectors = hasFullVectorsOrPowerOf2(
95619567 TTI, getValueType(UniqueValues.front()), NumUniqueScalarValues);
95629568 if (NumUniqueScalarValues == VL.size() &&
95639569 (VectorizeNonPowerOf2 || IsFullVectors)) {
95649570 ReuseShuffleIndices.clear();
9565- } else {
9566- // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
9567- if ((UserTreeIdx.UserTE &&
9568- UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(TTI)) ||
9569- !hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) {
9570- LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
9571- "for nodes with padding.\n");
9572- return false;
9573- }
9574- LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
9575- if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
9576- (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
9577- return isa<UndefValue>(V) || !isConstant(V);
9578- }))) {
9579- if (DoNotFail && UniquePositions.size() > 1 &&
9580- NumUniqueScalarValues > 1 && S.getMainOp()->isSafeToRemove() &&
9581- all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
9582- // Find the number of elements, which forms full vectors.
9583- unsigned PWSz = getFullVectorNumberOfElements(
9584- TTI, UniqueValues.front()->getType(), UniqueValues.size());
9585- PWSz = std::min<unsigned>(PWSz, VL.size());
9586- if (PWSz == VL.size()) {
9571+ return true;
9572+ }
9573+
9574+ // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
9575+ if ((UserTreeIdx.UserTE &&
9576+ UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(TTI)) ||
9577+ !hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) {
9578+ LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
9579+ "for nodes with padding.\n");
9580+ ReuseShuffleIndices.clear();
9581+ return false;
9582+ }
9583+
9584+ LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
9585+ if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
9586+ (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
9587+ return isa<UndefValue>(V) || !isConstant(V);
9588+ }))) {
9589+ if (TryPad && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 &&
9590+ S.getMainOp()->isSafeToRemove() &&
9591+ all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
9592+ // Find the number of elements, which forms full vectors.
9593+ unsigned PWSz = getFullVectorNumberOfElements(
9594+ TTI, UniqueValues.front()->getType(), UniqueValues.size());
9595+ PWSz = std::min<unsigned>(PWSz, VL.size());
9596+ if (PWSz == VL.size()) {
9597+ // We ended up with the same size after removing duplicates and
9598+ // upgrading the resulting vector size to a "nice size". Just keep
9599+ // the initial VL then.
9600+ ReuseShuffleIndices.clear();
9601+ } else {
9602+ // Pad unique values with poison to grow the vector to a "nice" size
9603+ SmallVector<Value *> PaddedUniqueValues(UniqueValues.begin(),
9604+ UniqueValues.end());
9605+ PaddedUniqueValues.append(
9606+ PWSz - UniqueValues.size(),
9607+ PoisonValue::get(UniqueValues.front()->getType()));
9608+ // Check that extended with poisons operations are still valid for
9609+ // vectorization (div/rem are not allowed).
9610+ if (!getSameOpcode(PaddedUniqueValues, TLI).valid()) {
9611+ LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
95879612 ReuseShuffleIndices.clear();
9588- } else {
9589- NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
9590- NonUniqueValueVL.append(
9591- PWSz - UniqueValues.size(),
9592- PoisonValue::get(UniqueValues.front()->getType()));
9593- // Check that extended with poisons operations are still valid for
9594- // vectorization (div/rem are not allowed).
9595- if (!getSameOpcode(NonUniqueValueVL, TLI).valid()) {
9596- LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9597- return false;
9598- }
9599- VL = NonUniqueValueVL;
9613+ return false;
96009614 }
9601- return true ;
9615+ VL = std::move(PaddedUniqueValues) ;
96029616 }
9603- LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9604- return false;
9617+ return true;
96059618 }
9606- VL = UniqueValues;
9619+ LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9620+ ReuseShuffleIndices.clear();
9621+ return false;
96079622 }
9623+ VL = std::move(UniqueValues);
96089624 return true;
96099625}
96109626
@@ -10005,24 +10021,13 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef<Value *> VL, unsigned Depth,
1000510021 return true;
1000610022}
1000710023
10008- void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL , unsigned Depth,
10024+ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef , unsigned Depth,
1000910025 const EdgeInfo &UserTreeIdx,
1001010026 unsigned InterleaveFactor) {
10011- assert((allConstant(VL ) || allSameType(VL )) && "Invalid types!");
10027+ assert((allConstant(VLRef ) || allSameType(VLRef )) && "Invalid types!");
1001210028
1001310029 SmallVector<int> ReuseShuffleIndices;
10014- SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
10015- auto TryToFindDuplicates = [&](const InstructionsState &S,
10016- bool DoNotFail = false) {
10017- if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
10018- S, UserTreeIdx, DoNotFail)) {
10019- VL = NonUniqueValueVL;
10020- return true;
10021- }
10022- auto Invalid = ScheduleBundle::invalid();
10023- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
10024- return false;
10025- };
10030+ SmallVector<Value *> VL(VLRef.begin(), VLRef.end());
1002610031
1002710032 InstructionsState S = InstructionsState::invalid();
1002810033 // Tries to build split node.
@@ -10068,11 +10073,12 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
1006810073 if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
1006910074 return;
1007010075 }
10071- if (!TryToPackDuplicates || TryToFindDuplicates(S)) {
10072- auto Invalid = ScheduleBundle::invalid();
10073- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10074- ReuseShuffleIndices);
10075- }
10076+ if (TryToPackDuplicates)
10077+ tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx);
10078+
10079+ auto Invalid = ScheduleBundle::invalid();
10080+ newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10081+ ReuseShuffleIndices);
1007610082 return;
1007710083 }
1007810084
@@ -10081,8 +10087,13 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
1008110087 return;
1008210088
1008310089 // Check that every instruction appears once in this bundle.
10084- if (!TryToFindDuplicates(S, /*DoNotFail=*/true))
10090+ if (!tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx,
10091+ /*TryPad=*/true)) {
10092+ auto Invalid = ScheduleBundle::invalid();
10093+ newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx,
10094+ ReuseShuffleIndices);
1008510095 return;
10096+ }
1008610097
1008710098 // Perform specific checks for each particular instruction kind.
1008810099 bool IsScatterVectorizeUserTE =
@@ -10125,7 +10136,7 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VL, unsigned Depth,
1012510136 NonScheduledFirst.insert(VL.front());
1012610137 if (S.getOpcode() == Instruction::Load &&
1012710138 BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit)
10128- registerNonVectorizableLoads(VL );
10139+ registerNonVectorizableLoads(ArrayRef(VL) );
1012910140 return;
1013010141 }
1013110142 ScheduleBundle Empty;
0 commit comments