@@ -9062,87 +9062,101 @@ getMainAltOpsNoStateVL(ArrayRef<Value *> VL) {
90629062 return std::make_pair(MainOp, AltOp);
90639063}
90649064
9065+ /// Checks that every instruction appears once in the list and if not, packs
9066+ /// them, building \p ReuseShuffleIndices mask. The list of unique scalars is
9067+ /// extended by poison values to the whole register size.
9068+ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
9069+ SmallVectorImpl<int> &ReuseShuffleIndices,
9070+ const TargetTransformInfo &TTI,
9071+ const TargetLibraryInfo &TLI,
9072+ const InstructionsState &S,
9073+ const BoUpSLP::EdgeInfo &UserTreeIdx,
9074+ bool DoNotFail) {
9075+ // Check that every instruction appears once in this bundle.
9076+ SmallVector<Value *> UniqueValues;
9077+ SmallVector<Value *> NonUniqueValueVL;
9078+ SmallDenseMap<Value *, unsigned, 16> UniquePositions(VL.size());
9079+ for (Value *V : VL) {
9080+ if (isConstant(V)) {
9081+ ReuseShuffleIndices.emplace_back(
9082+ isa<PoisonValue>(V) ? PoisonMaskElem : UniqueValues.size());
9083+ UniqueValues.emplace_back(V);
9084+ continue;
9085+ }
9086+ auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
9087+ ReuseShuffleIndices.emplace_back(Res.first->second);
9088+ if (Res.second)
9089+ UniqueValues.emplace_back(V);
9090+ }
9091+ size_t NumUniqueScalarValues = UniqueValues.size();
9092+ bool IsFullVectors = hasFullVectorsOrPowerOf2(
9093+ TTI, getValueType(UniqueValues.front()), NumUniqueScalarValues);
9094+ if (NumUniqueScalarValues == VL.size() &&
9095+ (VectorizeNonPowerOf2 || IsFullVectors)) {
9096+ ReuseShuffleIndices.clear();
9097+ } else {
9098+ // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
9099+ if ((UserTreeIdx.UserTE &&
9100+ UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(TTI)) ||
9101+ !hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) {
9102+ LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
9103+ "for nodes with padding.\n");
9104+ return false;
9105+ }
9106+ LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
9107+ if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
9108+ (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
9109+ return isa<UndefValue>(V) || !isConstant(V);
9110+ }))) {
9111+ if (DoNotFail && UniquePositions.size() > 1 &&
9112+ NumUniqueScalarValues > 1 && S.getMainOp()->isSafeToRemove() &&
9113+ all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
9114+ // Find the number of elements, which forms full vectors.
9115+ unsigned PWSz = getFullVectorNumberOfElements(
9116+ TTI, UniqueValues.front()->getType(), UniqueValues.size());
9117+ PWSz = std::min<unsigned>(PWSz, VL.size());
9118+ if (PWSz == VL.size()) {
9119+ ReuseShuffleIndices.clear();
9120+ } else {
9121+ NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
9122+ NonUniqueValueVL.append(
9123+ PWSz - UniqueValues.size(),
9124+ PoisonValue::get(UniqueValues.front()->getType()));
9125+ // Check that extended with poisons operations are still valid for
9126+ // vectorization (div/rem are not allowed).
9127+ if (!getSameOpcode(NonUniqueValueVL, TLI).valid()) {
9128+ LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9129+ return false;
9130+ }
9131+ VL = NonUniqueValueVL;
9132+ }
9133+ return true;
9134+ }
9135+ LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9136+ return false;
9137+ }
9138+ VL = UniqueValues;
9139+ }
9140+ return true;
9141+ }
9142+
90659143void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
90669144 const EdgeInfo &UserTreeIdx,
90679145 unsigned InterleaveFactor) {
90689146 assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
90699147
90709148 SmallVector<int> ReuseShuffleIndices;
9071- SmallVector<Value *> UniqueValues;
9072- SmallVector<Value *> NonUniqueValueVL;
9149+ SmallVector<Value *> NonUniqueValueVL(VL.begin(), VL.end());
90739150 auto TryToFindDuplicates = [&](const InstructionsState &S,
90749151 bool DoNotFail = false) {
9075- // Check that every instruction appears once in this bundle.
9076- SmallDenseMap<Value *, unsigned, 16> UniquePositions(VL.size());
9077- for (Value *V : VL) {
9078- if (isConstant(V)) {
9079- ReuseShuffleIndices.emplace_back(
9080- isa<PoisonValue>(V) ? PoisonMaskElem : UniqueValues.size());
9081- UniqueValues.emplace_back(V);
9082- continue;
9083- }
9084- auto Res = UniquePositions.try_emplace(V, UniqueValues.size());
9085- ReuseShuffleIndices.emplace_back(Res.first->second);
9086- if (Res.second)
9087- UniqueValues.emplace_back(V);
9088- }
9089- size_t NumUniqueScalarValues = UniqueValues.size();
9090- bool IsFullVectors = hasFullVectorsOrPowerOf2(
9091- *TTI, getValueType(UniqueValues.front()), NumUniqueScalarValues);
9092- if (NumUniqueScalarValues == VL.size() &&
9093- (VectorizeNonPowerOf2 || IsFullVectors)) {
9094- ReuseShuffleIndices.clear();
9095- } else {
9096- // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops.
9097- if ((UserTreeIdx.UserTE &&
9098- UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(*TTI)) ||
9099- !hasFullVectorsOrPowerOf2(*TTI, getValueType(VL.front()),
9100- VL.size())) {
9101- LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
9102- "for nodes with padding.\n");
9103- auto Invalid = ScheduleBundle::invalid();
9104- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9105- return false;
9106- }
9107- LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
9108- if (NumUniqueScalarValues <= 1 || !IsFullVectors ||
9109- (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) {
9110- return isa<UndefValue>(V) || !isConstant(V);
9111- }))) {
9112- if (DoNotFail && UniquePositions.size() > 1 &&
9113- NumUniqueScalarValues > 1 && S.getMainOp()->isSafeToRemove() &&
9114- all_of(UniqueValues, IsaPred<Instruction, PoisonValue>)) {
9115- // Find the number of elements, which forms full vectors.
9116- unsigned PWSz = getFullVectorNumberOfElements(
9117- *TTI, UniqueValues.front()->getType(), UniqueValues.size());
9118- PWSz = std::min<unsigned>(PWSz, VL.size());
9119- if (PWSz == VL.size()) {
9120- ReuseShuffleIndices.clear();
9121- } else {
9122- NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end());
9123- NonUniqueValueVL.append(
9124- PWSz - UniqueValues.size(),
9125- PoisonValue::get(UniqueValues.front()->getType()));
9126- // Check that extended with poisons operations are still valid for
9127- // vectorization (div/rem are not allowed).
9128- if (!getSameOpcode(NonUniqueValueVL, *TLI).valid()) {
9129- LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9130- auto Invalid = ScheduleBundle::invalid();
9131- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9132- return false;
9133- }
9134- VL = NonUniqueValueVL;
9135- }
9136- return true;
9137- }
9138- LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
9139- auto Invalid = ScheduleBundle::invalid();
9140- newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9141- return false;
9142- }
9143- VL = UniqueValues;
9152+ if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI,
9153+ S, UserTreeIdx, DoNotFail)) {
9154+ VL = NonUniqueValueVL;
9155+ return true;
91449156 }
9145- return true;
9157+ auto Invalid = ScheduleBundle::invalid();
9158+ newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx);
9159+ return false;
91469160 };
91479161
91489162 InstructionsState S = getSameOpcode(VL, *TLI);
@@ -9610,8 +9624,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
96109624
96119625 BlockScheduling &BS = *BSRef;
96129626
9627+ SetVector<Value *> UniqueValues(VL.begin(), VL.end());
96139628 std::optional<ScheduleBundle *> BundlePtr =
9614- BS.tryScheduleBundle(UniqueValues, this, S);
9629+ BS.tryScheduleBundle(UniqueValues.getArrayRef() , this, S);
96159630#ifdef EXPENSIVE_CHECKS
96169631 // Make sure we didn't break any internal invariants
96179632 BS.verify();
0 commit comments