From b0a0e077bbe1833cc7e8ac6404edb58a46078a82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Fri, 11 Apr 2025 10:50:11 +0000 Subject: [PATCH 1/3] tryToFindDuplicates(): Limit indentation --- .../Transforms/Vectorize/SLPVectorizer.cpp | 79 ++++++++++--------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 53da78ee599b7..2ad82165d3015 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9562,49 +9562,50 @@ static bool tryToFindDuplicates(SmallVectorImpl &VL, if (NumUniqueScalarValues == VL.size() && (VectorizeNonPowerOf2 || IsFullVectors)) { ReuseShuffleIndices.clear(); - } else { - // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops. - if ((UserTreeIdx.UserTE && - UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(TTI)) || - !hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) { - LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported " - "for nodes with padding.\n"); - return false; - } - LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); - if (NumUniqueScalarValues <= 1 || !IsFullVectors || - (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) { - return isa(V) || !isConstant(V); - }))) { - if (DoNotFail && UniquePositions.size() > 1 && - NumUniqueScalarValues > 1 && S.getMainOp()->isSafeToRemove() && - all_of(UniqueValues, IsaPred)) { - // Find the number of elements, which forms full vectors. - unsigned PWSz = getFullVectorNumberOfElements( - TTI, UniqueValues.front()->getType(), UniqueValues.size()); - PWSz = std::min(PWSz, VL.size()); - if (PWSz == VL.size()) { - ReuseShuffleIndices.clear(); - } else { - NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end()); - NonUniqueValueVL.append( - PWSz - UniqueValues.size(), - PoisonValue::get(UniqueValues.front()->getType())); - // Check that extended with poisons operations are still valid for - // vectorization (div/rem are not allowed). - if (!getSameOpcode(NonUniqueValueVL, TLI).valid()) { - LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); - return false; - } - VL = NonUniqueValueVL; + return true; + } + + // FIXME: Reshuffing scalars is not supported yet for non-power-of-2 ops. + if ((UserTreeIdx.UserTE && + UserTreeIdx.UserTE->hasNonWholeRegisterOrNonPowerOf2Vec(TTI)) || + !hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) { + LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported " + "for nodes with padding.\n"); + return false; + } + LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); + if (NumUniqueScalarValues <= 1 || !IsFullVectors || + (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) { + return isa(V) || !isConstant(V); + }))) { + if (DoNotFail && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && + S.getMainOp()->isSafeToRemove() && + all_of(UniqueValues, IsaPred)) { + // Find the number of elements, which forms full vectors. + unsigned PWSz = getFullVectorNumberOfElements( + TTI, UniqueValues.front()->getType(), UniqueValues.size()); + PWSz = std::min(PWSz, VL.size()); + if (PWSz == VL.size()) { + ReuseShuffleIndices.clear(); + } else { + NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end()); + NonUniqueValueVL.append( + PWSz - UniqueValues.size(), + PoisonValue::get(UniqueValues.front()->getType())); + // Check that extended with poisons operations are still valid for + // vectorization (div/rem are not allowed). + if (!getSameOpcode(NonUniqueValueVL, TLI).valid()) { + LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); + return false; } - return true; + VL = NonUniqueValueVL; } - LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); - return false; + return true; } - VL = UniqueValues; + LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); + return false; } + VL = UniqueValues; return true; } From ed19c40818c7b267b2df2d500ee54bb9a29acfd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Fri, 11 Apr 2025 11:43:51 +0000 Subject: [PATCH 2/3] tryToFindDuplicates(): more descriptive naming --- .../Transforms/Vectorize/SLPVectorizer.cpp | 24 +++++++++++-------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 2ad82165d3015..930b3efe52809 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9539,10 +9539,9 @@ static bool tryToFindDuplicates(SmallVectorImpl &VL, const TargetLibraryInfo &TLI, const InstructionsState &S, const BoUpSLP::EdgeInfo &UserTreeIdx, - bool DoNotFail) { + bool TryPad) { // Check that every instruction appears once in this bundle. SmallVector UniqueValues; - SmallVector NonUniqueValueVL; SmallDenseMap UniquePositions(VL.size()); for (Value *V : VL) { if (isConstant(V)) { @@ -9578,7 +9577,7 @@ static bool tryToFindDuplicates(SmallVectorImpl &VL, (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) { return isa(V) || !isConstant(V); }))) { - if (DoNotFail && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && + if (TryPad && UniquePositions.size() > 1 && NumUniqueScalarValues > 1 && S.getMainOp()->isSafeToRemove() && all_of(UniqueValues, IsaPred)) { // Find the number of elements, which forms full vectors. @@ -9586,19 +9585,24 @@ static bool tryToFindDuplicates(SmallVectorImpl &VL, TTI, UniqueValues.front()->getType(), UniqueValues.size()); PWSz = std::min(PWSz, VL.size()); if (PWSz == VL.size()) { + // We ended up with the same size after removing duplicates and + // upgrading the resulting vector size to a "nice size". Just keep + // the initial VL then. ReuseShuffleIndices.clear(); } else { - NonUniqueValueVL.assign(UniqueValues.begin(), UniqueValues.end()); - NonUniqueValueVL.append( + // Pad unique values with poison to grow the vector to a "nice" size + SmallVector PaddedUniqueValues(UniqueValues.begin(), + UniqueValues.end()); + PaddedUniqueValues.append( PWSz - UniqueValues.size(), PoisonValue::get(UniqueValues.front()->getType())); // Check that extended with poisons operations are still valid for // vectorization (div/rem are not allowed). - if (!getSameOpcode(NonUniqueValueVL, TLI).valid()) { + if (!getSameOpcode(PaddedUniqueValues, TLI).valid()) { LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); return false; } - VL = NonUniqueValueVL; + VL = std::move(PaddedUniqueValues); } return true; } @@ -10014,9 +10018,9 @@ void BoUpSLP::buildTreeRec(ArrayRef VL, unsigned Depth, SmallVector ReuseShuffleIndices; SmallVector NonUniqueValueVL(VL.begin(), VL.end()); auto TryToFindDuplicates = [&](const InstructionsState &S, - bool DoNotFail = false) { + bool TryPad = false) { if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI, - S, UserTreeIdx, DoNotFail)) { + S, UserTreeIdx, TryPad)) { VL = NonUniqueValueVL; return true; } @@ -10082,7 +10086,7 @@ void BoUpSLP::buildTreeRec(ArrayRef VL, unsigned Depth, return; // Check that every instruction appears once in this bundle. - if (!TryToFindDuplicates(S, /*DoNotFail=*/true)) + if (!TryToFindDuplicates(S, /*TryPad=*/true)) return; // Perform specific checks for each particular instruction kind. From 6df22ca80daf3f58879c3df943a4c7f79cbfb335 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABtan=20Bossu?= Date: Fri, 11 Apr 2025 12:20:59 +0000 Subject: [PATCH 3/3] tryToFindDuplicates(): more consistent outputs In particular, ReuseShuffleIndices is always in a consistent state, which allows to call newTreeEntry with the same parameters regardless of the result of tryToFindDuplicates(). --- .../Transforms/Vectorize/SLPVectorizer.cpp | 56 ++++++++++--------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 930b3efe52809..7b3db42973082 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9531,20 +9531,25 @@ getMainAltOpsNoStateVL(ArrayRef VL) { } /// Checks that every instruction appears once in the list and if not, packs -/// them, building \p ReuseShuffleIndices mask. The list of unique scalars is -/// extended by poison values to the whole register size. +/// them, building \p ReuseShuffleIndices mask and mutating \p VL. The list of +/// unique scalars is extended by poison values to the whole register size. +/// +/// \returns false if \p VL could not be uniquified, in which case \p VL is +/// unchanged and \p ReuseShuffleIndices is empty. static bool tryToFindDuplicates(SmallVectorImpl &VL, SmallVectorImpl &ReuseShuffleIndices, const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, const InstructionsState &S, const BoUpSLP::EdgeInfo &UserTreeIdx, - bool TryPad) { + bool TryPad = false) { // Check that every instruction appears once in this bundle. SmallVector UniqueValues; SmallDenseMap UniquePositions(VL.size()); for (Value *V : VL) { if (isConstant(V)) { + // Constants are always considered distinct, even if the same constant + // appears multiple times in VL. ReuseShuffleIndices.emplace_back( isa(V) ? PoisonMaskElem : UniqueValues.size()); UniqueValues.emplace_back(V); @@ -9555,6 +9560,8 @@ static bool tryToFindDuplicates(SmallVectorImpl &VL, if (Res.second) UniqueValues.emplace_back(V); } + + // Easy case: VL has unique values and a "natural" size size_t NumUniqueScalarValues = UniqueValues.size(); bool IsFullVectors = hasFullVectorsOrPowerOf2( TTI, getValueType(UniqueValues.front()), NumUniqueScalarValues); @@ -9570,8 +9577,10 @@ static bool tryToFindDuplicates(SmallVectorImpl &VL, !hasFullVectorsOrPowerOf2(TTI, getValueType(VL.front()), VL.size())) { LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported " "for nodes with padding.\n"); + ReuseShuffleIndices.clear(); return false; } + LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); if (NumUniqueScalarValues <= 1 || !IsFullVectors || (UniquePositions.size() == 1 && all_of(UniqueValues, [](Value *V) { @@ -9600,6 +9609,7 @@ static bool tryToFindDuplicates(SmallVectorImpl &VL, // vectorization (div/rem are not allowed). if (!getSameOpcode(PaddedUniqueValues, TLI).valid()) { LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); + ReuseShuffleIndices.clear(); return false; } VL = std::move(PaddedUniqueValues); @@ -9607,9 +9617,10 @@ static bool tryToFindDuplicates(SmallVectorImpl &VL, return true; } LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); + ReuseShuffleIndices.clear(); return false; } - VL = UniqueValues; + VL = std::move(UniqueValues); return true; } @@ -10010,24 +10021,13 @@ bool BoUpSLP::isLegalToVectorizeScalars(ArrayRef VL, unsigned Depth, return true; } -void BoUpSLP::buildTreeRec(ArrayRef VL, unsigned Depth, +void BoUpSLP::buildTreeRec(ArrayRef VLRef, unsigned Depth, const EdgeInfo &UserTreeIdx, unsigned InterleaveFactor) { - assert((allConstant(VL) || allSameType(VL)) && "Invalid types!"); + assert((allConstant(VLRef) || allSameType(VLRef)) && "Invalid types!"); SmallVector ReuseShuffleIndices; - SmallVector NonUniqueValueVL(VL.begin(), VL.end()); - auto TryToFindDuplicates = [&](const InstructionsState &S, - bool TryPad = false) { - if (tryToFindDuplicates(NonUniqueValueVL, ReuseShuffleIndices, *TTI, *TLI, - S, UserTreeIdx, TryPad)) { - VL = NonUniqueValueVL; - return true; - } - auto Invalid = ScheduleBundle::invalid(); - newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx); - return false; - }; + SmallVector VL(VLRef.begin(), VLRef.end()); InstructionsState S = InstructionsState::invalid(); // Tries to build split node. @@ -10073,11 +10073,12 @@ void BoUpSLP::buildTreeRec(ArrayRef VL, unsigned Depth, if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp))) return; } - if (!TryToPackDuplicates || TryToFindDuplicates(S)) { - auto Invalid = ScheduleBundle::invalid(); - newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndices); - } + if (TryToPackDuplicates) + tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx); + + auto Invalid = ScheduleBundle::invalid(); + newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndices); return; } @@ -10086,8 +10087,13 @@ void BoUpSLP::buildTreeRec(ArrayRef VL, unsigned Depth, return; // Check that every instruction appears once in this bundle. - if (!TryToFindDuplicates(S, /*TryPad=*/true)) + if (!tryToFindDuplicates(VL, ReuseShuffleIndices, *TTI, *TLI, S, UserTreeIdx, + /*TryPad=*/true)) { + auto Invalid = ScheduleBundle::invalid(); + newTreeEntry(VL, Invalid /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndices); return; + } // Perform specific checks for each particular instruction kind. bool IsScatterVectorizeUserTE = @@ -10130,7 +10136,7 @@ void BoUpSLP::buildTreeRec(ArrayRef VL, unsigned Depth, NonScheduledFirst.insert(VL.front()); if (S.getOpcode() == Instruction::Load && BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit) - registerNonVectorizableLoads(VL); + registerNonVectorizableLoads(ArrayRef(VL)); return; } ScheduleBundle Empty;