From 26e42265916fdfca45a5d5bd7d81d9680300159a Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Tue, 17 Dec 2024 00:08:42 -0800 Subject: [PATCH 01/14] [SLP] NFC. Replace MainOp and AltOp in TreeEntry with InstructionsState. InstructionsState will have default constructor. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 69 ++++++++----------- 1 file changed, 27 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d967813075bb9..2fd90137bd443 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -836,7 +836,7 @@ class InstructionsState { return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode; } - InstructionsState() = delete; + InstructionsState() = default; InstructionsState(Instruction *MainOp, Instruction *AltOp) : MainOp(MainOp), AltOp(AltOp) {} static InstructionsState invalid() { return {nullptr, nullptr}; } @@ -2407,15 +2407,16 @@ class BoUpSLP { } /// Go through the instructions in VL and append their operands. - void appendOperandsOfVL(ArrayRef VL, Instruction *VL0) { + void appendOperandsOfVL(ArrayRef VL, const InstructionsState &S) { assert(!VL.empty() && "Bad VL"); assert((empty() || VL.size() == getNumLanes()) && "Expected same number of lanes"); // IntrinsicInst::isCommutative returns true if swapping the first "two" // arguments to the intrinsic produces the same result. constexpr unsigned IntrinsicNumOperands = 2; - unsigned NumOperands = VL0->getNumOperands(); - ArgSize = isa(VL0) ? IntrinsicNumOperands : NumOperands; + unsigned NumOperands = S.getMainOp()->getNumOperands(); + ArgSize = isa(S.getMainOp()) ? IntrinsicNumOperands + : NumOperands; OpsVec.resize(NumOperands); unsigned NumLanes = VL.size(); for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { @@ -2435,8 +2436,8 @@ class BoUpSLP { // tell the inverse operations by checking commutativity. if (isa(VL[Lane])) { OpsVec[OpIdx][Lane] = { - PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true, - false}; + PoisonValue::get(S.getMainOp()->getOperand(OpIdx)->getType()), + true, false}; continue; } bool IsInverseOperation = !isCommutative(cast(VL[Lane])); @@ -2549,11 +2550,12 @@ class BoUpSLP { public: /// Initialize with all the operands of the instruction vector \p RootVL. - VLOperands(ArrayRef RootVL, Instruction *VL0, const BoUpSLP &R) + VLOperands(ArrayRef RootVL, const InstructionsState &S, + const BoUpSLP &R) : TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R), - L(R.LI->getLoopFor((VL0->getParent()))) { + L(R.LI->getLoopFor(S.getMainOp()->getParent())) { // Append all the operands of RootVL. - appendOperandsOfVL(RootVL, VL0); + appendOperandsOfVL(RootVL, S); } /// \Returns a value vector with the operands across all lanes for the @@ -3317,9 +3319,7 @@ class BoUpSLP { /// reordering of operands during buildTree_rec() and vectorizeTree(). SmallVector Operands; - /// The main/alternate instruction. - Instruction *MainOp = nullptr; - Instruction *AltOp = nullptr; + InstructionsState S; /// Interleaving factor for interleaved loads Vectorize nodes. unsigned InterleaveFactor = 0; @@ -3343,10 +3343,10 @@ class BoUpSLP { /// Set this bundle's operand from Scalars. void setOperand(const BoUpSLP &R, bool RequireReorder = false) { - VLOperands Ops(Scalars, MainOp, R); + VLOperands Ops(Scalars, S, R); if (RequireReorder) Ops.reorder(); - for (unsigned I : seq(MainOp->getNumOperands())) + for (unsigned I : seq(S.getMainOp()->getNumOperands())) setOperand(I, Ops.getVL(I)); } @@ -3379,13 +3379,9 @@ class BoUpSLP { } /// Some of the instructions in the list have alternate opcodes. - bool isAltShuffle() const { return MainOp != AltOp; } + bool isAltShuffle() const { return S.isAltShuffle(); } - bool isOpcodeOrAlt(Instruction *I) const { - unsigned CheckedOpcode = I->getOpcode(); - return (getOpcode() == CheckedOpcode || - getAltOpcode() == CheckedOpcode); - } + bool isOpcodeOrAlt(Instruction *I) const { return S.isOpcodeOrAlt(I); } /// Chooses the correct key for scheduling data. If \p Op has the same (or /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is @@ -3394,30 +3390,19 @@ class BoUpSLP { auto *I = dyn_cast(Op); if (I && isOpcodeOrAlt(I)) return Op; - return MainOp; + return S.getMainOp(); } - void setOperations(const InstructionsState &S) { - MainOp = S.getMainOp(); - AltOp = S.getAltOp(); - } + void setOperations(const InstructionsState &S) { this->S = S; } - Instruction *getMainOp() const { - return MainOp; - } + Instruction *getMainOp() const { return S.getMainOp(); } - Instruction *getAltOp() const { - return AltOp; - } + Instruction *getAltOp() const { return S.getAltOp(); } /// The main/alternate opcodes for the list of instructions. - unsigned getOpcode() const { - return MainOp ? MainOp->getOpcode() : 0; - } + unsigned getOpcode() const { return S.getOpcode(); } - unsigned getAltOpcode() const { - return AltOp ? AltOp->getOpcode() : 0; - } + unsigned getAltOpcode() const { return S.getAltOpcode(); } /// When ReuseReorderShuffleIndices is empty it just returns position of \p /// V within vector of Scalars. Otherwise, try to remap on its reuse index. @@ -3514,13 +3499,13 @@ class BoUpSLP { break; } dbgs() << "MainOp: "; - if (MainOp) - dbgs() << *MainOp << "\n"; + if (S.getMainOp()) + dbgs() << *S.getMainOp() << "\n"; else dbgs() << "NULL\n"; dbgs() << "AltOp: "; - if (AltOp) - dbgs() << *AltOp << "\n"; + if (S.getAltOp()) + dbgs() << *S.getAltOp() << "\n"; else dbgs() << "NULL\n"; dbgs() << "VectorizedValue: "; @@ -8561,7 +8546,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n"); ValueList Left, Right; - VLOperands Ops(VL, VL0, *this); + VLOperands Ops(VL, S, *this); if (cast(VL0)->isCommutative()) { // Commutative predicate - collect + sort operands of the instructions // so that each side is more likely to have the same opcode. From a082a792737e14c01c5a9b4859b4eb8c6c0cff2a Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 8 Jan 2025 00:36:01 -0800 Subject: [PATCH 02/14] fix conflict --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9121b608bf23e..ab34348dce165 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -839,16 +839,12 @@ class InstructionsState { return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode; } -<<<<<<< HEAD - InstructionsState() = default; -======= /// Checks if the current state is valid, i.e. has non-null MainOp bool valid() const { return MainOp && AltOp; } explicit operator bool() const { return valid(); } InstructionsState() = delete; ->>>>>>> upstream/main InstructionsState(Instruction *MainOp, Instruction *AltOp) : MainOp(MainOp), AltOp(AltOp) {} static InstructionsState invalid() { return {nullptr, nullptr}; } @@ -3331,7 +3327,7 @@ class BoUpSLP { /// reordering of operands during buildTree_rec() and vectorizeTree(). SmallVector Operands; - InstructionsState S; + InstructionsState S = InstructionsState::invalid(); /// Interleaving factor for interleaved loads Vectorize nodes. unsigned InterleaveFactor = 0; @@ -3405,15 +3401,10 @@ class BoUpSLP { return S.getMainOp(); } -<<<<<<< HEAD - void setOperations(const InstructionsState &S) { this->S = S; } -======= void setOperations(const InstructionsState &S) { assert(S && "InstructionsState is invalid."); - MainOp = S.getMainOp(); - AltOp = S.getAltOp(); + this->S = S; } ->>>>>>> upstream/main Instruction *getMainOp() const { return S.getMainOp(); } From 78a78ad7b82a06a39380b47bcc927fe2bd6c7b8d Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 8 Jan 2025 00:34:54 -0800 Subject: [PATCH 03/14] fix assert which is occurred by InstructionsState getMainOp() -> isInstructionsStateValid() getMainOp() -> isInstructionsStateValid()? getMainOp(): nullptr getOpcode() -> isInstructionsStateValid() getOpcode() -> isInstructionsStateValid()? getOpcode(): 0 getOpcode() -> isInstructionsStateValid() && getOpcode() isAltShuffle() -> isInstructionsStateValid() && isAltShuffle() !isAltShuffle() -> !isInstructionsStateValid() || !isAltShuffle() --- .../Transforms/Vectorize/SLPVectorizer.cpp | 109 +++++++++++------- 1 file changed, 65 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ab34348dce165..7a44873f33d0d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3415,6 +3415,8 @@ class BoUpSLP { unsigned getAltOpcode() const { return S.getAltOpcode(); } + bool isInstructionsStateValid() const { return S.valid(); } + /// When ReuseReorderShuffleIndices is empty it just returns position of \p /// V within vector of Scalars. Otherwise, try to remap on its reuse index. int findLaneForValue(Value *V) const { @@ -3509,16 +3511,13 @@ class BoUpSLP { dbgs() << "CombinedVectorize\n"; break; } - dbgs() << "MainOp: "; - if (S.getMainOp()) - dbgs() << *S.getMainOp() << "\n"; - else - dbgs() << "NULL\n"; - dbgs() << "AltOp: "; - if (S.getAltOp()) - dbgs() << *S.getAltOp() << "\n"; - else - dbgs() << "NULL\n"; + if (S) { + dbgs() << "MainOp: " << *S.getMainOp() << "\n"; + dbgs() << "AltOp: " << *S.getAltOp() << "\n"; + } else { + dbgs() << "MainOp: NULL\n"; + dbgs() << "AltOp: NULL\n"; + } dbgs() << "VectorizedValue: "; if (VectorizedValue) dbgs() << *VectorizedValue << "\n"; @@ -5546,7 +5545,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // Try build correct order for extractelement instructions. SmallVector ReusedMask(TE.ReuseShuffleIndices.begin(), TE.ReuseShuffleIndices.end()); - if (TE.getOpcode() == Instruction::ExtractElement && + if (TE.isInstructionsStateValid() && + TE.getOpcode() == Instruction::ExtractElement && all_of(TE.Scalars, [Sz](Value *V) { if (isa(V)) return true; @@ -5708,10 +5708,12 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { return std::nullopt; // No need to reorder. return std::move(Phis); } - if (TE.isGather() && !TE.isAltShuffle() && allSameType(TE.Scalars)) { + if (TE.isGather() && (!TE.isInstructionsStateValid() || !TE.isAltShuffle()) && + allSameType(TE.Scalars)) { // TODO: add analysis of other gather nodes with extractelement // instructions and other values/instructions, not only undefs. - if ((TE.getOpcode() == Instruction::ExtractElement || + if (((TE.isInstructionsStateValid() && + TE.getOpcode() == Instruction::ExtractElement) || (all_of(TE.Scalars, IsaPred) && any_of(TE.Scalars, IsaPred))) && all_of(TE.Scalars, [](Value *V) { @@ -5721,8 +5723,10 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // Check that gather of extractelements can be represented as // just a shuffle of a single vector. OrdersType CurrentOrder; - bool Reuse = canReuseExtract(TE.Scalars, TE.getMainOp(), CurrentOrder, - /*ResizeAllowed=*/true); + bool Reuse = canReuseExtract( + TE.Scalars, TE.isInstructionsStateValid() ? TE.getMainOp() : nullptr, + CurrentOrder, + /*ResizeAllowed=*/true); if (Reuse || !CurrentOrder.empty()) return std::move(CurrentOrder); } @@ -5771,7 +5775,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { return Order; // Check if can include the order of vectorized loads. For masked gathers do // extra analysis later, so include such nodes into a special list. - if (TE.isGather() && TE.getOpcode() == Instruction::Load) { + if (TE.isInstructionsStateValid() && TE.getOpcode() == Instruction::Load) { SmallVector PointerOps; OrdersType CurrentOrder; LoadsState Res = canVectorizeLoads(TE.Scalars, TE.Scalars.front(), @@ -5886,7 +5890,7 @@ void BoUpSLP::reorderTopToBottom() { // Patterns like [fadd,fsub] can be combined into a single instruction in // x86. Reordering them into [fsub,fadd] blocks this pattern. So we need // to take into account their order when looking for the most used order. - if (TE->isAltShuffle()) { + if (TE->isInstructionsStateValid() && TE->isAltShuffle()) { VectorType *VecTy = getWidenedType(TE->Scalars[0]->getType(), TE->Scalars.size()); unsigned Opcode0 = TE->getOpcode(); @@ -9501,7 +9505,8 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) { // Do not reorder nodes if it small (just 2 elements), all-constant or all // instructions have same opcode already. - if (TE.Scalars.size() == 2 || (TE.getOpcode() && !TE.isAltShuffle()) || + if (TE.Scalars.size() == 2 || + (TE.isInstructionsStateValid() && !TE.isAltShuffle()) || all_of(TE.Scalars, isConstant)) return; @@ -9720,8 +9725,9 @@ void BoUpSLP::transformNodes() { // Do not try partial vectorization for small nodes (<= 2), nodes with the // same opcode and same parent block or all constants. if (VL.size() <= 2 || LoadEntriesToVectorize.contains(Idx) || - !(!E.getOpcode() || E.getOpcode() == Instruction::Load || - E.isAltShuffle() || !allSameBlock(VL)) || + !(!E.isInstructionsStateValid() || + E.getOpcode() == Instruction::Load || E.isAltShuffle() || + !allSameBlock(VL)) || allConstant(VL) || isSplat(VL)) continue; // Try to find vectorizable sequences and transform them into a series of @@ -9863,6 +9869,8 @@ void BoUpSLP::transformNodes() { E.ReorderIndices.clear(); } } + if (!E.isInstructionsStateValid()) + continue; switch (E.getOpcode()) { case Instruction::Load: { // No need to reorder masked gather loads, just reorder the scalar @@ -9982,7 +9990,7 @@ void BoUpSLP::transformNodes() { getCanonicalGraphSize() <= SmallTree && count_if(ArrayRef(VectorizableTree).drop_front(getCanonicalGraphSize()), [](const std::unique_ptr &TE) { - return TE->isGather() && + return TE->isGather() && TE->isInstructionsStateValid() && TE->getOpcode() == Instruction::Load && !allSameBlock(TE->Scalars); }) == 1) @@ -9998,13 +10006,13 @@ void BoUpSLP::transformNodes() { for (std::unique_ptr &TE : VectorizableTree) { TreeEntry &E = *TE; if (E.isGather() && - (E.getOpcode() == Instruction::Load || - (!E.getOpcode() && any_of(E.Scalars, - [&](Value *V) { - return isa(V) && - !isVectorized(V) && - !isDeleted(cast(V)); - }))) && + ((E.isInstructionsStateValid() && E.getOpcode() == Instruction::Load) || + (!E.isInstructionsStateValid() && + any_of(E.Scalars, + [&](Value *V) { + return isa(V) && !isVectorized(V) && + !isDeleted(cast(V)); + }))) && !isSplat(E.Scalars)) { for (Value *V : E.Scalars) { auto *LI = dyn_cast(V); @@ -10598,8 +10606,9 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { bool PrevNodeFound = any_of( ArrayRef(R.VectorizableTree).take_front(E->Idx), [&](const std::unique_ptr &TE) { - return ((!TE->isAltShuffle() && - TE->getOpcode() == Instruction::ExtractElement) || + return (((!TE->isInstructionsStateValid() || !TE->isAltShuffle()) && + (TE->isInstructionsStateValid() && + TE->getOpcode() == Instruction::ExtractElement)) || TE->isGather()) && all_of(enumerate(TE->Scalars), [&](auto &&Data) { return VL.size() > Data.index() && @@ -11712,7 +11721,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, for (const std::unique_ptr &TE : VectorizableTree) { if (TE.get() == E) break; - if (TE->isAltShuffle() && + if (TE->isInstructionsStateValid() && TE->isAltShuffle() && ((TE->getOpcode() == E->getOpcode() && TE->getAltOpcode() == E->getAltOpcode()) || (TE->getOpcode() == E->getAltOpcode() && @@ -11874,10 +11883,13 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const { [this](Value *V) { return EphValues.contains(V); }) && (allConstant(TE->Scalars) || isSplat(TE->Scalars) || TE->Scalars.size() < Limit || - ((TE->getOpcode() == Instruction::ExtractElement || + (((TE->isInstructionsStateValid() && + TE->getOpcode() == Instruction::ExtractElement) || all_of(TE->Scalars, IsaPred)) && isFixedVectorShuffle(TE->Scalars, Mask, AC)) || - (TE->getOpcode() == Instruction::Load && !TE->isAltShuffle()) || + ((TE->isInstructionsStateValid() && + TE->getOpcode() == Instruction::Load) && + (!TE->isInstructionsStateValid() || !TE->isAltShuffle())) || any_of(TE->Scalars, IsaPred)); }; @@ -12006,9 +12018,11 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { !VectorizableTree.empty() && all_of(VectorizableTree, [&](const std::unique_ptr &TE) { return (TE->isGather() && - TE->getOpcode() != Instruction::ExtractElement && + (!TE->isInstructionsStateValid() || + TE->getOpcode() != Instruction::ExtractElement) && count_if(TE->Scalars, IsaPred) <= Limit) || - TE->getOpcode() == Instruction::PHI; + (TE->isInstructionsStateValid() && + TE->getOpcode() == Instruction::PHI); })) return true; @@ -12042,6 +12056,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { return false; if (VectorizableTree.back()->isGather() && + VectorizableTree.back()->isInstructionsStateValid() && VectorizableTree.back()->isAltShuffle() && VectorizableTree.back()->getVectorFactor() > 2 && allSameBlock(VectorizableTree.back()->Scalars) && @@ -12066,7 +12081,7 @@ bool BoUpSLP::isTreeNotExtendable() const { getCanonicalGraphSize() <= SmallTree && count_if(ArrayRef(VectorizableTree).drop_front(getCanonicalGraphSize()), [](const std::unique_ptr &TE) { - return TE->isGather() && + return TE->isGather() && TE->isInstructionsStateValid() && TE->getOpcode() == Instruction::Load && !allSameBlock(TE->Scalars); }) == 1) @@ -12078,7 +12093,7 @@ bool BoUpSLP::isTreeNotExtendable() const { TreeEntry &E = *VectorizableTree[Idx]; if (!E.isGather()) continue; - if (E.getOpcode() && E.getOpcode() != Instruction::Load) + if (E.isInstructionsStateValid() && E.getOpcode() != Instruction::Load) return false; if (isSplat(E.Scalars) || allConstant(E.Scalars)) continue; @@ -12388,7 +12403,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { TE.dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n"); continue; } - if (TE.isGather()) { + if (TE.isGather() && TE.isInstructionsStateValid()) { if (const TreeEntry *E = getTreeEntry(TE.getMainOp()); E && E->getVectorFactor() == TE.getVectorFactor() && E->isSame(TE.Scalars)) { @@ -14833,14 +14848,17 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, } } // Gather extracts after we check for full matched gathers only. - if (!ExtractShuffles.empty() || E->getOpcode() != Instruction::Load || - ((E->getOpcode() == Instruction::Load || + if (!ExtractShuffles.empty() || + (!E->isInstructionsStateValid() || + E->getOpcode() != Instruction::Load) || + (((E->isInstructionsStateValid() && + E->getOpcode() == Instruction::Load) || any_of(E->Scalars, IsaPred)) && any_of(E->Scalars, [this](Value *V) { return isa(V) && getTreeEntry(V); })) || - E->isAltShuffle() || + (E->isInstructionsStateValid() && E->isAltShuffle()) || all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || isSplat(E->Scalars) || (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) { @@ -15220,7 +15238,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size()); if (E->isGather()) { // Set insert point for non-reduction initial nodes. - if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList) + if (E->isInstructionsStateValid() && E->Idx == 0 && !UserIgnoreList) setInsertPointAfterBundle(E); Value *Vec = createBuildVector(E, ScalarTy, PostponedPHIs); E->VectorizedValue = Vec; @@ -18309,7 +18327,9 @@ void BoUpSLP::computeMinimumValueSizes() { while (NodeIdx < VectorizableTree.size()) { ArrayRef TreeRoot = VectorizableTree[NodeIdx]->Scalars; unsigned Limit = 2; - unsigned Opcode = VectorizableTree[NodeIdx]->getOpcode(); + unsigned Opcode = VectorizableTree[NodeIdx]->isInstructionsStateValid() + ? VectorizableTree[NodeIdx]->getOpcode() + : 0; if (IsTopRoot && ReductionBitWidth == DL->getTypeSizeInBits( @@ -18362,7 +18382,8 @@ void BoUpSLP::computeMinimumValueSizes() { NodeIdx < VectorizableTree.size() && any_of(VectorizableTree[NodeIdx]->UserTreeIndices, [&](const EdgeInfo &EI) { - return EI.UserTE->getOpcode() == Instruction::ICmp && + return (EI.UserTE->isInstructionsStateValid() && + EI.UserTE->getOpcode() == Instruction::ICmp) && any_of(EI.UserTE->Scalars, [&](Value *V) { auto *IC = dyn_cast(V); return IC && From dc2f0d1154b05a3dc714843e99a0c95dc0ce6508 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 8 Jan 2025 00:39:29 -0800 Subject: [PATCH 04/14] add assert for getTreeEntry --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7a44873f33d0d..5af8579c1447d 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3692,9 +3692,13 @@ class BoUpSLP { } #endif - TreeEntry *getTreeEntry(Value *V) { return ScalarToTreeEntry.lookup(V); } + TreeEntry *getTreeEntry(Value *V) { + assert(V && "V cannot be nullptr."); + return ScalarToTreeEntry.lookup(V); + } const TreeEntry *getTreeEntry(Value *V) const { + assert(V && "V cannot be nullptr."); return ScalarToTreeEntry.lookup(V); } From e9897c3d365f253997eede1fea50f24e18cb2fd2 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 8 Jan 2025 00:40:36 -0800 Subject: [PATCH 05/14] OpValue is not used in canReuseExtract --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5af8579c1447d..00840514a89bd 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3036,7 +3036,7 @@ class BoUpSLP { /// non-identity permutation that allows to reuse extract instructions. /// \param ResizeAllowed indicates whether it is allowed to handle subvector /// extract order. - bool canReuseExtract(ArrayRef VL, Value *OpValue, + bool canReuseExtract(ArrayRef VL, SmallVectorImpl &CurrentOrder, bool ResizeAllowed = false) const; @@ -5727,10 +5727,8 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // Check that gather of extractelements can be represented as // just a shuffle of a single vector. OrdersType CurrentOrder; - bool Reuse = canReuseExtract( - TE.Scalars, TE.isInstructionsStateValid() ? TE.getMainOp() : nullptr, - CurrentOrder, - /*ResizeAllowed=*/true); + bool Reuse = + canReuseExtract(TE.Scalars, CurrentOrder, /*ResizeAllowed=*/true); if (Reuse || !CurrentOrder.empty()) return std::move(CurrentOrder); } @@ -7574,7 +7572,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState( } case Instruction::ExtractValue: case Instruction::ExtractElement: { - bool Reuse = canReuseExtract(VL, VL0, CurrentOrder); + bool Reuse = canReuseExtract(VL, CurrentOrder); // FIXME: Vectorizing is not supported yet for non-power-of-2 ops. if (!has_single_bit(VL.size())) return TreeEntry::NeedToGather; @@ -8855,7 +8853,7 @@ unsigned BoUpSLP::canMapToVector(Type *T) const { return N; } -bool BoUpSLP::canReuseExtract(ArrayRef VL, Value *OpValue, +bool BoUpSLP::canReuseExtract(ArrayRef VL, SmallVectorImpl &CurrentOrder, bool ResizeAllowed) const { const auto *It = find_if(VL, IsaPred); From ede73b62257959427828171dc7efcab6d08c43ae Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 8 Jan 2025 01:26:38 -0800 Subject: [PATCH 06/14] get opcode from TreeEntry instead of the parameter --- .../Transforms/Vectorize/SLPVectorizer.cpp | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 00840514a89bd..b34beb1dbdb66 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -18129,10 +18129,9 @@ void BoUpSLP::computeMinimumValueSizes() { return; SmallVector ToDemote; - auto ComputeMaxBitWidth = [&](const TreeEntry &E, bool IsTopRoot, - bool IsProfitableToDemoteRoot, unsigned Opcode, - unsigned Limit, bool IsTruncRoot, - bool IsSignedCmp) -> unsigned { + auto ComputeMaxBitWidth = + [&](const TreeEntry &E, bool IsTopRoot, bool IsProfitableToDemoteRoot, + unsigned Limit, bool IsTruncRoot, bool IsSignedCmp) -> unsigned { ToDemote.clear(); // Check if the root is trunc and the next node is gather/buildvector, then // keep trunc in scalars, which is free in most cases. @@ -18173,11 +18172,14 @@ void BoUpSLP::computeMinimumValueSizes() { return MaxBitWidth; } + if (!E.isInstructionsStateValid()) + return 0u; + unsigned VF = E.getVectorFactor(); Type *ScalarTy = E.Scalars.front()->getType(); unsigned ScalarTyNumElements = getNumElements(ScalarTy); auto *TreeRootIT = dyn_cast(ScalarTy->getScalarType()); - if (!TreeRootIT || !Opcode) + if (!TreeRootIT) return 0u; if (any_of(E.Scalars, @@ -18249,6 +18251,7 @@ void BoUpSLP::computeMinimumValueSizes() { IntegerType::get(F->getContext(), bit_ceil(MaxBitWidth)), VF))) return 0u; + unsigned Opcode = E.getOpcode(); bool IsProfitableToDemote = Opcode == Instruction::Trunc || Opcode == Instruction::SExt || Opcode == Instruction::ZExt || NumParts > 1; @@ -18329,17 +18332,14 @@ void BoUpSLP::computeMinimumValueSizes() { while (NodeIdx < VectorizableTree.size()) { ArrayRef TreeRoot = VectorizableTree[NodeIdx]->Scalars; unsigned Limit = 2; - unsigned Opcode = VectorizableTree[NodeIdx]->isInstructionsStateValid() - ? VectorizableTree[NodeIdx]->getOpcode() - : 0; if (IsTopRoot && ReductionBitWidth == DL->getTypeSizeInBits( VectorizableTree.front()->Scalars.front()->getType())) Limit = 3; unsigned MaxBitWidth = ComputeMaxBitWidth( - *VectorizableTree[NodeIdx], IsTopRoot, IsProfitableToDemoteRoot, Opcode, - Limit, IsTruncRoot, IsSignedCmp); + *VectorizableTree[NodeIdx], IsTopRoot, IsProfitableToDemoteRoot, Limit, + IsTruncRoot, IsSignedCmp); if (ReductionBitWidth != 0 && (IsTopRoot || !RootDemotes.empty())) { if (MaxBitWidth != 0 && ReductionBitWidth < MaxBitWidth) ReductionBitWidth = bit_ceil(MaxBitWidth); From 719d126506806a258a2aac1149209a3afa4d1a88 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 8 Jan 2025 00:49:45 -0800 Subject: [PATCH 07/14] add assert for isInstructionsStateValid --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index b34beb1dbdb66..1c1a4d6f36f09 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3415,7 +3415,11 @@ class BoUpSLP { unsigned getAltOpcode() const { return S.getAltOpcode(); } - bool isInstructionsStateValid() const { return S.valid(); } + bool isInstructionsStateValid() const { + assert((S.valid() || isGather()) && + "Invalid InstructionsState must be gathered."); + return S.valid(); + } /// When ReuseReorderShuffleIndices is empty it just returns position of \p /// V within vector of Scalars. Otherwise, try to remap on its reuse index. From bb3dc5eedac49bcc05f2435070b432df63bcd070 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 8 Jan 2025 01:48:17 -0800 Subject: [PATCH 08/14] merge isInstructionsStateValid --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 1c1a4d6f36f09..6c7587488f13e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -10612,9 +10612,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { bool PrevNodeFound = any_of( ArrayRef(R.VectorizableTree).take_front(E->Idx), [&](const std::unique_ptr &TE) { - return (((!TE->isInstructionsStateValid() || !TE->isAltShuffle()) && - (TE->isInstructionsStateValid() && - TE->getOpcode() == Instruction::ExtractElement)) || + return ((TE->isInstructionsStateValid() && !TE->isAltShuffle() && + TE->getOpcode() == Instruction::ExtractElement) || TE->isGather()) && all_of(enumerate(TE->Scalars), [&](auto &&Data) { return VL.size() > Data.index() && From 390e15b5595c49327b1de9fd77b8e16450b8361a Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 8 Jan 2025 01:59:26 -0800 Subject: [PATCH 09/14] clang-format --- .../Transforms/Vectorize/SLPVectorizer.cpp | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 6c7587488f13e..fd68f08df2c21 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -18385,20 +18385,21 @@ void BoUpSLP::computeMinimumValueSizes() { }); IsSignedCmp = NodeIdx < VectorizableTree.size() && - any_of(VectorizableTree[NodeIdx]->UserTreeIndices, - [&](const EdgeInfo &EI) { - return (EI.UserTE->isInstructionsStateValid() && - EI.UserTE->getOpcode() == Instruction::ICmp) && - any_of(EI.UserTE->Scalars, [&](Value *V) { - auto *IC = dyn_cast(V); - return IC && - (IC->isSigned() || - !isKnownNonNegative(IC->getOperand(0), - SimplifyQuery(*DL)) || - !isKnownNonNegative(IC->getOperand(1), - SimplifyQuery(*DL))); - }); - }); + any_of( + VectorizableTree[NodeIdx]->UserTreeIndices, + [&](const EdgeInfo &EI) { + return (EI.UserTE->isInstructionsStateValid() && + EI.UserTE->getOpcode() == Instruction::ICmp) && + any_of(EI.UserTE->Scalars, [&](Value *V) { + auto *IC = dyn_cast(V); + return IC && + (IC->isSigned() || + !isKnownNonNegative(IC->getOperand(0), + SimplifyQuery(*DL)) || + !isKnownNonNegative(IC->getOperand(1), + SimplifyQuery(*DL))); + }); + }); } // If the maximum bit width we compute is less than the width of the roots' From 2590b2aa3483d6a74f2ca22dcf1f9150d7a55b97 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 8 Jan 2025 04:00:40 -0800 Subject: [PATCH 10/14] add assert for isGather --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fd68f08df2c21..9064c2917f6b2 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3263,7 +3263,12 @@ class BoUpSLP { }; /// Checks if the current node is a gather node. - bool isGather() const {return State == NeedToGather; } + bool isGather() const { + assert( + (State == NeedToGather || S.valid()) && + "InstructionsState must be valid if the TreeEntry is not gathered."); + return State == NeedToGather; + } /// A vector of scalars. ValueList Scalars; From 918dc7d6432a8de53373b01296579f4b776a5f93 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 8 Jan 2025 04:50:07 -0800 Subject: [PATCH 11/14] fix isAltShuffle --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9064c2917f6b2..4d8185fa79a96 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -5980,7 +5980,7 @@ void BoUpSLP::reorderTopToBottom() { if (It != GathersToOrders.end()) return It->second; } - if (OpTE->isAltShuffle()) { + if (OpTE->isInstructionsStateValid() && OpTE->isAltShuffle()) { auto It = AltShufflesToOrders.find(OpTE); if (It != AltShufflesToOrders.end()) return It->second; From 3bba2ed586deeaecc8a0caaa58a9874da7196cf3 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Thu, 9 Jan 2025 06:36:17 -0800 Subject: [PATCH 12/14] rename isInstructionsStateValid to hasState --- .../Transforms/Vectorize/SLPVectorizer.cpp | 80 +++++++++---------- 1 file changed, 36 insertions(+), 44 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4d8185fa79a96..7444860495050 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3420,7 +3420,7 @@ class BoUpSLP { unsigned getAltOpcode() const { return S.getAltOpcode(); } - bool isInstructionsStateValid() const { + bool hasState() const { assert((S.valid() || isGather()) && "Invalid InstructionsState must be gathered."); return S.valid(); @@ -5558,8 +5558,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { // Try build correct order for extractelement instructions. SmallVector ReusedMask(TE.ReuseShuffleIndices.begin(), TE.ReuseShuffleIndices.end()); - if (TE.isInstructionsStateValid() && - TE.getOpcode() == Instruction::ExtractElement && + if (TE.hasState() && TE.getOpcode() == Instruction::ExtractElement && all_of(TE.Scalars, [Sz](Value *V) { if (isa(V)) return true; @@ -5721,12 +5720,11 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { return std::nullopt; // No need to reorder. return std::move(Phis); } - if (TE.isGather() && (!TE.isInstructionsStateValid() || !TE.isAltShuffle()) && + if (TE.isGather() && (!TE.hasState() || !TE.isAltShuffle()) && allSameType(TE.Scalars)) { // TODO: add analysis of other gather nodes with extractelement // instructions and other values/instructions, not only undefs. - if (((TE.isInstructionsStateValid() && - TE.getOpcode() == Instruction::ExtractElement) || + if (((TE.hasState() && TE.getOpcode() == Instruction::ExtractElement) || (all_of(TE.Scalars, IsaPred) && any_of(TE.Scalars, IsaPred))) && all_of(TE.Scalars, [](Value *V) { @@ -5786,7 +5784,7 @@ BoUpSLP::getReorderingData(const TreeEntry &TE, bool TopToBottom) { return Order; // Check if can include the order of vectorized loads. For masked gathers do // extra analysis later, so include such nodes into a special list. - if (TE.isInstructionsStateValid() && TE.getOpcode() == Instruction::Load) { + if (TE.hasState() && TE.getOpcode() == Instruction::Load) { SmallVector PointerOps; OrdersType CurrentOrder; LoadsState Res = canVectorizeLoads(TE.Scalars, TE.Scalars.front(), @@ -5901,7 +5899,7 @@ void BoUpSLP::reorderTopToBottom() { // Patterns like [fadd,fsub] can be combined into a single instruction in // x86. Reordering them into [fsub,fadd] blocks this pattern. So we need // to take into account their order when looking for the most used order. - if (TE->isInstructionsStateValid() && TE->isAltShuffle()) { + if (TE->hasState() && TE->isAltShuffle()) { VectorType *VecTy = getWidenedType(TE->Scalars[0]->getType(), TE->Scalars.size()); unsigned Opcode0 = TE->getOpcode(); @@ -5980,7 +5978,7 @@ void BoUpSLP::reorderTopToBottom() { if (It != GathersToOrders.end()) return It->second; } - if (OpTE->isInstructionsStateValid() && OpTE->isAltShuffle()) { + if (OpTE->hasState() && OpTE->isAltShuffle()) { auto It = AltShufflesToOrders.find(OpTE); if (It != AltShufflesToOrders.end()) return It->second; @@ -9516,8 +9514,7 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) { // Do not reorder nodes if it small (just 2 elements), all-constant or all // instructions have same opcode already. - if (TE.Scalars.size() == 2 || - (TE.isInstructionsStateValid() && !TE.isAltShuffle()) || + if (TE.Scalars.size() == 2 || (TE.hasState() && !TE.isAltShuffle()) || all_of(TE.Scalars, isConstant)) return; @@ -9736,9 +9733,8 @@ void BoUpSLP::transformNodes() { // Do not try partial vectorization for small nodes (<= 2), nodes with the // same opcode and same parent block or all constants. if (VL.size() <= 2 || LoadEntriesToVectorize.contains(Idx) || - !(!E.isInstructionsStateValid() || - E.getOpcode() == Instruction::Load || E.isAltShuffle() || - !allSameBlock(VL)) || + !(!E.hasState() || E.getOpcode() == Instruction::Load || + E.isAltShuffle() || !allSameBlock(VL)) || allConstant(VL) || isSplat(VL)) continue; // Try to find vectorizable sequences and transform them into a series of @@ -9880,7 +9876,7 @@ void BoUpSLP::transformNodes() { E.ReorderIndices.clear(); } } - if (!E.isInstructionsStateValid()) + if (!E.hasState()) continue; switch (E.getOpcode()) { case Instruction::Load: { @@ -10001,7 +9997,7 @@ void BoUpSLP::transformNodes() { getCanonicalGraphSize() <= SmallTree && count_if(ArrayRef(VectorizableTree).drop_front(getCanonicalGraphSize()), [](const std::unique_ptr &TE) { - return TE->isGather() && TE->isInstructionsStateValid() && + return TE->isGather() && TE->hasState() && TE->getOpcode() == Instruction::Load && !allSameBlock(TE->Scalars); }) == 1) @@ -10017,13 +10013,13 @@ void BoUpSLP::transformNodes() { for (std::unique_ptr &TE : VectorizableTree) { TreeEntry &E = *TE; if (E.isGather() && - ((E.isInstructionsStateValid() && E.getOpcode() == Instruction::Load) || - (!E.isInstructionsStateValid() && - any_of(E.Scalars, - [&](Value *V) { - return isa(V) && !isVectorized(V) && - !isDeleted(cast(V)); - }))) && + ((E.hasState() && E.getOpcode() == Instruction::Load) || + (!E.hasState() && any_of(E.Scalars, + [&](Value *V) { + return isa(V) && + !isVectorized(V) && + !isDeleted(cast(V)); + }))) && !isSplat(E.Scalars)) { for (Value *V : E.Scalars) { auto *LI = dyn_cast(V); @@ -10617,7 +10613,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis { bool PrevNodeFound = any_of( ArrayRef(R.VectorizableTree).take_front(E->Idx), [&](const std::unique_ptr &TE) { - return ((TE->isInstructionsStateValid() && !TE->isAltShuffle() && + return ((TE->hasState() && !TE->isAltShuffle() && TE->getOpcode() == Instruction::ExtractElement) || TE->isGather()) && all_of(enumerate(TE->Scalars), [&](auto &&Data) { @@ -11731,7 +11727,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, for (const std::unique_ptr &TE : VectorizableTree) { if (TE.get() == E) break; - if (TE->isInstructionsStateValid() && TE->isAltShuffle() && + if (TE->hasState() && TE->isAltShuffle() && ((TE->getOpcode() == E->getOpcode() && TE->getAltOpcode() == E->getAltOpcode()) || (TE->getOpcode() == E->getAltOpcode() && @@ -11893,13 +11889,12 @@ bool BoUpSLP::isFullyVectorizableTinyTree(bool ForReduction) const { [this](Value *V) { return EphValues.contains(V); }) && (allConstant(TE->Scalars) || isSplat(TE->Scalars) || TE->Scalars.size() < Limit || - (((TE->isInstructionsStateValid() && + (((TE->hasState() && TE->getOpcode() == Instruction::ExtractElement) || all_of(TE->Scalars, IsaPred)) && isFixedVectorShuffle(TE->Scalars, Mask, AC)) || - ((TE->isInstructionsStateValid() && - TE->getOpcode() == Instruction::Load) && - (!TE->isInstructionsStateValid() || !TE->isAltShuffle())) || + ((TE->hasState() && TE->getOpcode() == Instruction::Load) && + (!TE->hasState() || !TE->isAltShuffle())) || any_of(TE->Scalars, IsaPred)); }; @@ -12028,11 +12023,10 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { !VectorizableTree.empty() && all_of(VectorizableTree, [&](const std::unique_ptr &TE) { return (TE->isGather() && - (!TE->isInstructionsStateValid() || + (!TE->hasState() || TE->getOpcode() != Instruction::ExtractElement) && count_if(TE->Scalars, IsaPred) <= Limit) || - (TE->isInstructionsStateValid() && - TE->getOpcode() == Instruction::PHI); + (TE->hasState() && TE->getOpcode() == Instruction::PHI); })) return true; @@ -12066,7 +12060,7 @@ bool BoUpSLP::isTreeTinyAndNotFullyVectorizable(bool ForReduction) const { return false; if (VectorizableTree.back()->isGather() && - VectorizableTree.back()->isInstructionsStateValid() && + VectorizableTree.back()->hasState() && VectorizableTree.back()->isAltShuffle() && VectorizableTree.back()->getVectorFactor() > 2 && allSameBlock(VectorizableTree.back()->Scalars) && @@ -12091,7 +12085,7 @@ bool BoUpSLP::isTreeNotExtendable() const { getCanonicalGraphSize() <= SmallTree && count_if(ArrayRef(VectorizableTree).drop_front(getCanonicalGraphSize()), [](const std::unique_ptr &TE) { - return TE->isGather() && TE->isInstructionsStateValid() && + return TE->isGather() && TE->hasState() && TE->getOpcode() == Instruction::Load && !allSameBlock(TE->Scalars); }) == 1) @@ -12103,7 +12097,7 @@ bool BoUpSLP::isTreeNotExtendable() const { TreeEntry &E = *VectorizableTree[Idx]; if (!E.isGather()) continue; - if (E.isInstructionsStateValid() && E.getOpcode() != Instruction::Load) + if (E.hasState() && E.getOpcode() != Instruction::Load) return false; if (isSplat(E.Scalars) || allConstant(E.Scalars)) continue; @@ -12413,7 +12407,7 @@ InstructionCost BoUpSLP::getTreeCost(ArrayRef VectorizedVals) { TE.dump(); dbgs() << "SLP: Current total cost = " << Cost << "\n"); continue; } - if (TE.isGather() && TE.isInstructionsStateValid()) { + if (TE.isGather() && TE.hasState()) { if (const TreeEntry *E = getTreeEntry(TE.getMainOp()); E && E->getVectorFactor() == TE.getVectorFactor() && E->isSame(TE.Scalars)) { @@ -14859,16 +14853,14 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, } // Gather extracts after we check for full matched gathers only. if (!ExtractShuffles.empty() || - (!E->isInstructionsStateValid() || - E->getOpcode() != Instruction::Load) || - (((E->isInstructionsStateValid() && - E->getOpcode() == Instruction::Load) || + (!E->hasState() || E->getOpcode() != Instruction::Load) || + (((E->hasState() && E->getOpcode() == Instruction::Load) || any_of(E->Scalars, IsaPred)) && any_of(E->Scalars, [this](Value *V) { return isa(V) && getTreeEntry(V); })) || - (E->isInstructionsStateValid() && E->isAltShuffle()) || + (E->hasState() && E->isAltShuffle()) || all_of(E->Scalars, [this](Value *V) { return getTreeEntry(V); }) || isSplat(E->Scalars) || (E->Scalars != GatheredScalars && GatheredScalars.size() <= 2)) { @@ -15248,7 +15240,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size()); if (E->isGather()) { // Set insert point for non-reduction initial nodes. - if (E->isInstructionsStateValid() && E->Idx == 0 && !UserIgnoreList) + if (E->hasState() && E->Idx == 0 && !UserIgnoreList) setInsertPointAfterBundle(E); Value *Vec = createBuildVector(E, ScalarTy, PostponedPHIs); E->VectorizedValue = Vec; @@ -18180,7 +18172,7 @@ void BoUpSLP::computeMinimumValueSizes() { return MaxBitWidth; } - if (!E.isInstructionsStateValid()) + if (!E.hasState()) return 0u; unsigned VF = E.getVectorFactor(); @@ -18393,7 +18385,7 @@ void BoUpSLP::computeMinimumValueSizes() { any_of( VectorizableTree[NodeIdx]->UserTreeIndices, [&](const EdgeInfo &EI) { - return (EI.UserTE->isInstructionsStateValid() && + return (EI.UserTE->hasState() && EI.UserTE->getOpcode() == Instruction::ICmp) && any_of(EI.UserTE->Scalars, [&](Value *V) { auto *IC = dyn_cast(V); From f5c0acd4d27866613372dc02b9e19fec3c433c14 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Thu, 9 Jan 2025 06:50:29 -0800 Subject: [PATCH 13/14] remove assert in isGather and hasState --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7444860495050..a602cfbac3ceb 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3263,12 +3263,7 @@ class BoUpSLP { }; /// Checks if the current node is a gather node. - bool isGather() const { - assert( - (State == NeedToGather || S.valid()) && - "InstructionsState must be valid if the TreeEntry is not gathered."); - return State == NeedToGather; - } + bool isGather() const { return State == NeedToGather; } /// A vector of scalars. ValueList Scalars; @@ -3420,11 +3415,7 @@ class BoUpSLP { unsigned getAltOpcode() const { return S.getAltOpcode(); } - bool hasState() const { - assert((S.valid() || isGather()) && - "Invalid InstructionsState must be gathered."); - return S.valid(); - } + bool hasState() const { return S.valid(); } /// When ReuseReorderShuffleIndices is empty it just returns position of \p /// V within vector of Scalars. Otherwise, try to remap on its reuse index. From 0367ff67475c675728206ad8f5b5fa90c3a128d3 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Thu, 9 Jan 2025 08:09:11 -0800 Subject: [PATCH 14/14] apply comment --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index a602cfbac3ceb..781ef29cb5a41 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3327,6 +3327,8 @@ class BoUpSLP { /// reordering of operands during buildTree_rec() and vectorizeTree(). SmallVector Operands; + /// MainOp and AltOp are recorded inside. S should be obtained from + /// newTreeEntry. InstructionsState S = InstructionsState::invalid(); /// Interleaving factor for interleaved loads Vectorize nodes. @@ -14843,8 +14845,8 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Type *ScalarTy, } } // Gather extracts after we check for full matched gathers only. - if (!ExtractShuffles.empty() || - (!E->hasState() || E->getOpcode() != Instruction::Load) || + if (!ExtractShuffles.empty() || !E->hasState() || + E->getOpcode() != Instruction::Load || (((E->hasState() && E->getOpcode() == Instruction::Load) || any_of(E->Scalars, IsaPred)) && any_of(E->Scalars,