From 5624823cb9b7e7a14c84f3991eeeb24f0f97ebe6 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Tue, 17 Dec 2024 01:37:04 -0800 Subject: [PATCH 1/4] [SLP] NFC. Use getMainOp if users just want to know whether VL has same opcode. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 27 +++++++++---------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index d967813075bb9..8a6958c3541bd 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -868,8 +868,8 @@ static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0, (!isa(BaseOp0) && !isa(Op0) && !isa(BaseOp1) && !isa(Op1)) || BaseOp0 == Op0 || BaseOp1 == Op1 || - getSameOpcode({BaseOp0, Op0}, TLI).getOpcode() || - getSameOpcode({BaseOp1, Op1}, TLI).getOpcode(); + getSameOpcode({BaseOp0, Op0}, TLI).getMainOp() || + getSameOpcode({BaseOp1, Op1}, TLI).getMainOp(); } /// \returns true if a compare instruction \p CI has similar "look" and @@ -2380,7 +2380,7 @@ class BoUpSLP { // Use Boyer-Moore majority voting for finding the majority opcode and // the number of times it occurs. if (auto *I = dyn_cast(OpData.V)) { - if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).getOpcode() || + if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).getMainOp() || I->getParent() != Parent) { if (NumOpsWithSameOpcodeParent == 0) { NumOpsWithSameOpcodeParent = 1; @@ -2500,7 +2500,7 @@ class BoUpSLP { // next lane does not build same opcode sequence. (Lns == 2 && !getSameOpcode({Op, getValue((OpI + 1) % OpE, Ln)}, TLI) - .getOpcode() && + .getMainOp() && isa(Data.V)))) || // 3. The operand in the current lane is loop invariant (can be // hoisted out) and another operand is also a loop invariant @@ -2509,7 +2509,7 @@ class BoUpSLP { // FIXME: need to teach the cost model about this case for better // estimation. (IsInvariant && !isa(Data.V) && - !getSameOpcode({Op, Data.V}, TLI).getOpcode() && + !getSameOpcode({Op, Data.V}, TLI).getMainOp() && L->isLoopInvariant(Data.V))) { FoundCandidate = true; Data.IsUsed = Data.V == Op; @@ -2539,7 +2539,7 @@ class BoUpSLP { return true; Value *OpILn = getValue(OpI, Ln); return (L && L->isLoopInvariant(OpILn)) || - (getSameOpcode({Op, OpILn}, TLI).getOpcode() && + (getSameOpcode({Op, OpILn}, TLI).getMainOp() && allSameBlock({Op, OpILn})); })) return true; @@ -4766,7 +4766,7 @@ static bool arePointersCompatible(Value *Ptr1, Value *Ptr2, !CompareOpcodes || (GEP1 && GEP2 && getSameOpcode({GEP1->getOperand(1), GEP2->getOperand(1)}, TLI) - .getOpcode())); + .getMainOp())); } /// Calculates minimal alignment as a common alignment. @@ -13223,7 +13223,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( Value *In1 = PHI1->getIncomingValue(I); if (isConstant(In) && isConstant(In1)) continue; - if (!getSameOpcode({In, In1}, *TLI).getOpcode()) + if (!getSameOpcode({In, In1}, *TLI).getMainOp()) return false; if (cast(In)->getParent() != cast(In1)->getParent()) @@ -13251,7 +13251,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( if (It != UsedValuesEntry.end()) UsedInSameVTE = It->second == UsedValuesEntry.find(V)->second; return V != V1 && MightBeIgnored(V1) && !UsedInSameVTE && - getSameOpcode({V, V1}, *TLI).getOpcode() && + getSameOpcode({V, V1}, *TLI).getMainOp() && cast(V)->getParent() == cast(V1)->getParent() && (!isa(V1) || AreCompatiblePHIs(V, V1)); @@ -21346,8 +21346,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { return false; if (I1->getParent() != I2->getParent()) return false; - InstructionsState S = getSameOpcode({I1, I2}, *TLI); - if (S.getOpcode()) + if (getSameOpcode({I1, I2}, *TLI).getMainOp()) continue; return false; } @@ -21701,8 +21700,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { "Different nodes should have different DFS numbers"); if (NodeI1 != NodeI2) return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); - InstructionsState S = getSameOpcode({I1, I2}, *TLI); - if (S.getOpcode()) + if (getSameOpcode({I1, I2}, *TLI).getMainOp()) return false; return I1->getOpcode() < I2->getOpcode(); } @@ -21728,8 +21726,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { if (auto *I2 = dyn_cast(V2->getValueOperand())) { if (I1->getParent() != I2->getParent()) return false; - InstructionsState S = getSameOpcode({I1, I2}, *TLI); - return S.getOpcode() > 0; + return getSameOpcode({I1, I2}, *TLI).getMainOp() != nullptr; } if (isa(V1->getValueOperand()) && isa(V2->getValueOperand())) From 3b885f24b0287a0e08642fc9502ec3e438b24faf Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Tue, 17 Dec 2024 22:30:03 -0800 Subject: [PATCH 2/4] add InstructionsState::valid --- .../Transforms/Vectorize/SLPVectorizer.cpp | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 8a6958c3541bd..3c8b7dca86b62 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -836,6 +836,8 @@ class InstructionsState { return getOpcode() == CheckedOpcode || getAltOpcode() == CheckedOpcode; } + bool valid() const { return MainOp != nullptr; } + InstructionsState() = delete; InstructionsState(Instruction *MainOp, Instruction *AltOp) : MainOp(MainOp), AltOp(AltOp) {} @@ -868,8 +870,8 @@ static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0, (!isa(BaseOp0) && !isa(Op0) && !isa(BaseOp1) && !isa(Op1)) || BaseOp0 == Op0 || BaseOp1 == Op1 || - getSameOpcode({BaseOp0, Op0}, TLI).getMainOp() || - getSameOpcode({BaseOp1, Op1}, TLI).getMainOp(); + getSameOpcode({BaseOp0, Op0}, TLI).valid() || + getSameOpcode({BaseOp1, Op1}, TLI).valid(); } /// \returns true if a compare instruction \p CI has similar "look" and @@ -2380,7 +2382,7 @@ class BoUpSLP { // Use Boyer-Moore majority voting for finding the majority opcode and // the number of times it occurs. if (auto *I = dyn_cast(OpData.V)) { - if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).getMainOp() || + if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).valid() || I->getParent() != Parent) { if (NumOpsWithSameOpcodeParent == 0) { NumOpsWithSameOpcodeParent = 1; @@ -2500,7 +2502,7 @@ class BoUpSLP { // next lane does not build same opcode sequence. (Lns == 2 && !getSameOpcode({Op, getValue((OpI + 1) % OpE, Ln)}, TLI) - .getMainOp() && + .valid() && isa(Data.V)))) || // 3. The operand in the current lane is loop invariant (can be // hoisted out) and another operand is also a loop invariant @@ -2509,7 +2511,7 @@ class BoUpSLP { // FIXME: need to teach the cost model about this case for better // estimation. (IsInvariant && !isa(Data.V) && - !getSameOpcode({Op, Data.V}, TLI).getMainOp() && + !getSameOpcode({Op, Data.V}, TLI).valid() && L->isLoopInvariant(Data.V))) { FoundCandidate = true; Data.IsUsed = Data.V == Op; @@ -2539,7 +2541,7 @@ class BoUpSLP { return true; Value *OpILn = getValue(OpI, Ln); return (L && L->isLoopInvariant(OpILn)) || - (getSameOpcode({Op, OpILn}, TLI).getMainOp() && + (getSameOpcode({Op, OpILn}, TLI).valid() && allSameBlock({Op, OpILn})); })) return true; @@ -4766,7 +4768,7 @@ static bool arePointersCompatible(Value *Ptr1, Value *Ptr2, !CompareOpcodes || (GEP1 && GEP2 && getSameOpcode({GEP1->getOperand(1), GEP2->getOperand(1)}, TLI) - .getMainOp())); + .valid())); } /// Calculates minimal alignment as a common alignment. @@ -7488,7 +7490,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S, [&](ArrayRef Op) { if (allConstant(Op) || (!isSplat(Op) && allSameBlock(Op) && allSameType(Op) && - getSameOpcode(Op, *TLI).getMainOp())) + getSameOpcode(Op, *TLI).valid())) return false; DenseMap Uniques; for (Value *V : Op) { @@ -13223,7 +13225,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( Value *In1 = PHI1->getIncomingValue(I); if (isConstant(In) && isConstant(In1)) continue; - if (!getSameOpcode({In, In1}, *TLI).getMainOp()) + if (!getSameOpcode({In, In1}, *TLI).valid()) return false; if (cast(In)->getParent() != cast(In1)->getParent()) @@ -13251,7 +13253,7 @@ BoUpSLP::isGatherShuffledSingleRegisterEntry( if (It != UsedValuesEntry.end()) UsedInSameVTE = It->second == UsedValuesEntry.find(V)->second; return V != V1 && MightBeIgnored(V1) && !UsedInSameVTE && - getSameOpcode({V, V1}, *TLI).getMainOp() && + getSameOpcode({V, V1}, *TLI).valid() && cast(V)->getParent() == cast(V1)->getParent() && (!isa(V1) || AreCompatiblePHIs(V, V1)); @@ -21346,7 +21348,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { return false; if (I1->getParent() != I2->getParent()) return false; - if (getSameOpcode({I1, I2}, *TLI).getMainOp()) + if (getSameOpcode({I1, I2}, *TLI).valid()) continue; return false; } @@ -21700,7 +21702,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { "Different nodes should have different DFS numbers"); if (NodeI1 != NodeI2) return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); - if (getSameOpcode({I1, I2}, *TLI).getMainOp()) + if (getSameOpcode({I1, I2}, *TLI).valid()) return false; return I1->getOpcode() < I2->getOpcode(); } @@ -21726,7 +21728,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { if (auto *I2 = dyn_cast(V2->getValueOperand())) { if (I1->getParent() != I2->getParent()) return false; - return getSameOpcode({I1, I2}, *TLI).getMainOp() != nullptr; + return getSameOpcode({I1, I2}, *TLI).valid(); } if (isa(V1->getValueOperand()) && isa(V2->getValueOperand())) From c9ccb5577ac8c9994199bd761231ab1b1fd41321 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Tue, 17 Dec 2024 02:15:42 -0800 Subject: [PATCH 3/4] [SLP] NFC. Use InstructionsState::getOpcode only when necessary. Use isa, isa_and_present and dyn_cast_if_present instead of getOpcode. --- .../Transforms/Vectorize/SLPVectorizer.cpp | 89 +++++++++---------- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 3c8b7dca86b62..98d8fd638ca60 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -821,7 +821,8 @@ class InstructionsState { /// The main/alternate opcodes for the list of instructions. unsigned getOpcode() const { - return MainOp ? MainOp->getOpcode() : 0; + assert(MainOp && "InstructionsState is invalid."); + return MainOp->getOpcode(); } unsigned getAltOpcode() const { @@ -1847,7 +1848,7 @@ class BoUpSLP { InstructionsState S = getSameOpcode(Ops, TLI); // Note: Only consider instructions with <= 2 operands to avoid // complexity explosion. - if (S.getOpcode() && + if (S.getMainOp() && (S.getMainOp()->getNumOperands() <= 2 || !MainAltOps.empty() || !S.isAltShuffle()) && all_of(Ops, [&S](Value *V) { @@ -2698,7 +2699,7 @@ class BoUpSLP { OperandData &AltOp = getData(OpIdx, Lane); InstructionsState OpS = getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V}, TLI); - if (OpS.getOpcode() && OpS.isAltShuffle()) + if (OpS.getMainOp() && OpS.isAltShuffle()) MainAltOps[OpIdx].push_back(AltOp.V); } } @@ -3594,8 +3595,8 @@ class BoUpSLP { // Gathered loads still gathered? Do not create entry, use the original one. if (GatheredLoadsEntriesFirst.has_value() && EntryState == TreeEntry::NeedToGather && - S.getOpcode() == Instruction::Load && UserTreeIdx.EdgeIdx == UINT_MAX && - !UserTreeIdx.UserTE) + isa_and_present(S.getMainOp()) && + UserTreeIdx.EdgeIdx == UINT_MAX && !UserTreeIdx.UserTE) return nullptr; VectorizableTree.push_back(std::make_unique(VectorizableTree)); TreeEntry *Last = VectorizableTree.back().get(); @@ -8069,7 +8070,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } // Check if this is a duplicate of another entry. - if (S.getOpcode()) { + if (S.getMainOp()) { if (TreeEntry *E = getTreeEntry(S.getMainOp())) { LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n"); @@ -8135,8 +8136,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, all_of(VL, [&S](const Value *I) { return match(I, m_OneUse(m_ZExtOrSExt(m_OneUse(m_Load(m_Value()))))) && - cast(I)->getOpcode() == - S.getMainOp()->getOpcode(); + cast(I)->getOpcode() == S.getOpcode(); })))) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); if (TryToFindDuplicates(S)) @@ -8146,15 +8146,14 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } // Don't handle scalable vectors - if (S.getOpcode() == Instruction::ExtractElement && - isa( - cast(S.getMainOp())->getVectorOperandType())) { - LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n"); - if (TryToFindDuplicates(S)) - newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, - ReuseShuffleIndices); - return; - } + if (auto *EE = dyn_cast_if_present(S.getMainOp())) + if (isa(EE->getVectorOperandType())) { + LLVM_DEBUG(dbgs() << "SLP: Gathering due to scalable vector type.\n"); + if (TryToFindDuplicates(S)) + newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, + ReuseShuffleIndices); + return; + } // Don't handle vectors. if (!SLPReVec && getValueType(VL.front())->isVectorTy()) { @@ -8170,7 +8169,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // vectorize. auto &&NotProfitableForVectorization = [&S, this, Depth](ArrayRef VL) { - if (!S.getOpcode() || !S.isAltShuffle() || VL.size() > 2) + if (!S.getMainOp() || !S.isAltShuffle() || VL.size() > 2) return false; if (VectorizableTree.size() < MinTreeSize) return false; @@ -8225,7 +8224,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, bool IsScatterVectorizeUserTE = UserTreeIdx.UserTE && UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; - bool AreAllSameBlock = S.getOpcode() && allSameBlock(VL); + bool AreAllSameBlock = S.getMainOp() && allSameBlock(VL); bool AreScatterAllGEPSameBlock = (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() && VL.size() > 2 && @@ -8242,7 +8241,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE, SortedIndices)); bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock; - if (!AreAllSameInsts || (!S.getOpcode() && allConstant(VL)) || isSplat(VL) || + if (!AreAllSameInsts || (!S.getMainOp() && allConstant(VL)) || isSplat(VL) || (isa_and_present( S.getMainOp()) && !all_of(VL, isVectorLikeInstWithConstOps)) || @@ -8255,7 +8254,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } // Don't vectorize ephemeral values. - if (S.getOpcode() && !EphValues.empty()) { + if (S.getMainOp() && !EphValues.empty()) { for (Value *V : VL) { if (EphValues.count(V)) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V @@ -8361,15 +8360,15 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndices); NonScheduledFirst.insert(VL.front()); - if (S.getOpcode() == Instruction::Load && + if (isa(S.getMainOp()) && BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit) registerNonVectorizableLoads(VL); return; } LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); - unsigned ShuffleOrOp = S.isAltShuffle() ? - (unsigned) Instruction::ShuffleVector : S.getOpcode(); + unsigned ShuffleOrOp = + S.isAltShuffle() ? (unsigned)Instruction::ShuffleVector : S.getOpcode(); auto CreateOperandNodes = [&](TreeEntry *TE, const auto &Operands) { // Postpone PHI nodes creation SmallVector PHIOps; @@ -8378,7 +8377,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (Op.empty()) continue; InstructionsState S = getSameOpcode(Op, *TLI); - if (S.getOpcode() != Instruction::PHI || S.isAltShuffle()) + if (!isa_and_present(S.getMainOp()) || S.isAltShuffle()) buildTree_rec(Op, Depth + 1, {TE, I}); else PHIOps.push_back(I); @@ -9732,10 +9731,10 @@ void BoUpSLP::transformNodes() { if (IsSplat) continue; InstructionsState S = getSameOpcode(Slice, *TLI); - if (!S.getOpcode() || S.isAltShuffle() || !allSameBlock(Slice) || - (S.getOpcode() == Instruction::Load && + if (!S.getMainOp() || S.isAltShuffle() || !allSameBlock(Slice) || + (isa(S.getMainOp()) && areKnownNonVectorizableLoads(Slice)) || - (S.getOpcode() != Instruction::Load && !has_single_bit(VF))) + (!isa(S.getMainOp()) && !has_single_bit(VF))) continue; if (VF == 2) { // Try to vectorize reduced values or if all users are vectorized. @@ -9750,7 +9749,7 @@ void BoUpSLP::transformNodes() { UserIgnoreList); })) continue; - if (S.getOpcode() == Instruction::Load) { + if (isa(S.getMainOp())) { OrdersType Order; SmallVector PointerOps; LoadsState Res = @@ -9767,7 +9766,7 @@ void BoUpSLP::transformNodes() { } continue; } - } else if (S.getOpcode() == Instruction::ExtractElement || + } else if (isa(S.getMainOp()) || (TTI->getInstructionCost(S.getMainOp(), CostKind) < TTI::TCC_Expensive && !CheckOperandsProfitability( @@ -11049,7 +11048,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, if (const TreeEntry *OpTE = getTreeEntry(V)) return getCastContextHint(*OpTE); InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI); - if (SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle()) + if (isa_and_present(SrcState.getMainOp()) && + !SrcState.isAltShuffle()) return TTI::CastContextHint::GatherScatter; return TTI::CastContextHint::None; }; @@ -14396,12 +14396,12 @@ BoUpSLP::TreeEntry *BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, ArrayRef VL = E->getOperand(NodeIdx); InstructionsState S = getSameOpcode(VL, *TLI); // Special processing for GEPs bundle, which may include non-gep values. - if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) { + if (!S.getMainOp() && VL.front()->getType()->isPointerTy()) { const auto *It = find_if(VL, IsaPred); if (It != VL.end()) S = getSameOpcode(*It, *TLI); } - if (!S.getOpcode()) + if (!S.getMainOp()) return nullptr; auto CheckSameVE = [&](const TreeEntry *VE) { return VE->isSame(VL) && @@ -18378,8 +18378,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, hasFullVectorsOrPowerOf2(*TTI, ValOps.front()->getType(), ValOps.size()) || (VectorizeNonPowerOf2 && has_single_bit(ValOps.size() + 1)); - if ((!IsAllowedSize && S.getOpcode() && - S.getOpcode() != Instruction::Load && + if ((!IsAllowedSize && S.getMainOp() && !isa(S.getMainOp()) && (!S.getMainOp()->isSafeToRemove() || any_of(ValOps.getArrayRef(), [&](Value *V) { @@ -18389,8 +18388,8 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, return !Stores.contains(U); })); }))) || - (ValOps.size() > Chain.size() / 2 && !S.getOpcode())) { - Size = (!IsAllowedSize && S.getOpcode()) ? 1 : 2; + (ValOps.size() > Chain.size() / 2 && !S.getMainOp())) { + Size = (!IsAllowedSize && S.getMainOp()) ? 1 : 2; return false; } } @@ -18413,7 +18412,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, R.computeMinimumValueSizes(); Size = R.getCanonicalGraphSize(); - if (S.getOpcode() == Instruction::Load) + if (isa_and_present(S.getMainOp())) Size = 2; // cut off masked gather small trees InstructionCost Cost = R.getTreeCost(); @@ -18914,7 +18913,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, // Check that all of the parts are instructions of the same type, // we permit an alternate opcode via InstructionsState. InstructionsState S = getSameOpcode(VL, *TLI); - if (!S.getOpcode()) + if (!S.getMainOp()) return false; Instruction *I0 = S.getMainOp(); @@ -19726,16 +19725,16 @@ class HorizontalReduction { // Also check if the instruction was folded to constant/other value. auto *Inst = dyn_cast(RdxVal); if ((Inst && isVectorLikeInstWithConstOps(Inst) && - (!S.getOpcode() || !S.isOpcodeOrAlt(Inst))) || - (S.getOpcode() && !Inst)) + (!S.getMainOp() || !S.isOpcodeOrAlt(Inst))) || + (S.getMainOp() && !Inst)) continue; Candidates.push_back(RdxVal); TrackedToOrig.try_emplace(RdxVal, OrigReducedVals[Cnt]); } bool ShuffledExtracts = false; // Try to handle shuffled extractelements. - if (S.getOpcode() == Instruction::ExtractElement && !S.isAltShuffle() && - I + 1 < E) { + if (isa_and_present(S.getMainOp()) && + !S.isAltShuffle() && I + 1 < E) { SmallVector CommonCandidates(Candidates); for (Value *RV : ReducedVals[I + 1]) { Value *RdxVal = TrackedVals.at(RV); @@ -21130,7 +21129,7 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI, return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); } InstructionsState S = getSameOpcode({I1, I2}, TLI); - if (S.getOpcode() && (IsCompatibility || !S.isAltShuffle())) + if (S.getMainOp() && (IsCompatibility || !S.isAltShuffle())) continue; if (IsCompatibility) return false; @@ -21285,7 +21284,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (NodeI1 != NodeI2) return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); InstructionsState S = getSameOpcode({I1, I2}, *TLI); - if (S.getOpcode() && !S.isAltShuffle()) + if (S.getMainOp() && !S.isAltShuffle()) continue; return I1->getOpcode() < I2->getOpcode(); } From eae78c045f6bcfd769b8d15fba040a2a1e2dfdc9 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Tue, 17 Dec 2024 22:47:23 -0800 Subject: [PATCH 4/4] use InstructionsState::valid --- .../Transforms/Vectorize/SLPVectorizer.cpp | 68 +++++++++---------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 98d8fd638ca60..b8cd2c1cfc049 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1848,7 +1848,7 @@ class BoUpSLP { InstructionsState S = getSameOpcode(Ops, TLI); // Note: Only consider instructions with <= 2 operands to avoid // complexity explosion. - if (S.getMainOp() && + if (S.valid() && (S.getMainOp()->getNumOperands() <= 2 || !MainAltOps.empty() || !S.isAltShuffle()) && all_of(Ops, [&S](Value *V) { @@ -2699,7 +2699,7 @@ class BoUpSLP { OperandData &AltOp = getData(OpIdx, Lane); InstructionsState OpS = getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V}, TLI); - if (OpS.getMainOp() && OpS.isAltShuffle()) + if (OpS.valid() && OpS.isAltShuffle()) MainAltOps[OpIdx].push_back(AltOp.V); } } @@ -3594,9 +3594,9 @@ class BoUpSLP { "Need to vectorize gather entry?"); // Gathered loads still gathered? Do not create entry, use the original one. if (GatheredLoadsEntriesFirst.has_value() && - EntryState == TreeEntry::NeedToGather && - isa_and_present(S.getMainOp()) && - UserTreeIdx.EdgeIdx == UINT_MAX && !UserTreeIdx.UserTE) + EntryState == TreeEntry::NeedToGather && S.valid() && + S.getOpcode() == Instruction::Load && UserTreeIdx.EdgeIdx == UINT_MAX && + !UserTreeIdx.UserTE) return nullptr; VectorizableTree.push_back(std::make_unique(VectorizableTree)); TreeEntry *Last = VectorizableTree.back().get(); @@ -8062,7 +8062,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // Don't go into catchswitch blocks, which can happen with PHIs. // Such blocks can only have PHIs and the catchswitch. There is no // place to insert a shuffle if we need to, so just avoid that issue. - if (S.getMainOp() && + if (S.valid() && isa(S.getMainOp()->getParent()->getTerminator())) { LLVM_DEBUG(dbgs() << "SLP: bundle in catchswitch block.\n"); newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx); @@ -8070,7 +8070,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } // Check if this is a duplicate of another entry. - if (S.getMainOp()) { + if (S.valid()) { if (TreeEntry *E = getTreeEntry(S.getMainOp())) { LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.getMainOp() << ".\n"); @@ -8131,7 +8131,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // a load), in which case peek through to include it in the tree, without // ballooning over-budget. if (Depth >= RecursionMaxDepth && - !(S.getMainOp() && !S.isAltShuffle() && VL.size() >= 4 && + !(S.valid() && !S.isAltShuffle() && VL.size() >= 4 && (match(S.getMainOp(), m_Load(m_Value())) || all_of(VL, [&S](const Value *I) { return match(I, @@ -8169,7 +8169,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, // vectorize. auto &&NotProfitableForVectorization = [&S, this, Depth](ArrayRef VL) { - if (!S.getMainOp() || !S.isAltShuffle() || VL.size() > 2) + if (!S.valid() || !S.isAltShuffle() || VL.size() > 2) return false; if (VectorizableTree.size() < MinTreeSize) return false; @@ -8224,7 +8224,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, bool IsScatterVectorizeUserTE = UserTreeIdx.UserTE && UserTreeIdx.UserTE->State == TreeEntry::ScatterVectorize; - bool AreAllSameBlock = S.getMainOp() && allSameBlock(VL); + bool AreAllSameBlock = S.valid() && allSameBlock(VL); bool AreScatterAllGEPSameBlock = (IsScatterVectorizeUserTE && VL.front()->getType()->isPointerTy() && VL.size() > 2 && @@ -8241,7 +8241,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, sortPtrAccesses(VL, UserTreeIdx.UserTE->getMainOp()->getType(), *DL, *SE, SortedIndices)); bool AreAllSameInsts = AreAllSameBlock || AreScatterAllGEPSameBlock; - if (!AreAllSameInsts || (!S.getMainOp() && allConstant(VL)) || isSplat(VL) || + if (!AreAllSameInsts || (!S.valid() && allConstant(VL)) || isSplat(VL) || (isa_and_present( S.getMainOp()) && !all_of(VL, isVectorLikeInstWithConstOps)) || @@ -8254,7 +8254,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } // Don't vectorize ephemeral values. - if (S.getMainOp() && !EphValues.empty()) { + if (S.valid() && !EphValues.empty()) { for (Value *V : VL) { if (EphValues.count(V)) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *V @@ -8313,7 +8313,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, Instruction *VL0 = S.getMainOp(); BB = VL0->getParent(); - if (S.getMainOp() && + if (S.valid() && (BB->isEHPad() || isa_and_nonnull(BB->getTerminator()) || !DT->isReachableFromEntry(BB))) { // Don't go into unreachable blocks. They may contain instructions with @@ -8360,7 +8360,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndices); NonScheduledFirst.insert(VL.front()); - if (isa(S.getMainOp()) && + if (S.getOpcode() == Instruction::Load && BS.ScheduleRegionSize < BS.ScheduleRegionSizeLimit) registerNonVectorizableLoads(VL); return; @@ -8377,7 +8377,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, if (Op.empty()) continue; InstructionsState S = getSameOpcode(Op, *TLI); - if (!isa_and_present(S.getMainOp()) || S.isAltShuffle()) + if ((!S.valid() || S.getOpcode() != Instruction::PHI) || S.isAltShuffle()) buildTree_rec(Op, Depth + 1, {TE, I}); else PHIOps.push_back(I); @@ -9731,10 +9731,10 @@ void BoUpSLP::transformNodes() { if (IsSplat) continue; InstructionsState S = getSameOpcode(Slice, *TLI); - if (!S.getMainOp() || S.isAltShuffle() || !allSameBlock(Slice) || - (isa(S.getMainOp()) && + if (!S.valid() || S.isAltShuffle() || !allSameBlock(Slice) || + (S.getOpcode() == Instruction::Load && areKnownNonVectorizableLoads(Slice)) || - (!isa(S.getMainOp()) && !has_single_bit(VF))) + (S.getOpcode() != Instruction::Load && !has_single_bit(VF))) continue; if (VF == 2) { // Try to vectorize reduced values or if all users are vectorized. @@ -9749,7 +9749,7 @@ void BoUpSLP::transformNodes() { UserIgnoreList); })) continue; - if (isa(S.getMainOp())) { + if (S.getOpcode() == Instruction::Load) { OrdersType Order; SmallVector PointerOps; LoadsState Res = @@ -9766,7 +9766,7 @@ void BoUpSLP::transformNodes() { } continue; } - } else if (isa(S.getMainOp()) || + } else if (S.getOpcode() == Instruction::ExtractElement || (TTI->getInstructionCost(S.getMainOp(), CostKind) < TTI::TCC_Expensive && !CheckOperandsProfitability( @@ -11048,7 +11048,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, if (const TreeEntry *OpTE = getTreeEntry(V)) return getCastContextHint(*OpTE); InstructionsState SrcState = getSameOpcode(E->getOperand(0), *TLI); - if (isa_and_present(SrcState.getMainOp()) && + if (SrcState.valid() && SrcState.getOpcode() == Instruction::Load && !SrcState.isAltShuffle()) return TTI::CastContextHint::GatherScatter; return TTI::CastContextHint::None; @@ -14396,12 +14396,12 @@ BoUpSLP::TreeEntry *BoUpSLP::getMatchedVectorizedOperand(const TreeEntry *E, ArrayRef VL = E->getOperand(NodeIdx); InstructionsState S = getSameOpcode(VL, *TLI); // Special processing for GEPs bundle, which may include non-gep values. - if (!S.getMainOp() && VL.front()->getType()->isPointerTy()) { + if (!S.valid() && VL.front()->getType()->isPointerTy()) { const auto *It = find_if(VL, IsaPred); if (It != VL.end()) S = getSameOpcode(*It, *TLI); } - if (!S.getMainOp()) + if (!S.valid()) return nullptr; auto CheckSameVE = [&](const TreeEntry *VE) { return VE->isSame(VL) && @@ -15061,7 +15061,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { auto *VecTy = getWidenedType(ScalarTy, E->Scalars.size()); if (E->isGather()) { // Set insert point for non-reduction initial nodes. - if (E->getMainOp() && E->Idx == 0 && !UserIgnoreList) + if (E->getMainOp() != nullptr && E->Idx == 0 && !UserIgnoreList) setInsertPointAfterBundle(E); Value *Vec = createBuildVector(E, ScalarTy, PostponedPHIs); E->VectorizedValue = Vec; @@ -18378,7 +18378,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, hasFullVectorsOrPowerOf2(*TTI, ValOps.front()->getType(), ValOps.size()) || (VectorizeNonPowerOf2 && has_single_bit(ValOps.size() + 1)); - if ((!IsAllowedSize && S.getMainOp() && !isa(S.getMainOp()) && + if ((!IsAllowedSize && S.valid() && S.getOpcode() != Instruction::Load && (!S.getMainOp()->isSafeToRemove() || any_of(ValOps.getArrayRef(), [&](Value *V) { @@ -18388,8 +18388,8 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, return !Stores.contains(U); })); }))) || - (ValOps.size() > Chain.size() / 2 && !S.getMainOp())) { - Size = (!IsAllowedSize && S.getMainOp()) ? 1 : 2; + (ValOps.size() > Chain.size() / 2 && !S.valid())) { + Size = (!IsAllowedSize && S.valid()) ? 1 : 2; return false; } } @@ -18412,7 +18412,7 @@ SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, R.computeMinimumValueSizes(); Size = R.getCanonicalGraphSize(); - if (isa_and_present(S.getMainOp())) + if (S.valid() && S.getOpcode() == Instruction::Load) Size = 2; // cut off masked gather small trees InstructionCost Cost = R.getTreeCost(); @@ -18913,7 +18913,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef VL, BoUpSLP &R, // Check that all of the parts are instructions of the same type, // we permit an alternate opcode via InstructionsState. InstructionsState S = getSameOpcode(VL, *TLI); - if (!S.getMainOp()) + if (!S.valid()) return false; Instruction *I0 = S.getMainOp(); @@ -19725,15 +19725,15 @@ class HorizontalReduction { // Also check if the instruction was folded to constant/other value. auto *Inst = dyn_cast(RdxVal); if ((Inst && isVectorLikeInstWithConstOps(Inst) && - (!S.getMainOp() || !S.isOpcodeOrAlt(Inst))) || - (S.getMainOp() && !Inst)) + (!S.valid() || !S.isOpcodeOrAlt(Inst))) || + (S.valid() && !Inst)) continue; Candidates.push_back(RdxVal); TrackedToOrig.try_emplace(RdxVal, OrigReducedVals[Cnt]); } bool ShuffledExtracts = false; // Try to handle shuffled extractelements. - if (isa_and_present(S.getMainOp()) && + if (S.valid() && S.getOpcode() == Instruction::ExtractElement && !S.isAltShuffle() && I + 1 < E) { SmallVector CommonCandidates(Candidates); for (Value *RV : ReducedVals[I + 1]) { @@ -21129,7 +21129,7 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI, return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); } InstructionsState S = getSameOpcode({I1, I2}, TLI); - if (S.getMainOp() && (IsCompatibility || !S.isAltShuffle())) + if (S.valid() && (IsCompatibility || !S.isAltShuffle())) continue; if (IsCompatibility) return false; @@ -21284,7 +21284,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { if (NodeI1 != NodeI2) return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); InstructionsState S = getSameOpcode({I1, I2}, *TLI); - if (S.getMainOp() && !S.isAltShuffle()) + if (S.valid() && !S.isAltShuffle()) continue; return I1->getOpcode() < I2->getOpcode(); }