From ce7152a96cd491d1add4b83971aabea626cbd1fc Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 4 Dec 2024 14:59:51 +0000 Subject: [PATCH 1/2] [VectorCombine] Pull out TargetCostKind argument to allow globally set cost kind value Some prep work to allow a future patch to potentially use VectorCombine to target code size for -Os/Oz builds (setting TCK_CodeSize instead of TCK_RecipThroughput). There's still more cleanup to do as a lot of get*Cost calls are relying on the default TargetCostKind value (usually TCK_RecipThroughput but not always). --- .../Transforms/Vectorize/VectorCombine.cpp | 199 +++++++++--------- 1 file changed, 100 insertions(+), 99 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index b9caf8c0df9be..e655e73fa9e14 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -91,38 +91,40 @@ class VectorCombine { // TODO: Direct calls from the top-level "run" loop use a plain "Instruction" // parameter. That should be updated to specific sub-classes because the // run loop was changed to dispatch on opcode. - bool vectorizeLoadInsert(Instruction &I); - bool widenSubvectorLoad(Instruction &I); + bool vectorizeLoadInsert(Instruction &I, TTI::TargetCostKind CostKind); + bool widenSubvectorLoad(Instruction &I, TTI::TargetCostKind CostKind); ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0, ExtractElementInst *Ext1, + TTI::TargetCostKind CostKind, unsigned PreferredExtractIndex) const; bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1, const Instruction &I, ExtractElementInst *&ConvertToShuffle, + TTI::TargetCostKind CostKind, unsigned PreferredExtractIndex); void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1, Instruction &I); void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1, Instruction &I); - bool foldExtractExtract(Instruction &I); - bool foldInsExtFNeg(Instruction &I); - bool foldInsExtVectorToShuffle(Instruction &I); - bool foldBitcastShuffle(Instruction &I); - bool scalarizeBinopOrCmp(Instruction &I); - bool scalarizeVPIntrinsic(Instruction &I); - bool foldExtractedCmps(Instruction &I); + bool foldExtractExtract(Instruction &I, TTI::TargetCostKind CostKind); + bool foldInsExtFNeg(Instruction &I, TTI::TargetCostKind CostKind); + bool foldInsExtVectorToShuffle(Instruction &I, TTI::TargetCostKind CostKind); + bool foldBitcastShuffle(Instruction &I, TTI::TargetCostKind CostKind); + bool scalarizeBinopOrCmp(Instruction &I, TTI::TargetCostKind CostKind); + bool scalarizeVPIntrinsic(Instruction &I, TTI::TargetCostKind CostKind); + bool foldExtractedCmps(Instruction &I, TTI::TargetCostKind CostKind); bool foldSingleElementStore(Instruction &I); - bool scalarizeLoadExtract(Instruction &I); - bool foldPermuteOfBinops(Instruction &I); - bool foldShuffleOfBinops(Instruction &I); - bool foldShuffleOfCastops(Instruction &I); - bool foldShuffleOfShuffles(Instruction &I); - bool foldShuffleOfIntrinsics(Instruction &I); - bool foldShuffleToIdentity(Instruction &I); + bool scalarizeLoadExtract(Instruction &I, TTI::TargetCostKind CostKind); + bool foldPermuteOfBinops(Instruction &I, TTI::TargetCostKind CostKind); + bool foldShuffleOfBinops(Instruction &I, TTI::TargetCostKind CostKind); + bool foldShuffleOfCastops(Instruction &I, TTI::TargetCostKind CostKind); + bool foldShuffleOfShuffles(Instruction &I, TTI::TargetCostKind CostKind); + bool foldShuffleOfIntrinsics(Instruction &I, TTI::TargetCostKind CostKind); + bool foldShuffleToIdentity(Instruction &I, TTI::TargetCostKind CostKind); bool foldShuffleFromReductions(Instruction &I); - bool foldCastFromReductions(Instruction &I); + bool foldCastFromReductions(Instruction &I, TTI::TargetCostKind CostKind); bool foldSelectShuffle(Instruction &I, bool FromReduction = false); - bool shrinkType(Instruction &I); + bool shrinkType(Instruction &I, TTI::TargetCostKind CostKind); void replaceValue(Value &Old, Value &New) { Old.replaceAllUsesWith(&New); @@ -172,7 +174,8 @@ static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) { return true; } -bool VectorCombine::vectorizeLoadInsert(Instruction &I) { +bool VectorCombine::vectorizeLoadInsert(Instruction &I, + TTI::TargetCostKind CostKind) { // Match insert into fixed vector of scalar value. // TODO: Handle non-zero insert index. Value *Scalar; @@ -249,7 +252,6 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { InstructionCost OldCost = TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); APInt DemandedElts = APInt::getOneBitSet(MinVecNumElts, 0); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; OldCost += TTI.getScalarizationOverhead(MinVecTy, DemandedElts, /* Insert */ true, HasExtract, CostKind); @@ -293,7 +295,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { /// If we are loading a vector and then inserting it into a larger vector with /// undefined elements, try to load the larger vector and eliminate the insert. /// This removes a shuffle in IR and may allow combining of other loaded values. -bool VectorCombine::widenSubvectorLoad(Instruction &I) { +bool VectorCombine::widenSubvectorLoad(Instruction &I, + TTI::TargetCostKind CostKind) { // Match subvector insert of fixed vector. auto *Shuf = cast(&I); if (!Shuf->isIdentityWithPadding()) @@ -329,11 +332,11 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I) { // undef value is 0. We could add that cost if the cost model accurately // reflects the real cost of that operation. InstructionCost OldCost = - TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS); + TTI.getMemoryOpCost(Instruction::Load, LoadTy, Alignment, AS, CostKind); // New pattern: load PtrOp InstructionCost NewCost = - TTI.getMemoryOpCost(Instruction::Load, Ty, Alignment, AS); + TTI.getMemoryOpCost(Instruction::Load, Ty, Alignment, AS, CostKind); // We can aggressively convert to the vector form because the backend can // invert this transform if it does not result in a performance win. @@ -353,6 +356,7 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I) { /// followed by extract from a different index. ExtractElementInst *VectorCombine::getShuffleExtract( ExtractElementInst *Ext0, ExtractElementInst *Ext1, + TTI::TargetCostKind CostKind, unsigned PreferredExtractIndex = InvalidIndex) const { auto *Index0C = dyn_cast(Ext0->getIndexOperand()); auto *Index1C = dyn_cast(Ext1->getIndexOperand()); @@ -366,7 +370,6 @@ ExtractElementInst *VectorCombine::getShuffleExtract( return nullptr; Type *VecTy = Ext0->getVectorOperand()->getType(); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; assert(VecTy == Ext1->getVectorOperand()->getType() && "Need matching types"); InstructionCost Cost0 = TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Index0); @@ -405,6 +408,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1, const Instruction &I, ExtractElementInst *&ConvertToShuffle, + TTI::TargetCostKind CostKind, unsigned PreferredExtractIndex) { auto *Ext0IndexC = dyn_cast(Ext0->getIndexOperand()); auto *Ext1IndexC = dyn_cast(Ext1->getIndexOperand()); @@ -436,7 +440,6 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, // both sequences. unsigned Ext0Index = Ext0IndexC->getZExtValue(); unsigned Ext1Index = Ext1IndexC->getZExtValue(); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost Extract0Cost = TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Ext0Index); @@ -475,7 +478,8 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, !Ext1->hasOneUse() * Extract1Cost; } - ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex); + ConvertToShuffle = + getShuffleExtract(Ext0, Ext1, CostKind, PreferredExtractIndex); if (ConvertToShuffle) { if (IsBinOp && DisableBinopExtractShuffle) return true; @@ -589,7 +593,8 @@ void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0, } /// Match an instruction with extracted vector operands. -bool VectorCombine::foldExtractExtract(Instruction &I) { +bool VectorCombine::foldExtractExtract(Instruction &I, + TTI::TargetCostKind CostKind) { // It is not safe to transform things like div, urem, etc. because we may // create undefined behavior when executing those on unknown vector elements. if (!isSafeToSpeculativelyExecute(&I)) @@ -621,7 +626,8 @@ bool VectorCombine::foldExtractExtract(Instruction &I) { m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex))); ExtractElementInst *ExtractToChange; - if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex)) + if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, CostKind, + InsertIndex)) return false; if (ExtractToChange) { @@ -648,7 +654,8 @@ bool VectorCombine::foldExtractExtract(Instruction &I) { /// Try to replace an extract + scalar fneg + insert with a vector fneg + /// shuffle. -bool VectorCombine::foldInsExtFNeg(Instruction &I) { +bool VectorCombine::foldInsExtFNeg(Instruction &I, + TTI::TargetCostKind CostKind) { // Match an insert (op (extract)) pattern. Value *DestVec; uint64_t Index; @@ -683,7 +690,6 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) { Mask[Index] = Index + NumElts; Type *ScalarTy = VecTy->getScalarType(); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost OldCost = TTI.getArithmeticInstrCost(Instruction::FNeg, ScalarTy) + TTI.getVectorInstrCost(I, VecTy, CostKind, Index); @@ -712,7 +718,8 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I) { /// If this is a bitcast of a shuffle, try to bitcast the source vector to the /// destination type followed by shuffle. This can enable further transforms by /// moving bitcasts or shuffles together. -bool VectorCombine::foldBitcastShuffle(Instruction &I) { +bool VectorCombine::foldBitcastShuffle(Instruction &I, + TTI::TargetCostKind CostKind) { Value *V0, *V1; ArrayRef Mask; if (!match(&I, m_BitCast(m_OneUse( @@ -772,21 +779,20 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) { unsigned NumOps = IsUnary ? 1 : 2; // The new shuffle must not cost more than the old shuffle. - TargetTransformInfo::TargetCostKind CK = - TargetTransformInfo::TCK_RecipThroughput; TargetTransformInfo::ShuffleKind SK = IsUnary ? TargetTransformInfo::SK_PermuteSingleSrc : TargetTransformInfo::SK_PermuteTwoSrc; InstructionCost DestCost = - TTI.getShuffleCost(SK, NewShuffleTy, NewMask, CK) + + TTI.getShuffleCost(SK, NewShuffleTy, NewMask, CostKind) + (NumOps * TTI.getCastInstrCost(Instruction::BitCast, NewShuffleTy, SrcTy, TargetTransformInfo::CastContextHint::None, - CK)); + CostKind)); InstructionCost SrcCost = - TTI.getShuffleCost(SK, SrcTy, Mask, CK) + + TTI.getShuffleCost(SK, SrcTy, Mask, CostKind) + TTI.getCastInstrCost(Instruction::BitCast, DestTy, OldShuffleTy, - TargetTransformInfo::CastContextHint::None, CK); + TargetTransformInfo::CastContextHint::None, + CostKind); if (DestCost > SrcCost || !DestCost.isValid()) return false; @@ -802,7 +808,8 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I) { /// VP Intrinsics whose vector operands are both splat values may be simplified /// into the scalar version of the operation and the result splatted. This /// can lead to scalarization down the line. -bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) { +bool VectorCombine::scalarizeVPIntrinsic(Instruction &I, + TTI::TargetCostKind CostKind) { if (!isa(I)) return false; VPIntrinsic &VPI = cast(I); @@ -841,7 +848,6 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) { // Calculate cost of splatting both operands into vectors and the vector // intrinsic VectorType *VecTy = cast(VPI.getType()); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; SmallVector Mask; if (auto *FVTy = dyn_cast(VecTy)) Mask.resize(FVTy->getNumElements(), 0); @@ -923,7 +929,8 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) { /// Match a vector binop or compare instruction with at least one inserted /// scalar operand and convert to scalar binop/cmp followed by insertelement. -bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) { +bool VectorCombine::scalarizeBinopOrCmp(Instruction &I, + TTI::TargetCostKind CostKind) { CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; Value *Ins0, *Ins1; if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) && @@ -1003,7 +1010,6 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) { // Get cost estimate for the insert element. This cost will factor into // both sequences. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost InsertCost = TTI.getVectorInstrCost( Instruction::InsertElement, VecTy, CostKind, Index); InstructionCost OldCost = @@ -1052,7 +1058,8 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) { /// Try to combine a scalar binop + 2 scalar compares of extracted elements of /// a vector into vector operations followed by extract. Note: The SLP pass /// may miss this pattern because of implementation problems. -bool VectorCombine::foldExtractedCmps(Instruction &I) { +bool VectorCombine::foldExtractedCmps(Instruction &I, + TTI::TargetCostKind CostKind) { auto *BI = dyn_cast(&I); // We are looking for a scalar binop of booleans. @@ -1080,7 +1087,7 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) { auto *Ext0 = cast(I0); auto *Ext1 = cast(I1); - ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1); + ExtractElementInst *ConvertToShuf = getShuffleExtract(Ext0, Ext1, CostKind); if (!ConvertToShuf) return false; assert((ConvertToShuf == Ext0 || ConvertToShuf == Ext1) && @@ -1089,13 +1096,12 @@ bool VectorCombine::foldExtractedCmps(Instruction &I) { // The original scalar pattern is: // binop i1 (cmp Pred (ext X, Index0), C0), (cmp Pred (ext X, Index1), C1) CmpInst::Predicate Pred = P0; - unsigned CmpOpcode = CmpInst::isFPPredicate(Pred) ? Instruction::FCmp - : Instruction::ICmp; + unsigned CmpOpcode = + CmpInst::isFPPredicate(Pred) ? Instruction::FCmp : Instruction::ICmp; auto *VecTy = dyn_cast(X->getType()); if (!VecTy) return false; - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost Ext0Cost = TTI.getVectorInstrCost(*Ext0, VecTy, CostKind, Index0), Ext1Cost = @@ -1331,7 +1337,8 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) { } /// Try to scalarize vector loads feeding extractelement instructions. -bool VectorCombine::scalarizeLoadExtract(Instruction &I) { +bool VectorCombine::scalarizeLoadExtract(Instruction &I, + TTI::TargetCostKind CostKind) { Value *Ptr; if (!match(&I, m_Load(m_Value(Ptr)))) return false; @@ -1386,7 +1393,6 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { } auto *Index = dyn_cast(UI->getOperand(1)); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; OriginalCost += TTI.getVectorInstrCost(Instruction::ExtractElement, VecTy, CostKind, Index ? Index->getZExtValue() : -1); @@ -1428,7 +1434,8 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) { /// Try to convert "shuffle (binop (shuffle, shuffle)), undef" /// --> "binop (shuffle), (shuffle)". -bool VectorCombine::foldPermuteOfBinops(Instruction &I) { +bool VectorCombine::foldPermuteOfBinops(Instruction &I, + TTI::TargetCostKind CostKind) { BinaryOperator *BinOp; ArrayRef OuterMask; if (!match(&I, @@ -1480,8 +1487,6 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) { } // Try to merge shuffles across the binop if the new shuffles are not costly. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - InstructionCost OldCost = TTI.getArithmeticInstrCost(Opcode, BinOpTy, CostKind) + TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, BinOpTy, @@ -1523,7 +1528,8 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I) { } /// Try to convert "shuffle (binop), (binop)" into "binop (shuffle), (shuffle)". -bool VectorCombine::foldShuffleOfBinops(Instruction &I) { +bool VectorCombine::foldShuffleOfBinops(Instruction &I, + TTI::TargetCostKind CostKind) { BinaryOperator *B0, *B1; ArrayRef OldMask; if (!match(&I, m_Shuffle(m_OneUse(m_BinOp(B0)), m_OneUse(m_BinOp(B1)), @@ -1575,8 +1581,6 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) { } // Try to replace a binop with a shuffle if the shuffle is not costly. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - InstructionCost OldCost = TTI.getArithmeticInstrCost(B0->getOpcode(), BinOpTy, CostKind) + TTI.getArithmeticInstrCost(B1->getOpcode(), BinOpTy, CostKind) + @@ -1612,7 +1616,8 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) { /// Try to convert "shuffle (castop), (castop)" with a shared castop operand /// into "castop (shuffle)". -bool VectorCombine::foldShuffleOfCastops(Instruction &I) { +bool VectorCombine::foldShuffleOfCastops(Instruction &I, + TTI::TargetCostKind CostKind) { Value *V0, *V1; ArrayRef OldMask; if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask)))) @@ -1672,8 +1677,6 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { FixedVectorType::get(CastSrcTy->getScalarType(), NewMask.size()); // Try to replace a castop with a shuffle if the shuffle is not costly. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - InstructionCost CostC0 = TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy, TTI::CastContextHint::None, CostKind); @@ -1717,7 +1720,8 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) { /// Try to convert "shuffle (shuffle x, undef), (shuffle y, undef)" /// into "shuffle x, y". -bool VectorCombine::foldShuffleOfShuffles(Instruction &I) { +bool VectorCombine::foldShuffleOfShuffles(Instruction &I, + TTI::TargetCostKind CostKind) { Value *V0, *V1; UndefValue *U0, *U1; ArrayRef OuterMask, InnerMask0, InnerMask1; @@ -1767,8 +1771,6 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) { } // Try to merge the shuffles if the new shuffle is not costly. - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; - InstructionCost InnerCost0 = TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, ShuffleSrcTy, InnerMask0, CostKind, 0, nullptr, {V0, U0}, ShufI0); @@ -1807,7 +1809,8 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I) { /// Try to convert /// "shuffle (intrinsic), (intrinsic)" into "intrinsic (shuffle), (shuffle)". -bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) { +bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I, + TTI::TargetCostKind CostKind) { Value *V0, *V1; ArrayRef OldMask; if (!match(&I, m_Shuffle(m_OneUse(m_Value(V0)), m_OneUse(m_Value(V1)), @@ -1837,12 +1840,10 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) { return false; InstructionCost OldCost = - TTI.getIntrinsicInstrCost(IntrinsicCostAttributes(IID, *II0), - TTI::TCK_RecipThroughput) + - TTI.getIntrinsicInstrCost(IntrinsicCostAttributes(IID, *II1), - TTI::TCK_RecipThroughput) + + TTI.getIntrinsicInstrCost(IntrinsicCostAttributes(IID, *II0), CostKind) + + TTI.getIntrinsicInstrCost(IntrinsicCostAttributes(IID, *II1), CostKind) + TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, II0Ty, OldMask, - TTI::TCK_RecipThroughput, 0, nullptr, {II0, II1}, &I); + CostKind, 0, nullptr, {II0, II1}, &I); SmallVector NewArgsTy; InstructionCost NewCost = 0; @@ -1854,10 +1855,10 @@ bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) { NewArgsTy.push_back(FixedVectorType::get(VecTy->getElementType(), VecTy->getNumElements() * 2)); NewCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, - VecTy, OldMask, TTI::TCK_RecipThroughput); + VecTy, OldMask, CostKind); } IntrinsicCostAttributes NewAttr(IID, ShuffleDstTy, NewArgsTy); - NewCost += TTI.getIntrinsicInstrCost(NewAttr, TTI::TCK_RecipThroughput); + NewCost += TTI.getIntrinsicInstrCost(NewAttr, CostKind); LLVM_DEBUG(dbgs() << "Found a shuffle feeding two intrinsics: " << I << "\n OldCost: " << OldCost << " vs NewCost: " << NewCost @@ -1923,7 +1924,7 @@ generateInstLaneVectorFromOperand(ArrayRef Item, int Op) { } /// Detect concat of multiple values into a vector -static bool isFreeConcat(ArrayRef Item, +static bool isFreeConcat(ArrayRef Item, TTI::TargetCostKind CostKind, const TargetTransformInfo &TTI) { auto *Ty = cast(Item.front().first->get()->getType()); unsigned NumElts = Ty->getNumElements(); @@ -1934,8 +1935,7 @@ static bool isFreeConcat(ArrayRef Item, // during legalization. SmallVector ConcatMask(NumElts * 2); std::iota(ConcatMask.begin(), ConcatMask.end(), 0); - if (TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, ConcatMask, - TTI::TCK_RecipThroughput) != 0) + if (TTI.getShuffleCost(TTI::SK_PermuteTwoSrc, Ty, ConcatMask, CostKind) != 0) return false; unsigned NumSlices = Item.size() / NumElts; @@ -2048,7 +2048,8 @@ static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, // Starting from a shuffle, look up through operands tracking the shuffled index // of each lane. If we can simplify away the shuffles to identities then // do so. -bool VectorCombine::foldShuffleToIdentity(Instruction &I) { +bool VectorCombine::foldShuffleToIdentity(Instruction &I, + TTI::TargetCostKind CostKind) { auto *Ty = dyn_cast(I.getType()); if (!Ty || I.use_empty()) return false; @@ -2189,7 +2190,7 @@ bool VectorCombine::foldShuffleToIdentity(Instruction &I) { } } - if (isFreeConcat(Item, TTI)) { + if (isFreeConcat(Item, CostKind, TTI)) { ConcatLeafs.insert(FrontU); continue; } @@ -2332,7 +2333,8 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) { /// reduce(trunc(x)) -> trunc(reduce(x)). /// reduce(sext(x)) -> sext(reduce(x)). /// reduce(zext(x)) -> zext(reduce(x)). -bool VectorCombine::foldCastFromReductions(Instruction &I) { +bool VectorCombine::foldCastFromReductions(Instruction &I, + TTI::TargetCostKind CostKind) { auto *II = dyn_cast(&I); if (!II) return false; @@ -2367,7 +2369,6 @@ bool VectorCombine::foldCastFromReductions(Instruction &I) { auto *ReductionSrcTy = cast(ReductionSrc->getType()); Type *ResultTy = I.getType(); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost OldCost = TTI.getArithmeticReductionCost( ReductionOpc, ReductionSrcTy, std::nullopt, CostKind); OldCost += TTI.getCastInstrCost(CastOpc, ReductionSrcTy, SrcTy, @@ -2717,7 +2718,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { /// lshr((zext(x),y) -> zext(lshr(x,trunc(y))) /// Cost model calculations takes into account if zext(x) has other users and /// whether it can be propagated through them too. -bool VectorCombine::shrinkType(llvm::Instruction &I) { +bool VectorCombine::shrinkType(Instruction &I, TTI::TargetCostKind CostKind) { Value *ZExted, *OtherOperand; if (!match(&I, m_c_BitwiseLogic(m_ZExt(m_Value(ZExted)), m_Value(OtherOperand))) && @@ -2746,7 +2747,6 @@ bool VectorCombine::shrinkType(llvm::Instruction &I) { // Calculate costs of leaving current IR as it is and moving ZExt operation // later, along with adding truncates if needed - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost ZExtCost = TTI.getCastInstrCost( Instruction::ZExt, BigTy, SmallTy, TargetTransformInfo::CastContextHint::None, CostKind); @@ -2809,7 +2809,8 @@ bool VectorCombine::shrinkType(llvm::Instruction &I) { /// insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) --> /// shuffle (DstVec, SrcVec, Mask) -bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { +bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I, + TTI::TargetCostKind CostKind) { Value *DstVec, *SrcVec; uint64_t ExtIdx, InsIdx; if (!match(&I, @@ -2833,7 +2834,6 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { auto *Ins = cast(&I); auto *Ext = cast(I.getOperand(1)); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; InstructionCost OldCost = TTI.getVectorInstrCost(*Ext, VecTy, CostKind, ExtIdx) + TTI.getVectorInstrCost(*Ins, VecTy, CostKind, InsIdx); @@ -2865,6 +2865,7 @@ bool VectorCombine::run() { bool MadeChange = false; auto FoldInst = [this, &MadeChange](Instruction &I) { Builder.SetInsertPoint(&I); + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; bool IsFixedVectorType = isa(I.getType()); auto Opcode = I.getOpcode(); @@ -2875,10 +2876,10 @@ bool VectorCombine::run() { if (IsFixedVectorType) { switch (Opcode) { case Instruction::InsertElement: - MadeChange |= vectorizeLoadInsert(I); + MadeChange |= vectorizeLoadInsert(I, CostKind); break; case Instruction::ShuffleVector: - MadeChange |= widenSubvectorLoad(I); + MadeChange |= widenSubvectorLoad(I, CostKind); break; default: break; @@ -2888,9 +2889,9 @@ bool VectorCombine::run() { // This transform works with scalable and fixed vectors // TODO: Identify and allow other scalable transforms if (isa(I.getType())) { - MadeChange |= scalarizeBinopOrCmp(I); - MadeChange |= scalarizeLoadExtract(I); - MadeChange |= scalarizeVPIntrinsic(I); + MadeChange |= scalarizeBinopOrCmp(I, CostKind); + MadeChange |= scalarizeLoadExtract(I, CostKind); + MadeChange |= scalarizeVPIntrinsic(I, CostKind); } if (Opcode == Instruction::Store) @@ -2907,39 +2908,39 @@ bool VectorCombine::run() { if (IsFixedVectorType) { switch (Opcode) { case Instruction::InsertElement: - MadeChange |= foldInsExtFNeg(I); - MadeChange |= foldInsExtVectorToShuffle(I); + MadeChange |= foldInsExtFNeg(I, CostKind); + MadeChange |= foldInsExtVectorToShuffle(I, CostKind); break; case Instruction::ShuffleVector: - MadeChange |= foldPermuteOfBinops(I); - MadeChange |= foldShuffleOfBinops(I); - MadeChange |= foldShuffleOfCastops(I); - MadeChange |= foldShuffleOfShuffles(I); - MadeChange |= foldShuffleOfIntrinsics(I); - MadeChange |= foldSelectShuffle(I); - MadeChange |= foldShuffleToIdentity(I); + MadeChange |= foldPermuteOfBinops(I, CostKind); + MadeChange |= foldShuffleOfBinops(I, CostKind); + MadeChange |= foldShuffleOfCastops(I, CostKind); + MadeChange |= foldShuffleOfShuffles(I, CostKind); + MadeChange |= foldShuffleOfIntrinsics(I, CostKind); + MadeChange |= foldSelectShuffle(I, CostKind); + MadeChange |= foldShuffleToIdentity(I, CostKind); break; case Instruction::BitCast: - MadeChange |= foldBitcastShuffle(I); + MadeChange |= foldBitcastShuffle(I, CostKind); break; default: - MadeChange |= shrinkType(I); + MadeChange |= shrinkType(I, CostKind); break; } } else { switch (Opcode) { case Instruction::Call: MadeChange |= foldShuffleFromReductions(I); - MadeChange |= foldCastFromReductions(I); + MadeChange |= foldCastFromReductions(I, CostKind); break; case Instruction::ICmp: case Instruction::FCmp: - MadeChange |= foldExtractExtract(I); + MadeChange |= foldExtractExtract(I, CostKind); break; default: if (Instruction::isBinaryOp(Opcode)) { - MadeChange |= foldExtractExtract(I); - MadeChange |= foldExtractedCmps(I); + MadeChange |= foldExtractExtract(I, CostKind); + MadeChange |= foldExtractedCmps(I, CostKind); } break; } From ee80ee8ae7e664413a4d208b7758c55576b829e7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 9 Dec 2024 11:05:42 +0000 Subject: [PATCH 2/2] [VectorCombine] Replace TargetCostKind function argument with common class member --- .../Transforms/Vectorize/VectorCombine.cpp | 149 ++++++++---------- 1 file changed, 64 insertions(+), 85 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index e655e73fa9e14..9635b106c6158 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -67,9 +67,10 @@ class VectorCombine { public: VectorCombine(Function &F, const TargetTransformInfo &TTI, const DominatorTree &DT, AAResults &AA, AssumptionCache &AC, - const DataLayout *DL, bool TryEarlyFoldsOnly) + const DataLayout *DL, TTI::TargetCostKind CostKind, + bool TryEarlyFoldsOnly) : F(F), Builder(F.getContext()), TTI(TTI), DT(DT), AA(AA), AC(AC), DL(DL), - TryEarlyFoldsOnly(TryEarlyFoldsOnly) {} + CostKind(CostKind), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {} bool run(); @@ -81,6 +82,7 @@ class VectorCombine { AAResults &AA; AssumptionCache &AC; const DataLayout *DL; + TTI::TargetCostKind CostKind; /// If true, only perform beneficial early IR transforms. Do not introduce new /// vector operations. @@ -91,40 +93,38 @@ class VectorCombine { // TODO: Direct calls from the top-level "run" loop use a plain "Instruction" // parameter. That should be updated to specific sub-classes because the // run loop was changed to dispatch on opcode. - bool vectorizeLoadInsert(Instruction &I, TTI::TargetCostKind CostKind); - bool widenSubvectorLoad(Instruction &I, TTI::TargetCostKind CostKind); + bool vectorizeLoadInsert(Instruction &I); + bool widenSubvectorLoad(Instruction &I); ExtractElementInst *getShuffleExtract(ExtractElementInst *Ext0, ExtractElementInst *Ext1, - TTI::TargetCostKind CostKind, unsigned PreferredExtractIndex) const; bool isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1, const Instruction &I, ExtractElementInst *&ConvertToShuffle, - TTI::TargetCostKind CostKind, unsigned PreferredExtractIndex); void foldExtExtCmp(ExtractElementInst *Ext0, ExtractElementInst *Ext1, Instruction &I); void foldExtExtBinop(ExtractElementInst *Ext0, ExtractElementInst *Ext1, Instruction &I); - bool foldExtractExtract(Instruction &I, TTI::TargetCostKind CostKind); - bool foldInsExtFNeg(Instruction &I, TTI::TargetCostKind CostKind); - bool foldInsExtVectorToShuffle(Instruction &I, TTI::TargetCostKind CostKind); - bool foldBitcastShuffle(Instruction &I, TTI::TargetCostKind CostKind); - bool scalarizeBinopOrCmp(Instruction &I, TTI::TargetCostKind CostKind); - bool scalarizeVPIntrinsic(Instruction &I, TTI::TargetCostKind CostKind); - bool foldExtractedCmps(Instruction &I, TTI::TargetCostKind CostKind); + bool foldExtractExtract(Instruction &I); + bool foldInsExtFNeg(Instruction &I); + bool foldInsExtVectorToShuffle(Instruction &I); + bool foldBitcastShuffle(Instruction &I); + bool scalarizeBinopOrCmp(Instruction &I); + bool scalarizeVPIntrinsic(Instruction &I); + bool foldExtractedCmps(Instruction &I); bool foldSingleElementStore(Instruction &I); - bool scalarizeLoadExtract(Instruction &I, TTI::TargetCostKind CostKind); - bool foldPermuteOfBinops(Instruction &I, TTI::TargetCostKind CostKind); - bool foldShuffleOfBinops(Instruction &I, TTI::TargetCostKind CostKind); - bool foldShuffleOfCastops(Instruction &I, TTI::TargetCostKind CostKind); - bool foldShuffleOfShuffles(Instruction &I, TTI::TargetCostKind CostKind); - bool foldShuffleOfIntrinsics(Instruction &I, TTI::TargetCostKind CostKind); - bool foldShuffleToIdentity(Instruction &I, TTI::TargetCostKind CostKind); + bool scalarizeLoadExtract(Instruction &I); + bool foldPermuteOfBinops(Instruction &I); + bool foldShuffleOfBinops(Instruction &I); + bool foldShuffleOfCastops(Instruction &I); + bool foldShuffleOfShuffles(Instruction &I); + bool foldShuffleOfIntrinsics(Instruction &I); + bool foldShuffleToIdentity(Instruction &I); bool foldShuffleFromReductions(Instruction &I); - bool foldCastFromReductions(Instruction &I, TTI::TargetCostKind CostKind); + bool foldCastFromReductions(Instruction &I); bool foldSelectShuffle(Instruction &I, bool FromReduction = false); - bool shrinkType(Instruction &I, TTI::TargetCostKind CostKind); + bool shrinkType(Instruction &I); void replaceValue(Value &Old, Value &New) { Old.replaceAllUsesWith(&New); @@ -174,8 +174,7 @@ static bool canWidenLoad(LoadInst *Load, const TargetTransformInfo &TTI) { return true; } -bool VectorCombine::vectorizeLoadInsert(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // Match insert into fixed vector of scalar value. // TODO: Handle non-zero insert index. Value *Scalar; @@ -295,8 +294,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I, /// If we are loading a vector and then inserting it into a larger vector with /// undefined elements, try to load the larger vector and eliminate the insert. /// This removes a shuffle in IR and may allow combining of other loaded values. -bool VectorCombine::widenSubvectorLoad(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::widenSubvectorLoad(Instruction &I) { // Match subvector insert of fixed vector. auto *Shuf = cast(&I); if (!Shuf->isIdentityWithPadding()) @@ -356,7 +354,6 @@ bool VectorCombine::widenSubvectorLoad(Instruction &I, /// followed by extract from a different index. ExtractElementInst *VectorCombine::getShuffleExtract( ExtractElementInst *Ext0, ExtractElementInst *Ext1, - TTI::TargetCostKind CostKind, unsigned PreferredExtractIndex = InvalidIndex) const { auto *Index0C = dyn_cast(Ext0->getIndexOperand()); auto *Index1C = dyn_cast(Ext1->getIndexOperand()); @@ -408,7 +405,6 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, ExtractElementInst *Ext1, const Instruction &I, ExtractElementInst *&ConvertToShuffle, - TTI::TargetCostKind CostKind, unsigned PreferredExtractIndex) { auto *Ext0IndexC = dyn_cast(Ext0->getIndexOperand()); auto *Ext1IndexC = dyn_cast(Ext1->getIndexOperand()); @@ -478,8 +474,7 @@ bool VectorCombine::isExtractExtractCheap(ExtractElementInst *Ext0, !Ext1->hasOneUse() * Extract1Cost; } - ConvertToShuffle = - getShuffleExtract(Ext0, Ext1, CostKind, PreferredExtractIndex); + ConvertToShuffle = getShuffleExtract(Ext0, Ext1, PreferredExtractIndex); if (ConvertToShuffle) { if (IsBinOp && DisableBinopExtractShuffle) return true; @@ -593,8 +588,7 @@ void VectorCombine::foldExtExtBinop(ExtractElementInst *Ext0, } /// Match an instruction with extracted vector operands. -bool VectorCombine::foldExtractExtract(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldExtractExtract(Instruction &I) { // It is not safe to transform things like div, urem, etc. because we may // create undefined behavior when executing those on unknown vector elements. if (!isSafeToSpeculativelyExecute(&I)) @@ -626,8 +620,7 @@ bool VectorCombine::foldExtractExtract(Instruction &I, m_InsertElt(m_Value(), m_Value(), m_ConstantInt(InsertIndex))); ExtractElementInst *ExtractToChange; - if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, CostKind, - InsertIndex)) + if (isExtractExtractCheap(Ext0, Ext1, I, ExtractToChange, InsertIndex)) return false; if (ExtractToChange) { @@ -654,8 +647,7 @@ bool VectorCombine::foldExtractExtract(Instruction &I, /// Try to replace an extract + scalar fneg + insert with a vector fneg + /// shuffle. -bool VectorCombine::foldInsExtFNeg(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldInsExtFNeg(Instruction &I) { // Match an insert (op (extract)) pattern. Value *DestVec; uint64_t Index; @@ -718,8 +710,7 @@ bool VectorCombine::foldInsExtFNeg(Instruction &I, /// If this is a bitcast of a shuffle, try to bitcast the source vector to the /// destination type followed by shuffle. This can enable further transforms by /// moving bitcasts or shuffles together. -bool VectorCombine::foldBitcastShuffle(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldBitcastShuffle(Instruction &I) { Value *V0, *V1; ArrayRef Mask; if (!match(&I, m_BitCast(m_OneUse( @@ -808,8 +799,7 @@ bool VectorCombine::foldBitcastShuffle(Instruction &I, /// VP Intrinsics whose vector operands are both splat values may be simplified /// into the scalar version of the operation and the result splatted. This /// can lead to scalarization down the line. -bool VectorCombine::scalarizeVPIntrinsic(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::scalarizeVPIntrinsic(Instruction &I) { if (!isa(I)) return false; VPIntrinsic &VPI = cast(I); @@ -929,8 +919,7 @@ bool VectorCombine::scalarizeVPIntrinsic(Instruction &I, /// Match a vector binop or compare instruction with at least one inserted /// scalar operand and convert to scalar binop/cmp followed by insertelement. -bool VectorCombine::scalarizeBinopOrCmp(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::scalarizeBinopOrCmp(Instruction &I) { CmpInst::Predicate Pred = CmpInst::BAD_ICMP_PREDICATE; Value *Ins0, *Ins1; if (!match(&I, m_BinOp(m_Value(Ins0), m_Value(Ins1))) && @@ -1058,8 +1047,7 @@ bool VectorCombine::scalarizeBinopOrCmp(Instruction &I, /// Try to combine a scalar binop + 2 scalar compares of extracted elements of /// a vector into vector operations followed by extract. Note: The SLP pass /// may miss this pattern because of implementation problems. -bool VectorCombine::foldExtractedCmps(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldExtractedCmps(Instruction &I) { auto *BI = dyn_cast(&I); // We are looking for a scalar binop of booleans. @@ -1337,8 +1325,7 @@ bool VectorCombine::foldSingleElementStore(Instruction &I) { } /// Try to scalarize vector loads feeding extractelement instructions. -bool VectorCombine::scalarizeLoadExtract(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::scalarizeLoadExtract(Instruction &I) { Value *Ptr; if (!match(&I, m_Load(m_Value(Ptr)))) return false; @@ -1434,8 +1421,7 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I, /// Try to convert "shuffle (binop (shuffle, shuffle)), undef" /// --> "binop (shuffle), (shuffle)". -bool VectorCombine::foldPermuteOfBinops(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldPermuteOfBinops(Instruction &I) { BinaryOperator *BinOp; ArrayRef OuterMask; if (!match(&I, @@ -1528,8 +1514,7 @@ bool VectorCombine::foldPermuteOfBinops(Instruction &I, } /// Try to convert "shuffle (binop), (binop)" into "binop (shuffle), (shuffle)". -bool VectorCombine::foldShuffleOfBinops(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldShuffleOfBinops(Instruction &I) { BinaryOperator *B0, *B1; ArrayRef OldMask; if (!match(&I, m_Shuffle(m_OneUse(m_BinOp(B0)), m_OneUse(m_BinOp(B1)), @@ -1616,8 +1601,7 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I, /// Try to convert "shuffle (castop), (castop)" with a shared castop operand /// into "castop (shuffle)". -bool VectorCombine::foldShuffleOfCastops(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldShuffleOfCastops(Instruction &I) { Value *V0, *V1; ArrayRef OldMask; if (!match(&I, m_Shuffle(m_Value(V0), m_Value(V1), m_Mask(OldMask)))) @@ -1720,8 +1704,7 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I, /// Try to convert "shuffle (shuffle x, undef), (shuffle y, undef)" /// into "shuffle x, y". -bool VectorCombine::foldShuffleOfShuffles(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldShuffleOfShuffles(Instruction &I) { Value *V0, *V1; UndefValue *U0, *U1; ArrayRef OuterMask, InnerMask0, InnerMask1; @@ -1809,8 +1792,7 @@ bool VectorCombine::foldShuffleOfShuffles(Instruction &I, /// Try to convert /// "shuffle (intrinsic), (intrinsic)" into "intrinsic (shuffle), (shuffle)". -bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldShuffleOfIntrinsics(Instruction &I) { Value *V0, *V1; ArrayRef OldMask; if (!match(&I, m_Shuffle(m_OneUse(m_Value(V0)), m_OneUse(m_Value(V1)), @@ -2048,8 +2030,7 @@ static Value *generateNewInstTree(ArrayRef Item, FixedVectorType *Ty, // Starting from a shuffle, look up through operands tracking the shuffled index // of each lane. If we can simplify away the shuffles to identities then // do so. -bool VectorCombine::foldShuffleToIdentity(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldShuffleToIdentity(Instruction &I) { auto *Ty = dyn_cast(I.getType()); if (!Ty || I.use_empty()) return false; @@ -2333,8 +2314,7 @@ bool VectorCombine::foldShuffleFromReductions(Instruction &I) { /// reduce(trunc(x)) -> trunc(reduce(x)). /// reduce(sext(x)) -> sext(reduce(x)). /// reduce(zext(x)) -> zext(reduce(x)). -bool VectorCombine::foldCastFromReductions(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldCastFromReductions(Instruction &I) { auto *II = dyn_cast(&I); if (!II) return false; @@ -2718,7 +2698,7 @@ bool VectorCombine::foldSelectShuffle(Instruction &I, bool FromReduction) { /// lshr((zext(x),y) -> zext(lshr(x,trunc(y))) /// Cost model calculations takes into account if zext(x) has other users and /// whether it can be propagated through them too. -bool VectorCombine::shrinkType(Instruction &I, TTI::TargetCostKind CostKind) { +bool VectorCombine::shrinkType(Instruction &I) { Value *ZExted, *OtherOperand; if (!match(&I, m_c_BitwiseLogic(m_ZExt(m_Value(ZExted)), m_Value(OtherOperand))) && @@ -2809,8 +2789,7 @@ bool VectorCombine::shrinkType(Instruction &I, TTI::TargetCostKind CostKind) { /// insert (DstVec, (extract SrcVec, ExtIdx), InsIdx) --> /// shuffle (DstVec, SrcVec, Mask) -bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I, - TTI::TargetCostKind CostKind) { +bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) { Value *DstVec, *SrcVec; uint64_t ExtIdx, InsIdx; if (!match(&I, @@ -2865,7 +2844,6 @@ bool VectorCombine::run() { bool MadeChange = false; auto FoldInst = [this, &MadeChange](Instruction &I) { Builder.SetInsertPoint(&I); - TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; bool IsFixedVectorType = isa(I.getType()); auto Opcode = I.getOpcode(); @@ -2876,10 +2854,10 @@ bool VectorCombine::run() { if (IsFixedVectorType) { switch (Opcode) { case Instruction::InsertElement: - MadeChange |= vectorizeLoadInsert(I, CostKind); + MadeChange |= vectorizeLoadInsert(I); break; case Instruction::ShuffleVector: - MadeChange |= widenSubvectorLoad(I, CostKind); + MadeChange |= widenSubvectorLoad(I); break; default: break; @@ -2889,9 +2867,9 @@ bool VectorCombine::run() { // This transform works with scalable and fixed vectors // TODO: Identify and allow other scalable transforms if (isa(I.getType())) { - MadeChange |= scalarizeBinopOrCmp(I, CostKind); - MadeChange |= scalarizeLoadExtract(I, CostKind); - MadeChange |= scalarizeVPIntrinsic(I, CostKind); + MadeChange |= scalarizeBinopOrCmp(I); + MadeChange |= scalarizeLoadExtract(I); + MadeChange |= scalarizeVPIntrinsic(I); } if (Opcode == Instruction::Store) @@ -2908,39 +2886,39 @@ bool VectorCombine::run() { if (IsFixedVectorType) { switch (Opcode) { case Instruction::InsertElement: - MadeChange |= foldInsExtFNeg(I, CostKind); - MadeChange |= foldInsExtVectorToShuffle(I, CostKind); + MadeChange |= foldInsExtFNeg(I); + MadeChange |= foldInsExtVectorToShuffle(I); break; case Instruction::ShuffleVector: - MadeChange |= foldPermuteOfBinops(I, CostKind); - MadeChange |= foldShuffleOfBinops(I, CostKind); - MadeChange |= foldShuffleOfCastops(I, CostKind); - MadeChange |= foldShuffleOfShuffles(I, CostKind); - MadeChange |= foldShuffleOfIntrinsics(I, CostKind); - MadeChange |= foldSelectShuffle(I, CostKind); - MadeChange |= foldShuffleToIdentity(I, CostKind); + MadeChange |= foldPermuteOfBinops(I); + MadeChange |= foldShuffleOfBinops(I); + MadeChange |= foldShuffleOfCastops(I); + MadeChange |= foldShuffleOfShuffles(I); + MadeChange |= foldShuffleOfIntrinsics(I); + MadeChange |= foldSelectShuffle(I); + MadeChange |= foldShuffleToIdentity(I); break; case Instruction::BitCast: - MadeChange |= foldBitcastShuffle(I, CostKind); + MadeChange |= foldBitcastShuffle(I); break; default: - MadeChange |= shrinkType(I, CostKind); + MadeChange |= shrinkType(I); break; } } else { switch (Opcode) { case Instruction::Call: MadeChange |= foldShuffleFromReductions(I); - MadeChange |= foldCastFromReductions(I, CostKind); + MadeChange |= foldCastFromReductions(I); break; case Instruction::ICmp: case Instruction::FCmp: - MadeChange |= foldExtractExtract(I, CostKind); + MadeChange |= foldExtractExtract(I); break; default: if (Instruction::isBinaryOp(Opcode)) { - MadeChange |= foldExtractExtract(I, CostKind); - MadeChange |= foldExtractedCmps(I, CostKind); + MadeChange |= foldExtractExtract(I); + MadeChange |= foldExtractedCmps(I); } break; } @@ -2982,7 +2960,8 @@ PreservedAnalyses VectorCombinePass::run(Function &F, DominatorTree &DT = FAM.getResult(F); AAResults &AA = FAM.getResult(F); const DataLayout *DL = &F.getDataLayout(); - VectorCombine Combiner(F, TTI, DT, AA, AC, DL, TryEarlyFoldsOnly); + VectorCombine Combiner(F, TTI, DT, AA, AC, DL, TTI::TCK_RecipThroughput, + TryEarlyFoldsOnly); if (!Combiner.run()) return PreservedAnalyses::all(); PreservedAnalyses PA;