@@ -3883,7 +3883,6 @@ class BoUpSLP {
38833883 enum CombinedOpcode {
38843884 NotCombinedOp = -1,
38853885 MinMax = Instruction::OtherOpsEnd + 1,
3886- FMulAdd,
38873886 };
38883887 CombinedOpcode CombinedOp = NotCombinedOp;
38893888
@@ -4034,9 +4033,6 @@ class BoUpSLP {
40344033 /// Returns true if any scalar in the list is a copyable element.
40354034 bool hasCopyableElements() const { return !CopyableElements.empty(); }
40364035
4037- /// Returns the state of the operations.
4038- const InstructionsState &getOperations() const { return S; }
4039-
40404036 /// When ReuseReorderShuffleIndices is empty it just returns position of \p
40414037 /// V within vector of Scalars. Otherwise, try to remap on its reuse index.
40424038 unsigned findLaneForValue(Value *V) const {
@@ -11991,81 +11987,6 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) {
1199111987 }
1199211988}
1199311989
11994- static InstructionCost canConvertToFMA(ArrayRef<Value *> VL,
11995- const InstructionsState &S,
11996- DominatorTree &DT, const DataLayout &DL,
11997- TargetTransformInfo &TTI,
11998- const TargetLibraryInfo &TLI) {
11999- assert(all_of(VL,
12000- [](Value *V) {
12001- return V->getType()->getScalarType()->isFloatingPointTy();
12002- }) &&
12003- "Can only convert to FMA for floating point types");
12004- assert(S.isAddSubLikeOp() && "Can only convert to FMA for add/sub");
12005-
12006- auto CheckForContractable = [&](ArrayRef<Value *> VL) {
12007- FastMathFlags FMF;
12008- FMF.set();
12009- for (Value *V : VL) {
12010- auto *I = dyn_cast<Instruction>(V);
12011- if (!I)
12012- continue;
12013- // TODO: support for copyable elements.
12014- Instruction *MatchingI = S.getMatchingMainOpOrAltOp(I);
12015- if (S.getMainOp() != MatchingI && S.getAltOp() != MatchingI)
12016- continue;
12017- if (auto *FPCI = dyn_cast<FPMathOperator>(I))
12018- FMF &= FPCI->getFastMathFlags();
12019- }
12020- return FMF.allowContract();
12021- };
12022- if (!CheckForContractable(VL))
12023- return InstructionCost::getInvalid();
12024- // fmul also should be contractable
12025- InstructionsCompatibilityAnalysis Analysis(DT, DL, TTI, TLI);
12026- SmallVector<BoUpSLP::ValueList> Operands = Analysis.buildOperands(S, VL);
12027-
12028- InstructionsState OpS = getSameOpcode(Operands.front(), TLI);
12029- if (!OpS.valid())
12030- return InstructionCost::getInvalid();
12031- if (OpS.isAltShuffle() || OpS.getOpcode() != Instruction::FMul)
12032- return InstructionCost::getInvalid();
12033- if (!CheckForContractable(Operands.front()))
12034- return InstructionCost::getInvalid();
12035- // Compare the costs.
12036- InstructionCost FMulPlusFAddCost = 0;
12037- InstructionCost FMACost = 0;
12038- constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
12039- FastMathFlags FMF;
12040- FMF.set();
12041- for (Value *V : VL) {
12042- auto *I = dyn_cast<Instruction>(V);
12043- if (!I)
12044- continue;
12045- if (auto *FPCI = dyn_cast<FPMathOperator>(I))
12046- FMF &= FPCI->getFastMathFlags();
12047- FMulPlusFAddCost += TTI.getInstructionCost(I, CostKind);
12048- }
12049- unsigned NumOps = 0;
12050- for (auto [V, Op] : zip(VL, Operands.front())) {
12051- auto *I = dyn_cast<Instruction>(Op);
12052- if (!I || !I->hasOneUse()) {
12053- FMACost += TTI.getInstructionCost(cast<Instruction>(V), CostKind);
12054- if (I)
12055- FMACost += TTI.getInstructionCost(I, CostKind);
12056- continue;
12057- }
12058- ++NumOps;
12059- if (auto *FPCI = dyn_cast<FPMathOperator>(I))
12060- FMF &= FPCI->getFastMathFlags();
12061- FMulPlusFAddCost += TTI.getInstructionCost(I, CostKind);
12062- }
12063- Type *Ty = VL.front()->getType();
12064- IntrinsicCostAttributes ICA(Intrinsic::fmuladd, Ty, {Ty, Ty, Ty}, FMF);
12065- FMACost += NumOps * TTI.getIntrinsicInstrCost(ICA, CostKind);
12066- return FMACost < FMulPlusFAddCost ? FMACost : InstructionCost::getInvalid();
12067- }
12068-
1206911990void BoUpSLP::transformNodes() {
1207011991 constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1207111992 BaseGraphSize = VectorizableTree.size();
@@ -12434,25 +12355,6 @@ void BoUpSLP::transformNodes() {
1243412355 }
1243512356 break;
1243612357 }
12437- case Instruction::FSub:
12438- case Instruction::FAdd: {
12439- // Check if possible to convert (a*b)+c to fma.
12440- if (E.State != TreeEntry::Vectorize ||
12441- !E.getOperations().isAddSubLikeOp())
12442- break;
12443- if (!canConvertToFMA(E.Scalars, E.getOperations(), *DT, *DL, *TTI, *TLI)
12444- .isValid())
12445- break;
12446- // This node is a fmuladd node.
12447- E.CombinedOp = TreeEntry::FMulAdd;
12448- TreeEntry *FMulEntry = getOperandEntry(&E, 0);
12449- if (FMulEntry->UserTreeIndex &&
12450- FMulEntry->State == TreeEntry::Vectorize) {
12451- // The FMul node is part of the combined fmuladd node.
12452- FMulEntry->State = TreeEntry::CombinedVectorize;
12453- }
12454- break;
12455- }
1245612358 default:
1245712359 break;
1245812360 }
@@ -13685,11 +13587,6 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1368513587 }
1368613588 return IntrinsicCost;
1368713589 };
13688- auto GetFMulAddCost = [&, &TTI = *TTI](const InstructionsState &S,
13689- Instruction *VI) {
13690- InstructionCost Cost = canConvertToFMA(VI, S, *DT, *DL, TTI, *TLI);
13691- return Cost;
13692- };
1369313590 switch (ShuffleOrOp) {
1369413591 case Instruction::PHI: {
1369513592 // Count reused scalars.
@@ -14030,30 +13927,6 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1403013927 };
1403113928 return GetCostDiff(GetScalarCost, GetVectorCost);
1403213929 }
14033- case TreeEntry::FMulAdd: {
14034- auto GetScalarCost = [&](unsigned Idx) {
14035- if (isa<PoisonValue>(UniqueValues[Idx]))
14036- return InstructionCost(TTI::TCC_Free);
14037- return GetFMulAddCost(E->getOperations(),
14038- cast<Instruction>(UniqueValues[Idx]));
14039- };
14040- auto GetVectorCost = [&, &TTI = *TTI](InstructionCost CommonCost) {
14041- FastMathFlags FMF;
14042- FMF.set();
14043- for (Value *V : E->Scalars) {
14044- if (auto *FPCI = dyn_cast<FPMathOperator>(V)) {
14045- FMF &= FPCI->getFastMathFlags();
14046- if (auto *FPCIOp = dyn_cast<FPMathOperator>(FPCI->getOperand(0)))
14047- FMF &= FPCIOp->getFastMathFlags();
14048- }
14049- }
14050- IntrinsicCostAttributes ICA(Intrinsic::fmuladd, VecTy,
14051- {VecTy, VecTy, VecTy}, FMF);
14052- InstructionCost VecCost = TTI.getIntrinsicInstrCost(ICA, CostKind);
14053- return VecCost + CommonCost;
14054- };
14055- return GetCostDiff(GetScalarCost, GetVectorCost);
14056- }
1405713930 case Instruction::FNeg:
1405813931 case Instruction::Add:
1405913932 case Instruction::FAdd:
@@ -14091,16 +13964,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1409113964 }
1409213965 TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Op1);
1409313966 TTI::OperandValueInfo Op2Info = TTI::getOperandInfo(Op2);
14094- InstructionCost ScalarCost = TTI->getArithmeticInstrCost(
14095- ShuffleOrOp, OrigScalarTy, CostKind, Op1Info, Op2Info, Operands);
14096- if (auto *I = dyn_cast<Instruction>(UniqueValues[Idx]);
14097- I && (ShuffleOrOp == Instruction::FAdd ||
14098- ShuffleOrOp == Instruction::FSub)) {
14099- InstructionCost IntrinsicCost = GetFMulAddCost(E->getOperations(), I);
14100- if (IntrinsicCost.isValid())
14101- ScalarCost = IntrinsicCost;
14102- }
14103- return ScalarCost;
13967+ return TTI->getArithmeticInstrCost(ShuffleOrOp, OrigScalarTy, CostKind,
13968+ Op1Info, Op2Info, Operands);
1410413969 };
1410513970 auto GetVectorCost = [=](InstructionCost CommonCost) {
1410613971 if (ShuffleOrOp == Instruction::And && It != MinBWs.end()) {
@@ -22729,21 +22594,11 @@ class HorizontalReduction {
2272922594 /// Try to find a reduction tree.
2273022595 bool matchAssociativeReduction(BoUpSLP &R, Instruction *Root,
2273122596 ScalarEvolution &SE, const DataLayout &DL,
22732- const TargetLibraryInfo &TLI,
22733- DominatorTree &DT, TargetTransformInfo &TTI) {
22597+ const TargetLibraryInfo &TLI) {
2273422598 RdxKind = HorizontalReduction::getRdxKind(Root);
2273522599 if (!isVectorizable(RdxKind, Root))
2273622600 return false;
2273722601
22738- // FMA reduction root - skip.
22739- auto CheckForFMA = [&](Instruction *I) {
22740- return RdxKind == RecurKind::FAdd &&
22741- canConvertToFMA(I, getSameOpcode(I, TLI), DT, DL, TTI, TLI)
22742- .isValid();
22743- };
22744- if (CheckForFMA(Root))
22745- return false;
22746-
2274722602 // Analyze "regular" integer/FP types for reductions - no target-specific
2274822603 // types or pointers.
2274922604 Type *Ty = Root->getType();
@@ -22781,7 +22636,7 @@ class HorizontalReduction {
2278122636 // Also, do not try to reduce const values, if the operation is not
2278222637 // foldable.
2278322638 if (!EdgeInst || Level > RecursionMaxDepth ||
22784- getRdxKind(EdgeInst) != RdxKind || CheckForFMA(EdgeInst) ||
22639+ getRdxKind(EdgeInst) != RdxKind ||
2278522640 IsCmpSelMinMax != isCmpSelMinMax(EdgeInst) ||
2278622641 !hasRequiredNumberOfUses(IsCmpSelMinMax, EdgeInst) ||
2278722642 !isVectorizable(RdxKind, EdgeInst) ||
@@ -24350,13 +24205,13 @@ bool SLPVectorizerPass::vectorizeHorReduction(
2435024205 Stack.emplace(SelectRoot(), 0);
2435124206 SmallPtrSet<Value *, 8> VisitedInstrs;
2435224207 bool Res = false;
24353- auto TryToReduce = [this, &R, TTI = TTI ](Instruction *Inst) -> Value * {
24208+ auto && TryToReduce = [this, &R](Instruction *Inst) -> Value * {
2435424209 if (R.isAnalyzedReductionRoot(Inst))
2435524210 return nullptr;
2435624211 if (!isReductionCandidate(Inst))
2435724212 return nullptr;
2435824213 HorizontalReduction HorRdx;
24359- if (!HorRdx.matchAssociativeReduction(R, Inst, *SE, *DL, *TLI, *DT, *TTI ))
24214+ if (!HorRdx.matchAssociativeReduction(R, Inst, *SE, *DL, *TLI))
2436024215 return nullptr;
2436124216 return HorRdx.tryToReduce(R, *DL, TTI, *TLI, AC);
2436224217 };
@@ -24422,12 +24277,6 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
2442224277
2442324278 if (!isa<BinaryOperator, CmpInst>(I) || isa<VectorType>(I->getType()))
2442424279 return false;
24425- // Skip potential FMA candidates.
24426- if ((I->getOpcode() == Instruction::FAdd ||
24427- I->getOpcode() == Instruction::FSub) &&
24428- canConvertToFMA(I, getSameOpcode(I, *TLI), *DT, *DL, *TTI, *TLI)
24429- .isValid())
24430- return false;
2443124280
2443224281 Value *P = I->getParent();
2443324282
0 commit comments