@@ -3883,7 +3883,6 @@ class BoUpSLP {
38833883 enum CombinedOpcode {
38843884 NotCombinedOp = -1,
38853885 MinMax = Instruction::OtherOpsEnd + 1,
3886- FMulAdd,
38873886 };
38883887 CombinedOpcode CombinedOp = NotCombinedOp;
38893888
@@ -4034,9 +4033,6 @@ class BoUpSLP {
40344033 /// Returns true if any scalar in the list is a copyable element.
40354034 bool hasCopyableElements() const { return !CopyableElements.empty(); }
40364035
4037- /// Returns the state of the operations.
4038- const InstructionsState &getOperations() const { return S; }
4039-
40404036 /// When ReuseReorderShuffleIndices is empty it just returns position of \p
40414037 /// V within vector of Scalars. Otherwise, try to remap on its reuse index.
40424038 unsigned findLaneForValue(Value *V) const {
@@ -11991,82 +11987,6 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) {
1199111987 }
1199211988}
1199311989
11994- static InstructionCost canConvertToFMA(ArrayRef<Value *> VL,
11995- const InstructionsState &S,
11996- DominatorTree &DT, const DataLayout &DL,
11997- TargetTransformInfo &TTI,
11998- const TargetLibraryInfo &TLI) {
11999- assert(all_of(VL,
12000- [](Value *V) {
12001- return V->getType()->getScalarType()->isFloatingPointTy();
12002- }) &&
12003- "Can only convert to FMA for floating point types");
12004- assert(S.isAddSubLikeOp() && "Can only convert to FMA for add/sub");
12005-
12006- auto CheckForContractable = [&](ArrayRef<Value *> VL) {
12007- FastMathFlags FMF;
12008- FMF.set();
12009- for (Value *V : VL) {
12010- auto *I = dyn_cast<Instruction>(V);
12011- if (!I)
12012- continue;
12013- // TODO: support for copyable elements.
12014- Instruction *MatchingI = S.getMatchingMainOpOrAltOp(I);
12015- if (S.getMainOp() != MatchingI && S.getAltOp() != MatchingI)
12016- continue;
12017- if (auto *FPCI = dyn_cast<FPMathOperator>(I))
12018- FMF &= FPCI->getFastMathFlags();
12019- }
12020- return FMF.allowContract();
12021- };
12022- if (!CheckForContractable(VL))
12023- return InstructionCost::getInvalid();
12024- // fmul also should be contractable
12025- InstructionsCompatibilityAnalysis Analysis(DT, DL, TTI, TLI);
12026- SmallVector<BoUpSLP::ValueList> Operands = Analysis.buildOperands(S, VL);
12027-
12028- InstructionsState OpS = getSameOpcode(Operands.front(), TLI);
12029- if (!OpS.valid())
12030- return InstructionCost::getInvalid();
12031- if (OpS.isAltShuffle() || OpS.getOpcode() != Instruction::FMul)
12032- return InstructionCost::getInvalid();
12033- if (!CheckForContractable(Operands.front()))
12034- return InstructionCost::getInvalid();
12035- // Compare the costs.
12036- InstructionCost FMulPlusFAddCost = 0;
12037- InstructionCost FMACost = 0;
12038- constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
12039- FastMathFlags FMF;
12040- FMF.set();
12041- for (Value *V : VL) {
12042- auto *I = dyn_cast<Instruction>(V);
12043- if (!I)
12044- continue;
12045- if (auto *FPCI = dyn_cast<FPMathOperator>(I))
12046- FMF &= FPCI->getFastMathFlags();
12047- FMulPlusFAddCost += TTI.getInstructionCost(I, CostKind);
12048- }
12049- unsigned NumOps = 0;
12050- for (auto [V, Op] : zip(VL, Operands.front())) {
12051- auto *I = dyn_cast<Instruction>(Op);
12052- if (!I || !I->hasOneUse()) {
12053- if (auto *OpI = dyn_cast<Instruction>(V))
12054- FMACost += TTI.getInstructionCost(OpI, CostKind);
12055- if (I)
12056- FMACost += TTI.getInstructionCost(I, CostKind);
12057- continue;
12058- }
12059- ++NumOps;
12060- if (auto *FPCI = dyn_cast<FPMathOperator>(I))
12061- FMF &= FPCI->getFastMathFlags();
12062- FMulPlusFAddCost += TTI.getInstructionCost(I, CostKind);
12063- }
12064- Type *Ty = VL.front()->getType();
12065- IntrinsicCostAttributes ICA(Intrinsic::fmuladd, Ty, {Ty, Ty, Ty}, FMF);
12066- FMACost += NumOps * TTI.getIntrinsicInstrCost(ICA, CostKind);
12067- return FMACost < FMulPlusFAddCost ? FMACost : InstructionCost::getInvalid();
12068- }
12069-
1207011990void BoUpSLP::transformNodes() {
1207111991 constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1207211992 BaseGraphSize = VectorizableTree.size();
@@ -12435,25 +12355,6 @@ void BoUpSLP::transformNodes() {
1243512355 }
1243612356 break;
1243712357 }
12438- case Instruction::FSub:
12439- case Instruction::FAdd: {
12440- // Check if possible to convert (a*b)+c to fma.
12441- if (E.State != TreeEntry::Vectorize ||
12442- !E.getOperations().isAddSubLikeOp())
12443- break;
12444- if (!canConvertToFMA(E.Scalars, E.getOperations(), *DT, *DL, *TTI, *TLI)
12445- .isValid())
12446- break;
12447- // This node is a fmuladd node.
12448- E.CombinedOp = TreeEntry::FMulAdd;
12449- TreeEntry *FMulEntry = getOperandEntry(&E, 0);
12450- if (FMulEntry->UserTreeIndex &&
12451- FMulEntry->State == TreeEntry::Vectorize) {
12452- // The FMul node is part of the combined fmuladd node.
12453- FMulEntry->State = TreeEntry::CombinedVectorize;
12454- }
12455- break;
12456- }
1245712358 default:
1245812359 break;
1245912360 }
@@ -13686,11 +13587,6 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1368613587 }
1368713588 return IntrinsicCost;
1368813589 };
13689- auto GetFMulAddCost = [&, &TTI = *TTI](const InstructionsState &S,
13690- Instruction *VI) {
13691- InstructionCost Cost = canConvertToFMA(VI, S, *DT, *DL, TTI, *TLI);
13692- return Cost;
13693- };
1369413590 switch (ShuffleOrOp) {
1369513591 case Instruction::PHI: {
1369613592 // Count reused scalars.
@@ -14031,30 +13927,6 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1403113927 };
1403213928 return GetCostDiff(GetScalarCost, GetVectorCost);
1403313929 }
14034- case TreeEntry::FMulAdd: {
14035- auto GetScalarCost = [&](unsigned Idx) {
14036- if (isa<PoisonValue>(UniqueValues[Idx]))
14037- return InstructionCost(TTI::TCC_Free);
14038- return GetFMulAddCost(E->getOperations(),
14039- cast<Instruction>(UniqueValues[Idx]));
14040- };
14041- auto GetVectorCost = [&, &TTI = *TTI](InstructionCost CommonCost) {
14042- FastMathFlags FMF;
14043- FMF.set();
14044- for (Value *V : E->Scalars) {
14045- if (auto *FPCI = dyn_cast<FPMathOperator>(V)) {
14046- FMF &= FPCI->getFastMathFlags();
14047- if (auto *FPCIOp = dyn_cast<FPMathOperator>(FPCI->getOperand(0)))
14048- FMF &= FPCIOp->getFastMathFlags();
14049- }
14050- }
14051- IntrinsicCostAttributes ICA(Intrinsic::fmuladd, VecTy,
14052- {VecTy, VecTy, VecTy}, FMF);
14053- InstructionCost VecCost = TTI.getIntrinsicInstrCost(ICA, CostKind);
14054- return VecCost + CommonCost;
14055- };
14056- return GetCostDiff(GetScalarCost, GetVectorCost);
14057- }
1405813930 case Instruction::FNeg:
1405913931 case Instruction::Add:
1406013932 case Instruction::FAdd:
@@ -14092,16 +13964,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1409213964 }
1409313965 TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Op1);
1409413966 TTI::OperandValueInfo Op2Info = TTI::getOperandInfo(Op2);
14095- InstructionCost ScalarCost = TTI->getArithmeticInstrCost(
14096- ShuffleOrOp, OrigScalarTy, CostKind, Op1Info, Op2Info, Operands);
14097- if (auto *I = dyn_cast<Instruction>(UniqueValues[Idx]);
14098- I && (ShuffleOrOp == Instruction::FAdd ||
14099- ShuffleOrOp == Instruction::FSub)) {
14100- InstructionCost IntrinsicCost = GetFMulAddCost(E->getOperations(), I);
14101- if (IntrinsicCost.isValid())
14102- ScalarCost = IntrinsicCost;
14103- }
14104- return ScalarCost;
13967+ return TTI->getArithmeticInstrCost(ShuffleOrOp, OrigScalarTy, CostKind,
13968+ Op1Info, Op2Info, Operands);
1410513969 };
1410613970 auto GetVectorCost = [=](InstructionCost CommonCost) {
1410713971 if (ShuffleOrOp == Instruction::And && It != MinBWs.end()) {
@@ -22730,21 +22594,11 @@ class HorizontalReduction {
2273022594 /// Try to find a reduction tree.
2273122595 bool matchAssociativeReduction(BoUpSLP &R, Instruction *Root,
2273222596 ScalarEvolution &SE, const DataLayout &DL,
22733- const TargetLibraryInfo &TLI,
22734- DominatorTree &DT, TargetTransformInfo &TTI) {
22597+ const TargetLibraryInfo &TLI) {
2273522598 RdxKind = HorizontalReduction::getRdxKind(Root);
2273622599 if (!isVectorizable(RdxKind, Root))
2273722600 return false;
2273822601
22739- // FMA reduction root - skip.
22740- auto CheckForFMA = [&](Instruction *I) {
22741- return RdxKind == RecurKind::FAdd &&
22742- canConvertToFMA(I, getSameOpcode(I, TLI), DT, DL, TTI, TLI)
22743- .isValid();
22744- };
22745- if (CheckForFMA(Root))
22746- return false;
22747-
2274822602 // Analyze "regular" integer/FP types for reductions - no target-specific
2274922603 // types or pointers.
2275022604 Type *Ty = Root->getType();
@@ -22782,7 +22636,7 @@ class HorizontalReduction {
2278222636 // Also, do not try to reduce const values, if the operation is not
2278322637 // foldable.
2278422638 if (!EdgeInst || Level > RecursionMaxDepth ||
22785- getRdxKind(EdgeInst) != RdxKind || CheckForFMA(EdgeInst) ||
22639+ getRdxKind(EdgeInst) != RdxKind ||
2278622640 IsCmpSelMinMax != isCmpSelMinMax(EdgeInst) ||
2278722641 !hasRequiredNumberOfUses(IsCmpSelMinMax, EdgeInst) ||
2278822642 !isVectorizable(RdxKind, EdgeInst) ||
@@ -24351,13 +24205,13 @@ bool SLPVectorizerPass::vectorizeHorReduction(
2435124205 Stack.emplace(SelectRoot(), 0);
2435224206 SmallPtrSet<Value *, 8> VisitedInstrs;
2435324207 bool Res = false;
24354- auto TryToReduce = [this, &R, TTI = TTI ](Instruction *Inst) -> Value * {
24208+ auto && TryToReduce = [this, &R](Instruction *Inst) -> Value * {
2435524209 if (R.isAnalyzedReductionRoot(Inst))
2435624210 return nullptr;
2435724211 if (!isReductionCandidate(Inst))
2435824212 return nullptr;
2435924213 HorizontalReduction HorRdx;
24360- if (!HorRdx.matchAssociativeReduction(R, Inst, *SE, *DL, *TLI, *DT, *TTI ))
24214+ if (!HorRdx.matchAssociativeReduction(R, Inst, *SE, *DL, *TLI))
2436124215 return nullptr;
2436224216 return HorRdx.tryToReduce(R, *DL, TTI, *TLI, AC);
2436324217 };
@@ -24423,12 +24277,6 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
2442324277
2442424278 if (!isa<BinaryOperator, CmpInst>(I) || isa<VectorType>(I->getType()))
2442524279 return false;
24426- // Skip potential FMA candidates.
24427- if ((I->getOpcode() == Instruction::FAdd ||
24428- I->getOpcode() == Instruction::FSub) &&
24429- canConvertToFMA(I, getSameOpcode(I, *TLI), *DT, *DL, *TTI, *TLI)
24430- .isValid())
24431- return false;
2443224280
2443324281 Value *P = I->getParent();
2443424282
0 commit comments