@@ -3883,7 +3883,6 @@ class BoUpSLP {
3883
3883
enum CombinedOpcode {
3884
3884
NotCombinedOp = -1,
3885
3885
MinMax = Instruction::OtherOpsEnd + 1,
3886
- FMulAdd,
3887
3886
};
3888
3887
CombinedOpcode CombinedOp = NotCombinedOp;
3889
3888
@@ -4034,9 +4033,6 @@ class BoUpSLP {
4034
4033
/// Returns true if any scalar in the list is a copyable element.
4035
4034
bool hasCopyableElements() const { return !CopyableElements.empty(); }
4036
4035
4037
- /// Returns the state of the operations.
4038
- const InstructionsState &getOperations() const { return S; }
4039
-
4040
4036
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
4041
4037
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
4042
4038
unsigned findLaneForValue(Value *V) const {
@@ -11991,81 +11987,6 @@ void BoUpSLP::reorderGatherNode(TreeEntry &TE) {
11991
11987
}
11992
11988
}
11993
11989
11994
- static InstructionCost canConvertToFMA(ArrayRef<Value *> VL,
11995
- const InstructionsState &S,
11996
- DominatorTree &DT, const DataLayout &DL,
11997
- TargetTransformInfo &TTI,
11998
- const TargetLibraryInfo &TLI) {
11999
- assert(all_of(VL,
12000
- [](Value *V) {
12001
- return V->getType()->getScalarType()->isFloatingPointTy();
12002
- }) &&
12003
- "Can only convert to FMA for floating point types");
12004
- assert(S.isAddSubLikeOp() && "Can only convert to FMA for add/sub");
12005
-
12006
- auto CheckForContractable = [&](ArrayRef<Value *> VL) {
12007
- FastMathFlags FMF;
12008
- FMF.set();
12009
- for (Value *V : VL) {
12010
- auto *I = dyn_cast<Instruction>(V);
12011
- if (!I)
12012
- continue;
12013
- // TODO: support for copyable elements.
12014
- Instruction *MatchingI = S.getMatchingMainOpOrAltOp(I);
12015
- if (S.getMainOp() != MatchingI && S.getAltOp() != MatchingI)
12016
- continue;
12017
- if (auto *FPCI = dyn_cast<FPMathOperator>(I))
12018
- FMF &= FPCI->getFastMathFlags();
12019
- }
12020
- return FMF.allowContract();
12021
- };
12022
- if (!CheckForContractable(VL))
12023
- return InstructionCost::getInvalid();
12024
- // fmul also should be contractable
12025
- InstructionsCompatibilityAnalysis Analysis(DT, DL, TTI, TLI);
12026
- SmallVector<BoUpSLP::ValueList> Operands = Analysis.buildOperands(S, VL);
12027
-
12028
- InstructionsState OpS = getSameOpcode(Operands.front(), TLI);
12029
- if (!OpS.valid())
12030
- return InstructionCost::getInvalid();
12031
- if (OpS.isAltShuffle() || OpS.getOpcode() != Instruction::FMul)
12032
- return InstructionCost::getInvalid();
12033
- if (!CheckForContractable(Operands.front()))
12034
- return InstructionCost::getInvalid();
12035
- // Compare the costs.
12036
- InstructionCost FMulPlusFAddCost = 0;
12037
- InstructionCost FMACost = 0;
12038
- constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
12039
- FastMathFlags FMF;
12040
- FMF.set();
12041
- for (Value *V : VL) {
12042
- auto *I = dyn_cast<Instruction>(V);
12043
- if (!I)
12044
- continue;
12045
- if (auto *FPCI = dyn_cast<FPMathOperator>(I))
12046
- FMF &= FPCI->getFastMathFlags();
12047
- FMulPlusFAddCost += TTI.getInstructionCost(I, CostKind);
12048
- }
12049
- unsigned NumOps = 0;
12050
- for (auto [V, Op] : zip(VL, Operands.front())) {
12051
- auto *I = dyn_cast<Instruction>(Op);
12052
- if (!I || !I->hasOneUse()) {
12053
- FMACost += TTI.getInstructionCost(cast<Instruction>(V), CostKind);
12054
- if (I)
12055
- FMACost += TTI.getInstructionCost(I, CostKind);
12056
- continue;
12057
- }
12058
- ++NumOps;
12059
- if (auto *FPCI = dyn_cast<FPMathOperator>(I))
12060
- FMF &= FPCI->getFastMathFlags();
12061
- FMulPlusFAddCost += TTI.getInstructionCost(I, CostKind);
12062
- }
12063
- Type *Ty = VL.front()->getType();
12064
- IntrinsicCostAttributes ICA(Intrinsic::fmuladd, Ty, {Ty, Ty, Ty}, FMF);
12065
- FMACost += NumOps * TTI.getIntrinsicInstrCost(ICA, CostKind);
12066
- return FMACost < FMulPlusFAddCost ? FMACost : InstructionCost::getInvalid();
12067
- }
12068
-
12069
11990
void BoUpSLP::transformNodes() {
12070
11991
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
12071
11992
BaseGraphSize = VectorizableTree.size();
@@ -12434,25 +12355,6 @@ void BoUpSLP::transformNodes() {
12434
12355
}
12435
12356
break;
12436
12357
}
12437
- case Instruction::FSub:
12438
- case Instruction::FAdd: {
12439
- // Check if possible to convert (a*b)+c to fma.
12440
- if (E.State != TreeEntry::Vectorize ||
12441
- !E.getOperations().isAddSubLikeOp())
12442
- break;
12443
- if (!canConvertToFMA(E.Scalars, E.getOperations(), *DT, *DL, *TTI, *TLI)
12444
- .isValid())
12445
- break;
12446
- // This node is a fmuladd node.
12447
- E.CombinedOp = TreeEntry::FMulAdd;
12448
- TreeEntry *FMulEntry = getOperandEntry(&E, 0);
12449
- if (FMulEntry->UserTreeIndex &&
12450
- FMulEntry->State == TreeEntry::Vectorize) {
12451
- // The FMul node is part of the combined fmuladd node.
12452
- FMulEntry->State = TreeEntry::CombinedVectorize;
12453
- }
12454
- break;
12455
- }
12456
12358
default:
12457
12359
break;
12458
12360
}
@@ -13685,11 +13587,6 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
13685
13587
}
13686
13588
return IntrinsicCost;
13687
13589
};
13688
- auto GetFMulAddCost = [&, &TTI = *TTI](const InstructionsState &S,
13689
- Instruction *VI) {
13690
- InstructionCost Cost = canConvertToFMA(VI, S, *DT, *DL, TTI, *TLI);
13691
- return Cost;
13692
- };
13693
13590
switch (ShuffleOrOp) {
13694
13591
case Instruction::PHI: {
13695
13592
// Count reused scalars.
@@ -14030,30 +13927,6 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
14030
13927
};
14031
13928
return GetCostDiff(GetScalarCost, GetVectorCost);
14032
13929
}
14033
- case TreeEntry::FMulAdd: {
14034
- auto GetScalarCost = [&](unsigned Idx) {
14035
- if (isa<PoisonValue>(UniqueValues[Idx]))
14036
- return InstructionCost(TTI::TCC_Free);
14037
- return GetFMulAddCost(E->getOperations(),
14038
- cast<Instruction>(UniqueValues[Idx]));
14039
- };
14040
- auto GetVectorCost = [&, &TTI = *TTI](InstructionCost CommonCost) {
14041
- FastMathFlags FMF;
14042
- FMF.set();
14043
- for (Value *V : E->Scalars) {
14044
- if (auto *FPCI = dyn_cast<FPMathOperator>(V)) {
14045
- FMF &= FPCI->getFastMathFlags();
14046
- if (auto *FPCIOp = dyn_cast<FPMathOperator>(FPCI->getOperand(0)))
14047
- FMF &= FPCIOp->getFastMathFlags();
14048
- }
14049
- }
14050
- IntrinsicCostAttributes ICA(Intrinsic::fmuladd, VecTy,
14051
- {VecTy, VecTy, VecTy}, FMF);
14052
- InstructionCost VecCost = TTI.getIntrinsicInstrCost(ICA, CostKind);
14053
- return VecCost + CommonCost;
14054
- };
14055
- return GetCostDiff(GetScalarCost, GetVectorCost);
14056
- }
14057
13930
case Instruction::FNeg:
14058
13931
case Instruction::Add:
14059
13932
case Instruction::FAdd:
@@ -14091,16 +13964,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
14091
13964
}
14092
13965
TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Op1);
14093
13966
TTI::OperandValueInfo Op2Info = TTI::getOperandInfo(Op2);
14094
- InstructionCost ScalarCost = TTI->getArithmeticInstrCost(
14095
- ShuffleOrOp, OrigScalarTy, CostKind, Op1Info, Op2Info, Operands);
14096
- if (auto *I = dyn_cast<Instruction>(UniqueValues[Idx]);
14097
- I && (ShuffleOrOp == Instruction::FAdd ||
14098
- ShuffleOrOp == Instruction::FSub)) {
14099
- InstructionCost IntrinsicCost = GetFMulAddCost(E->getOperations(), I);
14100
- if (IntrinsicCost.isValid())
14101
- ScalarCost = IntrinsicCost;
14102
- }
14103
- return ScalarCost;
13967
+ return TTI->getArithmeticInstrCost(ShuffleOrOp, OrigScalarTy, CostKind,
13968
+ Op1Info, Op2Info, Operands);
14104
13969
};
14105
13970
auto GetVectorCost = [=](InstructionCost CommonCost) {
14106
13971
if (ShuffleOrOp == Instruction::And && It != MinBWs.end()) {
@@ -22729,21 +22594,11 @@ class HorizontalReduction {
22729
22594
/// Try to find a reduction tree.
22730
22595
bool matchAssociativeReduction(BoUpSLP &R, Instruction *Root,
22731
22596
ScalarEvolution &SE, const DataLayout &DL,
22732
- const TargetLibraryInfo &TLI,
22733
- DominatorTree &DT, TargetTransformInfo &TTI) {
22597
+ const TargetLibraryInfo &TLI) {
22734
22598
RdxKind = HorizontalReduction::getRdxKind(Root);
22735
22599
if (!isVectorizable(RdxKind, Root))
22736
22600
return false;
22737
22601
22738
- // FMA reduction root - skip.
22739
- auto CheckForFMA = [&](Instruction *I) {
22740
- return RdxKind == RecurKind::FAdd &&
22741
- canConvertToFMA(I, getSameOpcode(I, TLI), DT, DL, TTI, TLI)
22742
- .isValid();
22743
- };
22744
- if (CheckForFMA(Root))
22745
- return false;
22746
-
22747
22602
// Analyze "regular" integer/FP types for reductions - no target-specific
22748
22603
// types or pointers.
22749
22604
Type *Ty = Root->getType();
@@ -22781,7 +22636,7 @@ class HorizontalReduction {
22781
22636
// Also, do not try to reduce const values, if the operation is not
22782
22637
// foldable.
22783
22638
if (!EdgeInst || Level > RecursionMaxDepth ||
22784
- getRdxKind(EdgeInst) != RdxKind || CheckForFMA(EdgeInst) ||
22639
+ getRdxKind(EdgeInst) != RdxKind ||
22785
22640
IsCmpSelMinMax != isCmpSelMinMax(EdgeInst) ||
22786
22641
!hasRequiredNumberOfUses(IsCmpSelMinMax, EdgeInst) ||
22787
22642
!isVectorizable(RdxKind, EdgeInst) ||
@@ -24350,13 +24205,13 @@ bool SLPVectorizerPass::vectorizeHorReduction(
24350
24205
Stack.emplace(SelectRoot(), 0);
24351
24206
SmallPtrSet<Value *, 8> VisitedInstrs;
24352
24207
bool Res = false;
24353
- auto TryToReduce = [this, &R, TTI = TTI ](Instruction *Inst) -> Value * {
24208
+ auto && TryToReduce = [this, &R](Instruction *Inst) -> Value * {
24354
24209
if (R.isAnalyzedReductionRoot(Inst))
24355
24210
return nullptr;
24356
24211
if (!isReductionCandidate(Inst))
24357
24212
return nullptr;
24358
24213
HorizontalReduction HorRdx;
24359
- if (!HorRdx.matchAssociativeReduction(R, Inst, *SE, *DL, *TLI, *DT, *TTI ))
24214
+ if (!HorRdx.matchAssociativeReduction(R, Inst, *SE, *DL, *TLI))
24360
24215
return nullptr;
24361
24216
return HorRdx.tryToReduce(R, *DL, TTI, *TLI, AC);
24362
24217
};
@@ -24422,12 +24277,6 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
24422
24277
24423
24278
if (!isa<BinaryOperator, CmpInst>(I) || isa<VectorType>(I->getType()))
24424
24279
return false;
24425
- // Skip potential FMA candidates.
24426
- if ((I->getOpcode() == Instruction::FAdd ||
24427
- I->getOpcode() == Instruction::FSub) &&
24428
- canConvertToFMA(I, getSameOpcode(I, *TLI), *DT, *DL, *TTI, *TLI)
24429
- .isValid())
24430
- return false;
24431
24280
24432
24281
Value *P = I->getParent();
24433
24282
0 commit comments