@@ -987,7 +987,7 @@ class LoopVectorizationCostModel {
987
987
InterleavedAccessInfo &IAI)
988
988
: ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal),
989
989
TTI (TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F),
990
- Hints(Hints), InterleaveInfo(IAI) {}
990
+ Hints(Hints), InterleaveInfo(IAI), CostKind(TTI::TCK_RecipThroughput) {}
991
991
992
992
// / \return An upper bound for the vectorization factors (both fixed and
993
993
// / scalable). If the factors are 0, vectorization and interleaving should be
@@ -1555,9 +1555,9 @@ class LoopVectorizationCostModel {
1555
1555
1556
1556
// / Return the cost of instructions in an inloop reduction pattern, if I is
1557
1557
// / part of that pattern.
1558
- std::optional<InstructionCost>
1559
- getReductionPatternCost (Instruction *I, ElementCount VF, Type *VectorTy ,
1560
- TTI::TargetCostKind CostKind ) const ;
1558
+ std::optional<InstructionCost> getReductionPatternCost (Instruction *I,
1559
+ ElementCount VF ,
1560
+ Type *VectorTy ) const ;
1561
1561
1562
1562
// / Returns true if \p Op should be considered invariant and if it is
1563
1563
// / trivially hoistable.
@@ -1616,8 +1616,8 @@ class LoopVectorizationCostModel {
1616
1616
1617
1617
// / Estimate the overhead of scalarizing an instruction. This is a
1618
1618
// / convenience wrapper for the type-based getScalarizationOverhead API.
1619
- InstructionCost getScalarizationOverhead (Instruction *I, ElementCount VF,
1620
- TTI::TargetCostKind CostKind ) const ;
1619
+ InstructionCost getScalarizationOverhead (Instruction *I,
1620
+ ElementCount VF ) const ;
1621
1621
1622
1622
// / Returns true if an artificially high cost for emulated masked memrefs
1623
1623
// / should be used.
@@ -1798,6 +1798,9 @@ class LoopVectorizationCostModel {
1798
1798
1799
1799
// / All element types found in the loop.
1800
1800
SmallPtrSet<Type *, 16 > ElementTypesInLoop;
1801
+
1802
+ // / The kind of cost that we are calculating
1803
+ TTI::TargetCostKind CostKind;
1801
1804
};
1802
1805
} // end namespace llvm
1803
1806
@@ -1838,13 +1841,17 @@ class GeneratedRTChecks {
1838
1841
1839
1842
PredicatedScalarEvolution &PSE;
1840
1843
1844
+ // / The kind of cost that we are calculating
1845
+ TTI::TargetCostKind CostKind;
1846
+
1841
1847
public:
1842
1848
GeneratedRTChecks (PredicatedScalarEvolution &PSE, DominatorTree *DT,
1843
1849
LoopInfo *LI, TargetTransformInfo *TTI,
1844
- const DataLayout &DL, bool AddBranchWeights)
1850
+ const DataLayout &DL, bool AddBranchWeights,
1851
+ TTI::TargetCostKind CostKind)
1845
1852
: DT(DT), LI(LI), TTI(TTI), SCEVExp(*PSE.getSE(), DL, " scev.check" ),
1846
1853
MemCheckExp (*PSE.getSE(), DL, "scev.check"),
1847
- AddBranchWeights(AddBranchWeights), PSE(PSE) {}
1854
+ AddBranchWeights(AddBranchWeights), PSE(PSE), CostKind(CostKind) {}
1848
1855
1849
1856
// / Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can
1850
1857
// / accurately estimate the cost of the runtime checks. The blocks are
@@ -1956,8 +1963,7 @@ class GeneratedRTChecks {
1956
1963
for (Instruction &I : *SCEVCheckBlock) {
1957
1964
if (SCEVCheckBlock->getTerminator () == &I)
1958
1965
continue ;
1959
- InstructionCost C =
1960
- TTI->getInstructionCost (&I, TTI::TCK_RecipThroughput);
1966
+ InstructionCost C = TTI->getInstructionCost (&I, CostKind);
1961
1967
LLVM_DEBUG (dbgs () << " " << C << " for " << I << " \n " );
1962
1968
RTCheckCost += C;
1963
1969
}
@@ -1966,8 +1972,7 @@ class GeneratedRTChecks {
1966
1972
for (Instruction &I : *MemCheckBlock) {
1967
1973
if (MemCheckBlock->getTerminator () == &I)
1968
1974
continue ;
1969
- InstructionCost C =
1970
- TTI->getInstructionCost (&I, TTI::TCK_RecipThroughput);
1975
+ InstructionCost C = TTI->getInstructionCost (&I, CostKind);
1971
1976
LLVM_DEBUG (dbgs () << " " << C << " for " << I << " \n " );
1972
1977
MemCheckCost += C;
1973
1978
}
@@ -2928,10 +2933,9 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI,
2928
2933
if (!VF.isScalar ())
2929
2934
return CallWideningDecisions.at (std::make_pair (CI, VF)).Cost ;
2930
2935
2931
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
2932
2936
Type *RetTy = CI->getType ();
2933
2937
if (RecurrenceDescriptor::isFMulAddIntrinsic (CI))
2934
- if (auto RedCost = getReductionPatternCost (CI, VF, RetTy, CostKind ))
2938
+ if (auto RedCost = getReductionPatternCost (CI, VF, RetTy))
2935
2939
return *RedCost;
2936
2940
2937
2941
SmallVector<Type *, 4 > Tys;
@@ -2974,8 +2978,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
2974
2978
2975
2979
IntrinsicCostAttributes CostAttrs (ID, RetTy, Arguments, ParamTys, FMF,
2976
2980
dyn_cast<IntrinsicInst>(CI));
2977
- return TTI.getIntrinsicInstrCost (CostAttrs,
2978
- TargetTransformInfo::TCK_RecipThroughput);
2981
+ return TTI.getIntrinsicInstrCost (CostAttrs, CostKind);
2979
2982
}
2980
2983
2981
2984
void InnerLoopVectorizer::fixVectorizedLoop (VPTransformState &State) {
@@ -3432,8 +3435,6 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
3432
3435
I->getOpcode () == Instruction::URem);
3433
3436
assert (!isSafeToSpeculativelyExecute (I));
3434
3437
3435
- const TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
3436
-
3437
3438
// Scalarization isn't legal for scalable vector types
3438
3439
InstructionCost ScalarizationCost = InstructionCost::getInvalid ();
3439
3440
if (!VF.isScalable ()) {
@@ -3455,7 +3456,7 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
3455
3456
3456
3457
// The cost of insertelement and extractelement instructions needed for
3457
3458
// scalarization.
3458
- ScalarizationCost += getScalarizationOverhead (I, VF, CostKind );
3459
+ ScalarizationCost += getScalarizationOverhead (I, VF);
3459
3460
3460
3461
// Scale the cost by the probability of executing the predicated blocks.
3461
3462
// This assumes the predicated block for each vector lane is equally
@@ -4445,7 +4446,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
4445
4446
for (const auto &Plan : VPlans) {
4446
4447
for (ElementCount VF : Plan->vectorFactors ()) {
4447
4448
VPCostContext CostCtx (CM.TTI , *CM.TLI , Legal->getWidestInductionType (),
4448
- CM);
4449
+ CM, CM. CostKind );
4449
4450
precomputeCosts (*Plan, VF, CostCtx);
4450
4451
auto Iter = vp_depth_first_deep (Plan->getVectorLoopRegion ()->getEntry ());
4451
4452
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
@@ -5595,7 +5596,6 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
5595
5596
5596
5597
// Compute the scalarization overhead of needed insertelement instructions
5597
5598
// and phi nodes.
5598
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
5599
5599
if (isScalarWithPredication (I, VF) && !I->getType ()->isVoidTy ()) {
5600
5600
ScalarCost += TTI.getScalarizationOverhead (
5601
5601
cast<VectorType>(toVectorTy (I->getType (), VF)),
@@ -5742,15 +5742,14 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
5742
5742
5743
5743
// Don't pass *I here, since it is scalar but will actually be part of a
5744
5744
// vectorized loop where the user of it is a vectorized instruction.
5745
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
5746
5745
const Align Alignment = getLoadStoreAlignment (I);
5747
5746
Cost += VF.getKnownMinValue () * TTI.getMemoryOpCost (I->getOpcode (),
5748
5747
ValTy->getScalarType (),
5749
5748
Alignment, AS, CostKind);
5750
5749
5751
5750
// Get the overhead of the extractelement and insertelement instructions
5752
5751
// we might create due to scalarization.
5753
- Cost += getScalarizationOverhead (I, VF, CostKind );
5752
+ Cost += getScalarizationOverhead (I, VF);
5754
5753
5755
5754
// If we have a predicated load/store, it will need extra i1 extracts and
5756
5755
// conditional branches, but may not be executed for each vector lane. Scale
@@ -5783,7 +5782,6 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
5783
5782
Value *Ptr = getLoadStorePointerOperand (I);
5784
5783
unsigned AS = getLoadStoreAddressSpace (I);
5785
5784
int ConsecutiveStride = Legal->isConsecutivePtr (ValTy, Ptr);
5786
- enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
5787
5785
5788
5786
assert ((ConsecutiveStride == 1 || ConsecutiveStride == -1 ) &&
5789
5787
" Stride should be 1 or -1 for consecutive memory access" );
@@ -5814,12 +5812,12 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
5814
5812
auto *VectorTy = cast<VectorType>(toVectorTy (ValTy, VF));
5815
5813
const Align Alignment = getLoadStoreAlignment (I);
5816
5814
unsigned AS = getLoadStoreAddressSpace (I);
5817
- enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
5818
5815
if (isa<LoadInst>(I)) {
5819
5816
return TTI.getAddressComputationCost (ValTy) +
5820
5817
TTI.getMemoryOpCost (Instruction::Load, ValTy, Alignment, AS,
5821
5818
CostKind) +
5822
- TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, VectorTy);
5819
+ TTI.getShuffleCost (TargetTransformInfo::SK_Broadcast, VectorTy, {},
5820
+ CostKind);
5823
5821
}
5824
5822
StoreInst *SI = cast<StoreInst>(I);
5825
5823
@@ -5842,9 +5840,9 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
5842
5840
const Value *Ptr = getLoadStorePointerOperand (I);
5843
5841
5844
5842
return TTI.getAddressComputationCost (VectorTy) +
5845
- TTI.getGatherScatterOpCost (
5846
- I-> getOpcode (), VectorTy, Ptr, Legal->isMaskRequired (I), Alignment,
5847
- TargetTransformInfo::TCK_RecipThroughput , I);
5843
+ TTI.getGatherScatterOpCost (I-> getOpcode (), VectorTy, Ptr,
5844
+ Legal->isMaskRequired (I), Alignment,
5845
+ CostKind , I);
5848
5846
}
5849
5847
5850
5848
InstructionCost
@@ -5857,7 +5855,6 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
5857
5855
Type *ValTy = getLoadStoreType (InsertPos);
5858
5856
auto *VectorTy = cast<VectorType>(toVectorTy (ValTy, VF));
5859
5857
unsigned AS = getLoadStoreAddressSpace (InsertPos);
5860
- enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
5861
5858
5862
5859
unsigned InterleaveFactor = Group->getFactor ();
5863
5860
auto *WideVecTy = VectorType::get (ValTy, VF * InterleaveFactor);
@@ -5889,9 +5886,9 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
5889
5886
}
5890
5887
5891
5888
std::optional<InstructionCost>
5892
- LoopVectorizationCostModel::getReductionPatternCost (
5893
- Instruction *I, ElementCount VF, Type *Ty ,
5894
- TTI::TargetCostKind CostKind ) const {
5889
+ LoopVectorizationCostModel::getReductionPatternCost (Instruction *I,
5890
+ ElementCount VF ,
5891
+ Type *Ty ) const {
5895
5892
using namespace llvm ::PatternMatch;
5896
5893
// Early exit for no inloop reductions
5897
5894
if (InLoopReductions.empty () || VF.isScalar () || !isa<VectorType>(Ty))
@@ -6082,14 +6079,15 @@ LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
6082
6079
6083
6080
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo (I->getOperand (0 ));
6084
6081
return TTI.getAddressComputationCost (ValTy) +
6085
- TTI.getMemoryOpCost (I->getOpcode (), ValTy, Alignment, AS,
6086
- TTI::TCK_RecipThroughput, OpInfo, I);
6082
+ TTI.getMemoryOpCost (I->getOpcode (), ValTy, Alignment, AS, CostKind,
6083
+ OpInfo, I);
6087
6084
}
6088
6085
return getWideningCost (I, VF);
6089
6086
}
6090
6087
6091
- InstructionCost LoopVectorizationCostModel::getScalarizationOverhead (
6092
- Instruction *I, ElementCount VF, TTI::TargetCostKind CostKind) const {
6088
+ InstructionCost
6089
+ LoopVectorizationCostModel::getScalarizationOverhead (Instruction *I,
6090
+ ElementCount VF) const {
6093
6091
6094
6092
// There is no mechanism yet to create a scalable scalarization loop,
6095
6093
// so this is currently Invalid.
@@ -6332,7 +6330,6 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
6332
6330
InstructionCost ScalarCost = InstructionCost::getInvalid ();
6333
6331
InstructionCost VectorCost = InstructionCost::getInvalid ();
6334
6332
InstructionCost IntrinsicCost = InstructionCost::getInvalid ();
6335
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
6336
6333
Function *ScalarFunc = CI->getCalledFunction ();
6337
6334
Type *ScalarRetTy = CI->getType ();
6338
6335
SmallVector<Type *, 4 > Tys, ScalarTys;
@@ -6348,8 +6345,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
6348
6345
6349
6346
// Compute costs of unpacking argument values for the scalar calls and
6350
6347
// packing the return values to a vector.
6351
- InstructionCost ScalarizationCost =
6352
- getScalarizationOverhead (CI, VF, CostKind);
6348
+ InstructionCost ScalarizationCost = getScalarizationOverhead (CI, VF);
6353
6349
6354
6350
ScalarCost = ScalarCallCost * VF.getKnownMinValue () + ScalarizationCost;
6355
6351
// Honor ForcedScalars and UniformAfterVectorization decisions.
@@ -6373,7 +6369,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
6373
6369
// An in-loop reduction using an fmuladd intrinsic is a special case;
6374
6370
// we don't want the normal cost for that intrinsic.
6375
6371
if (RecurrenceDescriptor::isFMulAddIntrinsic (CI))
6376
- if (auto RedCost = getReductionPatternCost (CI, VF, RetTy, CostKind )) {
6372
+ if (auto RedCost = getReductionPatternCost (CI, VF, RetTy)) {
6377
6373
setCallWideningDecision (CI, VF, CM_IntrinsicCall, nullptr ,
6378
6374
getVectorIntrinsicIDForCall (CI, TLI),
6379
6375
std::nullopt, *RedCost);
@@ -6458,7 +6454,8 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
6458
6454
TargetTransformInfo::SK_Broadcast,
6459
6455
VectorType::get (IntegerType::getInt1Ty (
6460
6456
VecFunc->getFunctionType ()->getContext ()),
6461
- VF));
6457
+ VF),
6458
+ {}, CostKind);
6462
6459
6463
6460
if (TLI && VecFunc && !CI->isNoBuiltin ())
6464
6461
VectorCost =
@@ -6526,7 +6523,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6526
6523
if (canTruncateToMinimalBitwidth (I, VF))
6527
6524
RetTy = IntegerType::get (RetTy->getContext (), MinBWs[I]);
6528
6525
auto *SE = PSE.getSE ();
6529
- TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
6530
6526
6531
6527
auto HasSingleCopyAfterVectorization = [this ](Instruction *I,
6532
6528
ElementCount VF) -> bool {
@@ -6702,7 +6698,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6702
6698
InstructionCost MulCost = TTI::TCC_Free;
6703
6699
ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand (1 ));
6704
6700
if (!RHS || RHS->getZExtValue () != 1 )
6705
- MulCost = TTI.getArithmeticInstrCost (Instruction::Mul, VectorTy);
6701
+ MulCost =
6702
+ TTI.getArithmeticInstrCost (Instruction::Mul, VectorTy, CostKind);
6706
6703
6707
6704
// Find the cost of the histogram operation itself.
6708
6705
Type *PtrTy = VectorType::get (HGram->Load ->getPointerOperandType (), VF);
@@ -6713,9 +6710,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6713
6710
{PtrTy, ScalarTy, MaskTy});
6714
6711
6715
6712
// Add the costs together with the add/sub operation.
6716
- return TTI.getIntrinsicInstrCost (
6717
- ICA, TargetTransformInfo::TCK_RecipThroughput) +
6718
- MulCost + TTI.getArithmeticInstrCost (I->getOpcode (), VectorTy);
6713
+ return TTI.getIntrinsicInstrCost (ICA, CostKind) + MulCost +
6714
+ TTI.getArithmeticInstrCost (I->getOpcode (), VectorTy, CostKind);
6719
6715
}
6720
6716
[[fallthrough]];
6721
6717
}
@@ -6740,7 +6736,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6740
6736
return 0 ;
6741
6737
6742
6738
// Detect reduction patterns
6743
- if (auto RedCost = getReductionPatternCost (I, VF, VectorTy, CostKind ))
6739
+ if (auto RedCost = getReductionPatternCost (I, VF, VectorTy))
6744
6740
return *RedCost;
6745
6741
6746
6742
// Certain instructions can be cheaper to vectorize if they have a constant
@@ -6905,7 +6901,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6905
6901
}
6906
6902
6907
6903
// Detect reduction patterns
6908
- if (auto RedCost = getReductionPatternCost (I, VF, VectorTy, CostKind ))
6904
+ if (auto RedCost = getReductionPatternCost (I, VF, VectorTy))
6909
6905
return *RedCost;
6910
6906
6911
6907
Type *SrcScalarTy = I->getOperand (0 )->getType ();
@@ -6930,7 +6926,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
6930
6926
case Instruction::Call:
6931
6927
return getVectorCallCost (cast<CallInst>(I), VF);
6932
6928
case Instruction::ExtractValue:
6933
- return TTI.getInstructionCost (I, TTI::TCK_RecipThroughput );
6929
+ return TTI.getInstructionCost (I, CostKind );
6934
6930
case Instruction::Alloca:
6935
6931
// We cannot easily widen alloca to a scalable alloca, as
6936
6932
// the result would need to be a vector of pointers.
@@ -7442,8 +7438,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
7442
7438
7443
7439
// Pre-compute the cost for I, if it has a reduction pattern cost.
7444
7440
for (Instruction *I : ChainOpsAndOperands) {
7445
- auto ReductionCost = CM. getReductionPatternCost (
7446
- I, VF, toVectorTy (I->getType (), VF), TTI::TCK_RecipThroughput );
7441
+ auto ReductionCost =
7442
+ CM. getReductionPatternCost ( I, VF, toVectorTy (I->getType (), VF));
7447
7443
if (!ReductionCost)
7448
7444
continue ;
7449
7445
@@ -7501,7 +7497,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
7501
7497
7502
7498
InstructionCost LoopVectorizationPlanner::cost (VPlan &Plan,
7503
7499
ElementCount VF) const {
7504
- VPCostContext CostCtx (CM.TTI , *CM.TLI , Legal->getWidestInductionType (), CM);
7500
+ VPCostContext CostCtx (CM.TTI , *CM.TLI , Legal->getWidestInductionType (), CM,
7501
+ CM.CostKind );
7505
7502
InstructionCost Cost = precomputeCosts (Plan, VF, CostCtx);
7506
7503
7507
7504
// Now compute and add the VPlan-based cost.
@@ -7581,6 +7578,16 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7581
7578
if (VPlans.size () == 1 && size (FirstPlan.vectorFactors ()) == 1 )
7582
7579
return {*FirstPlan.vectorFactors ().begin (), 0 , 0 };
7583
7580
7581
+ LLVM_DEBUG (dbgs () << " LV: Computing best VF using cost kind: "
7582
+ << (CM.CostKind == TTI::TCK_RecipThroughput
7583
+ ? " Reciprocal Throughput\n "
7584
+ : CM.CostKind == TTI::TCK_Latency
7585
+ ? " Instruction Latency\n "
7586
+ : CM.CostKind == TTI::TCK_CodeSize ? " Code Size\n "
7587
+ : CM.CostKind == TTI::TCK_SizeAndLatency
7588
+ ? " Code Size and Latency\n "
7589
+ : " Unknown\n " ));
7590
+
7584
7591
ElementCount ScalarVF = ElementCount::getFixed (1 );
7585
7592
assert (hasPlanWithVF (ScalarVF) &&
7586
7593
" More than a single plan/VF w/o any plan having scalar VF" );
@@ -7634,7 +7641,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
7634
7641
// simplifications not accounted for in the legacy cost model. If that's the
7635
7642
// case, don't trigger the assertion, as the extra simplifications may cause a
7636
7643
// different VF to be picked by the VPlan-based cost model.
7637
- VPCostContext CostCtx (CM.TTI , *CM.TLI , Legal->getWidestInductionType (), CM);
7644
+ VPCostContext CostCtx (CM.TTI , *CM.TLI , Legal->getWidestInductionType (), CM,
7645
+ CM.CostKind );
7638
7646
precomputeCosts (BestPlan, BestFactor.Width , CostCtx);
7639
7647
assert ((BestFactor.Width == LegacyVF.Width ||
7640
7648
planContainsAdditionalSimplifications (getPlanFor (BestFactor.Width ),
@@ -10155,7 +10163,7 @@ static bool processLoopInVPlanNativePath(
10155
10163
bool AddBranchWeights =
10156
10164
hasBranchWeightMD (*L->getLoopLatch ()->getTerminator ());
10157
10165
GeneratedRTChecks Checks (PSE, DT, LI, TTI, F->getDataLayout (),
10158
- AddBranchWeights);
10166
+ AddBranchWeights, CM. CostKind );
10159
10167
InnerLoopVectorizer LB (L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width ,
10160
10168
VF.Width , 1 , LVL, &CM, BFI, PSI, Checks, BestPlan);
10161
10169
LLVM_DEBUG (dbgs () << " Vectorizing outer loop in \" "
@@ -10692,7 +10700,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
10692
10700
bool AddBranchWeights =
10693
10701
hasBranchWeightMD (*L->getLoopLatch ()->getTerminator ());
10694
10702
GeneratedRTChecks Checks (PSE, DT, LI, TTI, F->getDataLayout (),
10695
- AddBranchWeights);
10703
+ AddBranchWeights, CM. CostKind );
10696
10704
if (LVP.hasPlanWithVF (VF.Width )) {
10697
10705
// Select the interleave count.
10698
10706
IC = CM.selectInterleaveCount (VF.Width , VF.Cost );
0 commit comments