-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LoopVectorize][NFC] Centralize the setting of CostKind #121937
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -987,7 +987,7 @@ class LoopVectorizationCostModel { | |
| InterleavedAccessInfo &IAI) | ||
| : ScalarEpilogueStatus(SEL), TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), | ||
| TTI(TTI), TLI(TLI), DB(DB), AC(AC), ORE(ORE), TheFunction(F), | ||
| Hints(Hints), InterleaveInfo(IAI) {} | ||
| Hints(Hints), InterleaveInfo(IAI), CostKind(TTI::TCK_RecipThroughput) {} | ||
|
|
||
| /// \return An upper bound for the vectorization factors (both fixed and | ||
| /// scalable). If the factors are 0, vectorization and interleaving should be | ||
|
|
@@ -1553,9 +1553,9 @@ class LoopVectorizationCostModel { | |
|
|
||
| /// Return the cost of instructions in an inloop reduction pattern, if I is | ||
| /// part of that pattern. | ||
| std::optional<InstructionCost> | ||
| getReductionPatternCost(Instruction *I, ElementCount VF, Type *VectorTy, | ||
| TTI::TargetCostKind CostKind) const; | ||
| std::optional<InstructionCost> getReductionPatternCost(Instruction *I, | ||
| ElementCount VF, | ||
| Type *VectorTy) const; | ||
|
|
||
| /// Returns true if \p Op should be considered invariant and if it is | ||
| /// trivially hoistable. | ||
|
|
@@ -1614,8 +1614,8 @@ class LoopVectorizationCostModel { | |
|
|
||
| /// Estimate the overhead of scalarizing an instruction. This is a | ||
| /// convenience wrapper for the type-based getScalarizationOverhead API. | ||
| InstructionCost getScalarizationOverhead(Instruction *I, ElementCount VF, | ||
| TTI::TargetCostKind CostKind) const; | ||
| InstructionCost getScalarizationOverhead(Instruction *I, | ||
| ElementCount VF) const; | ||
|
|
||
| /// Returns true if an artificially high cost for emulated masked memrefs | ||
| /// should be used. | ||
|
|
@@ -1796,6 +1796,9 @@ class LoopVectorizationCostModel { | |
|
|
||
| /// All element types found in the loop. | ||
| SmallPtrSet<Type *, 16> ElementTypesInLoop; | ||
|
|
||
| /// The kind of cost that we are calculating | ||
| TTI::TargetCostKind CostKind; | ||
| }; | ||
| } // end namespace llvm | ||
|
|
||
|
|
@@ -1836,13 +1839,17 @@ class GeneratedRTChecks { | |
|
|
||
| PredicatedScalarEvolution &PSE; | ||
|
|
||
| /// The kind of cost that we are calculating | ||
| TTI::TargetCostKind CostKind; | ||
|
|
||
| public: | ||
| GeneratedRTChecks(PredicatedScalarEvolution &PSE, DominatorTree *DT, | ||
| LoopInfo *LI, TargetTransformInfo *TTI, | ||
| const DataLayout &DL, bool AddBranchWeights) | ||
| const DataLayout &DL, bool AddBranchWeights, | ||
| TTI::TargetCostKind CostKind) | ||
| : DT(DT), LI(LI), TTI(TTI), SCEVExp(*PSE.getSE(), DL, "scev.check"), | ||
| MemCheckExp(*PSE.getSE(), DL, "scev.check"), | ||
| AddBranchWeights(AddBranchWeights), PSE(PSE) {} | ||
| AddBranchWeights(AddBranchWeights), PSE(PSE), CostKind(CostKind) {} | ||
|
|
||
| /// Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can | ||
| /// accurately estimate the cost of the runtime checks. The blocks are | ||
|
|
@@ -1954,8 +1961,7 @@ class GeneratedRTChecks { | |
| for (Instruction &I : *SCEVCheckBlock) { | ||
| if (SCEVCheckBlock->getTerminator() == &I) | ||
| continue; | ||
| InstructionCost C = | ||
| TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput); | ||
| InstructionCost C = TTI->getInstructionCost(&I, CostKind); | ||
| LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); | ||
| RTCheckCost += C; | ||
| } | ||
|
|
@@ -1964,8 +1970,7 @@ class GeneratedRTChecks { | |
| for (Instruction &I : *MemCheckBlock) { | ||
| if (MemCheckBlock->getTerminator() == &I) | ||
| continue; | ||
| InstructionCost C = | ||
| TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput); | ||
| InstructionCost C = TTI->getInstructionCost(&I, CostKind); | ||
| LLVM_DEBUG(dbgs() << " " << C << " for " << I << "\n"); | ||
| MemCheckCost += C; | ||
| } | ||
|
|
@@ -2926,18 +2931,17 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, | |
| if (!VF.isScalar()) | ||
| return CallWideningDecisions.at(std::make_pair(CI, VF)).Cost; | ||
|
|
||
| TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||
| Type *RetTy = CI->getType(); | ||
| if (RecurrenceDescriptor::isFMulAddIntrinsic(CI)) | ||
| if (auto RedCost = getReductionPatternCost(CI, VF, RetTy, CostKind)) | ||
| if (auto RedCost = getReductionPatternCost(CI, VF, RetTy)) | ||
| return *RedCost; | ||
|
|
||
| SmallVector<Type *, 4> Tys; | ||
| for (auto &ArgOp : CI->args()) | ||
| Tys.push_back(ArgOp->getType()); | ||
|
|
||
| InstructionCost ScalarCallCost = | ||
| TTI.getCallInstrCost(CI->getCalledFunction(), RetTy, Tys, CostKind); | ||
| TTI.getCallInstrCost(CI->getCalledFunction(), RetTy, Tys); | ||
|
|
||
| // If this is an intrinsic we may have a lower cost for it. | ||
| if (getVectorIntrinsicIDForCall(CI, TLI)) { | ||
|
|
@@ -2972,8 +2976,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI, | |
|
|
||
| IntrinsicCostAttributes CostAttrs(ID, RetTy, Arguments, ParamTys, FMF, | ||
| dyn_cast<IntrinsicInst>(CI)); | ||
| return TTI.getIntrinsicInstrCost(CostAttrs, | ||
| TargetTransformInfo::TCK_RecipThroughput); | ||
| return TTI.getIntrinsicInstrCost(CostAttrs, CostKind); | ||
| } | ||
|
|
||
| void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) { | ||
|
|
@@ -3430,8 +3433,6 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I, | |
| I->getOpcode() == Instruction::URem); | ||
| assert(!isSafeToSpeculativelyExecute(I)); | ||
|
|
||
| const TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||
|
|
||
| // Scalarization isn't legal for scalable vector types | ||
| InstructionCost ScalarizationCost = InstructionCost::getInvalid(); | ||
| if (!VF.isScalable()) { | ||
|
|
@@ -3453,7 +3454,7 @@ LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I, | |
|
|
||
| // The cost of insertelement and extractelement instructions needed for | ||
| // scalarization. | ||
| ScalarizationCost += getScalarizationOverhead(I, VF, CostKind); | ||
| ScalarizationCost += getScalarizationOverhead(I, VF); | ||
|
|
||
| // Scale the cost by the probability of executing the predicated blocks. | ||
| // This assumes the predicated block for each vector lane is equally | ||
|
|
@@ -4426,7 +4427,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( | |
| for (const auto &Plan : VPlans) { | ||
| for (ElementCount VF : Plan->vectorFactors()) { | ||
| VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), | ||
| CM); | ||
| CM, CM.CostKind); | ||
| precomputeCosts(*Plan, VF, CostCtx); | ||
| auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry()); | ||
| for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) { | ||
|
|
@@ -5576,7 +5577,6 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount( | |
|
|
||
| // Compute the scalarization overhead of needed insertelement instructions | ||
| // and phi nodes. | ||
| TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||
| if (isScalarWithPredication(I, VF) && !I->getType()->isVoidTy()) { | ||
| ScalarCost += TTI.getScalarizationOverhead( | ||
| cast<VectorType>(toVectorTy(I->getType(), VF)), | ||
|
|
@@ -5723,15 +5723,14 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I, | |
|
|
||
| // Don't pass *I here, since it is scalar but will actually be part of a | ||
| // vectorized loop where the user of it is a vectorized instruction. | ||
| TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||
| const Align Alignment = getLoadStoreAlignment(I); | ||
| Cost += VF.getKnownMinValue() * TTI.getMemoryOpCost(I->getOpcode(), | ||
| ValTy->getScalarType(), | ||
| Alignment, AS, CostKind); | ||
|
|
||
| // Get the overhead of the extractelement and insertelement instructions | ||
| // we might create due to scalarization. | ||
| Cost += getScalarizationOverhead(I, VF, CostKind); | ||
| Cost += getScalarizationOverhead(I, VF); | ||
|
|
||
| // If we have a predicated load/store, it will need extra i1 extracts and | ||
| // conditional branches, but may not be executed for each vector lane. Scale | ||
|
|
@@ -5764,7 +5763,6 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I, | |
| Value *Ptr = getLoadStorePointerOperand(I); | ||
| unsigned AS = getLoadStoreAddressSpace(I); | ||
| int ConsecutiveStride = Legal->isConsecutivePtr(ValTy, Ptr); | ||
| enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||
|
|
||
| assert((ConsecutiveStride == 1 || ConsecutiveStride == -1) && | ||
| "Stride should be 1 or -1 for consecutive memory access"); | ||
|
|
@@ -5795,12 +5793,12 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I, | |
| auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF)); | ||
| const Align Alignment = getLoadStoreAlignment(I); | ||
| unsigned AS = getLoadStoreAddressSpace(I); | ||
| enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||
| if (isa<LoadInst>(I)) { | ||
| return TTI.getAddressComputationCost(ValTy) + | ||
| TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS, | ||
| CostKind) + | ||
| TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy); | ||
| TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy, {}, | ||
| CostKind); | ||
| } | ||
| StoreInst *SI = cast<StoreInst>(I); | ||
|
|
||
|
|
@@ -5823,9 +5821,9 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I, | |
| const Value *Ptr = getLoadStorePointerOperand(I); | ||
|
|
||
| return TTI.getAddressComputationCost(VectorTy) + | ||
| TTI.getGatherScatterOpCost( | ||
| I->getOpcode(), VectorTy, Ptr, Legal->isMaskRequired(I), Alignment, | ||
| TargetTransformInfo::TCK_RecipThroughput, I); | ||
| TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr, | ||
| Legal->isMaskRequired(I), Alignment, | ||
| CostKind, I); | ||
| } | ||
|
|
||
| InstructionCost | ||
|
|
@@ -5838,7 +5836,6 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, | |
| Type *ValTy = getLoadStoreType(InsertPos); | ||
| auto *VectorTy = cast<VectorType>(toVectorTy(ValTy, VF)); | ||
| unsigned AS = getLoadStoreAddressSpace(InsertPos); | ||
| enum TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||
|
|
||
| unsigned InterleaveFactor = Group->getFactor(); | ||
| auto *WideVecTy = VectorType::get(ValTy, VF * InterleaveFactor); | ||
|
|
@@ -5870,9 +5867,9 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I, | |
| } | ||
|
|
||
| std::optional<InstructionCost> | ||
| LoopVectorizationCostModel::getReductionPatternCost( | ||
| Instruction *I, ElementCount VF, Type *Ty, | ||
| TTI::TargetCostKind CostKind) const { | ||
| LoopVectorizationCostModel::getReductionPatternCost(Instruction *I, | ||
| ElementCount VF, | ||
| Type *Ty) const { | ||
| using namespace llvm::PatternMatch; | ||
| // Early exit for no inloop reductions | ||
| if (InLoopReductions.empty() || VF.isScalar() || !isa<VectorType>(Ty)) | ||
|
|
@@ -6063,14 +6060,15 @@ LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I, | |
|
|
||
| TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0)); | ||
| return TTI.getAddressComputationCost(ValTy) + | ||
| TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, | ||
| TTI::TCK_RecipThroughput, OpInfo, I); | ||
| TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, CostKind, | ||
| OpInfo, I); | ||
| } | ||
| return getWideningCost(I, VF); | ||
| } | ||
|
|
||
| InstructionCost LoopVectorizationCostModel::getScalarizationOverhead( | ||
| Instruction *I, ElementCount VF, TTI::TargetCostKind CostKind) const { | ||
| InstructionCost | ||
| LoopVectorizationCostModel::getScalarizationOverhead(Instruction *I, | ||
| ElementCount VF) const { | ||
|
|
||
| // There is no mechanism yet to create a scalable scalarization loop, | ||
| // so this is currently Invalid. | ||
|
|
@@ -6313,7 +6311,6 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { | |
| InstructionCost ScalarCost = InstructionCost::getInvalid(); | ||
| InstructionCost VectorCost = InstructionCost::getInvalid(); | ||
| InstructionCost IntrinsicCost = InstructionCost::getInvalid(); | ||
| TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||
| Function *ScalarFunc = CI->getCalledFunction(); | ||
| Type *ScalarRetTy = CI->getType(); | ||
| SmallVector<Type *, 4> Tys, ScalarTys; | ||
|
|
@@ -6329,8 +6326,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { | |
|
|
||
| // Compute costs of unpacking argument values for the scalar calls and | ||
| // packing the return values to a vector. | ||
| InstructionCost ScalarizationCost = | ||
| getScalarizationOverhead(CI, VF, CostKind); | ||
| InstructionCost ScalarizationCost = getScalarizationOverhead(CI, VF); | ||
|
|
||
| ScalarCost = ScalarCallCost * VF.getKnownMinValue() + ScalarizationCost; | ||
| // Honor ForcedScalars and UniformAfterVectorization decisions. | ||
|
|
@@ -6354,7 +6350,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { | |
| // An in-loop reduction using an fmuladd intrinsic is a special case; | ||
| // we don't want the normal cost for that intrinsic. | ||
| if (RecurrenceDescriptor::isFMulAddIntrinsic(CI)) | ||
| if (auto RedCost = getReductionPatternCost(CI, VF, RetTy, CostKind)) { | ||
| if (auto RedCost = getReductionPatternCost(CI, VF, RetTy)) { | ||
| setCallWideningDecision(CI, VF, CM_IntrinsicCall, nullptr, | ||
| getVectorIntrinsicIDForCall(CI, TLI), | ||
| std::nullopt, *RedCost); | ||
|
|
@@ -6439,7 +6435,8 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) { | |
| TargetTransformInfo::SK_Broadcast, | ||
| VectorType::get(IntegerType::getInt1Ty( | ||
| VecFunc->getFunctionType()->getContext()), | ||
| VF)); | ||
| VF), | ||
| {}, CostKind); | ||
|
|
||
| if (TLI && VecFunc && !CI->isNoBuiltin()) | ||
| VectorCost = | ||
|
|
@@ -6507,7 +6504,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, | |
| if (canTruncateToMinimalBitwidth(I, VF)) | ||
| RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]); | ||
| auto *SE = PSE.getSE(); | ||
| TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||
|
|
||
| auto HasSingleCopyAfterVectorization = [this](Instruction *I, | ||
| ElementCount VF) -> bool { | ||
|
|
@@ -6694,9 +6690,8 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, | |
| {PtrTy, ScalarTy, MaskTy}); | ||
|
|
||
| // Add the costs together with the add/sub operation. | ||
| return TTI.getIntrinsicInstrCost( | ||
| ICA, TargetTransformInfo::TCK_RecipThroughput) + | ||
| MulCost + TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy); | ||
| return TTI.getIntrinsicInstrCost(ICA, CostKind) + MulCost + | ||
| TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy); | ||
|
||
| } | ||
| [[fallthrough]]; | ||
| } | ||
|
|
@@ -6721,7 +6716,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, | |
| return 0; | ||
|
|
||
| // Detect reduction patterns | ||
| if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind)) | ||
| if (auto RedCost = getReductionPatternCost(I, VF, VectorTy)) | ||
| return *RedCost; | ||
|
|
||
| // Certain instructions can be cheaper to vectorize if they have a constant | ||
|
|
@@ -6886,7 +6881,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, | |
| } | ||
|
|
||
| // Detect reduction patterns | ||
| if (auto RedCost = getReductionPatternCost(I, VF, VectorTy, CostKind)) | ||
| if (auto RedCost = getReductionPatternCost(I, VF, VectorTy)) | ||
| return *RedCost; | ||
|
|
||
| Type *SrcScalarTy = I->getOperand(0)->getType(); | ||
|
|
@@ -6911,7 +6906,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, | |
| case Instruction::Call: | ||
| return getVectorCallCost(cast<CallInst>(I), VF); | ||
| case Instruction::ExtractValue: | ||
| return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput); | ||
| return TTI.getInstructionCost(I, CostKind); | ||
| case Instruction::Alloca: | ||
| // We cannot easily widen alloca to a scalable alloca, as | ||
| // the result would need to be a vector of pointers. | ||
|
|
@@ -7423,8 +7418,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, | |
|
|
||
| // Pre-compute the cost for I, if it has a reduction pattern cost. | ||
| for (Instruction *I : ChainOpsAndOperands) { | ||
| auto ReductionCost = CM.getReductionPatternCost( | ||
| I, VF, toVectorTy(I->getType(), VF), TTI::TCK_RecipThroughput); | ||
| auto ReductionCost = | ||
| CM.getReductionPatternCost(I, VF, toVectorTy(I->getType(), VF)); | ||
| if (!ReductionCost) | ||
| continue; | ||
|
|
||
|
|
@@ -7482,7 +7477,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, | |
|
|
||
| InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, | ||
| ElementCount VF) const { | ||
| VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM); | ||
| VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM, | ||
| CM.CostKind); | ||
| InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx); | ||
|
|
||
| // Now compute and add the VPlan-based cost. | ||
|
|
@@ -7611,7 +7607,8 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { | |
| // simplifications not accounted for in the legacy cost model. If that's the | ||
| // case, don't trigger the assertion, as the extra simplifications may cause a | ||
| // different VF to be picked by the VPlan-based cost model. | ||
| VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM); | ||
| VPCostContext CostCtx(CM.TTI, *CM.TLI, Legal->getWidestInductionType(), CM, | ||
| CM.CostKind); | ||
| precomputeCosts(BestPlan, BestFactor.Width, CostCtx); | ||
| assert((BestFactor.Width == LegacyVF.Width || | ||
| planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width), | ||
|
|
@@ -9971,7 +9968,7 @@ static bool processLoopInVPlanNativePath( | |
| bool AddBranchWeights = | ||
| hasBranchWeightMD(*L->getLoopLatch()->getTerminator()); | ||
| GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), | ||
| AddBranchWeights); | ||
| AddBranchWeights, CM.CostKind); | ||
| InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, | ||
| VF.Width, 1, LVL, &CM, BFI, PSI, Checks, BestPlan); | ||
| LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \"" | ||
|
|
@@ -10488,7 +10485,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |
| bool AddBranchWeights = | ||
| hasBranchWeightMD(*L->getLoopLatch()->getTerminator()); | ||
| GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), | ||
| AddBranchWeights); | ||
| AddBranchWeights, CM.CostKind); | ||
| if (LVP.hasPlanWithVF(VF.Width)) { | ||
| // Select the interleave count. | ||
| IC = CM.selectInterleaveCount(VF.Width, VF.Cost); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think somewhere in the debug output it would be useful to tell the user what cost model we're using now that it's variable. For example, perhaps in
LoopVectorizationPlanner::computeBestVFbefore we start calculating the costs you could print out the cost model being used.