7575#include " llvm/ADT/Statistic.h"
7676#include " llvm/ADT/StringRef.h"
7777#include " llvm/ADT/Twine.h"
78+ #include " llvm/ADT/TypeSwitch.h"
7879#include " llvm/ADT/iterator_range.h"
7980#include " llvm/Analysis/AssumptionCache.h"
8081#include " llvm/Analysis/BasicAliasAnalysis.h"
@@ -889,20 +890,18 @@ static void debugVectorizationMessage(const StringRef Prefix,
889890// / \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
890891// / RemarkName is the identifier for the remark. If \p I is passed it is an
891892// / instruction that prevents vectorization. Otherwise \p TheLoop is used for
892- // / the location of the remark. \return the remark object that can be
893- // / streamed to.
894- static OptimizationRemarkAnalysis createLVAnalysis (const char *PassName,
895- StringRef RemarkName, Loop *TheLoop, Instruction *I) {
896- Value *CodeRegion = TheLoop->getHeader ();
897- DebugLoc DL = TheLoop->getStartLoc ();
898-
899- if (I) {
900- CodeRegion = I->getParent ();
901- // If there is no debug location attached to the instruction, revert back to
902- // using the loop's.
903- if (I->getDebugLoc ())
904- DL = I->getDebugLoc ();
905- }
893+ // / the location of the remark. If \p DL is passed, use it as debug location for
894+ // / the remark. \return the remark object that can be streamed to.
895+ static OptimizationRemarkAnalysis
896+ createLVAnalysis (const char *PassName, StringRef RemarkName, Loop *TheLoop,
897+ Instruction *I, DebugLoc DL = {}) {
898+ Value *CodeRegion = I ? I->getParent () : TheLoop->getHeader ();
899+ // If debug location is attached to the instruction, use it. Otherwise if DL
900+ // was not provided, use the loop's.
901+ if (I && I->getDebugLoc ())
902+ DL = I->getDebugLoc ();
903+ else if (!DL)
904+ DL = TheLoop->getStartLoc ();
906905
907906 return OptimizationRemarkAnalysis (PassName, RemarkName, DL, CodeRegion);
908907}
@@ -943,15 +942,17 @@ void reportVectorizationFailure(const StringRef DebugMsg,
943942
944943// / Reports an informative message: print \p Msg for debugging purposes as well
945944// / as an optimization remark. Uses either \p I as location of the remark, or
946- // / otherwise \p TheLoop.
945+ // / otherwise \p TheLoop. If \p DL is passed, use it as debug location for the
946+ // / remark. If \p DL is passed, use it as debug location for the remark.
947947static void reportVectorizationInfo (const StringRef Msg, const StringRef ORETag,
948- OptimizationRemarkEmitter *ORE, Loop *TheLoop,
949- Instruction *I = nullptr ) {
948+ OptimizationRemarkEmitter *ORE,
949+ Loop *TheLoop, Instruction *I = nullptr ,
950+ DebugLoc DL = {}) {
950951 LLVM_DEBUG (debugVectorizationMessage (" " , Msg, I));
951952 LoopVectorizeHints Hints (TheLoop, true /* doesn't matter */ , *ORE);
952- ORE->emit (
953- createLVAnalysis (Hints. vectorizeAnalysisPassName (), ORETag, TheLoop, I )
954- << Msg);
953+ ORE->emit (createLVAnalysis (Hints. vectorizeAnalysisPassName (), ORETag, TheLoop,
954+ I, DL )
955+ << Msg);
955956}
956957
957958// / Report successful vectorization of the loop. In case an outer loop is
@@ -1538,12 +1539,8 @@ class LoopVectorizationCostModel {
15381539 // / Returns the expected execution cost. The unit of the cost does
15391540 // / not matter because we use the 'cost' units to compare different
15401541 // / vector widths. The cost that is returned is *not* normalized by
1541- // / the factor width. If \p Invalid is not nullptr, this function
1542- // / will add a pair(Instruction*, ElementCount) to \p Invalid for
1543- // / each instruction that has an Invalid cost for the given VF.
1544- InstructionCost
1545- expectedCost (ElementCount VF,
1546- SmallVectorImpl<InstructionVFPair> *Invalid = nullptr );
1542+ // / the factor width.
1543+ InstructionCost expectedCost (ElementCount VF);
15471544
15481545 bool hasPredStores () const { return NumPredStores > 0 ; }
15491546
@@ -4350,24 +4347,38 @@ bool LoopVectorizationPlanner::isMoreProfitable(
43504347 return CmpFn (RTCostA, RTCostB);
43514348}
43524349
4353- static void emitInvalidCostRemarks (SmallVector<InstructionVFPair> InvalidCosts,
4354- OptimizationRemarkEmitter *ORE,
4355- Loop *TheLoop) {
4350+ void LoopVectorizationPlanner::emitInvalidCostRemarks (
4351+ OptimizationRemarkEmitter *ORE) {
4352+ using RecipeVFPair = std::pair<VPRecipeBase *, ElementCount>;
4353+ LLVMContext &LLVMCtx = OrigLoop->getHeader ()->getContext ();
4354+ SmallVector<RecipeVFPair> InvalidCosts;
4355+ for (const auto &Plan : VPlans) {
4356+ for (ElementCount VF : Plan->vectorFactors ()) {
4357+ VPCostContext CostCtx (CM.TTI , Legal->getWidestInductionType (), LLVMCtx,
4358+ CM);
4359+ auto Iter = vp_depth_first_deep (Plan->getVectorLoopRegion ()->getEntry ());
4360+ for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(Iter)) {
4361+ for (auto &R : *VPBB) {
4362+ if (!R.cost (VF, CostCtx).isValid ())
4363+ InvalidCosts.emplace_back (&R, VF);
4364+ }
4365+ }
4366+ }
4367+ }
43564368 if (InvalidCosts.empty ())
43574369 return ;
43584370
43594371 // Emit a report of VFs with invalid costs in the loop.
43604372
4361- // Group the remarks per instruction, keeping the instruction order from
4362- // InvalidCosts.
4363- std::map<Instruction *, unsigned > Numbering;
4373+ // Group the remarks per recipe, keeping the recipe order from InvalidCosts.
4374+ DenseMap<VPRecipeBase *, unsigned > Numbering;
43644375 unsigned I = 0 ;
43654376 for (auto &Pair : InvalidCosts)
43664377 if (!Numbering.count (Pair.first ))
43674378 Numbering[Pair.first ] = I++;
43684379
4369- // Sort the list, first on instruction (number) then on VF.
4370- sort (InvalidCosts, [&Numbering](InstructionVFPair &A, InstructionVFPair &B) {
4380+ // Sort the list, first on recipe (number) then on VF.
4381+ sort (InvalidCosts, [&Numbering](RecipeVFPair &A, RecipeVFPair &B) {
43714382 if (Numbering[A.first ] != Numbering[B.first ])
43724383 return Numbering[A.first ] < Numbering[B.first ];
43734384 const auto &LHS = A.second ;
@@ -4376,38 +4387,64 @@ static void emitInvalidCostRemarks(SmallVector<InstructionVFPair> InvalidCosts,
43764387 std::make_tuple (RHS.isScalable (), RHS.getKnownMinValue ());
43774388 });
43784389
4379- // For a list of ordered instruction-vf pairs:
4380- // [(load, vf1 ), (load, vf2 ), (store, vf1 )]
4381- // Group the instructions together to emit separate remarks for:
4382- // load (vf1, vf2 )
4383- // store (vf1 )
4384- auto Tail = ArrayRef<InstructionVFPair >(InvalidCosts);
4385- auto Subset = ArrayRef<InstructionVFPair >();
4390+ // For a list of ordered recipe-VF pairs:
4391+ // [(load, VF1 ), (load, VF2 ), (store, VF1 )]
4392+ // group the recipes together to emit separate remarks for:
4393+ // load (VF1, VF2 )
4394+ // store (VF1 )
4395+ auto Tail = ArrayRef<RecipeVFPair >(InvalidCosts);
4396+ auto Subset = ArrayRef<RecipeVFPair >();
43864397 do {
43874398 if (Subset.empty ())
43884399 Subset = Tail.take_front (1 );
43894400
4390- Instruction *I = Subset.front ().first ;
4391-
4392- // If the next instruction is different, or if there are no other pairs,
4401+ VPRecipeBase *R = Subset.front ().first ;
4402+
4403+ unsigned Opcode =
4404+ TypeSwitch<const VPRecipeBase *, unsigned >(R)
4405+ .Case <VPHeaderPHIRecipe>(
4406+ [](const auto *R) { return Instruction::PHI; })
4407+ .Case <VPWidenSelectRecipe>(
4408+ [](const auto *R) { return Instruction::Select; })
4409+ .Case <VPWidenStoreRecipe>(
4410+ [](const auto *R) { return Instruction::Store; })
4411+ .Case <VPWidenLoadRecipe>(
4412+ [](const auto *R) { return Instruction::Load; })
4413+ .Case <VPWidenCallRecipe>(
4414+ [](const auto *R) { return Instruction::Call; })
4415+ .Case <VPInstruction, VPWidenRecipe, VPReplicateRecipe,
4416+ VPWidenCastRecipe>(
4417+ [](const auto *R) { return R->getOpcode (); })
4418+ .Case <VPInterleaveRecipe>([](const VPInterleaveRecipe *R) {
4419+ return R->getStoredValues ().empty () ? Instruction::Load
4420+ : Instruction::Store;
4421+ });
4422+
4423+ // If the next recipe is different, or if there are no other pairs,
43934424 // emit a remark for the collated subset. e.g.
4394- // [(load, vf1 ), (load, vf2 ))]
4425+ // [(load, VF1 ), (load, VF2 ))]
43954426 // to emit:
4396- // remark: invalid costs for 'load' at VF=(vf, vf2 )
4397- if (Subset == Tail || Tail[Subset.size ()].first != I ) {
4427+ // remark: invalid costs for 'load' at VF=(VF1, VF2 )
4428+ if (Subset == Tail || Tail[Subset.size ()].first != R ) {
43984429 std::string OutString;
43994430 raw_string_ostream OS (OutString);
44004431 assert (!Subset.empty () && " Unexpected empty range" );
4401- OS << " Instruction with invalid costs prevented vectorization at VF=(" ;
4432+ OS << " Recipe with invalid costs prevented vectorization at VF=(" ;
44024433 for (const auto &Pair : Subset)
44034434 OS << (Pair.second == Subset.front ().second ? " " : " , " ) << Pair.second ;
44044435 OS << " ):" ;
4405- if (auto *CI = dyn_cast<CallInst>(I))
4406- OS << " call to " << CI->getCalledFunction ()->getName ();
4407- else
4408- OS << " " << I->getOpcodeName ();
4436+ if (Opcode == Instruction::Call) {
4437+ auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
4438+ Function *CalledFn =
4439+ WidenCall ? WidenCall->getCalledScalarFunction ()
4440+ : cast<Function>(R->getOperand (R->getNumOperands () - 1 )
4441+ ->getLiveInIRValue ());
4442+ OS << " call to " << CalledFn->getName ();
4443+ } else
4444+ OS << " " << Instruction::getOpcodeName (Opcode);
44094445 OS.flush ();
4410- reportVectorizationInfo (OutString, " InvalidCost" , ORE, TheLoop, I);
4446+ reportVectorizationInfo (OutString, " InvalidCost" , ORE, OrigLoop, nullptr ,
4447+ R->getDebugLoc ());
44114448 Tail = Tail.drop_front (Subset.size ());
44124449 Subset = {};
44134450 } else
@@ -4536,14 +4573,13 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
45364573 ChosenFactor.Cost = InstructionCost::getMax ();
45374574 }
45384575
4539- SmallVector<InstructionVFPair> InvalidCosts;
45404576 for (auto &P : VPlans) {
45414577 for (ElementCount VF : P->vectorFactors ()) {
45424578 // The cost for scalar VF=1 is already calculated, so ignore it.
45434579 if (VF.isScalar ())
45444580 continue ;
45454581
4546- InstructionCost C = CM.expectedCost (VF, &InvalidCosts );
4582+ InstructionCost C = CM.expectedCost (VF);
45474583 VectorizationFactor Candidate (VF, C, ScalarCost.ScalarCost );
45484584
45494585#ifndef NDEBUG
@@ -4578,8 +4614,6 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() {
45784614 }
45794615 }
45804616
4581- emitInvalidCostRemarks (InvalidCosts, ORE, OrigLoop);
4582-
45834617 if (!EnableCondStoresVectorization && CM.hasPredStores ()) {
45844618 reportVectorizationFailure (
45854619 " There are conditional stores." ,
@@ -5484,8 +5518,7 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
54845518 return Discount;
54855519}
54865520
5487- InstructionCost LoopVectorizationCostModel::expectedCost (
5488- ElementCount VF, SmallVectorImpl<InstructionVFPair> *Invalid) {
5521+ InstructionCost LoopVectorizationCostModel::expectedCost (ElementCount VF) {
54895522 InstructionCost Cost;
54905523
54915524 // For each block.
@@ -5505,10 +5538,6 @@ InstructionCost LoopVectorizationCostModel::expectedCost(
55055538 if (C.isValid () && ForceTargetInstructionCost.getNumOccurrences () > 0 )
55065539 C = InstructionCost (ForceTargetInstructionCost);
55075540
5508- // Keep a list of instructions with invalid costs.
5509- if (Invalid && !C.isValid ())
5510- Invalid->emplace_back (&I, VF);
5511-
55125541 BlockCost += C;
55135542 LLVM_DEBUG (dbgs () << " LV: Found an estimated cost of " << C << " for VF "
55145543 << VF << " For instruction: " << I << ' \n ' );
@@ -9867,6 +9896,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
98679896 // Plan how to best vectorize, return the best VF and its cost.
98689897 std::optional<VectorizationFactor> MaybeVF = LVP.plan (UserVF, UserIC);
98699898
9899+ if (ORE->allowExtraAnalysis (LV_NAME))
9900+ LVP.emitInvalidCostRemarks (ORE);
9901+
98709902 VectorizationFactor VF = VectorizationFactor::Disabled ();
98719903 unsigned IC = 1 ;
98729904
0 commit comments