@@ -8112,10 +8112,9 @@ void VPRecipeBuilder::createBlockInMask(BasicBlock *BB, VPlan &Plan) {
81128112 BlockMaskCache[BB] = BlockMask;
81138113}
81148114
8115- VPRecipeBase *VPRecipeBuilder::tryToWidenMemory (Instruction *I,
8116- ArrayRef<VPValue *> Operands,
8117- VFRange &Range,
8118- VPlanPtr &Plan) {
8115+ VPWidenMemoryInstructionRecipe *
8116+ VPRecipeBuilder::tryToWidenMemory (Instruction *I, ArrayRef<VPValue *> Operands,
8117+ VFRange &Range, VPlanPtr &Plan) {
81198118 assert ((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
81208119 " Must be called with either a load or store" );
81218120
@@ -8187,7 +8186,7 @@ createWidenInductionRecipes(PHINode *Phi, Instruction *PhiOrTrunc,
81878186 return new VPWidenIntOrFpInductionRecipe (Phi, Start, Step, IndDesc);
81888187}
81898188
8190- VPRecipeBase *VPRecipeBuilder::tryToOptimizeInductionPHI (
8189+ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI (
81918190 PHINode *Phi, ArrayRef<VPValue *> Operands, VPlan &Plan, VFRange &Range) {
81928191
81938192 // Check if this is an integer or fp induction. If so, build the recipe that
@@ -8239,31 +8238,10 @@ VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate(
82398238 return nullptr ;
82408239}
82418240
8242- VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend (PHINode *Phi,
8243- ArrayRef<VPValue *> Operands,
8244- VPlanPtr &Plan) {
8245- // If all incoming values are equal, the incoming VPValue can be used directly
8246- // instead of creating a new VPBlendRecipe.
8247- if (llvm::all_equal (Operands))
8248- return Operands[0 ];
8249-
8241+ VPBlendRecipe *VPRecipeBuilder::tryToBlend (PHINode *Phi,
8242+ ArrayRef<VPValue *> Operands,
8243+ VPlanPtr &Plan) {
82508244 unsigned NumIncoming = Phi->getNumIncomingValues ();
8251- // For in-loop reductions, we do not need to create an additional select.
8252- VPValue *InLoopVal = nullptr ;
8253- for (unsigned In = 0 ; In < NumIncoming; In++) {
8254- PHINode *PhiOp =
8255- dyn_cast_or_null<PHINode>(Operands[In]->getUnderlyingValue ());
8256- if (PhiOp && CM.isInLoopReduction (PhiOp)) {
8257- assert (!InLoopVal && " Found more than one in-loop reduction!" );
8258- InLoopVal = Operands[In];
8259- }
8260- }
8261-
8262- assert ((!InLoopVal || NumIncoming == 2 ) &&
8263- " Found an in-loop reduction for PHI with unexpected number of "
8264- " incoming values" );
8265- if (InLoopVal)
8266- return Operands[Operands[0 ] == InLoopVal ? 1 : 0 ];
82678245
82688246 // We know that all PHIs in non-header blocks are converted into selects, so
82698247 // we don't have to worry about the insertion order and we can just use the
@@ -8273,15 +8251,18 @@ VPRecipeOrVPValueTy VPRecipeBuilder::tryToBlend(PHINode *Phi,
82738251 SmallVector<VPValue *, 2 > OperandsWithMask;
82748252
82758253 for (unsigned In = 0 ; In < NumIncoming; In++) {
8254+ OperandsWithMask.push_back (Operands[In]);
82768255 VPValue *EdgeMask =
82778256 createEdgeMask (Phi->getIncomingBlock (In), Phi->getParent (), *Plan);
8278- assert ((EdgeMask || NumIncoming == 1 ) &&
8279- " Multiple predecessors with one having a full mask" );
8280- OperandsWithMask.push_back (Operands[In]);
8281- if (EdgeMask)
8282- OperandsWithMask.push_back (EdgeMask);
8257+ if (!EdgeMask) {
8258+ assert (In == 0 && " Both null and non-null edge masks found" );
8259+ assert (all_equal (Operands) &&
8260+ " Distinct incoming values with one having a full mask" );
8261+ break ;
8262+ }
8263+ OperandsWithMask.push_back (EdgeMask);
82838264 }
8284- return toVPRecipeResult ( new VPBlendRecipe (Phi, OperandsWithMask) );
8265+ return new VPBlendRecipe (Phi, OperandsWithMask);
82858266}
82868267
82878268VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall (CallInst *CI,
@@ -8390,9 +8371,9 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
83908371 Range);
83918372}
83928373
8393- VPRecipeBase *VPRecipeBuilder::tryToWiden (Instruction *I,
8394- ArrayRef<VPValue *> Operands,
8395- VPBasicBlock *VPBB, VPlanPtr &Plan) {
8374+ VPWidenRecipe *VPRecipeBuilder::tryToWiden (Instruction *I,
8375+ ArrayRef<VPValue *> Operands,
8376+ VPBasicBlock *VPBB, VPlanPtr &Plan) {
83968377 switch (I->getOpcode ()) {
83978378 default :
83988379 return nullptr ;
@@ -8449,9 +8430,9 @@ void VPRecipeBuilder::fixHeaderPhis() {
84498430 }
84508431}
84518432
8452- VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication (Instruction *I,
8453- VFRange &Range,
8454- VPlan &Plan) {
8433+ VPReplicateRecipe * VPRecipeBuilder::handleReplication (Instruction *I,
8434+ VFRange &Range,
8435+ VPlan &Plan) {
84558436 bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange (
84568437 [&](ElementCount VF) { return CM.isUniformAfterVectorization (I, VF); },
84578438 Range);
@@ -8503,14 +8484,12 @@ VPRecipeOrVPValueTy VPRecipeBuilder::handleReplication(Instruction *I,
85038484
85048485 auto *Recipe = new VPReplicateRecipe (I, Plan.mapToVPValues (I->operands ()),
85058486 IsUniform, BlockInMask);
8506- return toVPRecipeResult ( Recipe) ;
8487+ return Recipe;
85078488}
85088489
8509- VPRecipeOrVPValueTy
8510- VPRecipeBuilder::tryToCreateWidenRecipe (Instruction *Instr,
8511- ArrayRef<VPValue *> Operands,
8512- VFRange &Range, VPBasicBlock *VPBB,
8513- VPlanPtr &Plan) {
8490+ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe (
8491+ Instruction *Instr, ArrayRef<VPValue *> Operands, VFRange &Range,
8492+ VPBasicBlock *VPBB, VPlanPtr &Plan) {
85148493 // First, check for specific widening recipes that deal with inductions, Phi
85158494 // nodes, calls and memory operations.
85168495 VPRecipeBase *Recipe;
@@ -8523,7 +8502,7 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85238502 recordRecipeOf (Phi);
85248503
85258504 if ((Recipe = tryToOptimizeInductionPHI (Phi, Operands, *Plan, Range)))
8526- return toVPRecipeResult ( Recipe) ;
8505+ return Recipe;
85278506
85288507 VPHeaderPHIRecipe *PhiRecipe = nullptr ;
85298508 assert ((Legal->isReductionVariable (Phi) ||
@@ -8555,43 +8534,43 @@ VPRecipeBuilder::tryToCreateWidenRecipe(Instruction *Instr,
85558534 recordRecipeOf (Inc);
85568535
85578536 PhisToFix.push_back (PhiRecipe);
8558- return toVPRecipeResult ( PhiRecipe) ;
8537+ return PhiRecipe;
85598538 }
85608539
85618540 if (isa<TruncInst>(Instr) &&
85628541 (Recipe = tryToOptimizeInductionTruncate (cast<TruncInst>(Instr), Operands,
85638542 Range, *Plan)))
8564- return toVPRecipeResult ( Recipe) ;
8543+ return Recipe;
85658544
85668545 // All widen recipes below deal only with VF > 1.
85678546 if (LoopVectorizationPlanner::getDecisionAndClampRange (
85688547 [&](ElementCount VF) { return VF.isScalar (); }, Range))
85698548 return nullptr ;
85708549
85718550 if (auto *CI = dyn_cast<CallInst>(Instr))
8572- return toVPRecipeResult ( tryToWidenCall (CI, Operands, Range, Plan) );
8551+ return tryToWidenCall (CI, Operands, Range, Plan);
85738552
85748553 if (isa<LoadInst>(Instr) || isa<StoreInst>(Instr))
8575- return toVPRecipeResult ( tryToWidenMemory (Instr, Operands, Range, Plan) );
8554+ return tryToWidenMemory (Instr, Operands, Range, Plan);
85768555
85778556 if (!shouldWiden (Instr, Range))
85788557 return nullptr ;
85798558
85808559 if (auto GEP = dyn_cast<GetElementPtrInst>(Instr))
8581- return toVPRecipeResult ( new VPWidenGEPRecipe (
8582- GEP, make_range (Operands.begin (), Operands.end () )));
8560+ return new VPWidenGEPRecipe (GEP,
8561+ make_range (Operands.begin (), Operands.end ()));
85838562
85848563 if (auto *SI = dyn_cast<SelectInst>(Instr)) {
8585- return toVPRecipeResult ( new VPWidenSelectRecipe (
8586- *SI, make_range (Operands.begin (), Operands.end ()))) ;
8564+ return new VPWidenSelectRecipe (
8565+ *SI, make_range (Operands.begin (), Operands.end ()));
85878566 }
85888567
85898568 if (auto *CI = dyn_cast<CastInst>(Instr)) {
8590- return toVPRecipeResult ( new VPWidenCastRecipe (CI->getOpcode (), Operands[0 ],
8591- CI-> getType (), *CI) );
8569+ return new VPWidenCastRecipe (CI->getOpcode (), Operands[0 ], CI-> getType () ,
8570+ *CI);
85928571 }
85938572
8594- return toVPRecipeResult ( tryToWiden (Instr, Operands, VPBB, Plan) );
8573+ return tryToWiden (Instr, Operands, VPBB, Plan);
85958574}
85968575
85978576void LoopVectorizationPlanner::buildVPlansWithVPRecipes (ElementCount MinVF,
@@ -8779,22 +8758,10 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
87798758 Legal->isInvariantAddressOfReduction (SI->getPointerOperand ()))
87808759 continue ;
87818760
8782- auto RecipeOrValue = RecipeBuilder.tryToCreateWidenRecipe (
8761+ VPRecipeBase *Recipe = RecipeBuilder.tryToCreateWidenRecipe (
87838762 Instr, Operands, Range, VPBB, Plan);
8784- if (!RecipeOrValue)
8785- RecipeOrValue = RecipeBuilder.handleReplication (Instr, Range, *Plan);
8786- // If Instr can be simplified to an existing VPValue, use it.
8787- if (isa<VPValue *>(RecipeOrValue)) {
8788- auto *VPV = cast<VPValue *>(RecipeOrValue);
8789- Plan->addVPValue (Instr, VPV);
8790- // If the re-used value is a recipe, register the recipe for the
8791- // instruction, in case the recipe for Instr needs to be recorded.
8792- if (VPRecipeBase *R = VPV->getDefiningRecipe ())
8793- RecipeBuilder.setRecipe (Instr, R);
8794- continue ;
8795- }
8796- // Otherwise, add the new recipe.
8797- VPRecipeBase *Recipe = cast<VPRecipeBase *>(RecipeOrValue);
8763+ if (!Recipe)
8764+ Recipe = RecipeBuilder.handleReplication (Instr, Range, *Plan);
87988765 for (auto *Def : Recipe->definedValues ()) {
87998766 auto *UV = Def->getUnderlyingValue ();
88008767 Plan->addVPValue (UV, Def);
@@ -9041,7 +9008,9 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
90419008 // the phi until LoopExitValue. We keep track of the previous item
90429009 // (PreviousLink) to tell which of the two operands of a Link will remain
90439010 // scalar and which will be reduced. For minmax by select(cmp), Link will be
9044- // the select instructions.
9011+ // the select instructions. Blend recipes of in-loop reduction phi's will
9012+ // get folded to their non-phi operand, as the reduction recipe handles the
9013+ // condition directly.
90459014 VPSingleDefRecipe *PreviousLink = PhiR; // Aka Worklist[0].
90469015 for (VPSingleDefRecipe *CurrentLink : Worklist.getArrayRef ().drop_front ()) {
90479016 Instruction *CurrentLinkI = CurrentLink->getUnderlyingInstr ();
@@ -9072,6 +9041,20 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
90729041 LinkVPBB->insert (FMulRecipe, CurrentLink->getIterator ());
90739042 VecOp = FMulRecipe;
90749043 } else {
9044+ auto *Blend = dyn_cast<VPBlendRecipe>(CurrentLink);
9045+ if (PhiR->isInLoop () && Blend) {
9046+ assert (Blend->getNumIncomingValues () == 2 &&
9047+ " Blend must have 2 incoming values" );
9048+ if (Blend->getIncomingValue (0 ) == PhiR)
9049+ Blend->replaceAllUsesWith (Blend->getIncomingValue (1 ));
9050+ else {
9051+ assert (Blend->getIncomingValue (1 ) == PhiR &&
9052+ " PhiR must be an operand of the blend" );
9053+ Blend->replaceAllUsesWith (Blend->getIncomingValue (0 ));
9054+ }
9055+ continue ;
9056+ }
9057+
90759058 if (RecurrenceDescriptor::isMinMaxRecurrenceKind (Kind)) {
90769059 if (isa<VPWidenRecipe>(CurrentLink)) {
90779060 assert (isa<CmpInst>(CurrentLinkI) &&
0 commit comments