@@ -562,21 +562,63 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
562562 return Builder.createScalarIVSteps (InductionOpcode, FPBinOp, BaseIV, Step);
563563}
564564
565+ static SmallVector<VPUser *> collectUsersRecursively (VPValue *V) {
566+ SetVector<VPUser *> Users (V->user_begin (), V->user_end ());
567+ for (unsigned I = 0 ; I != Users.size (); ++I) {
568+ VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
569+ if (isa<VPHeaderPHIRecipe>(Cur))
570+ continue ;
571+ for (VPValue *V : Cur->definedValues ())
572+ Users.insert (V->user_begin (), V->user_end ());
573+ }
574+ return Users.takeVector ();
575+ }
576+
565577// / Legalize VPWidenPointerInductionRecipe, by replacing it with a PtrAdd
566578// / (IndStart, ScalarIVSteps (0, Step)) if only its scalar values are used, as
567579// / VPWidenPointerInductionRecipe will generate vectors only. If some users
568580// / require vectors while other require scalars, the scalar uses need to extract
569581// / the scalars from the generated vectors (Note that this is different to how
570- // / int/fp inductions are handled). Also optimize VPWidenIntOrFpInductionRecipe,
571- // / if any of its users needs scalar values, by providing them scalar steps
572- // / built on the canonical scalar IV and update the original IV's users. This is
573- // / an optional optimization to reduce the needs of vector extracts.
582+ // / int/fp inductions are handled). Legalize extract-from-ends using uniform
583+ // / VPReplicateRecipe of wide inductions to use regular VPReplicateRecipe, so
584+ // / the correct end value is available. Also optimize
585+ // / VPWidenIntOrFpInductionRecipe, if any of its users needs scalar values, by
586+ // / providing them scalar steps built on the canonical scalar IV and update the
587+ // / original IV's users. This is an optional optimization to reduce the needs of
588+ // / vector extracts.
574589static void legalizeAndOptimizeInductions (VPlan &Plan) {
590+ using namespace llvm ::VPlanPatternMatch;
575591 SmallVector<VPRecipeBase *> ToRemove;
576592 VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
577593 bool HasOnlyVectorVFs = !Plan.hasVF (ElementCount::getFixed (1 ));
578594 VPBuilder Builder (HeaderVPBB, HeaderVPBB->getFirstNonPhi ());
579595 for (VPRecipeBase &Phi : HeaderVPBB->phis ()) {
596+ auto *PhiR = dyn_cast<VPHeaderPHIRecipe>(&Phi);
597+ if (!PhiR)
598+ break ;
599+
600+ // Check if any uniform VPReplicateRecipes using the phi recipe are used by
601+ // ExtractFromEnd. Those must be replaced by a regular VPReplicateRecipe to
602+ // ensure the final value is available.
603+ // TODO: Remove once uniformity analysis is done on VPlan.
604+ for (VPUser *U : collectUsersRecursively (PhiR)) {
605+ auto *ExitIRI = dyn_cast<VPIRInstruction>(U);
606+ VPValue *Op;
607+ if (!ExitIRI || !match (ExitIRI->getOperand (0 ),
608+ m_VPInstruction<VPInstruction::ExtractFromEnd>(
609+ m_VPValue (Op), m_VPValue ())))
610+ continue ;
611+ auto *RepR = dyn_cast<VPReplicateRecipe>(Op);
612+ if (!RepR || !RepR->isUniform ())
613+ continue ;
614+ assert (!RepR->isPredicated () && " RepR must not be predicated" );
615+ Instruction *I = RepR->getUnderlyingInstr ();
616+ auto *Clone =
617+ new VPReplicateRecipe (I, RepR->operands (), /* IsUniform*/ false );
618+ Clone->insertAfter (RepR);
619+ RepR->replaceAllUsesWith (Clone);
620+ }
621+
580622 // Replace wide pointer inductions which have only their scalars used by
581623 // PtrAdd(IndStart, ScalarIVSteps (0, Step)).
582624 if (auto *PtrIV = dyn_cast<VPWidenPointerInductionRecipe>(&Phi)) {
@@ -1086,18 +1128,6 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
10861128 return true ;
10871129}
10881130
1089- static SmallVector<VPUser *> collectUsersRecursively (VPValue *V) {
1090- SetVector<VPUser *> Users (V->user_begin (), V->user_end ());
1091- for (unsigned I = 0 ; I != Users.size (); ++I) {
1092- VPRecipeBase *Cur = cast<VPRecipeBase>(Users[I]);
1093- if (isa<VPHeaderPHIRecipe>(Cur))
1094- continue ;
1095- for (VPValue *V : Cur->definedValues ())
1096- Users.insert (V->user_begin (), V->user_end ());
1097- }
1098- return Users.takeVector ();
1099- }
1100-
11011131void VPlanTransforms::clearReductionWrapFlags (VPlan &Plan) {
11021132 for (VPRecipeBase &R :
11031133 Plan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
0 commit comments