@@ -53,6 +53,9 @@ class UnrollState {
5353 // / Unroll replicate region \p VPR by cloning the region UF - 1 times.
5454 void unrollReplicateRegionByUF (VPRegionBlock *VPR);
5555
56+ // / Add a start index operand to \p Steps for \p Part.
57+ void addStartIndexForScalarSteps (VPScalarIVStepsRecipe *Steps, unsigned Part);
58+
5659 // / Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
5760 // / all parts.
5861 void unrollRecipeByUF (VPRecipeBase &R);
@@ -123,6 +126,32 @@ class UnrollState {
123126};
124127} // namespace
125128
129+ void UnrollState::addStartIndexForScalarSteps (VPScalarIVStepsRecipe *Steps, unsigned Part) {
130+ if (Part == 0 ) {
131+ Steps->addOperand (getConstantInt (Part));
132+ return ;
133+ }
134+
135+ VPBuilder Builder (Steps);
136+ Type *BaseIVTy = TypeInfo.inferScalarType (Steps->getOperand (0 ));
137+ Type *IntStepTy =
138+ IntegerType::get (BaseIVTy->getContext (), BaseIVTy->getScalarSizeInBits ());
139+ VPValue *StartIdx0 = Steps->getOperand (2 );
140+ StartIdx0 = Builder.createOverflowingOp (
141+ Instruction::Mul,
142+ {StartIdx0,
143+ Plan.getConstantInt (TypeInfo.inferScalarType (StartIdx0), Part)});
144+ StartIdx0 = Builder.createScalarSExtOrTrunc (
145+ StartIdx0, IntStepTy, TypeInfo.inferScalarType (StartIdx0),
146+ DebugLoc::getUnknown ());
147+
148+ if (BaseIVTy->isFloatingPointTy ())
149+ StartIdx0 = Builder.createScalarCast (Instruction::SIToFP, StartIdx0,
150+ BaseIVTy, DebugLoc::getUnknown ());
151+
152+ Steps->addOperand (StartIdx0);
153+ }
154+
126155void UnrollState::unrollReplicateRegionByUF (VPRegionBlock *VPR) {
127156 VPBlockBase *InsertPt = VPR->getSingleSuccessor ();
128157 for (unsigned Part = 1 ; Part != UF; ++Part) {
@@ -136,9 +165,8 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
136165 VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
137166 for (const auto &[PartIR, Part0R] : zip (*PartIVPBB, *Part0VPBB)) {
138167 remapOperands (&PartIR, Part);
139- if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
140- ScalarIVSteps->addOperand (getConstantInt (Part));
141- }
168+ if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR))
169+ addStartIndexForScalarSteps (Steps, Part);
142170
143171 addRecipeForPart (&Part0R, &PartIR, Part);
144172 }
@@ -311,10 +339,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
311339 }
312340 remapOperands (Copy, Part);
313341
342+ if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Copy))
343+ addStartIndexForScalarSteps (ScalarIVSteps, Part);
344+
314345 // Add operand indicating the part to generate code for, to recipes still
315346 // requiring it.
316- if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe ,
317- VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
347+ if (isa<VPWidenCanonicalIVRecipe, VPVectorPointerRecipe ,
348+ VPVectorEndPointerRecipe>(Copy) ||
318349 match (Copy,
319350 m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
320351 Copy->addOperand (getConstantInt (Part));
0 commit comments