Skip to content

Commit 3e787a3

Browse files
committed
[VPlan] Replace UnrollPart for VPScalarIVSteps with start index op (NFC)
Replace the unroll part operand for VPScalarIVStepsRecipe with the start index. This simplifies #170053 and is also a first step to break down the recipe into its components.
1 parent ad1edc9 commit 3e787a3

File tree

5 files changed

+57
-29
lines changed

5 files changed

+57
-29
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -322,6 +322,17 @@ class VPBuilder {
322322
return createScalarCast(CastOp, Op, ResultTy, DL);
323323
}
324324

325+
VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
326+
DebugLoc DL) {
327+
if (ResultTy == SrcTy)
328+
return Op;
329+
Instruction::CastOps CastOp =
330+
ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
331+
? Instruction::Trunc
332+
: Instruction::SExt;
333+
return createScalarCast(CastOp, Op, ResultTy, DL);
334+
}
335+
325336
VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
326337
Type *ResultTy) {
327338
VPIRFlags Flags;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3783,9 +3783,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
37833783
};
37843784

37853785
/// A recipe for handling phi nodes of integer and floating-point inductions,
3786-
/// producing their scalar values.
3787-
class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
3788-
public VPUnrollPartAccessor<3> {
3786+
/// producing their scalar values. Before unrolling the recipe has 3 operands:
3787+
/// IV, step and VF. Unrolling adds an extra operand StartIndex.
3788+
class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
37893789
Instruction::BinaryOps InductionOpcode;
37903790

37913791
public:
@@ -3815,10 +3815,6 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
38153815
getDebugLoc());
38163816
}
38173817

3818-
/// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
3819-
/// this is only accurate after the VPlan has been unrolled.
3820-
bool isPart0() const { return getUnrollPart(*this) == 0; }
3821-
38223818
VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)
38233819

38243820
/// Generate the scalarized versions of the phi node as needed by their users.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2380,8 +2380,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
23802380
// iteration.
23812381
bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
23822382
// Compute the scalar steps and save the results in State.
2383-
Type *IntStepTy =
2384-
IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
23852383

23862384
unsigned StartLane = 0;
23872385
unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
@@ -2390,20 +2388,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
23902388
EndLane = StartLane + 1;
23912389
}
23922390
Value *StartIdx0;
2393-
if (getUnrollPart(*this) == 0)
2394-
StartIdx0 = ConstantInt::get(IntStepTy, 0);
2395-
else {
2396-
StartIdx0 = State.get(getOperand(2), true);
2397-
if (getUnrollPart(*this) != 1) {
2398-
StartIdx0 =
2399-
Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),
2400-
getUnrollPart(*this)));
2401-
}
2402-
StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
2403-
}
2404-
2405-
if (BaseIVTy->isFloatingPointTy())
2406-
StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
2391+
if (getNumOperands() == 3) {
2392+
StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
2393+
} else
2394+
StartIdx0 = State.get(getOperand(3), true);
24072395

24082396
for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
24092397
Value *StartIdx = Builder.CreateBinOp(

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1459,7 +1459,9 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
14591459
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
14601460
// the first lane is demanded.
14611461
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
1462-
if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) {
1462+
if ((Steps->getNumOperands() == 3 ||
1463+
match(Steps->getOperand(3), m_ZeroInt())) &&
1464+
vputils::onlyFirstLaneUsed(Steps)) {
14631465
Steps->replaceAllUsesWith(Steps->getOperand(0));
14641466
return;
14651467
}

llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ class UnrollState {
5353
/// Unroll replicate region \p VPR by cloning the region UF - 1 times.
5454
void unrollReplicateRegionByUF(VPRegionBlock *VPR);
5555

56+
/// Add a start index operand to \p Steps for \p Part.
57+
void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part);
58+
5659
/// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
5760
/// all parts.
5861
void unrollRecipeByUF(VPRecipeBase &R);
@@ -123,6 +126,32 @@ class UnrollState {
123126
};
124127
} // namespace
125128

129+
void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part) {
130+
if (Part == 0) {
131+
Steps->addOperand(getConstantInt(Part));
132+
return;
133+
}
134+
135+
VPBuilder Builder(Steps);
136+
Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
137+
Type *IntStepTy =
138+
IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
139+
VPValue *StartIdx0 = Steps->getOperand(2);
140+
StartIdx0 = Builder.createOverflowingOp(
141+
Instruction::Mul,
142+
{StartIdx0,
143+
Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)});
144+
StartIdx0 = Builder.createScalarSExtOrTrunc(
145+
StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0),
146+
DebugLoc::getUnknown());
147+
148+
if (BaseIVTy->isFloatingPointTy())
149+
StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0,
150+
BaseIVTy, DebugLoc::getUnknown());
151+
152+
Steps->addOperand(StartIdx0);
153+
}
154+
126155
void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
127156
VPBlockBase *InsertPt = VPR->getSingleSuccessor();
128157
for (unsigned Part = 1; Part != UF; ++Part) {
@@ -136,9 +165,8 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
136165
VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
137166
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
138167
remapOperands(&PartIR, Part);
139-
if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
140-
ScalarIVSteps->addOperand(getConstantInt(Part));
141-
}
168+
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR))
169+
addStartIndexForScalarSteps(Steps, Part);
142170

143171
addRecipeForPart(&Part0R, &PartIR, Part);
144172
}
@@ -311,10 +339,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
311339
}
312340
remapOperands(Copy, Part);
313341

342+
if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Copy))
343+
addStartIndexForScalarSteps(ScalarIVSteps, Part);
344+
314345
// Add operand indicating the part to generate code for, to recipes still
315346
// requiring it.
316-
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
317-
VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
347+
if (isa<VPWidenCanonicalIVRecipe, VPVectorPointerRecipe,
348+
VPVectorEndPointerRecipe>(Copy) ||
318349
match(Copy,
319350
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
320351
Copy->addOperand(getConstantInt(Part));

0 commit comments

Comments
 (0)