Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,17 @@ class VPBuilder {
return createScalarCast(CastOp, Op, ResultTy, DL);
}

VPValue *createScalarSExtOrTrunc(VPValue *Op, Type *ResultTy, Type *SrcTy,
DebugLoc DL) {
if (ResultTy == SrcTy)
return Op;
Instruction::CastOps CastOp =
ResultTy->getScalarSizeInBits() < SrcTy->getScalarSizeInBits()
? Instruction::Trunc
: Instruction::SExt;
return createScalarCast(CastOp, Op, ResultTy, DL);
}

VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
Type *ResultTy) {
VPIRFlags Flags;
Expand Down
10 changes: 3 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -3783,9 +3783,9 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
};

/// A recipe for handling phi nodes of integer and floating-point inductions,
/// producing their scalar values.
class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<3> {
/// producing their scalar values. Before unrolling the recipe has 3 operands:
/// IV, step and VF. Unrolling adds an extra operand StartIndex.
class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags {
Instruction::BinaryOps InductionOpcode;

public:
Expand Down Expand Up @@ -3815,10 +3815,6 @@ class LLVM_ABI_FOR_TEST VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
getDebugLoc());
}

/// Return true if this VPScalarIVStepsRecipe corresponds to part 0. Note that
/// this is only accurate after the VPlan has been unrolled.
bool isPart0() const { return getUnrollPart(*this) == 0; }

VP_CLASSOF_IMPL(VPDef::VPScalarIVStepsSC)

/// Generate the scalarized versions of the phi node as needed by their users.
Expand Down
20 changes: 4 additions & 16 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2380,8 +2380,6 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
// iteration.
bool FirstLaneOnly = vputils::onlyFirstLaneUsed(this);
// Compute the scalar steps and save the results in State.
Type *IntStepTy =
IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());

unsigned StartLane = 0;
unsigned EndLane = FirstLaneOnly ? 1 : State.VF.getKnownMinValue();
Expand All @@ -2390,20 +2388,10 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
EndLane = StartLane + 1;
}
Value *StartIdx0;
if (getUnrollPart(*this) == 0)
StartIdx0 = ConstantInt::get(IntStepTy, 0);
else {
StartIdx0 = State.get(getOperand(2), true);
if (getUnrollPart(*this) != 1) {
StartIdx0 =
Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),
getUnrollPart(*this)));
}
StartIdx0 = Builder.CreateSExtOrTrunc(StartIdx0, IntStepTy);
}

if (BaseIVTy->isFloatingPointTy())
StartIdx0 = Builder.CreateSIToFP(StartIdx0, BaseIVTy);
if (getNumOperands() == 3) {
StartIdx0 = getSignedIntOrFpConstant(BaseIVTy, 0);
} else
StartIdx0 = State.get(getOperand(3), true);

for (unsigned Lane = StartLane; Lane < EndLane; ++Lane) {
Value *StartIdx = Builder.CreateBinOp(
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1459,7 +1459,9 @@ static void simplifyRecipe(VPSingleDefRecipe *Def, VPTypeAnalysis &TypeInfo) {
// VPScalarIVSteps for part 0 can be replaced by their start value, if only
// the first lane is demanded.
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(Def)) {
if (Steps->isPart0() && vputils::onlyFirstLaneUsed(Steps)) {
if ((Steps->getNumOperands() == 3 ||
match(Steps->getOperand(3), m_ZeroInt())) &&
vputils::onlyFirstLaneUsed(Steps)) {
Steps->replaceAllUsesWith(Steps->getOperand(0));
return;
}
Expand Down
42 changes: 37 additions & 5 deletions llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ class UnrollState {
/// Unroll replicate region \p VPR by cloning the region UF - 1 times.
void unrollReplicateRegionByUF(VPRegionBlock *VPR);

/// Add a start index operand to \p Steps for \p Part.
void addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps, unsigned Part);

/// Unroll recipe \p R by cloning it UF - 1 times, unless it is uniform across
/// all parts.
void unrollRecipeByUF(VPRecipeBase &R);
Expand Down Expand Up @@ -123,6 +126,33 @@ class UnrollState {
};
} // namespace

void UnrollState::addStartIndexForScalarSteps(VPScalarIVStepsRecipe *Steps,
unsigned Part) {
if (Part == 0) {
Steps->addOperand(getConstantInt(Part));
return;
}

VPBuilder Builder(Steps);
Type *BaseIVTy = TypeInfo.inferScalarType(Steps->getOperand(0));
Type *IntStepTy =
IntegerType::get(BaseIVTy->getContext(), BaseIVTy->getScalarSizeInBits());
VPValue *StartIdx0 = Steps->getOperand(2);
StartIdx0 = Builder.createOverflowingOp(
Instruction::Mul,
{StartIdx0,
Plan.getConstantInt(TypeInfo.inferScalarType(StartIdx0), Part)});
StartIdx0 = Builder.createScalarSExtOrTrunc(
StartIdx0, IntStepTy, TypeInfo.inferScalarType(StartIdx0),
DebugLoc::getUnknown());

if (BaseIVTy->isFloatingPointTy())
StartIdx0 = Builder.createScalarCast(Instruction::SIToFP, StartIdx0,
BaseIVTy, DebugLoc::getUnknown());

Steps->addOperand(StartIdx0);
}

void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockBase *InsertPt = VPR->getSingleSuccessor();
for (unsigned Part = 1; Part != UF; ++Part) {
Expand All @@ -136,9 +166,8 @@ void UnrollState::unrollReplicateRegionByUF(VPRegionBlock *VPR) {
VPBlockUtils::blocksOnly<VPBasicBlock>(Part0))) {
for (const auto &[PartIR, Part0R] : zip(*PartIVPBB, *Part0VPBB)) {
remapOperands(&PartIR, Part);
if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR)) {
ScalarIVSteps->addOperand(getConstantInt(Part));
}
if (auto *Steps = dyn_cast<VPScalarIVStepsRecipe>(&PartIR))
addStartIndexForScalarSteps(Steps, Part);

addRecipeForPart(&Part0R, &PartIR, Part);
}
Expand Down Expand Up @@ -311,10 +340,13 @@ void UnrollState::unrollRecipeByUF(VPRecipeBase &R) {
}
remapOperands(Copy, Part);

if (auto *ScalarIVSteps = dyn_cast<VPScalarIVStepsRecipe>(Copy))
addStartIndexForScalarSteps(ScalarIVSteps, Part);

// Add operand indicating the part to generate code for, to recipes still
// requiring it.
if (isa<VPScalarIVStepsRecipe, VPWidenCanonicalIVRecipe,
VPVectorPointerRecipe, VPVectorEndPointerRecipe>(Copy) ||
if (isa<VPWidenCanonicalIVRecipe, VPVectorPointerRecipe,
VPVectorEndPointerRecipe>(Copy) ||
match(Copy,
m_VPInstruction<VPInstruction::CanonicalIVIncrementForPart>()))
Copy->addOperand(getConstantInt(Part));
Expand Down
Loading