Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 17 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// Scale the first operand (vector step) by the second operand
/// (scalar-step). Casts both operands to the result type if needed.
WideIVStep,
// Creates a step vector starting from 0 with a step of 1.
StepVector,

};

Expand Down Expand Up @@ -1063,6 +1065,9 @@ class VPInstructionWithType : public VPInstruction {
: VPInstruction(Opcode, Operands, FMFs, DL, Name), ResultTy(ResultTy) {}

static inline bool classof(const VPRecipeBase *R) {
if (isa<VPInstruction>(R) &&
cast<VPInstruction>(R)->getOpcode() == VPInstruction::StepVector)
return true;
// VPInstructionWithType are VPInstructions with specific opcodes requiring
// type information.
if (R->isScalarCast())
Expand Down Expand Up @@ -1836,6 +1841,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
Step, IndDesc, DL),
Trunc(nullptr) {
addOperand(VF);
addOperand(VF); // Dummy StepVector replaced in convertToConcreteRecipes
}

VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
Expand All @@ -1845,6 +1851,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
Step, IndDesc, DL),
Trunc(Trunc) {
addOperand(VF);
addOperand(VF); // Dummy StepVector replaced in convertToConcreteRecipes
SmallVector<std::pair<unsigned, MDNode *>> Metadata;
(void)Metadata;
if (Trunc)
Expand Down Expand Up @@ -1875,10 +1882,14 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
VPValue *getVFValue() { return getOperand(2); }
const VPValue *getVFValue() const { return getOperand(2); }

VPValue *getStepVector() { return getOperand(3); }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment, it always must be a VPInstructionWithType, right? Would changing the return type to VPInstructionWithType remove some casts at use sites?

Also asser that it is a StepVector opcode here, rather than at use sites?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks

const VPValue *getStepVector() const { return getOperand(3); }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we assure that this is only called after it has been set to non-poison?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think VPlanUnroll may clone it while it's still poison, which would invoke getStepVector()

void setStepVector(VPValue *V) { setOperand(3, V); }

VPValue *getSplatVFValue() {
// If the recipe has been unrolled (4 operands), return the VPValue for the
// If the recipe has been unrolled, return the VPValue for the
// induction increment.
return getNumOperands() == 5 ? getOperand(3) : nullptr;
return isUnrolled() ? getOperand(getNumOperands() - 2) : nullptr;
}

/// Returns the first defined value as TruncInst, if it is one or nullptr
Expand All @@ -1900,8 +1911,11 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
/// the last unrolled part, if it exists. Returns itself if unrolling did not
/// take place.
VPValue *getLastUnrolledPartOperand() {
return getNumOperands() == 5 ? getOperand(4) : this;
return isUnrolled() ? getOperand(getNumOperands() - 1) : this;
}

private:
bool isUnrolled() const { return getNumOperands() == 6; }
};

class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe,
Expand Down
31 changes: 17 additions & 14 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::Not:
case VPInstruction::PtrAdd:
case VPInstruction::WideIVStep:
case VPInstruction::StepVector:
return false;
default:
return true;
Expand Down Expand Up @@ -1078,8 +1079,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,

void VPInstructionWithType::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
assert(vputils::onlyFirstLaneUsed(this) &&
"Codegen only implemented for first lane.");
switch (getOpcode()) {
case Instruction::ZExt:
case Instruction::Trunc: {
Expand All @@ -1089,6 +1088,12 @@ void VPInstructionWithType::execute(VPTransformState &State) {
State.set(this, Cast, VPLane(0));
break;
}
case VPInstruction::StepVector: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to add this to VPInstruction::computeCost?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's currently costed as zero for now since we don't want to change the overall cost of VPWidenIntOrFpInductionRecipe.

Once we expand VPWidenIntOrFpInductionRecipe in #118638 I think it should be safe to add a cost to it then? Since the expansion will happen just before execution, after any costing.

Value *StepVector =
State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));
State.set(this, StepVector);
break;
}
default:
llvm_unreachable("opcode not implemented yet");
}
Expand All @@ -1106,6 +1111,9 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
O << "wide-iv-step ";
printOperands(O, SlotTracker);
break;
case VPInstruction::StepVector:
O << "step-vector " << *ResultTy;
break;
default:
assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
O << Instruction::getOpcodeName(getOpcode()) << " ";
Expand Down Expand Up @@ -1875,7 +1883,8 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
/// (0 * Step, 1 * Step, 2 * Step, ...)
/// to each vector element of Val.
/// \p Opcode is relevant for FP induction variable.
static Value *getStepVector(Value *Val, Value *Step,
/// \p InitVec is an integer step vector from 0 with a step of 1.
static Value *getStepVector(Value *Val, Value *Step, Value *InitVec,
Instruction::BinaryOps BinOp, ElementCount VF,
IRBuilderBase &Builder) {
assert(VF.isVector() && "only vector VFs are supported");
Expand All @@ -1891,15 +1900,6 @@ static Value *getStepVector(Value *Val, Value *Step,

SmallVector<Constant *, 8> Indices;

// Create a vector of consecutive numbers from zero to VF.
VectorType *InitVecValVTy = ValVTy;
if (STy->isFloatingPointTy()) {
Type *InitVecValSTy =
IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we should assert that InitVec is the type we expect, i.e. it must have the same type as Step

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They might not always have the same type as step might be a float or an integer, whereas InitVec is always an integer. Hopefully the calls to Builder.CreateMul(InitVec, Step) in the integer case and Builder.CreateUIToFP(InitVec, ValVTy) in the float case will catch this.

}
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);

if (STy->isIntegerTy()) {
Step = Builder.CreateVectorSplat(VLen, Step);
assert(Step->getType() == Val->getType() && "Invalid step vec");
Expand Down Expand Up @@ -1965,8 +1965,11 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
}

Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
State.VF, State.Builder);
assert(cast<VPInstruction>(getStepVector())->getOpcode() ==
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

&& step vector operand must be a StepVector VPInstruction.

Although that may be too restrictive, for fixed vectors it could just be a constant vector?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed vectors would still be a VPInstruction::StepVector. Hopefully the need for this assertion would also go away if we fully expand the recipe in #118638

VPInstruction::StepVector);
Value *SteppedStart =
::getStepVector(SplatStart, Step, State.get(getStepVector()),
ID.getInductionOpcode(), State.VF, State.Builder);

// We create vector phi nodes for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
Expand Down
17 changes: 17 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2410,6 +2410,23 @@ void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
continue;
}

if (auto *IVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
// Infer an up-to-date type since
// optimizeVectorInductionWidthForTCAndVFUF may have truncated the start
// and step values.
Type *Ty = TypeInfo.inferScalarType(IVR->getStartValue());
if (TruncInst *Trunc = IVR->getTruncInst())
Ty = Trunc->getType();
if (Ty->isFloatingPointTy())
Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits());
VPInstruction *StepVector = new VPInstructionWithType(
VPInstruction::StepVector, {}, Ty, R.getDebugLoc());

Plan.getVectorPreheader()->appendRecipe(StepVector);
IVR->setStepVector(StepVector);
continue;
}

VPValue *VectorStep;
VPValue *ScalarStep;
if (!match(&R, m_VPInstruction<VPInstruction::WideIVStep>(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1283,11 +1283,11 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP10:%.*]] = shl nuw nsw i64 [[TMP9]], 2
; CHECK-NEXT: [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP12:%.*]] = shl nuw nsw i32 [[TMP11]], 2
; CHECK-NEXT: [[TMP13:%.*]] = add nsw i32 [[TMP12]], -1
; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[DOTPRE]], i32 [[TMP13]]
; CHECK-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
; CHECK-NEXT: [[TMP15:%.*]] = shl <vscale x 4 x i64> [[TMP14]], splat (i64 1)
; CHECK-NEXT: [[TMP17:%.*]] = shl nuw nsw i64 [[TMP9]], 3
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP17]], i64 0
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ define void @dead_load(ptr %p, i16 %start) {
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8
; CHECK-NEXT: [[TMP18:%.*]] = mul i64 [[N_VEC]], 3
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP18]]
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[START_EXT]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 8 x i64> [[TMP15]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP14]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ define void @skip_free_iv_truncate(i16 %x, ptr %A) #0 {
; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[N_VEC]] to i32
; CHECK-NEXT: [[TMP50:%.*]] = mul i32 [[DOTCAST]], 3
; CHECK-NEXT: [[IND_END22:%.*]] = add i32 [[X_I32]], [[TMP50]]
; CHECK-NEXT: [[TMP53:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[X_I64]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP53:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP55:%.*]] = mul <vscale x 8 x i64> [[TMP53]], splat (i64 3)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP55]]
; CHECK-NEXT: [[TMP58:%.*]] = mul i64 3, [[TMP52]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -582,8 +582,8 @@ define i32 @step_cond_add(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
Expand Down Expand Up @@ -772,8 +772,8 @@ define i32 @step_cond_add_pred(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
; NO-VP-OUTLOOP-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
; NO-VP-OUTLOOP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: [[TMP12:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
; NO-VP-OUTLOOP-NEXT: [[TMP11:%.*]] = insertelement <vscale x 4 x i32> zeroinitializer, i32 [[START]], i32 0
; NO-VP-OUTLOOP-NEXT: [[TMP14:%.*]] = mul <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; NO-VP-OUTLOOP-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP14]]
; NO-VP-OUTLOOP-NEXT: [[TMP16:%.*]] = trunc i64 [[TMP10]] to i32
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/vplan-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -457,7 +457,7 @@ define void @print_expand_scev(i64 %y, ptr %ptr) {
; CHECK-NEXT: <x1> vector loop: {
; CHECK-NEXT: vector.body:
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, vp<[[EXP_SCEV]]>, vp<[[VF]]> (truncated to i8)
; CHECK-NEXT: ir<%iv> = WIDEN-INDUCTION ir<0>, vp<[[EXP_SCEV]]>, vp<[[VF]]>, vp<[[VF]]> (truncated to i8)
; CHECK-NEXT: vp<[[DERIVED_IV:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * vp<[[EXP_SCEV]]>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DERIVED_IV]]>, vp<[[EXP_SCEV]]>
; CHECK-NEXT: WIDEN ir<%v3> = add nuw ir<%iv>, ir<1>
Expand Down
Loading