Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added %t
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like this empty file was added by accident? please remove before landing.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Woops, will remove. This sometimes gets left around when calling update_test_checks.py on files that check the stderr output, e.g. ; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -mattr=+sve 2>%t. Not sure if UTC could be taught to redirect that

Empty file.
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7783,6 +7783,7 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
"Trying to execute plan with unsupported VF");
assert(BestVPlan.hasUF(BestUF) &&
"Trying to execute plan with unsupported UF");
VPlanTransforms::materializeStepVectors(BestVPlan);
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
// cost model is complete for better cost estimates.
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
Expand Down
26 changes: 24 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,8 @@ class VPInstruction : public VPRecipeWithIRFlags,
/// Scale the first operand (vector step) by the second operand
/// (scalar-step). Casts both operands to the result type if needed.
WideIVStep,
// Creates a step vector starting from 0 to VF with a step of 1.
StepVector,

};

Expand Down Expand Up @@ -1072,7 +1074,15 @@ class VPInstructionWithType : public VPInstruction {
if (R->isScalarCast())
return true;
auto *VPI = dyn_cast<VPInstruction>(R);
return VPI && VPI->getOpcode() == VPInstruction::WideIVStep;
if (!VPI)
return false;
switch (VPI->getOpcode()) {
case VPInstruction::WideIVStep:
case VPInstruction::StepVector:
return true;
default:
return false;
}
}

static inline bool classof(const VPUser *R) {
Expand Down Expand Up @@ -1869,7 +1879,7 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
TruncInst *Trunc;

// If this recipe is unrolled it will have 2 additional operands.
bool isUnrolled() const { return getNumOperands() == 5; }
bool isUnrolled() const { return getNumOperands() == 6; }

public:
VPWidenIntOrFpInductionRecipe(PHINode *IV, VPValue *Start, VPValue *Step,
Expand Down Expand Up @@ -1918,6 +1928,18 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe {
VPValue *getVFValue() { return getOperand(2); }
const VPValue *getVFValue() const { return getOperand(2); }

// TODO: Remove once VPWidenIntOrFpInduction is fully expanded in
// convertToConcreteRecipes.
VPValue *getStepVector() { return getOperand(3); }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At the moment, it always must be a VPInstructionWithType, right? Would changing the return type to VPInstructionWithType remove some casts at use sites?

Also asser that it is a StepVector opcode here, rather than at use sites?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks

const VPValue *getStepVector() const { return getOperand(3); }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we assure that this is only called after it has been set to non-poison?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think VPlanUnroll may clone it while it's still poison, which would invoke getStepVector()

void setStepVector(VPValue *V) {
assert(cast<VPInstructionWithType>(getStepVector()->getDefiningRecipe())
->getOpcode() == VPInstruction::StepVector &&
cast<VPInstructionWithType>(V->getDefiningRecipe())->getOpcode() ==
VPInstruction::StepVector);
setOperand(3, V);
}

VPValue *getSplatVFValue() {
// If the recipe has been unrolled return the VPValue for the induction
// increment.
Expand Down
32 changes: 18 additions & 14 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -934,6 +934,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::Not:
case VPInstruction::PtrAdd:
case VPInstruction::WideIVStep:
case VPInstruction::StepVector:
return false;
default:
return true;
Expand Down Expand Up @@ -1085,8 +1086,6 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,

void VPInstructionWithType::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
assert(vputils::onlyFirstLaneUsed(this) &&
"Codegen only implemented for first lane.");
switch (getOpcode()) {
case Instruction::ZExt:
case Instruction::Trunc: {
Expand All @@ -1096,6 +1095,12 @@ void VPInstructionWithType::execute(VPTransformState &State) {
State.set(this, Cast, VPLane(0));
break;
}
case VPInstruction::StepVector: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to add this to VPInstruction::computeCost?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's currently costed as zero for now since we don't want to change the overall cost of VPWidenIntOrFpInductionRecipe.

Once we expand VPWidenIntOrFpInductionRecipe in #118638 I think it should be safe to add a cost to it then? Since the expansion will happen just before execution, after any costing.

Value *StepVector =
State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));
State.set(this, StepVector);
break;
}
default:
llvm_unreachable("opcode not implemented yet");
}
Expand All @@ -1113,6 +1118,9 @@ void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
O << "wide-iv-step ";
printOperands(O, SlotTracker);
break;
case VPInstruction::StepVector:
O << "step-vector " << *ResultTy;
break;
default:
assert(Instruction::isCast(getOpcode()) && "unhandled opcode");
O << Instruction::getOpcodeName(getOpcode()) << " ";
Expand Down Expand Up @@ -1880,7 +1888,8 @@ InstructionCost VPHeaderPHIRecipe::computeCost(ElementCount VF,
/// (0 * Step, 1 * Step, 2 * Step, ...)
/// to each vector element of Val.
/// \p Opcode is relevant for FP induction variable.
static Value *getStepVector(Value *Val, Value *Step,
/// \p InitVec is an integer step vector from 0 with a step of 1.
static Value *getStepVector(Value *Val, Value *Step, Value *InitVec,
Instruction::BinaryOps BinOp, ElementCount VF,
IRBuilderBase &Builder) {
assert(VF.isVector() && "only vector VFs are supported");
Expand All @@ -1894,15 +1903,6 @@ static Value *getStepVector(Value *Val, Value *Step,
"Induction Step must be an integer or FP");
assert(Step->getType() == STy && "Step has wrong type");

// Create a vector of consecutive numbers from zero to VF.
VectorType *InitVecValVTy = ValVTy;
if (STy->isFloatingPointTy()) {
Type *InitVecValSTy =
IntegerType::get(STy->getContext(), STy->getScalarSizeInBits());
InitVecValVTy = VectorType::get(InitVecValSTy, VLen);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps we should assert that InitVec is the type we expect, i.e. it must have the same type as Step

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They might not always have the same type as step might be a float or an integer, whereas InitVec is always an integer. Hopefully the calls to Builder.CreateMul(InitVec, Step) in the integer case and Builder.CreateUIToFP(InitVec, ValVTy) in the float case will catch this.

}
Value *InitVec = Builder.CreateStepVector(InitVecValVTy);

if (STy->isIntegerTy()) {
Step = Builder.CreateVectorSplat(VLen, Step);
assert(Step->getType() == Val->getType() && "Invalid step vec");
Expand Down Expand Up @@ -1969,8 +1969,12 @@ void VPWidenIntOrFpInductionRecipe::execute(VPTransformState &State) {
}

Value *SplatStart = Builder.CreateVectorSplat(State.VF, Start);
Value *SteppedStart = getStepVector(SplatStart, Step, ID.getInductionOpcode(),
State.VF, State.Builder);
assert(cast<VPInstruction>(getStepVector())->getOpcode() ==
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

&& step vector operand must be a StepVector VPInstruction.

Although that may be too restrictive, for fixed vectors it could just be a constant vector?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed vectors would still be a VPInstruction::StepVector. Hopefully the need for this assertion would also go away if we fully expand the recipe in #118638

VPInstruction::StepVector &&
"step vector operand must be a VPInstruction::StepVector");
Value *SteppedStart =
::getStepVector(SplatStart, Step, State.get(getStepVector()),
ID.getInductionOpcode(), State.VF, State.Builder);

// We create vector phi nodes for both integer and floating-point induction
// variables. Here, we determine the kind of arithmetic we will perform.
Expand Down
32 changes: 32 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1223,6 +1223,15 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
WideIV->setStartValue(NewStart);
auto *NewStep = Plan.getOrAddLiveIn(ConstantInt::get(NewIVTy, 1));
WideIV->setStepValue(NewStep);
// TODO: Remove once VPWidenIntOrFpInductionRecipe is fully expanded.
auto *OldStepVector = cast<VPInstructionWithType>(
WideIV->getStepVector()->getDefiningRecipe());
assert(OldStepVector->getOpcode() == VPInstruction::StepVector);
auto *NewStepVector = new VPInstructionWithType(
VPInstruction::StepVector, {}, NewIVTy, OldStepVector->getDebugLoc());
NewStepVector->insertAfter(WideIV->getStepVector()->getDefiningRecipe());
WideIV->setStepVector(NewStepVector);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

StepVector has only a single user, the wide IV, right? Can we assert that there's a single user before removing the old step vector and just use OldStepVector->replaceAllUsesWith(NewStepVector), without the need for adding setStepVector?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea, done now

OldStepVector->eraseFromParent();

auto *NewBTC = new VPWidenCastRecipe(
Instruction::Trunc, Plan.getOrCreateBackedgeTakenCount(), NewIVTy);
Expand Down Expand Up @@ -2586,6 +2595,29 @@ void VPlanTransforms::handleUncountableEarlyExit(
LatchExitingBranch->eraseFromParent();
}

void VPlanTransforms::materializeStepVectors(VPlan &Plan) {
for (auto &Phi : Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
auto *IVR = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (!IVR)
continue;

// Infer an up-to-date type since
// optimizeVectorInductionWidthForTCAndVFUF may have truncated the start
// and step values.
Type *Ty = IVR->getPHINode()->getType();
if (TruncInst *Trunc = IVR->getTruncInst())
Ty = Trunc->getType();
if (Ty->isFloatingPointTy())
Ty = IntegerType::get(Ty->getContext(), Ty->getScalarSizeInBits());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this still needed? We now run before optimizeVectorInductionWidthForTCAndVFUF AFAICT

Copy link
Contributor Author

@lukel97 lukel97 May 7, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Woops, that comment was old from when we needed to use VPTypeAnalysis. Removed it now, since we can just work out the type from the underlying phi or trunc instead now


VPBuilder Builder(Plan.getVectorPreheader());
VPInstruction *StepVector = Builder.createNaryOp(
VPInstruction::StepVector, {}, Ty, {}, IVR->getDebugLoc());
assert(IVR->getNumOperands() == 3);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
assert(IVR->getNumOperands() == 3);
assert(IVR->getNumOperands() == 3 && "can only add step vector before unrolling");

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, done

IVR->addOperand(StepVector);
}
}

void VPlanTransforms::materializeBroadcasts(VPlan &Plan) {
if (Plan.hasScalarVFOnly())
return;
Expand Down
5 changes: 5 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,11 @@ struct VPlanTransforms {
optimizeInductionExitUsers(VPlan &Plan,
DenseMap<VPValue *, VPValue *> &EndValues);

/// Materialize VPInstruction::StepVectors for VPWidenIntOrFpInductionRecipes.
/// TODO: Remove once all of VPWidenIntOrFpInductionRecipe is expanded in
/// convertToConcreteRecipes.
static void materializeStepVectors(VPlan &Plan);

/// Add explicit broadcasts for live-ins and VPValues defined in \p Plan's entry block if they are used as vectors.
static void materializeBroadcasts(VPlan &Plan);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8)
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
Expand Down Expand Up @@ -100,9 +100,9 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range
; CHECK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 8
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 [[WIDE_TRIP_COUNT]])
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[VAL]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 8 x i64> [[TMP8]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP7]]
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 1, [[TMP6]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[N]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[N]])
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[M]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
Expand Down Expand Up @@ -246,9 +246,9 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i64 [[TMP0]], [[TMP11]]
; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[TMP12]], i64 0
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 [[TMP0]])
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[MUL_2_I]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 2 x i64> @llvm.stepvector.nxv2i64()
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 2 x i64> [[TMP15]], splat (i64 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i64> zeroinitializer, [[TMP17]]
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 1, [[TMP9]]
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/Transforms/LoopVectorize/AArch64/optsize_minsize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -517,13 +517,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
; DEFAULT-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
; DEFAULT-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
; DEFAULT-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; DEFAULT-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
; DEFAULT-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
; DEFAULT-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8
Expand Down Expand Up @@ -593,13 +593,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; OPTSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
; OPTSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
; OPTSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
; OPTSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
; OPTSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
; OPTSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; OPTSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
; OPTSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; OPTSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; OPTSIZE-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
; OPTSIZE-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
; OPTSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8
Expand Down Expand Up @@ -669,13 +669,13 @@ define void @sve_tail_predicate_without_minsize(ptr %p, i8 %a, i8 %b, i8 %c, i32
; MINSIZE-NEXT: [[TMP8:%.*]] = icmp ugt i64 15, [[TMP6]]
; MINSIZE-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], i64 [[TMP7]], i64 0
; MINSIZE-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 15)
; MINSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[A]], i64 0
; MINSIZE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[B]], i64 0
; MINSIZE-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT1]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; MINSIZE-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[C]], i64 0
; MINSIZE-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 16 x i8> [[BROADCAST_SPLATINSERT3]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; MINSIZE-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i8> @llvm.stepvector.nxv16i8()
; MINSIZE-NEXT: [[TMP11:%.*]] = mul <vscale x 16 x i8> [[TMP10]], splat (i8 1)
; MINSIZE-NEXT: [[INDUCTION:%.*]] = add <vscale x 16 x i8> zeroinitializer, [[TMP11]]
; MINSIZE-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP4]] to i8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ define void @test_no_scalarization(ptr %a, ptr noalias %b, i32 %idx, i32 %n) #0
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 2
; CHECK-NEXT: [[IND_END:%.*]] = add i32 [[IDX]], [[N_VEC]]
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[IDX]], i64 0
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 2 x i32> [[TMP8]], splat (i32 1)
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> [[DOTSPLAT]], [[TMP10]]
; CHECK-NEXT: [[TMP13:%.*]] = mul i32 1, [[TMP7]]
Expand Down
Loading