Skip to content
8 changes: 4 additions & 4 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ class VPBuilder {
std::initializer_list<VPValue *> Operands,
Type *ResultTy, const VPIRFlags &Flags = {},
DebugLoc DL = {}, const Twine &Name = "") {
return tryInsertInstruction(
new VPInstructionWithType(Opcode, Operands, ResultTy, Flags, DL, Name));
return tryInsertInstruction(new VPInstructionWithType(
Opcode, Operands, ResultTy, Flags, DL, /*IsSingleScalar=*/false, Name));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a default behavior? I.e., if one isn't sure or doesn't care whether a VPInstruction processes a single scalar or not, then IsSingleScalar is set to false. If so, perhaps MustBeSingleScalar or IsKnownSingleScalar would be more accurate, and/or an Unset alternative. In general additional alternative values may be relevant, see below.

}

VPInstruction *createOverflowingOp(unsigned Opcode,
Expand Down Expand Up @@ -264,8 +264,8 @@ class VPBuilder {

VPInstruction *createScalarCast(Instruction::CastOps Opcode, VPValue *Op,
Type *ResultTy, DebugLoc DL) {
return tryInsertInstruction(
new VPInstructionWithType(Opcode, Op, ResultTy, {}, DL));
return tryInsertInstruction(new VPInstructionWithType(
Opcode, Op, ResultTy, {}, DL, /*IsSingleScalar=*/true));
}

VPWidenCastRecipe *createWidenCast(Instruction::CastOps Opcode, VPValue *Op,
Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8071,7 +8071,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
return new VPHistogramRecipe(Opcode, HGramOps, HI->Store->getDebugLoc());
}

VPReplicateRecipe *
VPSingleDefRecipe *
VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range) {
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
Expand Down Expand Up @@ -8129,6 +8129,14 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
assert((Range.Start.isScalar() || !IsUniform || !IsPredicated ||
(Range.Start.isScalable() && isa<IntrinsicInst>(I))) &&
"Should not predicate a uniform recipe");
if (IsUniform && Instruction::isCast(I->getOpcode())) {
assert(!IsPredicated && "IsUniform implies unpredicated");
auto *Recipe = new VPInstructionWithType(
I->getOpcode(), Operands, I->getType(), VPIRFlags(*I), I->getDebugLoc(),
IsUniform, I->getName());
Recipe->setUnderlyingValue(I);
return Recipe;
}
auto *Recipe = new VPReplicateRecipe(I, Operands, IsUniform, BlockInMask,
VPIRMetadata(*I, LVer));
return Recipe;
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ class VPRecipeBuilder {
/// Build a VPReplicationRecipe for \p I using \p Operands. If it is
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// Build a VPReplicationRecipe for \p I using \p Operands. If it is
/// Build a VPReplicationRecipe or VPInstructionWithType for \p I using \p Operands. If it is

/// predicated, add the mask as last operand. Range.End may be decreased to
/// ensure same recipe behavior from \p Range.Start to \p Range.End.
VPReplicateRecipe *handleReplication(Instruction *I,
VPSingleDefRecipe *handleReplication(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range);

Expand Down
17 changes: 9 additions & 8 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,9 @@ class VPInstruction : public VPRecipeWithIRFlags,
public VPUnrollPartAccessor<1> {
friend class VPlanSlp;

/// True if the VPInstruction produces a single scalar value.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And false if it doesn't, i.e., if the VPInstruction produces more than one scalar value, or none - produces a vector instead or produces no value at all as in branches (for stores we consider the stored value as being "produced"?)
Holds for all VPValues of VPlan0?

bool IsSingleScalar;

Comment on lines +922 to +924
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the purpose of this patch, which models uniform cast operations via VPInstructionWithType instead of VPReplicateRecipe, suffice to introduce IsSingleScalar in VPInstructionWithType, rather than VPInstruction?

Adding IsSingleScalar to VPInstruction potentially lays the ground for future handling beyond VPInstructionWithType-for-scalar-casts. This raises several questions going forward: (1) what information to record - an IsSingleScalar bit or more? (2) where to record it - everywhere or only where needed? (3) how does this relate to Type information?

Regarding (1): every VPValue may represent (a) a single scalar, (b) multiple scalars (VFxUF, VF after unrolling), or (c) a single vector (of VFxUF elements, VF after unrolling). Note that "VF" may differ from the one common across a vector loop, when considering interleaved groups and/or SLP.

Regarding (2): Single scalar information may be inferred from uniformity/divergence analysis coupled with demanded-elements analysis (only first lane used), analogous to min-bitwidth information inferred from range propagation coupled with demanded bits. The latter encodes its results, i.e., the narrower element Type of VPValues, only where needed: in live-ins and type-changing casts, relying on VPTypeAnalysis to infer the types of other VPValues. Suffice to record single-scalar-ness also only where needed - recipes that create or change this behavior, relying on vputils::isSingleScalar() for other VPValues (possibly caching its results as in VPTypeAnalysis)? Casts OTOH typically modifying only the element type of their operand - retaining its single-scalar-ness, as in PreservesUniformity.

Regarding (3): single-scalar-ness could potentially be recorded alongside the element Type, as in LLVM-IR: can use Array type for (b), scalable vector type for (c), and neither - plain element type for (a). Can alternatively introduce VPType which holds information (1) alongside but separate from the element Type. In both cases it would seem natural for VPInstructionWithType to represent information (1).

public:
/// VPlan opcodes, extending LLVM IR with idiomatics instructions.
enum {
Expand Down Expand Up @@ -980,7 +983,7 @@ class VPInstruction : public VPRecipeWithIRFlags,

VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
const VPIRFlags &Flags, DebugLoc DL = {},
const Twine &Name = "");
const Twine &Name = "", bool IsSingleScalar = false);

VP_CLASSOF_IMPL(VPDef::VPInstructionSC)

Expand Down Expand Up @@ -1067,8 +1070,9 @@ class VPInstructionWithType : public VPInstruction {
public:
VPInstructionWithType(unsigned Opcode, ArrayRef<VPValue *> Operands,
Type *ResultTy, const VPIRFlags &Flags, DebugLoc DL,
const Twine &Name = "")
: VPInstruction(Opcode, Operands, Flags, DL, Name), ResultTy(ResultTy) {}
bool SingleScalar = false, const Twine &Name = "")
: VPInstruction(Opcode, Operands, Flags, DL, Name, true),
ResultTy(ResultTy) {}

static inline bool classof(const VPRecipeBase *R) {
// VPInstructionWithType are VPInstructions with specific opcodes requiring
Expand All @@ -1095,7 +1099,7 @@ class VPInstructionWithType : public VPInstruction {
SmallVector<VPValue *, 2> Operands(operands());
auto *New =
new VPInstructionWithType(getOpcode(), Operands, getResultType(), *this,
getDebugLoc(), getName());
getDebugLoc(), isSingleScalar(), getName());
New->setUnderlyingValue(getUnderlyingValue());
return New;
}
Expand All @@ -1104,10 +1108,7 @@ class VPInstructionWithType : public VPInstruction {

/// Return the cost of this VPInstruction.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override {
// TODO: Compute accurate cost after retiring the legacy cost model.
return 0;
}
VPCostContext &Ctx) const override;

Type *getResultType() const { return ResultTy; }

Expand Down
32 changes: 23 additions & 9 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,9 @@ template class VPUnrollPartAccessor<3>;

VPInstruction::VPInstruction(unsigned Opcode, ArrayRef<VPValue *> Operands,
const VPIRFlags &Flags, DebugLoc DL,
const Twine &Name)
const Twine &Name, bool IsSingleScalar)
: VPRecipeWithIRFlags(VPDef::VPInstructionSC, Operands, Flags, DL),
Opcode(Opcode), Name(Name.str()) {
IsSingleScalar(IsSingleScalar), Opcode(Opcode), Name(Name.str()) {
assert(flagsValidForOpcode(getOpcode()) &&
"Set flags not supported for the provided opcode");
}
Expand Down Expand Up @@ -857,7 +857,9 @@ bool VPInstruction::isVectorToScalar() const {
}

bool VPInstruction::isSingleScalar() const {
return getOpcode() == Instruction::PHI;
// TODO: Set IsSingleScalar for PHI.
return IsSingleScalar ||
getOpcode() == Instruction::PHI;
}

void VPInstruction::execute(VPTransformState &State) {
Expand Down Expand Up @@ -1069,15 +1071,17 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,

void VPInstructionWithType::execute(VPTransformState &State) {
State.setDebugLocFrom(getDebugLoc());
switch (getOpcode()) {
case Instruction::ZExt:
case Instruction::Trunc: {
if (Instruction::isCast(getOpcode())) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (Instruction::isCast(getOpcode())) {
if (Instruction::isCast(getOpcode())) {
assert(isSingleScalar(this) && "VPInstructionWithType models casts of single scalars only");

before working with VPlan(0) only below?

Value *Op = State.get(getOperand(0), VPLane(0));
Value *Cast = State.Builder.CreateCast(Instruction::CastOps(getOpcode()),
Op, ResultTy);
if (auto *I = dyn_cast<Instruction>(Cast))
applyFlags(*I);
State.set(this, Cast, VPLane(0));
break;
return;
}

switch (getOpcode()) {
case VPInstruction::StepVector: {
Value *StepVector =
State.Builder.CreateStepVector(VectorType::get(ResultTy, State.VF));
Expand All @@ -1089,10 +1093,19 @@ void VPInstructionWithType::execute(VPTransformState &State) {
}
}

InstructionCost VPInstructionWithType::computeCost(ElementCount VF,
VPCostContext &Ctx) const {
// TODO: Compute cost for VPInstructions without underlying values once
// the legacy cost model has been retired.
if (!getUnderlyingValue())
return 0;
return Ctx.getLegacyCost(cast<Instruction>(getUnderlyingValue()), VF);
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPInstructionWithType::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "EMIT ";
O << Indent << (isSingleScalar() ? "SINGLE-SCALAR " : "EMIT ");
printAsOperand(O, SlotTracker);
O << " = ";

Expand Down Expand Up @@ -1634,12 +1647,13 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
return Opcode == Instruction::FAdd || Opcode == Instruction::FMul ||
Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
Opcode == Instruction::FPTrunc || Opcode == Instruction::FPExt ||
Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
Opcode == VPInstruction::WideIVStep ||
Opcode == VPInstruction::ReductionStartVector ||
Opcode == VPInstruction::ComputeReductionResult;
case OperationType::NonNegOp:
return Opcode == Instruction::ZExt;
return Opcode == Instruction::UIToFP || Opcode == Instruction::ZExt;
break;
case OperationType::Cmp:
return Opcode == Instruction::ICmp;
Expand Down
11 changes: 9 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1037,8 +1037,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
unsigned ExtOpcode = match(R.getOperand(0), m_SExt(m_VPValue()))
? Instruction::SExt
: Instruction::ZExt;
auto *VPC =
new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A, TruncTy);
VPSingleDefRecipe *VPC;
if (vputils::isSingleScalar(Def))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this asks if a VPSingleDefRecipe Def is single scalar, where Def (not being a VPInstruction) does not record its own IsSingleScalar, defaulting to vputils function to figure it out.
Can VPWidenCastRecipe be folded into VPInstructionWithType, making the latter single-scalar agnostic?

VPC = new VPInstructionWithType(Instruction::CastOps(ExtOpcode), {A},
TruncTy, {}, {},
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we pass DebugLoc into VPInstructionWithType?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks

/*IsSingleScalar=*/true);
else
VPC = new VPWidenCastRecipe(Instruction::CastOps(ExtOpcode), A,
TruncTy);

if (auto *UnderlyingExt = R.getOperand(0)->getUnderlyingValue()) {
// UnderlyingExt has distinct return type, used to retain legacy cost.
VPC->setUnderlyingValue(UnderlyingExt);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ target triple = "aarch64-unknown-linux-gnu"
; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>, [[VF]]
; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]]
; CHECK-NEXT: CLONE [[IDX:.*]] = load [[GEP_IDX]]
; CHECK-NEXT: CLONE [[EXT_IDX:.*]] = zext [[IDX]]
; CHECK-NEXT: SINGLE-SCALAR [[EXT_IDX:.*]] = zext [[IDX]]
; CHECK-NEXT: CLONE [[GEP_BUCKET:.*]] = getelementptr inbounds ir<%buckets>, [[EXT_IDX]]
; CHECK-NEXT: CLONE [[HISTVAL:.*]] = load [[GEP_BUCKET]]
; CHECK-NEXT: CLONE [[UPDATE:.*]] = add nsw [[HISTVAL]], ir<1>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1>
; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
; CHECK-NEXT: SINGLE-SCALAR ir<%idxprom> = zext ir<%i.0>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, vp<[[VF]]>
; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]>
Expand Down Expand Up @@ -199,7 +199,7 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[CAN_IV_NEXT:%.+]]>, vector.body ]
; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1>
; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[DEV_IV]]>, ir<-1>
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
; CHECK-NEXT: SINGLE-SCALAR ir<%idxprom> = zext ir<%i.0>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]>
; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]>
Expand Down Expand Up @@ -323,7 +323,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1>
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[DEV_IV]]>, ir<-1>
; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[STEPS]]>, ir<-1>
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
; CHECK-NEXT: SINGLE-SCALAR ir<%idxprom> = zext ir<%i.0>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, vp<[[VF]]>
; CHECK-NEXT: WIDEN ir<%1> = load vp<[[VEC_PTR]]>
Expand Down Expand Up @@ -447,7 +447,7 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur
; CHECK-NEXT: EMIT-SCALAR vp<[[CAN_IV:%.+]]> = phi [ ir<0>, ir-bb<vector.ph> ], [ vp<[[CAN_IV_NEXT:%.+]]>, vector.body ]
; CHECK-NEXT: vp<[[DEV_IV:%.+]]> = DERIVED-IV ir<%n> + vp<[[CAN_IV]]> * ir<-1>
; CHECK-NEXT: CLONE ir<%i.0> = add nsw vp<[[DEV_IV]]>, ir<-1>
; CHECK-NEXT: CLONE ir<%idxprom> = zext ir<%i.0>
; CHECK-NEXT: SINGLE-SCALAR ir<%idxprom> = zext ir<%i.0>
; CHECK-NEXT: CLONE ir<%arrayidx> = getelementptr inbounds ir<%B>, ir<%idxprom>
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-end-pointer inbounds ir<%arrayidx>, ir<[[VF]]>
; CHECK-NEXT: WIDEN ir<[[L:%.+]]> = load vp<[[VEC_PTR]]>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: SINGLE-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
Expand Down Expand Up @@ -92,7 +92,7 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: SINGLE-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
Expand Down Expand Up @@ -149,7 +149,7 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: SINGLE-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
Expand Down Expand Up @@ -206,7 +206,7 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: SINGLE-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
Expand Down Expand Up @@ -260,7 +260,7 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTLZ]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: SINGLE-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
Expand Down Expand Up @@ -312,7 +312,7 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTTZ]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: SINGLE-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
Expand Down Expand Up @@ -366,7 +366,7 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: SINGLE-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
Expand Down Expand Up @@ -422,7 +422,7 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: SINGLE-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
Expand Down Expand Up @@ -476,7 +476,7 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ABS]]>, vp<[[EVL]]>
; IF-EVL-NEXT: EMIT vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: SINGLE-SCALAR vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]>
Expand Down
Loading
Loading