Skip to content
3 changes: 3 additions & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,9 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
if (TTI && Intrinsic::isTargetIntrinsic(ID))
return TTI->isVectorIntrinsicWithOverloadTypeAtArg(ID, OpdIdx);

if (VPCastIntrinsic::isVPCast(ID))
return OpdIdx == -1 || OpdIdx == 0;

switch (ID) {
case Intrinsic::fptosi_sat:
case Intrinsic::fptoui_sat:
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -964,7 +964,8 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
Module *M = State.Builder.GetInsertBlock()->getModule();
Function *VectorF =
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
assert(VectorF && "Can't retrieve vector intrinsic.");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this change still need?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably sufficient to keep the existing message

assert(VectorF &&
"Can't retrieve vector intrinsic or vector-predication intrinsics.");

auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
SmallVector<OperandBundleDef, 1> OpBundles;
Expand Down
20 changes: 20 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1500,6 +1500,26 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
*CI, VPID, Ops, TypeInfo.inferScalarType(CInst),
CInst->getDebugLoc());
})
.Case<VPWidenCastRecipe>(
[&](VPWidenCastRecipe *CInst) -> VPRecipeBase * {
auto *CI = dyn_cast<CastInst>(CInst->getUnderlyingInstr());
Intrinsic::ID VPID =
VPIntrinsic::getForOpcode(CI->getOpcode());
assert(VPID != Intrinsic::not_intrinsic &&
"Expected vp.casts Instrinsic");

SmallVector<VPValue *> Ops(CInst->operands());
assert(VPIntrinsic::getMaskParamPos(VPID) &&
VPIntrinsic::getVectorLengthParamPos(VPID) &&
"Expected VP intrinsic");
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::getTrue(
IntegerType::getInt1Ty(CI->getContext())));
Ops.push_back(Mask);
Ops.push_back(&EVL);
return new VPWidenIntrinsicRecipe(
VPID, Ops, TypeInfo.inferScalarType(CInst),
CInst->getDebugLoc());
})
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
SmallVector<VPValue *> Ops(Sel->operands());
Ops.push_back(&EVL);
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-OUTLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]]
; IF-EVL-OUTLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0
; IF-EVL-OUTLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x i16> @llvm.vp.load.nxv4i16.p0(ptr align 2 [[TMP8]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 4 x i16> [[VP_OP_LOAD]] to <vscale x 4 x i32>
; IF-EVL-OUTLOOP-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.vp.sext.nxv4i32.nxv4i16(<vscale x 4 x i16> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[VP_OP:%.*]] = call <vscale x 4 x i32> @llvm.vp.add.nxv4i32(<vscale x 4 x i32> [[VEC_PHI]], <vscale x 4 x i32> [[TMP9]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[TMP10]] = call <vscale x 4 x i32> @llvm.vp.merge.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[VP_OP]], <vscale x 4 x i32> [[VEC_PHI]], i32 [[TMP5]])
; IF-EVL-OUTLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
Expand Down Expand Up @@ -194,16 +194,16 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) {
; IF-EVL-INLOOP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
; IF-EVL-INLOOP-NEXT: [[AVL:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 8, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = add i32 [[EVL_BASED_IV]], 0
; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP6]]
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP8]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = sext <vscale x 8 x i16> [[VP_OP_LOAD]] to <vscale x 8 x i32>
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP9]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]])
; IF-EVL-INLOOP-NEXT: [[TMP5:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP5]], i32 8, i1 true)
; IF-EVL-INLOOP-NEXT: [[TMP7:%.*]] = add i32 [[EVL_BASED_IV]], 0
; IF-EVL-INLOOP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]]
; IF-EVL-INLOOP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[TMP8]], i32 0
; IF-EVL-INLOOP-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i16> @llvm.vp.load.nxv8i16.p0(ptr align 2 [[TMP9]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP14:%.*]] = call <vscale x 8 x i32> @llvm.vp.sext.nxv8i32.nxv8i16(<vscale x 8 x i16> [[VP_OP_LOAD]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP10:%.*]] = call i32 @llvm.vp.reduce.add.nxv8i32(i32 0, <vscale x 8 x i32> [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-INLOOP-NEXT: [[TMP11]] = add i32 [[TMP10]], [[VEC_PHI]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP5]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[INDEX_EVL_NEXT]] = add nuw i32 [[TMP6]], [[EVL_BASED_IV]]
; IF-EVL-INLOOP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP4]]
; IF-EVL-INLOOP-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-INLOOP-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -708,9 +708,9 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP13:%.*]] = fpext <vscale x 4 x float> [[VP_OP_LOAD]] to <vscale x 4 x double>
; IF-EVL-NEXT: [[TMP13:%.*]] = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.vp.lrint.nxv4i64.nxv4f64(<vscale x 4 x double> [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP15:%.*]] = trunc <vscale x 4 x i64> [[TMP14]] to <vscale x 4 x i32>
; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i32.nxv4i64(<vscale x 4 x i64> [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
Expand Down Expand Up @@ -817,9 +817,9 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds float, ptr [[TMP11]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP12]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP13:%.*]] = fpext <vscale x 4 x float> [[VP_OP_LOAD]] to <vscale x 4 x double>
; IF-EVL-NEXT: [[TMP13:%.*]] = call <vscale x 4 x double> @llvm.vp.fpext.nxv4f64.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP14:%.*]] = call <vscale x 4 x i64> @llvm.vp.llrint.nxv4i64.nxv4f64(<vscale x 4 x double> [[TMP13]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP15:%.*]] = trunc <vscale x 4 x i64> [[TMP14]] to <vscale x 4 x i32>
; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 4 x i32> @llvm.vp.trunc.nxv4i32.nxv4i64(<vscale x 4 x i64> [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]]
; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0
; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP15]], ptr align 4 [[TMP17]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
Expand Down
Loading
Loading