Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1506,18 +1506,17 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
})
.Case<VPWidenCastRecipe>(
[&](VPWidenCastRecipe *CInst) -> VPRecipeBase * {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name here is also confusing, as Inst implies an instructions, while this is a cast recipe... Better name it CastR

auto *CI = dyn_cast<CastInst>(CInst->getUnderlyingInstr());
Intrinsic::ID VPID =
VPIntrinsic::getForOpcode(CI->getOpcode());
VPIntrinsic::getForOpcode(CInst->getOpcode());
assert(VPID != Intrinsic::not_intrinsic &&
"Expected vp.casts Instrinsic");

SmallVector<VPValue *> Ops(CInst->operands());
assert(VPIntrinsic::getMaskParamPos(VPID) &&
VPIntrinsic::getVectorLengthParamPos(VPID) &&
"Expected VP intrinsic");
VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::getTrue(
IntegerType::getInt1Ty(CI->getContext())));
VPValue *Mask = Plan.getOrAddLiveIn(
ConstantInt::getTrue(IntegerType::getInt1Ty(Ctx)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ConstantInt::getTrue(Ctx));

Ops.push_back(Mask);
Ops.push_back(&EVL);
return new VPWidenIntrinsicRecipe(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1058,6 +1058,125 @@ loop:
exit:
ret void
}

define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr noalias %dst, ptr noalias %src, i32 %mvx) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr noalias %dst, ptr noalias %src, i32 %mvx) {
define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %dst, ptr %src, i32 %mvx) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was under the impression that we preferred noalias to avoid the memcheck block if we weren't explicitly testing for it? E.g. I remember seeing #107225 do it as a cleanup

; IF-EVL-LABEL: define void @truncate_to_minimal_bitwidths_widen_cast_recipe(
; IF-EVL-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC:%.*]], i32 [[MVX:%.*]]) #[[ATTR0]] {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move to a dedicated file with a descriptive name. The test file is already far too big

; IF-EVL-NEXT: [[ENTRY:.*:]]
; IF-EVL-NEXT: [[CMP111:%.*]] = icmp sgt i32 [[MVX]], 0
; IF-EVL-NEXT: br i1 [[CMP111]], label %[[FOR_BODY13_PREHEADER:.*]], label %[[FOR_COND_CLEANUP12:.*]]
; IF-EVL: [[FOR_BODY13_PREHEADER]]:
; IF-EVL-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[MVX]] to i64
; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; IF-EVL: [[VECTOR_PH]]:
; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[WIDE_TRIP_COUNT]], [[TMP2]]
; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i32> poison, i32 [[MVX]], i64 0
; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 16 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 16 x i32> poison, <vscale x 16 x i32> zeroinitializer
; IF-EVL-NEXT: [[TMP5:%.*]] = trunc <vscale x 16 x i32> [[BROADCAST_SPLAT]] to <vscale x 16 x i16>
; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 16 x ptr> poison, ptr [[DST]], i64 0
; IF-EVL-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 16 x ptr> [[BROADCAST_SPLATINSERT2]], <vscale x 16 x ptr> poison, <vscale x 16 x i32> zeroinitializer
; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
; IF-EVL: [[VECTOR_BODY]]:
; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IF-EVL-NEXT: [[AVL:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 16, i1 true)
; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[EVL_BASED_IV]], 0
; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP7]]
; IF-EVL-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP9]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[TMP10:%.*]] = call <vscale x 16 x i16> @llvm.vp.zext.nxv16i16.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[VP_OP:%.*]] = call <vscale x 16 x i16> @llvm.vp.mul.nxv16i16(<vscale x 16 x i16> [[TMP5]], <vscale x 16 x i16> [[TMP10]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[VP_OP1:%.*]] = call <vscale x 16 x i16> @llvm.vp.lshr.nxv16i16(<vscale x 16 x i16> [[VP_OP]], <vscale x 16 x i16> trunc (<vscale x 16 x i32> splat (i32 1) to <vscale x 16 x i16>), <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[TMP11:%.*]] = call <vscale x 16 x i8> @llvm.vp.trunc.nxv16i8.nxv16i16(<vscale x 16 x i16> [[VP_OP1]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: call void @llvm.vp.scatter.nxv16i8.nxv16p0(<vscale x 16 x i8> [[TMP11]], <vscale x 16 x ptr> align 1 [[BROADCAST_SPLAT3]], <vscale x 16 x i1> splat (i1 true), i32 [[TMP6]])
; IF-EVL-NEXT: [[TMP12:%.*]] = zext i32 [[TMP6]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP12]], [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
; IF-EVL-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; IF-EVL-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]]
; IF-EVL: [[MIDDLE_BLOCK]]:
; IF-EVL-NEXT: br i1 true, label %[[FOR_COND_CLEANUP12_LOOPEXIT:.*]], label %[[SCALAR_PH]]
; IF-EVL: [[SCALAR_PH]]:
; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY13_PREHEADER]] ]
; IF-EVL-NEXT: br label %[[FOR_BODY13:.*]]
; IF-EVL: [[FOR_COND_CLEANUP12_LOOPEXIT]]:
; IF-EVL-NEXT: br label %[[FOR_COND_CLEANUP12]]
; IF-EVL: [[FOR_COND_CLEANUP12]]:
; IF-EVL-NEXT: ret void
; IF-EVL: [[FOR_BODY13]]:
; IF-EVL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY13]] ]
; IF-EVL-NEXT: [[ARRAYIDX15:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVARS_IV]]
; IF-EVL-NEXT: [[TMP14:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
; IF-EVL-NEXT: [[CONV:%.*]] = zext i8 [[TMP14]] to i32
; IF-EVL-NEXT: [[MUL16:%.*]] = mul i32 [[MVX]], [[CONV]]
; IF-EVL-NEXT: [[SHR35:%.*]] = lshr i32 [[MUL16]], 1
; IF-EVL-NEXT: [[CONV36:%.*]] = trunc i32 [[SHR35]] to i8
; IF-EVL-NEXT: store i8 [[CONV36]], ptr [[DST]], align 1
; IF-EVL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP12_LOOPEXIT]], label %[[FOR_BODY13]], !llvm.loop [[LOOP48:![0-9]+]]
;
; NO-VP-LABEL: define void @truncate_to_minimal_bitwidths_widen_cast_recipe(
; NO-VP-SAME: ptr noalias [[DST:%.*]], ptr noalias [[SRC:%.*]], i32 [[MVX:%.*]]) #[[ATTR0]] {
; NO-VP-NEXT: [[ENTRY:.*:]]
; NO-VP-NEXT: [[CMP111:%.*]] = icmp sgt i32 [[MVX]], 0
; NO-VP-NEXT: br i1 [[CMP111]], label %[[FOR_BODY13_PREHEADER:.*]], label %[[FOR_COND_CLEANUP12:.*]]
; NO-VP: [[FOR_BODY13_PREHEADER]]:
; NO-VP-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[MVX]] to i64
; NO-VP-NEXT: br label %[[FOR_BODY13:.*]]
; NO-VP: [[FOR_COND_CLEANUP12_LOOPEXIT:.*]]:
; NO-VP-NEXT: br label %[[FOR_COND_CLEANUP12]]
; NO-VP: [[FOR_COND_CLEANUP12]]:
; NO-VP-NEXT: ret void
; NO-VP: [[FOR_BODY13]]:
; NO-VP-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, %[[FOR_BODY13_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY13]] ]
; NO-VP-NEXT: [[ARRAYIDX15:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[INDVARS_IV]]
; NO-VP-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX15]], align 1
; NO-VP-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32
; NO-VP-NEXT: [[MUL16:%.*]] = mul i32 [[MVX]], [[CONV]]
; NO-VP-NEXT: [[SHR35:%.*]] = lshr i32 [[MUL16]], 1
; NO-VP-NEXT: [[CONV36:%.*]] = trunc i32 [[SHR35]] to i8
; NO-VP-NEXT: store i8 [[CONV36]], ptr [[DST]], align 1
; NO-VP-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP12_LOOPEXIT]], label %[[FOR_BODY13]]
;
entry:
%cmp111 = icmp sgt i32 %mvx, 0
br i1 %cmp111, label %for.body13.preheader, label %for.cond.cleanup12

for.body13.preheader: ; preds = %entry
%wide.trip.count = zext nneg i32 %mvx to i64
br label %for.body13

for.cond.cleanup12.loopexit: ; preds = %for.body13
br label %for.cond.cleanup12
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Clean up this bb if possible.


for.cond.cleanup12: ; preds = %for.cond.cleanup12.loopexit, %entry
ret void

for.body13: ; preds = %for.body13.preheader, %for.body13
%indvars.iv = phi i64 [ 0, %for.body13.preheader ], [ %indvars.iv.next, %for.body13 ]
%arrayidx15 = getelementptr i8, ptr %src, i64 %indvars.iv
%0 = load i8, ptr %arrayidx15, align 1
%conv = zext i8 %0 to i32
%mul16 = mul i32 %mvx, %conv
%shr35 = lshr i32 %mul16, 1
%conv36 = trunc i32 %shr35 to i8
store i8 %conv36, ptr %dst, align 1
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond.not, label %for.cond.cleanup12.loopexit, label %for.body13
}

;.
; IF-EVL: [[META0]] = !{[[META1:![0-9]+]]}
; IF-EVL: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]]}
Expand Down Expand Up @@ -1106,4 +1225,6 @@ exit:
; IF-EVL: [[LOOP44]] = distinct !{[[LOOP44]], [[META6]]}
; IF-EVL: [[LOOP45]] = distinct !{[[LOOP45]], [[META6]], [[META7]]}
; IF-EVL: [[LOOP46]] = distinct !{[[LOOP46]], [[META6]]}
; IF-EVL: [[LOOP47]] = distinct !{[[LOOP47]], [[META6]], [[META7]]}
; IF-EVL: [[LOOP48]] = distinct !{[[LOOP48]], [[META7]], [[META6]]}
;.
Loading