Skip to content

Commit 3b96744

Browse files
committed
[VPlan] Preserve underlying value when EVL transforming intrinsics/casts
This fixes a crash that shows up with EVL tail folding due to a mismatch between the legacy and VPlan cost models. In the legacy model a cast instruction may have its cost ignored if it's part of a reduction/induction variable. The VPlan cost model will check if the underying value of a recipe is skipped in Ctx.skipCostComputation, so this works for normally widened casts. However when transformed to a VP intrinsic we drop the underlying value and we now end up costing the previously skipped values, which leads to the mismatch. This fixes the crash by preserving the underlying cast instruction. It means that a VPWidenIntrinsicRecipe's underlying value might not always be a CallInst anymore, so there was one place where I had to relax a cast<> to a dyn_cast<>. I wasn't able to provide a test case for the crash itself since we can't yet do EVL tail folding with widened IVs, but it will become a problem later down the line. , but only after induction variable widening support is enabled so I'm not really able to provide
1 parent 9df375e commit 3b96744

File tree

4 files changed

+48
-44
lines changed

4 files changed

+48
-44
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -985,7 +985,7 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
985985
assert(VectorF &&
986986
"Can't retrieve vector intrinsic or vector-predication intrinsics.");
987987

988-
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
988+
auto *CI = dyn_cast_if_present<CallInst>(getUnderlyingValue());
989989
SmallVector<OperandBundleDef, 1> OpBundles;
990990
if (CI)
991991
CI->getOperandBundlesAsDefs(OpBundles);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1506,8 +1506,12 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
15061506
SmallVector<VPValue *> Ops(CR->operands());
15071507
Ops.push_back(&AllOneMask);
15081508
Ops.push_back(&EVL);
1509-
return new VPWidenIntrinsicRecipe(
1509+
auto *VPR = new VPWidenIntrinsicRecipe(
15101510
VPID, Ops, TypeInfo.inferScalarType(CR), CR->getDebugLoc());
1511+
// Preserve the underlying value as some cast instructions may have
1512+
// their cost skipped in collectValuesToIgnore
1513+
VPR->setUnderlyingValue(CR->getUnderlyingValue());
1514+
return VPR;
15111515
})
15121516
.Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
15131517
SmallVector<VPValue *> Ops(Sel->operands());

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
2727
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
2828
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
2929
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
30-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SMAX:%.+]]> = call llvm.vp.smax(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
30+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.vp.smax(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
3131
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
3232
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
33-
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[SMAX]]>, vp<[[EVL]]>
33+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
3434
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
3535
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
3636
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -80,10 +80,10 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
8080
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
8181
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
8282
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
83-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SMIN:%.+]]> = call llvm.vp.smin(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
83+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.vp.smin(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
8484
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
8585
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
86-
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[SMIN]]>, vp<[[EVL]]>
86+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
8787
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
8888
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
8989
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -133,10 +133,10 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
133133
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
134134
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
135135
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
136-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UMAX:%.+]]> = call llvm.vp.umax(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
136+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.vp.umax(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
137137
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
138138
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
139-
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[UMAX]]>, vp<[[EVL]]>
139+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
140140
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
141141
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
142142
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -186,10 +186,10 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
186186
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
187187
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
188188
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
189-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UMIN:%.+]]> = call llvm.vp.umin(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
189+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.vp.umin(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
190190
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
191191
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
192-
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[UMIN]]>, vp<[[EVL]]>
192+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>
193193
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
194194
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
195195
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -236,10 +236,10 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
236236
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
237237
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
238238
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
239-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[CTLZ:%.+]]> = call llvm.vp.ctlz(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
239+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CTLZ:%.+]]> = call llvm.vp.ctlz(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
240240
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
241241
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
242-
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[CTLZ]]>, vp<[[EVL]]>
242+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTLZ]]>, vp<[[EVL]]>
243243
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
244244
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
245245
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -284,10 +284,10 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
284284
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
285285
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
286286
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
287-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[CTTZ:%.+]]> = call llvm.vp.cttz(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
287+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CTTZ:%.+]]> = call llvm.vp.cttz(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
288288
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
289289
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
290-
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[CTTZ]]>, vp<[[EVL]]>
290+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTTZ]]>, vp<[[EVL]]>
291291
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
292292
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
293293
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -332,12 +332,12 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
332332
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
333333
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
334334
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
335-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
336-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[LRINT:%.+]]> = call llvm.vp.lrint(vp<[[FPEXT]]>, ir<true>, vp<[[EVL]]>)
337-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(vp<[[LRINT]]>, ir<true>, vp<[[EVL]]>)
335+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
336+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LRINT:%.+]]> = call llvm.vp.lrint(ir<[[FPEXT]]>, ir<true>, vp<[[EVL]]>)
337+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[TRUNC:%.+]]> = call llvm.vp.trunc(ir<[[LRINT]]>, ir<true>, vp<[[EVL]]>)
338338
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
339339
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
340-
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]>
340+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
341341
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
342342
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
343343
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -384,12 +384,12 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
384384
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
385385
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
386386
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
387-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
388-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[LLRINT:%.+]]> = call llvm.vp.llrint(vp<[[FPEXT]]>, ir<true>, vp<[[EVL]]>)
389-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(vp<[[LLRINT]]>, ir<true>, vp<[[EVL]]>)
387+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
388+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LLRINT:%.+]]> = call llvm.vp.llrint(ir<[[FPEXT]]>, ir<true>, vp<[[EVL]]>)
389+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[TRUNC:%.+]]> = call llvm.vp.trunc(ir<[[LLRINT]]>, ir<true>, vp<[[EVL]]>)
390390
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
391391
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
392-
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]>
392+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[TRUNC]]>, vp<[[EVL]]>
393393
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
394394
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
395395
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -436,10 +436,10 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
436436
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
437437
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
438438
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
439-
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[ABS:%.+]]> = call llvm.vp.abs(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
439+
; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[ABS:%.+]]> = call llvm.vp.abs(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
440440
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
441441
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
442-
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[ABS]]>, vp<[[EVL]]>
442+
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ABS]]>, vp<[[EVL]]>
443443
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
444444
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
445445
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>

0 commit comments

Comments
 (0)