Skip to content

Commit 1e5f44e

Browse files
committed
[VPlan] Fix LastActiveLane assertion on scalar VF
For a scalar only VPlan with tail folding, if it has a phi live out then legalizeAndOptimizeInductions will scalarize the widened canonical IV feeding into the header mask: <x1> vector loop: { vector.body: EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%index.next> vp<%5> = SCALAR-STEPS vp<%4>, ir<1>, vp<%0> EMIT vp<%6> = icmp ule vp<%5>, vp<%3> EMIT vp<%index.next> = add nuw vp<%4>, vp<%1> EMIT branch-on-count vp<%index.next>, vp<%2> No successors } Successor(s): middle.block middle.block: EMIT vp<%8> = last-active-lane vp<%6> EMIT vp<%9> = extract-lane vp<%8>, vp<%5> Successor(s): ir-bb<exit> The verifier complains about this but this should still generate the correct last active lane, so this fixes the assert by handling this case in isHeaderMask. There is a similar pattern already there for ActiveLaneMask, which also expects a VPScalarIVSteps recipe. Fixes #167813
1 parent 2a53949 commit 1e5f44e

File tree

2 files changed

+67
-0
lines changed

2 files changed

+67
-0
lines changed

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
6666
m_One(), m_Specific(&Plan.getVF()))) ||
6767
IsWideCanonicalIV(A));
6868

69+
if (match(V,
70+
m_ICmp(m_ScalarIVSteps(
71+
m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
72+
m_One(), m_Specific(&Plan.getVF())),
73+
m_Specific(Plan.getBackedgeTakenCount()))))
74+
return true;
75+
6976
return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
7077
B == Plan.getBackedgeTakenCount();
7178
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6
2+
; RUN: opt -p loop-vectorize -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-width=1 -force-vector-interleave=2 -S %s | FileCheck %s
3+
4+
define i64 @live_out_scalar_vf(i64 %n) {
5+
; CHECK-LABEL: define i64 @live_out_scalar_vf(
6+
; CHECK-SAME: i64 [[N:%.*]]) {
7+
; CHECK-NEXT: [[ENTRY:.*:]]
8+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1
9+
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
10+
; CHECK: [[VECTOR_PH]]:
11+
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[TMP0]], 1
12+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
13+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
14+
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[TMP0]], 1
15+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
16+
; CHECK: [[VECTOR_BODY]]:
17+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
18+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0
19+
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1
20+
; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i64 [[TMP1]], [[TRIP_COUNT_MINUS_1]]
21+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ugt i64 [[TMP2]], [[TRIP_COUNT_MINUS_1]]
22+
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
23+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
24+
; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
25+
; CHECK: [[MIDDLE_BLOCK]]:
26+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i1 [[TMP4]], false
27+
; CHECK-NEXT: [[TMP7:%.*]] = zext i1 [[TMP6]] to i64
28+
; CHECK-NEXT: [[TMP8:%.*]] = add i64 1, [[TMP7]]
29+
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i1 [[TMP3]], false
30+
; CHECK-NEXT: [[TMP10:%.*]] = zext i1 [[TMP9]] to i64
31+
; CHECK-NEXT: [[TMP11:%.*]] = add i64 0, [[TMP10]]
32+
; CHECK-NEXT: [[TMP12:%.*]] = icmp ne i64 [[TMP10]], 1
33+
; CHECK-NEXT: [[TMP13:%.*]] = select i1 [[TMP12]], i64 [[TMP11]], i64 [[TMP8]]
34+
; CHECK-NEXT: [[LAST_ACTIVE_LANE:%.*]] = sub i64 [[TMP13]], 1
35+
; CHECK-NEXT: [[TMP14:%.*]] = sub i64 [[LAST_ACTIVE_LANE]], 1
36+
; CHECK-NEXT: [[TMP15:%.*]] = icmp uge i64 [[LAST_ACTIVE_LANE]], 1
37+
; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], i64 [[TMP2]], i64 [[TMP1]]
38+
; CHECK-NEXT: br label %[[EXIT:.*]]
39+
; CHECK: [[EXIT]]:
40+
; CHECK-NEXT: ret i64 [[TMP16]]
41+
;
42+
entry:
43+
br label %loop
44+
45+
loop:
46+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
47+
br label %latch
48+
49+
latch:
50+
; Need to use a phi otherwise the header mask will use a
51+
; VPWidenCanonicalIVRecipe instead of a VPScalarIVStepsRecipe.
52+
%exitval = phi i64 [ %iv, %loop ]
53+
%iv.next = add i64 %iv, 1
54+
%ec = icmp eq i64 %iv, %n
55+
br i1 %ec, label %exit, label %loop
56+
57+
exit:
58+
ret i64 %exitval
59+
}
60+

0 commit comments

Comments
 (0)