Skip to content

Commit 4d4a60c

Browse files
authored
[VPlan] Fix LastActiveLane assertion on scalar VF (#167897)
For a scalar only VPlan with tail folding, if it has a phi live out then legalizeAndOptimizeInductions will scalarize the widened canonical IV feeding into the header mask: <x1> vector loop: { vector.body: EMIT vp<%4> = CANONICAL-INDUCTION ir<0>, vp<%index.next> vp<%5> = SCALAR-STEPS vp<%4>, ir<1>, vp<%0> EMIT vp<%6> = icmp ule vp<%5>, vp<%3> EMIT vp<%index.next> = add nuw vp<%4>, vp<%1> EMIT branch-on-count vp<%index.next>, vp<%2> No successors } Successor(s): middle.block middle.block: EMIT vp<%8> = last-active-lane vp<%6> EMIT vp<%9> = extract-lane vp<%8>, vp<%5> Successor(s): ir-bb<exit> The verifier complains about this but this should still generate the correct last active lane, so this fixes the assert by handling this case in isHeaderMask. There is a similar pattern already there for ActiveLaneMask, which also expects a VPScalarIVSteps recipe. Fixes #167813
1 parent dfac905 commit 4d4a60c

File tree

2 files changed

+71
-7
lines changed

2 files changed

+71
-7
lines changed

llvm/lib/Transforms/Vectorize/VPlanUtils.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,21 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) {
5858

5959
VPValue *A, *B;
6060

61+
auto m_CanonicalScalarIVSteps =
62+
m_ScalarIVSteps(m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
63+
m_One(), m_Specific(&Plan.getVF()));
64+
6165
if (match(V, m_ActiveLaneMask(m_VPValue(A), m_VPValue(B), m_One())))
6266
return B == Plan.getTripCount() &&
63-
(match(A,
64-
m_ScalarIVSteps(
65-
m_Specific(Plan.getVectorLoopRegion()->getCanonicalIV()),
66-
m_One(), m_Specific(&Plan.getVF()))) ||
67-
IsWideCanonicalIV(A));
67+
(match(A, m_CanonicalScalarIVSteps) || IsWideCanonicalIV(A));
68+
69+
// For scalar plans, the header mask uses the scalar steps.
70+
if (match(V, m_ICmp(m_CanonicalScalarIVSteps,
71+
m_Specific(Plan.getBackedgeTakenCount())))) {
72+
assert(Plan.hasScalarVFOnly() &&
73+
"Non-scalar VF using scalar IV steps for header mask?");
74+
return true;
75+
}
6876

6977
return match(V, m_ICmp(m_VPValue(A), m_VPValue(B))) && IsWideCanonicalIV(A) &&
7078
B == Plan.getBackedgeTakenCount();

llvm/test/Transforms/LoopVectorize/tail-folding-vectorization-factor-1.ll

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2-
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -pass-remarks='loop-vectorize' -disable-output -S 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS
3-
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -S | FileCheck %s
2+
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -pass-remarks='loop-vectorize' -disable-output -S 2>&1 | FileCheck %s --check-prefix=CHECK-REMARKS
3+
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -S | FileCheck %s
44

55
; These tests are to check that fold-tail procedure produces correct scalar code when
66
; loop-vectorization is only unrolling but not vectorizing.
@@ -141,5 +141,61 @@ for.body:
141141
%cond = icmp eq ptr %ptr, %ptr2
142142
br i1 %cond, label %for.cond.cleanup, label %for.body
143143
}
144+
145+
define i64 @live_out_scalar_vf(i64 %n) {
146+
; CHECK-LABEL: @live_out_scalar_vf(
147+
; CHECK-NEXT: entry:
148+
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], 1
149+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 16
150+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
151+
; CHECK: vector.ph:
152+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 16
153+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
154+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
155+
; CHECK: vector.body:
156+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
157+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
158+
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
159+
; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
160+
; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i64> [[STEP_ADD_2]], splat (i64 4)
161+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
162+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD_3]], splat (i64 4)
163+
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
164+
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
165+
; CHECK: middle.block:
166+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[STEP_ADD_3]], i32 3
167+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i64> [[STEP_ADD_3]], i32 2
168+
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
169+
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
170+
; CHECK: scalar.ph:
171+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
172+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
173+
; CHECK-NEXT: br label [[LOOP:%.*]]
174+
; CHECK: loop:
175+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
176+
; CHECK-NEXT: [[EXITVAL:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV]], [[LOOP]] ]
177+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
178+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
179+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
180+
; CHECK: exit:
181+
; CHECK-NEXT: [[TMP19:%.*]] = phi i64 [ [[EXITVAL]], [[LOOP]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
182+
; CHECK-NEXT: ret i64 [[TMP19]]
183+
;
184+
entry:
185+
br label %loop
186+
187+
loop:
188+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
189+
; Need to use a phi otherwise the header mask will use a
190+
; VPWidenCanonicalIVRecipe instead of a VPScalarIVStepsRecipe.
191+
%exitval = phi i64 [ 0, %entry ], [ %iv, %loop ]
192+
%iv.next = add i64 %iv, 1
193+
%ec = icmp eq i64 %iv, %n
194+
br i1 %ec, label %exit, label %loop
195+
196+
exit:
197+
ret i64 %exitval
198+
}
199+
144200
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
145201
; CHECK-REMARKS: {{.*}}

0 commit comments

Comments
 (0)