Skip to content

Commit dc23869

Browse files
committed
[LV] Handle vector trip count being zero in preparePlanForEpiVectorLoop.
After a485e0e, we may not set the vector trip count in preparePlanForEpilogueVectorLoop if it is zero. We should not choose a VF * UF that makes the main vector loop dead (i.e. vector trip count is zero), but there are some cases where this can happen currently. In those cases, set EPI.VectorTripCount to zero.
1 parent 0989ff5 commit dc23869

File tree

2 files changed

+101
-0
lines changed

2 files changed

+101
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9796,6 +9796,19 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
97969796
"Must only have a single non-zero incoming value");
97979797
EPI.VectorTripCount = Inc;
97989798
}
9799+
// If we didn't find a non-zero vector trip count, all incoming values
9800+
// must be zero, which also means the vector trip count is zero. Pick the
9801+
// first zero as vector trip count.
9802+
// TODO: We should not choose VF * UF so the main vector loop is known to
9803+
// be dead.
9804+
if (!EPI.VectorTripCount) {
9805+
assert(
9806+
EPResumeVal->getNumIncomingValues() > 0 &&
9807+
all_of(EPResumeVal->incoming_values(),
9808+
[](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) &&
9809+
"all incoming values must be 0");
9810+
EPI.VectorTripCount = EPResumeVal->getOperand(0);
9811+
}
97999812
VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
98009813
assert(all_of(IV->users(),
98019814
[](const VPUser *U) {

llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -333,3 +333,91 @@ for.body:
333333
exit:
334334
ret void
335335
}
336+
337+
; TODO: Choose smaller VF * UF for main loop, so we do not create a dead vector loop.
338+
define void @small_trip_count_loop(ptr %arg, ptr %arg2) {
339+
; CHECK-LABEL: @small_trip_count_loop(
340+
; CHECK-NEXT: iter.check:
341+
; CHECK-NEXT: [[ARG3:%.*]] = ptrtoint ptr [[ARG:%.*]] to i64
342+
; CHECK-NEXT: [[ARG21:%.*]] = ptrtoint ptr [[ARG2:%.*]] to i64
343+
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
344+
; CHECK: vector.memcheck:
345+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[ARG21]], [[ARG3]]
346+
; CHECK-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
347+
; CHECK-NEXT: br i1 [[DIFF_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
348+
; CHECK: vector.main.loop.iter.check:
349+
; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
350+
; CHECK: vector.ph:
351+
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
352+
; CHECK: vector.body:
353+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 16
354+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 32
355+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 48
356+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[ARG]], align 1
357+
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i8>, ptr [[TMP1]], align 1
358+
; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
359+
; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i8>, ptr [[TMP3]], align 1
360+
; CHECK-NEXT: [[TMP4:%.*]] = add <16 x i8> [[WIDE_LOAD]], splat (i8 10)
361+
; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[WIDE_LOAD4]], splat (i8 10)
362+
; CHECK-NEXT: [[TMP6:%.*]] = add <16 x i8> [[WIDE_LOAD5]], splat (i8 10)
363+
; CHECK-NEXT: [[TMP7:%.*]] = add <16 x i8> [[WIDE_LOAD6]], splat (i8 10)
364+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 16
365+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 32
366+
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 48
367+
; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[ARG2]], align 1
368+
; CHECK-NEXT: store <16 x i8> [[TMP5]], ptr [[TMP8]], align 1
369+
; CHECK-NEXT: store <16 x i8> [[TMP6]], ptr [[TMP9]], align 1
370+
; CHECK-NEXT: store <16 x i8> [[TMP7]], ptr [[TMP10]], align 1
371+
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
372+
; CHECK: middle.block:
373+
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
374+
; CHECK: vec.epilog.iter.check:
375+
; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
376+
; CHECK: vec.epilog.ph:
377+
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
378+
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
379+
; CHECK: vec.epilog.vector.body:
380+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
381+
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 [[INDEX]]
382+
; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1
383+
; CHECK-NEXT: [[TMP12:%.*]] = add <16 x i8> [[WIDE_LOAD7]], splat (i8 10)
384+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 [[INDEX]]
385+
; CHECK-NEXT: store <16 x i8> [[TMP12]], ptr [[TMP13]], align 1
386+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
387+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
388+
; CHECK-NEXT: br i1 [[TMP14]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
389+
; CHECK: vec.epilog.middle.block:
390+
; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
391+
; CHECK: vec.epilog.scalar.ph:
392+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ]
393+
; CHECK-NEXT: br label [[LOOP:%.*]]
394+
; CHECK: loop:
395+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
396+
; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 [[IV]]
397+
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP_A]], align 1
398+
; CHECK-NEXT: [[SELECT:%.*]] = add i8 [[LOAD]], 10
399+
; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i8, ptr [[ARG2]], i32 [[IV]]
400+
; CHECK-NEXT: store i8 [[SELECT]], ptr [[GEP_B]], align 1
401+
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
402+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 20
403+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP12:![0-9]+]]
404+
; CHECK: exit:
405+
; CHECK-NEXT: ret void
406+
;
407+
entry:
408+
br label %loop
409+
410+
loop:
411+
%iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ]
412+
%gep.A = getelementptr inbounds i8, ptr %arg, i32 %iv
413+
%load = load i8, ptr %gep.A, align 1
414+
%select = add i8 %load, 10
415+
%gep.B = getelementptr inbounds i8, ptr %arg2, i32 %iv
416+
store i8 %select, ptr %gep.B, align 1
417+
%iv.next = add i32 %iv, 1
418+
%ec = icmp eq i32 %iv, 20
419+
br i1 %ec, label %exit, label %loop
420+
421+
exit:
422+
ret void
423+
}

0 commit comments

Comments
 (0)