Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ const char LLVMLoopVectorizeFollowupEpilogue[] =
STATISTIC(LoopsVectorized, "Number of loops vectorized");
STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization");
STATISTIC(LoopsEpilogueVectorized, "Number of epilogues vectorized");
STATISTIC(LoopsEarlyExitVectorized, "Number of early exit loops vectorized");

static cl::opt<bool> EnableEpilogueVectorization(
"enable-epilogue-vectorization", cl::init(true), cl::Hidden,
Expand Down Expand Up @@ -7205,6 +7206,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
"Trying to execute plan with unsupported VF");
assert(BestVPlan.hasUF(BestUF) &&
"Trying to execute plan with unsupported UF");
if (BestVPlan.hasEarlyExit())
++LoopsEarlyExitVectorized;
// TODO: Move to VPlan transform stage once the transition to the VPlan-based
// cost model is complete for better cost estimates.
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF,
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -4183,7 +4183,8 @@ class VPlan {
/// block with multiple predecessors (one for the exit via the latch and one
/// via the other early exit).
bool hasEarlyExit() const {
return ExitBlocks.size() > 1 || ExitBlocks[0]->getNumPredecessors() > 1;
return ExitBlocks.size() > 1 ||
(ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1);
}

/// Returns true if the scalar tail may execute after the vector loop. Note
Expand Down
44 changes: 38 additions & 6 deletions llvm/test/Transforms/LoopVectorize/vect.stats.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize --disable-output -stats -S 2>&1 | FileCheck %s
; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -enable-early-exit-vectorization --disable-output -stats -S 2>&1 | FileCheck %s
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The flag enable-early-exit-vectorization can be removed now, since it's on by default.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, @david-arm, I actually prefer to have the option left in for 2 reasons:

  1. Easier to see which lit tests are affected by early-exit vectorization. We have a bunch of them with the option on and it helps to see what tests already exist.
  2. after the option is baked-in for a release or so, it makes sense to remove the option for all the tests, since we don't want to keep adding ON-by-default options in tests

Thoughts?

; REQUIRES: asserts

;
; We have 2 loops, one of them is vectorizable and the second one is not.
;
; We have 3 loops, two of them are vectorizable (with one being early-exit
; vectorized) and the third one is not.

; CHECK: 2 loop-vectorize - Number of loops analyzed for vectorization
; CHECK: 1 loop-vectorize - Number of loops vectorized
; CHECK: 3 loop-vectorize - Number of loops analyzed for vectorization
; CHECK: 1 loop-vectorize - Number of early exit loops vectorized
; CHECK: 2 loop-vectorize - Number of loops vectorized

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

Expand All @@ -31,6 +31,36 @@ for.end: ; preds = %entry, %for.body
ret void
}

define i32 @early_exit_vectorized(i64 %end) {
entry:
%p1 = alloca [1024 x i32]
%p2 = alloca [1024 x i32]
call void @init_mem(ptr %p1, i64 1024)
call void @init_mem(ptr %p2, i64 1024)
%end.clamped = and i64 %end, 1023
br label %for.body

for.body:
%ind = phi i64 [ %ind.next, %for.inc ], [ 0, %entry ]
%arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %ind
%0 = load i32, ptr %arrayidx1, align 4
%arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %ind
%1 = load i32, ptr %arrayidx2, align 4
%cmp.early = icmp eq i32 %0, %1
br i1 %cmp.early, label %found, label %for.inc

for.inc:
%ind.next = add i64 %ind, 1
%cmp = icmp ult i64 %ind.next, %end.clamped
br i1 %cmp, label %for.body, label %exit

found:
ret i32 1

exit:
ret i32 0
}

define void @not_vectorized(ptr nocapture %a, i64 %size) {
entry:
%cmp1 = icmp sle i64 %size, 0
Expand All @@ -56,3 +86,5 @@ for.body: ; preds = %entry, %for.body
for.end: ; preds = %entry, %for.body
ret void
}

declare void @init_mem(ptr, i64);
Loading