From 7e21e562b064ab7b8f48b3debbd4155410894d7a Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Wed, 9 Jul 2025 09:19:11 -0400 Subject: [PATCH 1/2] [VPlan] Update hasEarlyExit check to consider no-exit block case If we call this API during vectorization without any exit blocks, we need to first check there is atleast one exit block. --- llvm/lib/Transforms/Vectorize/VPlan.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 972eca1fe8376..5f5342a9f9a41 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4247,7 +4247,8 @@ class VPlan { /// block with multiple predecessors (one for the exit via the latch and one /// via the other early exit). bool hasEarlyExit() const { - return ExitBlocks.size() > 1 || ExitBlocks[0]->getNumPredecessors() > 1; + return ExitBlocks.size() > 1 || + (ExitBlocks.size() == 1 && ExitBlocks[0]->getNumPredecessors() > 1); } /// Returns true if the scalar tail may execute after the vector loop. Note From a353c34987ee3681a841b8a8541da4c81a803555 Mon Sep 17 00:00:00 2001 From: Anna Thomas Date: Mon, 7 Jul 2025 10:18:28 -0400 Subject: [PATCH 2/2] [LV] Add a statistic for early exit vectorization Add statistic LoopsEarlyExitVectorized --- .../Transforms/Vectorize/LoopVectorize.cpp | 3 ++ .../Transforms/LoopVectorize/vect.stats.ll | 44 ++++++++++++++++--- 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 5eda2003920e6..5089f0ab8efa7 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -175,6 +175,7 @@ const char LLVMLoopVectorizeFollowupEpilogue[] = STATISTIC(LoopsVectorized, "Number of loops vectorized"); STATISTIC(LoopsAnalyzed, "Number of loops analyzed for vectorization"); STATISTIC(LoopsEpilogueVectorized, "Number of epilogues vectorized"); +STATISTIC(LoopsEarlyExitVectorized, "Number of early exit loops vectorized"); static cl::opt EnableEpilogueVectorization( "enable-epilogue-vectorization", cl::init(true), cl::Hidden, @@ -7324,6 +7325,8 @@ DenseMap LoopVectorizationPlanner::executePlan( "Trying to execute plan with unsupported VF"); assert(BestVPlan.hasUF(BestUF) && "Trying to execute plan with unsupported UF"); + if (BestVPlan.hasEarlyExit()) + ++LoopsEarlyExitVectorized; // TODO: Move to VPlan transform stage once the transition to the VPlan-based // cost model is complete for better cost estimates. VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF, diff --git a/llvm/test/Transforms/LoopVectorize/vect.stats.ll b/llvm/test/Transforms/LoopVectorize/vect.stats.ll index 9a55dc99c316b..018e2c213ddf2 100644 --- a/llvm/test/Transforms/LoopVectorize/vect.stats.ll +++ b/llvm/test/Transforms/LoopVectorize/vect.stats.ll @@ -1,12 +1,12 @@ -; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize --disable-output -stats -S 2>&1 | FileCheck %s +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -enable-early-exit-vectorization --disable-output -stats -S 2>&1 | FileCheck %s ; REQUIRES: asserts -; -; We have 2 loops, one of them is vectorizable and the second one is not. -; +; We have 3 loops, two of them are vectorizable (with one being early-exit +; vectorized) and the third one is not. -; CHECK: 2 loop-vectorize - Number of loops analyzed for vectorization -; CHECK: 1 loop-vectorize - Number of loops vectorized +; CHECK: 3 loop-vectorize - Number of loops analyzed for vectorization +; CHECK: 1 loop-vectorize - Number of early exit loops vectorized +; CHECK: 2 loop-vectorize - Number of loops vectorized target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" @@ -31,6 +31,36 @@ for.end: ; preds = %entry, %for.body ret void } +define i32 @early_exit_vectorized(i64 %end) { +entry: + %p1 = alloca [1024 x i32] + %p2 = alloca [1024 x i32] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + %end.clamped = and i64 %end, 1023 + br label %for.body + +for.body: + %ind = phi i64 [ %ind.next, %for.inc ], [ 0, %entry ] + %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %ind + %0 = load i32, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %ind + %1 = load i32, ptr %arrayidx2, align 4 + %cmp.early = icmp eq i32 %0, %1 + br i1 %cmp.early, label %found, label %for.inc + +for.inc: + %ind.next = add i64 %ind, 1 + %cmp = icmp ult i64 %ind.next, %end.clamped + br i1 %cmp, label %for.body, label %exit + +found: + ret i32 1 + +exit: + ret i32 0 +} + define void @not_vectorized(ptr nocapture %a, i64 %size) { entry: %cmp1 = icmp sle i64 %size, 0 @@ -56,3 +86,5 @@ for.body: ; preds = %entry, %for.body for.end: ; preds = %entry, %for.body ret void } + +declare void @init_mem(ptr, i64);