1- ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize --disable-output -stats -S 2>&1 | FileCheck %s
1+ ; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -debug-only=loop-vectorize -enable-early-exit-vectorization - -disable-output -stats -S 2>&1 | FileCheck %s
22; REQUIRES: asserts
33
4- ;
5- ; We have 2 loops, one of them is vectorizable and the second one is not.
6- ;
4+ ; We have 3 loops, two of them are vectorizable (with one being early-exit
5+ ; vectorized) and the third one is not.
76
8- ; CHECK: 2 loop-vectorize - Number of loops analyzed for vectorization
9- ; CHECK: 1 loop-vectorize - Number of loops vectorized
7+ ; CHECK: 3 loop-vectorize - Number of loops analyzed for vectorization
8+ ; CHECK: 1 loop-vectorize - Number of early exit loops vectorized
9+ ; CHECK: 2 loop-vectorize - Number of loops vectorized
1010
1111target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
1212
@@ -31,6 +31,36 @@ for.end: ; preds = %entry, %for.body
3131 ret void
3232}
3333
34+ define i32 @early_exit_vectorized (i64 %end ) {
35+ entry:
36+ %p1 = alloca [1024 x i32 ]
37+ %p2 = alloca [1024 x i32 ]
38+ call void @init_mem (ptr %p1 , i64 1024 )
39+ call void @init_mem (ptr %p2 , i64 1024 )
40+ %end.clamped = and i64 %end , 1023
41+ br label %for.body
42+
43+ for.body:
44+ %ind = phi i64 [ %ind.next , %for.inc ], [ 0 , %entry ]
45+ %arrayidx1 = getelementptr inbounds i32 , ptr %p1 , i64 %ind
46+ %0 = load i32 , ptr %arrayidx1 , align 4
47+ %arrayidx2 = getelementptr inbounds i32 , ptr %p2 , i64 %ind
48+ %1 = load i32 , ptr %arrayidx2 , align 4
49+ %cmp.early = icmp eq i32 %0 , %1
50+ br i1 %cmp.early , label %found , label %for.inc
51+
52+ for.inc:
53+ %ind.next = add i64 %ind , 1
54+ %cmp = icmp ult i64 %ind.next , %end.clamped
55+ br i1 %cmp , label %for.body , label %exit
56+
57+ found:
58+ ret i32 1
59+
60+ exit:
61+ ret i32 0
62+ }
63+
3464define void @not_vectorized (ptr nocapture %a , i64 %size ) {
3565entry:
3666 %cmp1 = icmp sle i64 %size , 0
@@ -56,3 +86,5 @@ for.body: ; preds = %entry, %for.body
5686for.end: ; preds = %entry, %for.body
5787 ret void
5888}
89+
90+ declare void @init_mem (ptr , i64 );
0 commit comments