1+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "vector.ph:" --version 5
12; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
23; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s -check-prefix=UNROLL
34
@@ -8,37 +9,91 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
89@a = common global [1000 x i32 ] zeroinitializer , align 16
910
1011; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
11- ; CHECK-LABEL: foo(
12- ; CHECK: %min.iters.check = icmp ult i64 %N, 4
13- ; CHECK: br i1 %min.iters.check, label %scalar.ph, label %vector.ph
14- ; UNROLL-LABEL: foo(
15- ; UNROLL: %min.iters.check = icmp ult i64 %N, 8
16- ; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %vector.ph
17-
1812define void @foo (i64 %N ) {
13+ ; CHECK-LABEL: define void @foo(
14+ ; CHECK-SAME: i64 [[N:%.*]]) {
15+ ; CHECK-NEXT: [[ENTRY:.*:]]
16+ ; CHECK-NEXT: [[C:%.*]] = icmp sgt i64 [[N]], 0
17+ ; CHECK-NEXT: br i1 [[C]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]]
18+ ; CHECK: [[LOOP_PREHEADER]]:
19+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
20+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
21+ ; CHECK: [[VECTOR_PH]]:
22+ ;
23+ ; UNROLL-LABEL: define void @foo(
24+ ; UNROLL-SAME: i64 [[N:%.*]]) {
25+ ; UNROLL-NEXT: [[ENTRY:.*:]]
26+ ; UNROLL-NEXT: [[C:%.*]] = icmp sgt i64 [[N]], 0
27+ ; UNROLL-NEXT: br i1 [[C]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]]
28+ ; UNROLL: [[LOOP_PREHEADER]]:
29+ ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
30+ ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
31+ ; UNROLL: [[VECTOR_PH]]:
32+ ;
1933entry:
20- %cmp.8 = icmp sgt i64 %N , 0
21- br i1 %cmp.8 , label %for.body.preheader , label %for.end
22-
23- for.body.preheader: ; preds = %entry
24- br label %for.body
34+ %c = icmp sgt i64 %N , 0
35+ br i1 %c , label %loop , label %exit
2536
26- for.body: ; preds = %for.body, %for.body.preheader
27- %i.09 = phi i64 [ %inc , %for.body ], [ 0 , %for.body.preheader ]
28- %arrayidx = getelementptr inbounds [1000 x i32 ], ptr @b , i64 0 , i64 %i.09
37+ loop:
38+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
39+ %arrayidx = getelementptr inbounds [1000 x i32 ], ptr @b , i64 0 , i64 %iv
2940 %tmp = load i32 , ptr %arrayidx , align 4
30- %arrayidx1 = getelementptr inbounds [1000 x i32 ], ptr @c , i64 0 , i64 %i.09
41+ %arrayidx1 = getelementptr inbounds [1000 x i32 ], ptr @c , i64 0 , i64 %iv
3142 %tmp1 = load i32 , ptr %arrayidx1 , align 4
3243 %add = add nsw i32 %tmp1 , %tmp
33- %arrayidx2 = getelementptr inbounds [1000 x i32 ], ptr @a , i64 0 , i64 %i.09
44+ %arrayidx2 = getelementptr inbounds [1000 x i32 ], ptr @a , i64 0 , i64 %iv
3445 store i32 %add , ptr %arrayidx2 , align 4
35- %inc = add nuw nsw i64 %i.09 , 1
36- %exitcond = icmp eq i64 %inc , %N
37- br i1 %exitcond , label %for.end.loopexit , label %for.body
46+ %iv.next = add nuw nsw i64 %iv , 1
47+ %ec = icmp eq i64 %iv.next , %N
48+ br i1 %ec , label %exit , label %loop
49+
50+ exit:
51+ ret void
52+ }
53+
54+ define void @min_iters_known_via_loop_guards_add (i32 %start , i32 %end , ptr %src ) {
55+ ; CHECK-LABEL: define void @min_iters_known_via_loop_guards_add(
56+ ; CHECK-SAME: i32 [[START:%.*]], i32 [[END:%.*]], ptr [[SRC:%.*]]) {
57+ ; CHECK-NEXT: [[ENTRY:.*:]]
58+ ; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[END]], [[START]]
59+ ; CHECK-NEXT: [[PRE:%.*]] = icmp sgt i32 [[SUB]], 100
60+ ; CHECK-NEXT: call void @llvm.assume(i1 [[PRE]])
61+ ; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[SUB]], 1
62+ ; CHECK-NEXT: [[IV_START:%.*]] = zext i32 [[ADD_1]] to i64
63+ ; CHECK-NEXT: [[TMP0:%.*]] = sub i64 101, [[IV_START]]
64+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
65+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
66+ ; CHECK: [[VECTOR_PH]]:
67+ ;
68+ ; UNROLL-LABEL: define void @min_iters_known_via_loop_guards_add(
69+ ; UNROLL-SAME: i32 [[START:%.*]], i32 [[END:%.*]], ptr [[SRC:%.*]]) {
70+ ; UNROLL-NEXT: [[ENTRY:.*:]]
71+ ; UNROLL-NEXT: [[SUB:%.*]] = sub i32 [[END]], [[START]]
72+ ; UNROLL-NEXT: [[PRE:%.*]] = icmp sgt i32 [[SUB]], 100
73+ ; UNROLL-NEXT: call void @llvm.assume(i1 [[PRE]])
74+ ; UNROLL-NEXT: [[ADD_1:%.*]] = add i32 [[SUB]], 1
75+ ; UNROLL-NEXT: [[IV_START:%.*]] = zext i32 [[ADD_1]] to i64
76+ ; UNROLL-NEXT: [[TMP0:%.*]] = sub i64 101, [[IV_START]]
77+ ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
78+ ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
79+ ; UNROLL: [[VECTOR_PH]]:
80+ ;
81+ entry:
82+ %sub = sub i32 %end , %start
83+ %pre = icmp sgt i32 %sub , 100
84+ call void @llvm.assume (i1 %pre )
85+ %add.1 = add i32 %sub , 1
86+ %iv.start = zext i32 %add.1 to i64
87+ br label %loop
3888
39- for.end.loopexit: ; preds = %for.body
40- br label %for.end
89+ loop:
90+ %iv = phi i64 [ %iv.start , %entry ], [ %iv.next , %loop ]
91+ %gep = getelementptr inbounds i64 , ptr %src , i64 %iv
92+ store i64 %iv , ptr %gep
93+ %iv.next = add i64 %iv , 1
94+ %ec = icmp eq i64 %iv , 100
95+ br i1 %ec , label %exit , label %loop
4196
42- for.end: ; preds = %for.end.loopexit, %entry
97+ exit:
4398 ret void
4499}
0 commit comments