Skip to content

Commit 3a9f803

Browse files
committed
[SCEV] Add tests for applying guards to SCEVAddExpr sub-expressions.
Adds a test case for computing the backedge-taken-count for #155941
1 parent edc9128 commit 3a9f803

File tree

3 files changed

+141
-23
lines changed

3 files changed

+141
-23
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt -passes='print<scalar-evolution>' -scalar-evolution-classify-expressions=0 -disable-output %s 2>&1 | FileCheck %s
3+
4+
define void @ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr(ptr %start, ptr %end) {
5+
; CHECK-LABEL: 'ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr'
6+
; CHECK-NEXT: Determining loop execution counts for: @ptrtoint_based_trip_count_known_via_guards_applied_to_add_subexpr
7+
; CHECK-NEXT: Loop %loop: backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
8+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 4611686018427387903
9+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is ((-4 + (-1 * (ptrtoint ptr %start to i64)) + (ptrtoint ptr %end to i64)) /u 4)
10+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
11+
;
12+
entry:
13+
%end.i = ptrtoint ptr %end to i64
14+
%start.i = ptrtoint ptr %start to i64
15+
%sub = sub i64 %end.i, %start.i
16+
%pre.1 = icmp eq i64 %sub, 4
17+
call void @llvm.assume(i1 %pre.1)
18+
br label %loop
19+
20+
loop:
21+
%iv = phi ptr [ %start, %entry ], [ %iv.next, %loop ]
22+
store i32 0, ptr %iv
23+
%iv.next = getelementptr inbounds nuw i8, ptr %iv, i64 4
24+
%ec = icmp eq ptr %iv.next, %end
25+
br i1 %ec, label %exit, label %loop
26+
27+
exit:
28+
ret void
29+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 2
2+
; RUN: opt -passes='print<scalar-evolution>' -scalar-evolution-classify-expressions=0 -disable-output %s 2>&1 | FileCheck %s
3+
4+
define void @max_btc_improved_by_applying_guards_to_add_subexpr(i32 %low, i32 %high) {
5+
; CHECK-LABEL: 'max_btc_improved_by_applying_guards_to_add_subexpr'
6+
; CHECK-NEXT: Determining loop execution counts for: @max_btc_improved_by_applying_guards_to_add_subexpr
7+
; CHECK-NEXT: Loop %loop: backedge-taken count is (-1 + (zext i32 (1 + (-1 * %low) + %high) to i64))<nsw>
8+
; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 -1
9+
; CHECK-NEXT: Loop %loop: symbolic max backedge-taken count is (-1 + (zext i32 (1 + (-1 * %low) + %high) to i64))<nsw>
10+
; CHECK-NEXT: Loop %loop: Trip multiple is 1
11+
;
12+
entry:
13+
%sub = sub i32 %high, %low
14+
%pre.1 = icmp slt i32 %sub, 8
15+
br i1 %pre.1, label %if.then, label %exit
16+
17+
if.then:
18+
%pre.2 = icmp slt i32 %sub, 0
19+
br i1 %pre.2, label %exit, label %ph
20+
21+
ph:
22+
%add.1 = add i32 %sub, 1
23+
%wide.trip.count = zext i32 %add.1 to i64
24+
br label %loop
25+
26+
loop:
27+
%iv = phi i64 [ 0, %ph ], [ %iv.next, %loop ]
28+
%iv.next = add i64 %iv, 1
29+
%ec = icmp eq i64 %iv.next, %wide.trip.count
30+
br i1 %ec, label %exit, label %loop
31+
32+
exit:
33+
ret void
34+
}
Lines changed: 78 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "vector.ph:" --version 5
12
; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
23
; RUN: opt %s -passes=loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -S | FileCheck %s -check-prefix=UNROLL
34

@@ -8,37 +9,91 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
89
@a = common global [1000 x i32] zeroinitializer, align 16
910

1011
; Generate min.iters.check to skip the vector loop and jump to scalar.ph directly when loop iteration number is less than VF * UF.
11-
; CHECK-LABEL: foo(
12-
; CHECK: %min.iters.check = icmp ult i64 %N, 4
13-
; CHECK: br i1 %min.iters.check, label %scalar.ph, label %vector.ph
14-
; UNROLL-LABEL: foo(
15-
; UNROLL: %min.iters.check = icmp ult i64 %N, 8
16-
; UNROLL: br i1 %min.iters.check, label %scalar.ph, label %vector.ph
17-
1812
define void @foo(i64 %N) {
13+
; CHECK-LABEL: define void @foo(
14+
; CHECK-SAME: i64 [[N:%.*]]) {
15+
; CHECK-NEXT: [[ENTRY:.*:]]
16+
; CHECK-NEXT: [[C:%.*]] = icmp sgt i64 [[N]], 0
17+
; CHECK-NEXT: br i1 [[C]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]]
18+
; CHECK: [[LOOP_PREHEADER]]:
19+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4
20+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
21+
; CHECK: [[VECTOR_PH]]:
22+
;
23+
; UNROLL-LABEL: define void @foo(
24+
; UNROLL-SAME: i64 [[N:%.*]]) {
25+
; UNROLL-NEXT: [[ENTRY:.*:]]
26+
; UNROLL-NEXT: [[C:%.*]] = icmp sgt i64 [[N]], 0
27+
; UNROLL-NEXT: br i1 [[C]], label %[[LOOP_PREHEADER:.*]], [[EXIT:label %.*]]
28+
; UNROLL: [[LOOP_PREHEADER]]:
29+
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8
30+
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
31+
; UNROLL: [[VECTOR_PH]]:
32+
;
1933
entry:
20-
%cmp.8 = icmp sgt i64 %N, 0
21-
br i1 %cmp.8, label %for.body.preheader, label %for.end
22-
23-
for.body.preheader: ; preds = %entry
24-
br label %for.body
34+
%c = icmp sgt i64 %N, 0
35+
br i1 %c, label %loop, label %exit
2536

26-
for.body: ; preds = %for.body, %for.body.preheader
27-
%i.09 = phi i64 [ %inc, %for.body ], [ 0, %for.body.preheader ]
28-
%arrayidx = getelementptr inbounds [1000 x i32], ptr @b, i64 0, i64 %i.09
37+
loop:
38+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
39+
%arrayidx = getelementptr inbounds [1000 x i32], ptr @b, i64 0, i64 %iv
2940
%tmp = load i32, ptr %arrayidx, align 4
30-
%arrayidx1 = getelementptr inbounds [1000 x i32], ptr @c, i64 0, i64 %i.09
41+
%arrayidx1 = getelementptr inbounds [1000 x i32], ptr @c, i64 0, i64 %iv
3142
%tmp1 = load i32, ptr %arrayidx1, align 4
3243
%add = add nsw i32 %tmp1, %tmp
33-
%arrayidx2 = getelementptr inbounds [1000 x i32], ptr @a, i64 0, i64 %i.09
44+
%arrayidx2 = getelementptr inbounds [1000 x i32], ptr @a, i64 0, i64 %iv
3445
store i32 %add, ptr %arrayidx2, align 4
35-
%inc = add nuw nsw i64 %i.09, 1
36-
%exitcond = icmp eq i64 %inc, %N
37-
br i1 %exitcond, label %for.end.loopexit, label %for.body
46+
%iv.next = add nuw nsw i64 %iv, 1
47+
%ec = icmp eq i64 %iv.next, %N
48+
br i1 %ec, label %exit, label %loop
49+
50+
exit:
51+
ret void
52+
}
53+
54+
define void @min_iters_known_via_loop_guards_add(i32 %start, i32 %end, ptr %src) {
55+
; CHECK-LABEL: define void @min_iters_known_via_loop_guards_add(
56+
; CHECK-SAME: i32 [[START:%.*]], i32 [[END:%.*]], ptr [[SRC:%.*]]) {
57+
; CHECK-NEXT: [[ENTRY:.*:]]
58+
; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[END]], [[START]]
59+
; CHECK-NEXT: [[PRE:%.*]] = icmp sgt i32 [[SUB]], 100
60+
; CHECK-NEXT: call void @llvm.assume(i1 [[PRE]])
61+
; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[SUB]], 1
62+
; CHECK-NEXT: [[IV_START:%.*]] = zext i32 [[ADD_1]] to i64
63+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 101, [[IV_START]]
64+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4
65+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
66+
; CHECK: [[VECTOR_PH]]:
67+
;
68+
; UNROLL-LABEL: define void @min_iters_known_via_loop_guards_add(
69+
; UNROLL-SAME: i32 [[START:%.*]], i32 [[END:%.*]], ptr [[SRC:%.*]]) {
70+
; UNROLL-NEXT: [[ENTRY:.*:]]
71+
; UNROLL-NEXT: [[SUB:%.*]] = sub i32 [[END]], [[START]]
72+
; UNROLL-NEXT: [[PRE:%.*]] = icmp sgt i32 [[SUB]], 100
73+
; UNROLL-NEXT: call void @llvm.assume(i1 [[PRE]])
74+
; UNROLL-NEXT: [[ADD_1:%.*]] = add i32 [[SUB]], 1
75+
; UNROLL-NEXT: [[IV_START:%.*]] = zext i32 [[ADD_1]] to i64
76+
; UNROLL-NEXT: [[TMP0:%.*]] = sub i64 101, [[IV_START]]
77+
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
78+
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
79+
; UNROLL: [[VECTOR_PH]]:
80+
;
81+
entry:
82+
%sub = sub i32 %end, %start
83+
%pre = icmp sgt i32 %sub, 100
84+
call void @llvm.assume(i1 %pre)
85+
%add.1 = add i32 %sub, 1
86+
%iv.start = zext i32 %add.1 to i64
87+
br label %loop
3888

39-
for.end.loopexit: ; preds = %for.body
40-
br label %for.end
89+
loop:
90+
%iv = phi i64 [ %iv.start, %entry ], [ %iv.next, %loop ]
91+
%gep = getelementptr inbounds i64, ptr %src, i64 %iv
92+
store i64 %iv, ptr %gep
93+
%iv.next = add i64 %iv, 1
94+
%ec = icmp eq i64 %iv, 100
95+
br i1 %ec, label %exit, label %loop
4196

42-
for.end: ; preds = %for.end.loopexit, %entry
97+
exit:
4398
ret void
4499
}

0 commit comments

Comments
 (0)