Skip to content

Commit af31aa4

Browse files
committed
[LV] Pre-commit tests for fixed width VF fully unrolled loop cost model change
1 parent 5024926 commit af31aa4

File tree

1 file changed

+121
-0
lines changed

1 file changed

+121
-0
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
; REQUIRES: asserts
2+
; RUN: opt < %s -mcpu=neoverse-v2 -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s
3+
4+
target triple="aarch64--linux-gnu"
5+
6+
; This test shows that comparison and next iteration IV have zero cost if the
7+
; vector loop gets executed exactly once with the given VF.
8+
define i64 @test(ptr %a, ptr %b) #0 {
9+
; CHECK-LABEL: LV: Checking a loop in 'test'
10+
; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
11+
; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
12+
; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
13+
; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
14+
; CHECK: Cost for VF 8: 26
15+
; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
16+
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
17+
; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
18+
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
19+
; CHECK: Cost for VF 16: 50
20+
; CHECK: LV: Selecting VF: vscale x 2
21+
entry:
22+
br label %for.body
23+
24+
exit: ; preds = %for.body
25+
ret i64 %add
26+
27+
for.body: ; preds = %entry, %for.body
28+
%i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
29+
%sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
30+
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %i.iv
31+
%0 = load i8, ptr %arrayidx, align 1
32+
%conv = zext i8 %0 to i64
33+
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %i.iv
34+
%1 = load i8, ptr %arrayidx2, align 1
35+
%conv3 = zext i8 %1 to i64
36+
%mul = mul nuw nsw i64 %conv3, %conv
37+
%add = add i64 %mul, %sum
38+
%i.iv.next = add nuw nsw i64 %i.iv, 1
39+
%exitcond.not = icmp eq i64 %i.iv.next, 16
40+
br i1 %exitcond.not, label %exit, label %for.body
41+
}
42+
43+
; Same as above, but in the next iteration IV has extra users, and thus, the cost is not zero.
44+
define i64 @test_external_iv_user(ptr %a, ptr %b) #0 {
45+
; CHECK-LABEL: LV: Checking a loop in 'test_external_iv_user'
46+
; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
47+
; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
48+
; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
49+
; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
50+
; CHECK: Cost for VF 8: 26
51+
; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
52+
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
53+
; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
54+
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
55+
; CHECK: Cost for VF 16: 50
56+
; CHECK: LV: Selecting VF: vscale x 2
57+
entry:
58+
br label %for.body
59+
60+
for.body: ; preds = %entry, %for.body
61+
%i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
62+
%sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
63+
%arrayidx = getelementptr inbounds nuw i8, ptr %a, i64 %i.iv
64+
%0 = load i8, ptr %arrayidx, align 1
65+
%conv = zext i8 %0 to i64
66+
%i.iv.next = add nuw nsw i64 %i.iv, 1
67+
%arrayidx2 = getelementptr inbounds nuw i8, ptr %b, i64 %i.iv.next
68+
%1 = load i8, ptr %arrayidx2, align 1
69+
%conv3 = zext i8 %1 to i64
70+
%mul = mul nuw nsw i64 %conv3, %conv
71+
%add = add i64 %sum, %mul
72+
%exitcond.not = icmp eq i64 %i.iv.next, 16
73+
br i1 %exitcond.not, label %exit, label %for.body
74+
75+
exit: ; preds = %for.body
76+
ret i64 %add
77+
}
78+
79+
; Same as above but with two IVs without extra users. They all have zero cost when VF equals the number of iterations.
80+
define i64 @test_two_ivs(ptr %a, ptr %b, i64 %start) #0 {
81+
; CHECK-LABEL: LV: Checking a loop in 'test_two_ivs'
82+
; CHECK: Cost of 1 for VF 8: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
83+
; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
84+
; CHECK-NEXT: Cost of 1 for VF 8: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1
85+
; CHECK-NEXT: Cost of 0 for VF 8: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
86+
; CHECK-NEXT: Cost of 1 for VF 8: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
87+
; CHECK-NEXT: Cost of 0 for VF 8: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
88+
; CHECK: Cost for VF 8: 27
89+
; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %i.iv.next = add nuw nsw i64 %i.iv, 1
90+
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
91+
; CHECK-NEXT: Cost of 1 for VF 16: induction instruction %j.iv.next = add nuw nsw i64 %j.iv, 1
92+
; CHECK-NEXT: Cost of 0 for VF 16: induction instruction %j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
93+
; CHECK-NEXT: Cost of 1 for VF 16: exit condition instruction %exitcond.not = icmp eq i64 %i.iv.next, 16
94+
; CHECK-NEXT: Cost of 0 for VF 16: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%index.next>
95+
; CHECK: Cost for VF 16: 51
96+
; CHECK: LV: Selecting VF: 16
97+
entry:
98+
br label %for.body
99+
100+
exit: ; preds = %for.body
101+
ret i64 %add
102+
103+
for.body: ; preds = %entry, %for.body
104+
%i.iv = phi i64 [ 0, %entry ], [ %i.iv.next, %for.body ]
105+
%j.iv = phi i64 [ %start, %entry ], [ %j.iv.next, %for.body ]
106+
%sum = phi i64 [ 0, %entry ], [ %add, %for.body ]
107+
%arrayidx = getelementptr inbounds i8, ptr %a, i64 %i.iv
108+
%0 = load i8, ptr %arrayidx, align 1
109+
%conv = zext i8 %0 to i64
110+
%arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %j.iv
111+
%1 = load i8, ptr %arrayidx2, align 1
112+
%conv3 = zext i8 %1 to i64
113+
%mul = mul nuw nsw i64 %conv3, %conv
114+
%add = add i64 %mul, %sum
115+
%i.iv.next = add nuw nsw i64 %i.iv, 1
116+
%j.iv.next = add nuw nsw i64 %j.iv, 1
117+
%exitcond.not = icmp eq i64 %i.iv.next, 16
118+
br i1 %exitcond.not, label %exit, label %for.body
119+
}
120+
121+
attributes #0 = { vscale_range(1, 16) "target-features"="+sve" }

0 commit comments

Comments
 (0)