Skip to content

Commit f58b5e1

Browse files
committed
Move reg usage tests to reg-usage.ll
1 parent bde39b4 commit f58b5e1

File tree

3 files changed

+180
-20
lines changed

3 files changed

+180
-20
lines changed

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product-neon.ll

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 4
2-
; REQUIRES: asserts
3-
42
; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -mattr=+neon,+dotprod -force-vector-interleave=1 -S < %s | FileCheck %s --check-prefixes=CHECK-INTERLEAVE1
53
; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -mattr=+neon,+dotprod -S < %s | FileCheck %s --check-prefixes=CHECK-INTERLEAVED
64
; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -mattr=+neon,+dotprod -force-vector-interleave=1 -vectorizer-maximize-bandwidth -S < %s | FileCheck %s --check-prefixes=CHECK-MAXBW
7-
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize --disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-REGS
85

96
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
107
target triple = "aarch64-none-unknown-elf"
@@ -949,13 +946,6 @@ define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) {
949946
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUM_IN]], [[N_VEC]]
950947
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
951948
; CHECK-MAXBW: scalar.ph:
952-
;
953-
; CHECK-REGS: LV: Checking a loop in 'dotp_unrolled' from <stdin>
954-
; CHECK-REGS: LV(REG): VF = 16
955-
; CHECK-REGS-NEXT: LV(REG): Found max usage: 2 item
956-
; CHECK-REGS-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 9 registers
957-
; CHECK-REGS-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 24 registers
958-
; CHECK-REGS-NEXT: LV(REG): Found invariant usage: 0 item
959949
entry:
960950
br label %for.body
961951

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph:" --version 4
2-
; REQUIRES: asserts
3-
42
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-INTERLEAVE1
53
; RUN: opt -passes=loop-vectorize -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-INTERLEAVED
64
; RUN: opt -passes=loop-vectorize -force-vector-interleave=1 -vectorizer-maximize-bandwidth -enable-epilogue-vectorization=false -S < %s | FileCheck %s --check-prefixes=CHECK-MAXBW
7-
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize --disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-REGS
85

96
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
107
target triple = "aarch64-none-unknown-elf"
@@ -3422,13 +3419,6 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum,
34223419
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
34233420
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
34243421
; CHECK-MAXBW: scalar.ph:
3425-
;
3426-
; CHECK-REGS-LABEL: LV: Checking a loop in 'not_dotp_high_register_pressure' from <stdin>
3427-
; CHECK-REGS: LV(REG): VF = 16
3428-
; CHECK-REGS-NEXT: LV(REG): Found max usage: 2 item
3429-
; CHECK-REGS-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 3 registers
3430-
; CHECK-REGS-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 40 registers
3431-
; CHECK-REGS-NEXT: LV(REG): Found invariant usage: 2 item
34323422
entry:
34333423
%cmp100 = icmp sgt i32 %n, 0
34343424
br i1 %cmp100, label %for.body.lr.ph, label %for.cond.cleanup

llvm/test/Transforms/LoopVectorize/AArch64/reg-usage.ll

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,183 @@ loop:
6868
exit:
6969
ret void
7070
}
71+
72+
define dso_local void @dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum, i32 %n) #1 {
73+
; CHECK-LABEL: LV: Checking a loop in 'dotp_high_register_pressure' from <stdin>
74+
; CHECK: LV(REG): VF = 16
75+
; CHECK-NEXT: LV(REG): Found max usage: 2 item
76+
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 3 registers
77+
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 40 registers
78+
; CHECK-NEXT: LV(REG): Found invariant usage: 2 item
79+
entry:
80+
%cmp100 = icmp sgt i32 %n, 0
81+
br i1 %cmp100, label %for.body.lr.ph, label %for.cond.cleanup
82+
83+
for.body.lr.ph: ; preds = %entry
84+
%arrayidx13 = getelementptr inbounds nuw i8, ptr %sum, i64 4
85+
%gep.b.12 = getelementptr inbounds nuw i8, ptr %sum, i64 8
86+
%arrayidx31 = getelementptr inbounds nuw i8, ptr %sum, i64 12
87+
%arrayidx40 = getelementptr inbounds nuw i8, ptr %sum, i64 16
88+
%arrayidx49 = getelementptr inbounds nuw i8, ptr %sum, i64 20
89+
%arrayidx58 = getelementptr inbounds nuw i8, ptr %sum, i64 24
90+
%arrayidx67 = getelementptr inbounds nuw i8, ptr %sum, i64 28
91+
%sum.promoted = load i32, ptr %sum, align 4
92+
%arrayidx13.promoted = load i32, ptr %arrayidx13, align 4
93+
%gep.b.12.promoted = load i32, ptr %gep.b.12, align 4
94+
%arrayidx31.promoted = load i32, ptr %arrayidx31, align 4
95+
%arrayidx40.promoted = load i32, ptr %arrayidx40, align 4
96+
%arrayidx49.promoted = load i32, ptr %arrayidx49, align 4
97+
%arrayidx58.promoted = load i32, ptr %arrayidx58, align 4
98+
%arrayidx67.promoted = load i32, ptr %arrayidx67, align 4
99+
%wide.trip.count = zext nneg i32 %n to i64
100+
br label %for.body
101+
102+
for.cond.for.cond.cleanup_crit_edge: ; preds = %for.body
103+
%add.lcssa = phi i32 [ %add.1, %for.body ]
104+
%add.2.lcssa = phi i32 [ %add.2, %for.body ]
105+
%add.3.lcssa = phi i32 [ %add.3, %for.body ]
106+
%add.4.lcssa = phi i32 [ %add.4, %for.body ]
107+
%add.5.lcssa = phi i32 [ %add.5, %for.body ]
108+
%add.6.lcssa = phi i32 [ %add.6, %for.body ]
109+
%add.7.lcssa = phi i32 [ %add.7, %for.body ]
110+
%add.8.lcssa = phi i32 [ %add.8, %for.body ]
111+
store i32 %add.lcssa, ptr %sum, align 4
112+
store i32 %add.2.lcssa, ptr %arrayidx13, align 4
113+
store i32 %add.3.lcssa, ptr %gep.b.12, align 4
114+
store i32 %add.4.lcssa, ptr %arrayidx31, align 4
115+
store i32 %add.5.lcssa, ptr %arrayidx40, align 4
116+
store i32 %add.6.lcssa, ptr %arrayidx49, align 4
117+
store i32 %add.7.lcssa, ptr %arrayidx58, align 4
118+
store i32 %add.8.lcssa, ptr %arrayidx67, align 4
119+
br label %for.cond.cleanup
120+
121+
for.cond.cleanup: ; preds = %for.cond.for.cond.cleanup_crit_edge, %entry
122+
ret void
123+
124+
for.body: ; preds = %for.body.lr.ph, %for.body
125+
%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
126+
%0 = phi i32 [ %arrayidx67.promoted, %for.body.lr.ph ], [ %add.8, %for.body ]
127+
%1 = phi i32 [ %arrayidx58.promoted, %for.body.lr.ph ], [ %add.7, %for.body ]
128+
%2 = phi i32 [ %arrayidx49.promoted, %for.body.lr.ph ], [ %add.6, %for.body ]
129+
%3 = phi i32 [ %arrayidx40.promoted, %for.body.lr.ph ], [ %add.5, %for.body ]
130+
%4 = phi i32 [ %arrayidx31.promoted, %for.body.lr.ph ], [ %add.4, %for.body ]
131+
%5 = phi i32 [ %gep.b.12.promoted, %for.body.lr.ph ], [ %add.3, %for.body ]
132+
%6 = phi i32 [ %arrayidx13.promoted, %for.body.lr.ph ], [ %add.2, %for.body ]
133+
%7 = phi i32 [ %sum.promoted, %for.body.lr.ph ], [ %add.1, %for.body ]
134+
%arrayidx = getelementptr inbounds nuw i8, ptr %a, i64 %indvars.iv
135+
%load.a = load i8, ptr %arrayidx, align 1
136+
%ext.a = zext i8 %load.a to i32
137+
%9 = shl nsw i64 %indvars.iv, 3
138+
%gep.b.1 = getelementptr inbounds nuw i8, ptr %b, i64 %9
139+
%load.b.1 = load i8, ptr %gep.b.1, align 1
140+
%ext.b.1 = sext i8 %load.b.1 to i32
141+
%mul.1 = mul nsw i32 %ext.b.1, %ext.a
142+
%add.1 = add nsw i32 %mul.1, %7
143+
%11 = or disjoint i64 %9, 1
144+
%gep.b.2 = getelementptr inbounds nuw i8, ptr %b, i64 %11
145+
%load.b.2 = load i8, ptr %gep.b.2, align 1
146+
%ext.b.2 = sext i8 %load.b.2 to i32
147+
%mul.2 = mul nsw i32 %ext.b.2, %ext.a
148+
%add.2 = add nsw i32 %mul.2, %6
149+
%13 = or disjoint i64 %9, 2
150+
%gep.b.3 = getelementptr inbounds nuw i8, ptr %b, i64 %13
151+
%load.b.3 = load i8, ptr %gep.b.3, align 1
152+
%ext.b.3 = sext i8 %load.b.3 to i32
153+
%mul.3 = mul nsw i32 %ext.b.3, %ext.a
154+
%add.3 = add nsw i32 %mul.3, %5
155+
%15 = or disjoint i64 %9, 3
156+
%gep.b.4 = getelementptr inbounds nuw i8, ptr %b, i64 %15
157+
%load.b.4 = load i8, ptr %gep.b.4, align 1
158+
%ext.b.4 = sext i8 %load.b.4 to i32
159+
%mul.4 = mul nsw i32 %ext.b.4, %ext.a
160+
%add.4 = add nsw i32 %mul.4, %4
161+
%17 = or disjoint i64 %9, 4
162+
%gep.b.5 = getelementptr inbounds nuw i8, ptr %b, i64 %17
163+
%load.b.5 = load i8, ptr %gep.b.5, align 1
164+
%ext.b.5 = sext i8 %load.b.5 to i32
165+
%mul.5 = mul nsw i32 %ext.b.5, %ext.a
166+
%add.5 = add nsw i32 %mul.5, %3
167+
%19 = or disjoint i64 %9, 5
168+
%gep.b.6 = getelementptr inbounds nuw i8, ptr %b, i64 %19
169+
%load.b.6 = load i8, ptr %gep.b.6, align 1
170+
%ext.b.6 = sext i8 %load.b.6 to i32
171+
%mul.6 = mul nsw i32 %ext.b.6, %ext.a
172+
%add.6 = add nsw i32 %mul.6, %2
173+
%21 = or disjoint i64 %9, 6
174+
%gep.b.7 = getelementptr inbounds nuw i8, ptr %b, i64 %21
175+
%load.b.7 = load i8, ptr %gep.b.7, align 1
176+
%ext.b.7 = sext i8 %load.b.7 to i32
177+
%mul.7 = mul nsw i32 %ext.b.7, %ext.a
178+
%add.7 = add nsw i32 %mul.7, %1
179+
%23 = or disjoint i64 %9, 7
180+
%gep.b.8 = getelementptr inbounds nuw i8, ptr %b, i64 %23
181+
%load.b.8 = load i8, ptr %gep.b.8, align 1
182+
%ext.b.8 = sext i8 %load.b.8 to i32
183+
%mul.8 = mul nsw i32 %ext.b.8, %ext.a
184+
%add.8 = add nsw i32 %mul.8, %0
185+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
186+
%exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count
187+
br i1 %exitcond.not, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
188+
}
189+
190+
define i32 @dotp_unrolled(i32 %num_out, i64 %num_in, ptr %a, ptr %b) {
191+
; CHECK-LABEL: LV: Checking a loop in 'dotp_unrolled' from <stdin>
192+
; CHECK: LV(REG): VF = 16
193+
; CHECK-NEXT: LV(REG): Found max usage: 2 item
194+
; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 9 registers
195+
; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 24 registers
196+
; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
197+
entry:
198+
br label %for.body
199+
200+
for.body: ; preds = %entry, %for.body
201+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
202+
%accum3 = phi i32 [ 0, %entry ], [ %add.a3, %for.body ]
203+
%accum2 = phi i32 [ 0, %entry ], [ %add.a2, %for.body ]
204+
%accum1 = phi i32 [ 0, %entry ], [ %add.a1, %for.body ]
205+
%accum0 = phi i32 [ 0, %entry ], [ %add.a0, %for.body ]
206+
%gep.a0 = getelementptr inbounds i8, ptr %a, i64 %iv
207+
%gep.b0 = getelementptr inbounds i8, ptr %b, i64 %iv
208+
%offset.1 = or disjoint i64 %iv, 1
209+
%gep.a1 = getelementptr inbounds i8, ptr %a, i64 %offset.1
210+
%gep.b1 = getelementptr inbounds i8, ptr %b, i64 %offset.1
211+
%offset.2 = or disjoint i64 %iv, 2
212+
%gep.a2 = getelementptr inbounds i8, ptr %a, i64 %offset.2
213+
%gep.b2 = getelementptr inbounds i8, ptr %b, i64 %offset.2
214+
%offset.3 = or disjoint i64 %iv, 3
215+
%gep.a3 = getelementptr inbounds i8, ptr %a, i64 %offset.3
216+
%gep.b3 = getelementptr inbounds i8, ptr %b, i64 %offset.3
217+
%load.a0 = load i8, ptr %gep.a0, align 1
218+
%ext.a0 = sext i8 %load.a0 to i32
219+
%load.b0 = load i8, ptr %gep.b0, align 1
220+
%ext.b0 = sext i8 %load.b0 to i32
221+
%mul.a0 = mul nsw i32 %ext.b0, %ext.a0
222+
%add.a0 = add nsw i32 %mul.a0, %accum0
223+
%load.a1 = load i8, ptr %gep.a1, align 1
224+
%ext.a1 = sext i8 %load.a1 to i32
225+
%load.b1 = load i8, ptr %gep.b1, align 1
226+
%ext.b1 = sext i8 %load.b1 to i32
227+
%mul.a1 = mul nsw i32 %ext.a1, %ext.b1
228+
%add.a1 = add nsw i32 %mul.a1, %accum1
229+
%load.a2 = load i8, ptr %gep.a2, align 1
230+
%ext.a2 = sext i8 %load.a2 to i32
231+
%load.b2 = load i8, ptr %gep.b2, align 1
232+
%ext.b2 = sext i8 %load.b2 to i32
233+
%mul.a2 = mul nsw i32 %ext.a2, %ext.b2
234+
%add.a2 = add nsw i32 %mul.a2, %accum2
235+
%load.a3 = load i8, ptr %gep.a3, align 1
236+
%ext.a3 = sext i8 %load.a3 to i32
237+
%load.b3 = load i8, ptr %gep.b3, align 1
238+
%ext.b3 = sext i8 %load.b3 to i32
239+
%mul.a3 = mul nsw i32 %ext.a3, %ext.b3
240+
%add.a3 = add nsw i32 %mul.a3, %accum3
241+
%iv.next = add nuw nsw i64 %iv, 1
242+
%exitcond.not = icmp eq i64 %iv.next, %num_in
243+
br i1 %exitcond.not, label %exit, label %for.body
244+
245+
exit: ; preds = %for.body
246+
%result0 = add nsw i32 %add.a0, %add.a1
247+
%result1 = add nsw i32 %add.a2, %add.a3
248+
%result = add nsw i32 %result0, %result1
249+
ret i32 %result
250+
}

0 commit comments

Comments
 (0)