|
3 | 3 | ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -S < %s | FileCheck %s --check-prefix=CHECK-VF4IC4 --check-prefix=CHECK |
4 | 4 | ; RUN: opt -passes=loop-vectorize -force-vector-interleave=4 -force-vector-width=1 -S < %s | FileCheck %s --check-prefix=CHECK-VF1IC4 --check-prefix=CHECK |
5 | 5 |
|
6 | | -; This test can theoretically be vectorized without a runtime-check, by |
7 | | -; pattern-matching on the constructs that are introduced by IndVarSimplify. |
8 | | -; We can check two things: |
9 | | -; %1 = trunc i64 %iv to i32 |
10 | | -; This indicates that the %iv is truncated to i32. We can then check the loop |
11 | | -; guard is a signed i32: |
12 | | -; %cmp.sgt = icmp sgt i32 %n, 0 |
13 | | -; and successfully vectorize the case without a runtime-check. |
| 6 | +; About the truncated test cases, the range analysis of induction variable is |
| 7 | +; used to ensure the induction variable is always greater than the sentinal |
| 8 | +; value. The case is vectorizable if the truncated induction variable is |
| 9 | +; monotonic increasing, and not equals to the sentinal. |
14 | 10 | define i32 @select_icmp_const_truncated_iv_widened_exit(ptr %a, i32 %n) { |
15 | 11 | ; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_widened_exit( |
16 | 12 | ; CHECK-VF4IC1-SAME: ptr [[A:%.*]], i32 [[N:%.*]]) { |
@@ -251,14 +247,8 @@ exit: ; preds = %for.body, %entry |
251 | 247 | ret i32 %rdx.lcssa |
252 | 248 | } |
253 | 249 |
|
254 | | -; This test can theoretically be vectorized without a runtime-check, by |
255 | | -; pattern-matching on the constructs that are introduced by IndVarSimplify. |
256 | | -; We can check two things: |
257 | | -; %1 = trunc i64 %iv to i32 |
258 | | -; This indicates that the %iv is truncated to i32. We can then check the loop |
259 | | -; exit condition, which compares to a constant that fits within i32: |
260 | | -; %exitcond.not = icmp eq i64 %inc, 20000 |
261 | | -; and successfully vectorize the case without a runtime-check. |
| 250 | +; Without loop guard, the range analysis is also able to base on the constant |
| 251 | +; trip count. |
262 | 252 | define i32 @select_icmp_const_truncated_iv_const_exit(ptr %a) { |
263 | 253 | ; CHECK-VF4IC1-LABEL: define i32 @select_icmp_const_truncated_iv_const_exit( |
264 | 254 | ; CHECK-VF4IC1-SAME: ptr [[A:%.*]]) { |
|
0 commit comments