Skip to content

Commit 02f203f

Browse files
committed
[NFC] Partial reduce test to demonstrate regression post commit #cc9c64d
We have seen regression for Neoverse-v2 post commit #cc9c64d for the mentioned case. See https://godbolt.org/z/j9Mj5WM7c. A future patch will address this.
1 parent d59f7f2 commit 02f203f

File tree

1 file changed

+99
-0
lines changed

1 file changed

+99
-0
lines changed
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --filter-out-after "^middle" --version 6
2+
; RUN: opt < %s -p loop-vectorize -mtriple=aarch64 -S -o - | FileCheck %s
3+
; RUN: opt < %s -p loop-vectorize -mtriple=aarch64 -mcpu=neoverse-v2 -S -o - | FileCheck %s --check-prefix NEOVERSE-V2
4+
5+
define i64 @partial_reduction_with_no_second_input(ptr %arr, i64 %N)
6+
; CHECK-LABEL: define i64 @partial_reduction_with_no_second_input(
7+
; CHECK-SAME: ptr [[ARR:%.*]], i64 [[N:%.*]]) {
8+
; CHECK-NEXT: [[ENTRY:.*:]]
9+
; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
10+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 8
11+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
12+
; CHECK: [[VECTOR_PH]]:
13+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 8
14+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
15+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
16+
; CHECK: [[VECTOR_BODY]]:
17+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
18+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ]
19+
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
20+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]]
21+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4
22+
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
23+
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4
24+
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
25+
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD2]] to <4 x i64>
26+
; CHECK-NEXT: [[TMP4]] = add <4 x i64> [[VEC_PHI]], [[TMP2]]
27+
; CHECK-NEXT: [[TMP5]] = add <4 x i64> [[VEC_PHI1]], [[TMP3]]
28+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
29+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
30+
; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
31+
; CHECK: [[MIDDLE_BLOCK]]:
32+
;
33+
; NEOVERSE-V2-LABEL: define i64 @partial_reduction_with_no_second_input(
34+
; NEOVERSE-V2-SAME: ptr [[ARR:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
35+
; NEOVERSE-V2-NEXT: [[ITER_CHECK:.*:]]
36+
; NEOVERSE-V2-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
37+
; NEOVERSE-V2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX]], 2
38+
; NEOVERSE-V2-NEXT: br i1 [[MIN_ITERS_CHECK]], [[VEC_EPILOG_SCALAR_PH:label %.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
39+
; NEOVERSE-V2: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
40+
; NEOVERSE-V2-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[UMAX]], 8
41+
; NEOVERSE-V2-NEXT: br i1 [[MIN_ITERS_CHECK1]], [[VEC_EPILOG_PH:label %.*]], label %[[VECTOR_PH:.*]]
42+
; NEOVERSE-V2: [[VECTOR_PH]]:
43+
; NEOVERSE-V2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX]], 8
44+
; NEOVERSE-V2-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX]], [[N_MOD_VF]]
45+
; NEOVERSE-V2-NEXT: br label %[[VECTOR_BODY:.*]]
46+
; NEOVERSE-V2: [[VECTOR_BODY]]:
47+
; NEOVERSE-V2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
48+
; NEOVERSE-V2-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
49+
; NEOVERSE-V2-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ]
50+
; NEOVERSE-V2-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP10:%.*]], %[[VECTOR_BODY]] ]
51+
; NEOVERSE-V2-NEXT: [[VEC_PHI4:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ]
52+
; NEOVERSE-V2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[INDEX]]
53+
; NEOVERSE-V2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2
54+
; NEOVERSE-V2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4
55+
; NEOVERSE-V2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6
56+
; NEOVERSE-V2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
57+
; NEOVERSE-V2-NEXT: [[WIDE_LOAD5:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
58+
; NEOVERSE-V2-NEXT: [[WIDE_LOAD6:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4
59+
; NEOVERSE-V2-NEXT: [[WIDE_LOAD7:%.*]] = load <2 x i32>, ptr [[TMP3]], align 4
60+
; NEOVERSE-V2-NEXT: [[TMP4:%.*]] = sext <2 x i32> [[WIDE_LOAD]] to <2 x i64>
61+
; NEOVERSE-V2-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[WIDE_LOAD5]] to <2 x i64>
62+
; NEOVERSE-V2-NEXT: [[TMP6:%.*]] = sext <2 x i32> [[WIDE_LOAD6]] to <2 x i64>
63+
; NEOVERSE-V2-NEXT: [[TMP7:%.*]] = sext <2 x i32> [[WIDE_LOAD7]] to <2 x i64>
64+
; NEOVERSE-V2-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI]], [[TMP4]]
65+
; NEOVERSE-V2-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI2]], [[TMP5]]
66+
; NEOVERSE-V2-NEXT: [[TMP10]] = add <2 x i64> [[VEC_PHI3]], [[TMP6]]
67+
; NEOVERSE-V2-NEXT: [[TMP11]] = add <2 x i64> [[VEC_PHI4]], [[TMP7]]
68+
; NEOVERSE-V2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
69+
; NEOVERSE-V2-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
70+
; NEOVERSE-V2-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
71+
; NEOVERSE-V2: [[MIDDLE_BLOCK]]:
72+
;
73+
{
74+
entry:
75+
br label %loop
76+
77+
loop:
78+
%1 = phi i64 [ 0, %entry ], [ %2, %loop ]
79+
%acc = phi i64 [ 0, %entry ], [ %add, %loop ]
80+
%gep = getelementptr inbounds i32, ptr %arr, i64 %1
81+
%load = load i32, ptr %gep
82+
%sext = sext i32 %load to i64
83+
%add = add i64 %acc, %sext
84+
%2 = add i64 %1, 1
85+
%3 = icmp ult i64 %2, %N
86+
br i1 %3, label %loop, label %exit
87+
88+
exit:
89+
ret i64 %add
90+
}
91+
;.
92+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
93+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
94+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
95+
;.
96+
; NEOVERSE-V2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
97+
; NEOVERSE-V2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
98+
; NEOVERSE-V2: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
99+
;.

0 commit comments

Comments
 (0)