Skip to content

Commit f562e2a

Browse files
committed
[ARM] Update a number of reduction tests to use -cost-kind=all. NFC
1 parent 4a09409 commit f562e2a

File tree

10 files changed

+825
-1043
lines changed

10 files changed

+825
-1043
lines changed
Lines changed: 51 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,34 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2-
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP
3-
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP
4-
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE
5-
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE
2+
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
3+
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
4+
; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
65

76
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
87

98
define i32 @reduce_i64(i32 %arg) {
10-
; V8M-RECIP-LABEL: 'reduce_i64'
11-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
12-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
13-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
14-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
15-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
16-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
9+
; V8M-LABEL: 'reduce_i64'
10+
; V8M-NEXT: Cost Model: Found costs of 2 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
11+
; V8M-NEXT: Cost Model: Found costs of 8 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
12+
; V8M-NEXT: Cost Model: Found costs of 20 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
13+
; V8M-NEXT: Cost Model: Found costs of 44 for: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
14+
; V8M-NEXT: Cost Model: Found costs of 92 for: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
15+
; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
1716
;
18-
; NEON-RECIP-LABEL: 'reduce_i64'
19-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
20-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
21-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
22-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
23-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
24-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
17+
; NEON-LABEL: 'reduce_i64'
18+
; NEON-NEXT: Cost Model: Found costs of 3 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
19+
; NEON-NEXT: Cost Model: Found costs of 16 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
20+
; NEON-NEXT: Cost Model: Found costs of 29 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
21+
; NEON-NEXT: Cost Model: Found costs of RThru:55 CodeSize:54 Lat:54 SizeLat:54 for: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
22+
; NEON-NEXT: Cost Model: Found costs of RThru:107 CodeSize:103 Lat:103 SizeLat:103 for: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
23+
; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
2524
;
26-
; V8M-SIZE-LABEL: 'reduce_i64'
27-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
28-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
29-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
30-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
31-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
32-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
33-
;
34-
; NEON-SIZE-LABEL: 'reduce_i64'
35-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
36-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
37-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
38-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
39-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
40-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
25+
; MVE-LABEL: 'reduce_i64'
26+
; MVE-NEXT: Cost Model: Found costs of 8 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
27+
; MVE-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
28+
; MVE-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
29+
; MVE-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
30+
; MVE-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
31+
; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
4132
;
4233
%V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
4334
%V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
@@ -48,45 +39,35 @@ define i32 @reduce_i64(i32 %arg) {
4839
}
4940

5041
define i32 @reduce_i32(i32 %arg) {
51-
; V8M-RECIP-LABEL: 'reduce_i32'
52-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
53-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
54-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
55-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
56-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
57-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
58-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
59-
; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
60-
;
61-
; NEON-RECIP-LABEL: 'reduce_i32'
62-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
63-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
64-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
65-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
66-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
67-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
68-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
69-
; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
42+
; V8M-LABEL: 'reduce_i32'
43+
; V8M-NEXT: Cost Model: Found costs of 4 for: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
44+
; V8M-NEXT: Cost Model: Found costs of 10 for: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
45+
; V8M-NEXT: Cost Model: Found costs of 22 for: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
46+
; V8M-NEXT: Cost Model: Found costs of 46 for: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
47+
; V8M-NEXT: Cost Model: Found costs of 94 for: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
48+
; V8M-NEXT: Cost Model: Found costs of 190 for: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
49+
; V8M-NEXT: Cost Model: Found costs of 382 for: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
50+
; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
7051
;
71-
; V8M-SIZE-LABEL: 'reduce_i32'
72-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
73-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
74-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
75-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
76-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
77-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
78-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
79-
; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
52+
; NEON-LABEL: 'reduce_i32'
53+
; NEON-NEXT: Cost Model: Found costs of 16 for: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
54+
; NEON-NEXT: Cost Model: Found costs of 53 for: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
55+
; NEON-NEXT: Cost Model: Found costs of 150 for: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
56+
; NEON-NEXT: Cost Model: Found costs of 391 for: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
57+
; NEON-NEXT: Cost Model: Found costs of 488 for: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
58+
; NEON-NEXT: Cost Model: Found costs of RThru:682 CodeSize:681 Lat:681 SizeLat:681 for: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
59+
; NEON-NEXT: Cost Model: Found costs of RThru:1070 CodeSize:1066 Lat:1066 SizeLat:1066 for: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
60+
; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
8061
;
81-
; NEON-SIZE-LABEL: 'reduce_i32'
82-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
83-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
84-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
85-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
86-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
87-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 681 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
88-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1066 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
89-
; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
62+
; MVE-LABEL: 'reduce_i32'
63+
; MVE-NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
64+
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
65+
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
66+
; MVE-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
67+
; MVE-NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
68+
; MVE-NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
69+
; MVE-NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
70+
; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
9071
;
9172
%V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
9273
%V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
@@ -97,30 +78,3 @@ define i32 @reduce_i32(i32 %arg) {
9778
%V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
9879
ret i32 undef
9980
}
100-
101-
declare i64 @llvm.vector.reduce.add.v1i64(<1 x i64>)
102-
declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>)
103-
declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>)
104-
declare i64 @llvm.vector.reduce.add.v8i64(<8 x i64>)
105-
declare i64 @llvm.vector.reduce.add.v16i64(<16 x i64>)
106-
107-
declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>)
108-
declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>)
109-
declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>)
110-
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
111-
declare i32 @llvm.vector.reduce.add.v32i32(<32 x i32>)
112-
113-
declare i16 @llvm.vector.reduce.add.v2i16(<2 x i16>)
114-
declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>)
115-
declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>)
116-
declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>)
117-
declare i16 @llvm.vector.reduce.add.v32i16(<32 x i16>)
118-
declare i16 @llvm.vector.reduce.add.v64i16(<64 x i16>)
119-
120-
declare i8 @llvm.vector.reduce.add.v2i8(<2 x i8>)
121-
declare i8 @llvm.vector.reduce.add.v4i8(<4 x i8>)
122-
declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>)
123-
declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>)
124-
declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>)
125-
declare i8 @llvm.vector.reduce.add.v64i8(<64 x i8>)
126-
declare i8 @llvm.vector.reduce.add.v128i8(<128 x i8>)

0 commit comments

Comments
 (0)