1
1
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2
- ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP
3
- ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP
4
- ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE
5
- ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE
2
+ ; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
3
+ ; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
4
+ ; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
6
5
7
6
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
8
7
9
8
define i32 @reduce_i64 (i32 %arg ) {
10
- ; V8M-RECIP- LABEL: 'reduce_i64'
11
- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 2 for instruction : %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
12
- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 8 for instruction : %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
13
- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 20 for instruction : %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
14
- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 44 for instruction : %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
15
- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 92 for instruction : %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
16
- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 1 for instruction : ret i32 undef
9
+ ; V8M-LABEL: 'reduce_i64'
10
+ ; V8M-NEXT: Cost Model: Found costs of 2 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
11
+ ; V8M-NEXT: Cost Model: Found costs of 8 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
12
+ ; V8M-NEXT: Cost Model: Found costs of 20 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
13
+ ; V8M-NEXT: Cost Model: Found costs of 44 for: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
14
+ ; V8M-NEXT: Cost Model: Found costs of 92 for: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
15
+ ; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
17
16
;
18
- ; NEON-RECIP- LABEL: 'reduce_i64'
19
- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 3 for instruction : %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
20
- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 16 for instruction : %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
21
- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 29 for instruction : %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
22
- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 55 for instruction : %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
23
- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 107 for instruction : %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
24
- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 0 for instruction : ret i32 undef
17
+ ; NEON-LABEL: 'reduce_i64'
18
+ ; NEON-NEXT: Cost Model: Found costs of 3 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
19
+ ; NEON-NEXT: Cost Model: Found costs of 16 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
20
+ ; NEON-NEXT: Cost Model: Found costs of 29 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
21
+ ; NEON-NEXT: Cost Model: Found costs of RThru: 55 CodeSize:54 Lat:54 SizeLat:54 for : %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
22
+ ; NEON-NEXT: Cost Model: Found costs of RThru: 107 CodeSize:103 Lat:103 SizeLat:103 for : %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
23
+ ; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for : ret i32 undef
25
24
;
26
- ; V8M-SIZE-LABEL: 'reduce_i64'
27
- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
28
- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
29
- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
30
- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
31
- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
32
- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
33
- ;
34
- ; NEON-SIZE-LABEL: 'reduce_i64'
35
- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
36
- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
37
- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
38
- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
39
- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
40
- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
25
+ ; MVE-LABEL: 'reduce_i64'
26
+ ; MVE-NEXT: Cost Model: Found costs of 8 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
27
+ ; MVE-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
28
+ ; MVE-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
29
+ ; MVE-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
30
+ ; MVE-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
31
+ ; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
41
32
;
42
33
%V1 = call i64 @llvm.vector.reduce.add.v1i64 (<1 x i64 > undef )
43
34
%V2 = call i64 @llvm.vector.reduce.add.v2i64 (<2 x i64 > undef )
@@ -48,45 +39,35 @@ define i32 @reduce_i64(i32 %arg) {
48
39
}
49
40
50
41
define i32 @reduce_i32 (i32 %arg ) {
51
- ; V8M-RECIP-LABEL: 'reduce_i32'
52
- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
53
- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
54
- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
55
- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
56
- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
57
- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
58
- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
59
- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
60
- ;
61
- ; NEON-RECIP-LABEL: 'reduce_i32'
62
- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
63
- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
64
- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
65
- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
66
- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
67
- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
68
- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
69
- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
42
+ ; V8M-LABEL: 'reduce_i32'
43
+ ; V8M-NEXT: Cost Model: Found costs of 4 for: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
44
+ ; V8M-NEXT: Cost Model: Found costs of 10 for: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
45
+ ; V8M-NEXT: Cost Model: Found costs of 22 for: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
46
+ ; V8M-NEXT: Cost Model: Found costs of 46 for: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
47
+ ; V8M-NEXT: Cost Model: Found costs of 94 for: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
48
+ ; V8M-NEXT: Cost Model: Found costs of 190 for: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
49
+ ; V8M-NEXT: Cost Model: Found costs of 382 for: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
50
+ ; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
70
51
;
71
- ; V8M-SIZE -LABEL: 'reduce_i32'
72
- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 4 for instruction : %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
73
- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 10 for instruction : %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
74
- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 22 for instruction : %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
75
- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 46 for instruction : %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
76
- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 94 for instruction : %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
77
- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 190 for instruction : %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
78
- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 382 for instruction : %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
79
- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 1 for instruction : ret i32 undef
52
+ ; NEON -LABEL: 'reduce_i32'
53
+ ; NEON- NEXT: Cost Model: Found costs of 16 for: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
54
+ ; NEON- NEXT: Cost Model: Found costs of 53 for: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
55
+ ; NEON- NEXT: Cost Model: Found costs of 150 for: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
56
+ ; NEON- NEXT: Cost Model: Found costs of 391 for: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
57
+ ; NEON- NEXT: Cost Model: Found costs of 488 for: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
58
+ ; NEON- NEXT: Cost Model: Found costs of RThru:682 CodeSize:681 Lat:681 SizeLat:681 for : %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
59
+ ; NEON- NEXT: Cost Model: Found costs of RThru:1070 CodeSize:1066 Lat:1066 SizeLat:1066 for : %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
60
+ ; NEON- NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for : ret i32 undef
80
61
;
81
- ; NEON-SIZE -LABEL: 'reduce_i32'
82
- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 16 for instruction : %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
83
- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 53 for instruction : %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
84
- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 150 for instruction : %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
85
- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 391 for instruction : %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
86
- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 488 for instruction : %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
87
- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 681 for instruction : %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
88
- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 1066 for instruction : %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
89
- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 1 for instruction : ret i32 undef
62
+ ; MVE -LABEL: 'reduce_i32'
63
+ ; MVE- NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for : %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
64
+ ; MVE- NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for : %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
65
+ ; MVE- NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for : %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
66
+ ; MVE- NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for : %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
67
+ ; MVE- NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for : %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
68
+ ; MVE- NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for : %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
69
+ ; MVE- NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for : %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
70
+ ; MVE- NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for : ret i32 undef
90
71
;
91
72
%V2 = call i8 @llvm.vector.reduce.add.v2i8 (<2 x i8 > undef )
92
73
%V4 = call i8 @llvm.vector.reduce.add.v4i8 (<4 x i8 > undef )
@@ -97,30 +78,3 @@ define i32 @reduce_i32(i32 %arg) {
97
78
%V128 = call i8 @llvm.vector.reduce.add.v128i8 (<128 x i8 > undef )
98
79
ret i32 undef
99
80
}
100
-
101
- declare i64 @llvm.vector.reduce.add.v1i64 (<1 x i64 >)
102
- declare i64 @llvm.vector.reduce.add.v2i64 (<2 x i64 >)
103
- declare i64 @llvm.vector.reduce.add.v4i64 (<4 x i64 >)
104
- declare i64 @llvm.vector.reduce.add.v8i64 (<8 x i64 >)
105
- declare i64 @llvm.vector.reduce.add.v16i64 (<16 x i64 >)
106
-
107
- declare i32 @llvm.vector.reduce.add.v2i32 (<2 x i32 >)
108
- declare i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 >)
109
- declare i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 >)
110
- declare i32 @llvm.vector.reduce.add.v16i32 (<16 x i32 >)
111
- declare i32 @llvm.vector.reduce.add.v32i32 (<32 x i32 >)
112
-
113
- declare i16 @llvm.vector.reduce.add.v2i16 (<2 x i16 >)
114
- declare i16 @llvm.vector.reduce.add.v4i16 (<4 x i16 >)
115
- declare i16 @llvm.vector.reduce.add.v8i16 (<8 x i16 >)
116
- declare i16 @llvm.vector.reduce.add.v16i16 (<16 x i16 >)
117
- declare i16 @llvm.vector.reduce.add.v32i16 (<32 x i16 >)
118
- declare i16 @llvm.vector.reduce.add.v64i16 (<64 x i16 >)
119
-
120
- declare i8 @llvm.vector.reduce.add.v2i8 (<2 x i8 >)
121
- declare i8 @llvm.vector.reduce.add.v4i8 (<4 x i8 >)
122
- declare i8 @llvm.vector.reduce.add.v8i8 (<8 x i8 >)
123
- declare i8 @llvm.vector.reduce.add.v16i8 (<16 x i8 >)
124
- declare i8 @llvm.vector.reduce.add.v32i8 (<32 x i8 >)
125
- declare i8 @llvm.vector.reduce.add.v64i8 (<64 x i8 >)
126
- declare i8 @llvm.vector.reduce.add.v128i8 (<128 x i8 >)
0 commit comments