11; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2- ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-RECIP
3- ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-RECIP
4- ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M-SIZE
5- ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON-SIZE
2+ ; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=V8M
3+ ; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8a-linux-gnueabihf < %s | FileCheck %s --check-prefix=NEON
4+ ; RUN: opt -passes="print<cost-model>" -cost-kind=all 2>&1 -disable-output -mtriple=armv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=MVE
65
76target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
87
98define i32 @reduce_i64 (i32 %arg ) {
10- ; V8M-RECIP- LABEL: 'reduce_i64'
11- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 2 for instruction : %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
12- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 8 for instruction : %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
13- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 20 for instruction : %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
14- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 44 for instruction : %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
15- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 92 for instruction : %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
16- ; V8M-RECIP- NEXT: Cost Model: Found an estimated cost of 1 for instruction : ret i32 undef
9+ ; V8M-LABEL: 'reduce_i64'
10+ ; V8M-NEXT: Cost Model: Found costs of 2 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
11+ ; V8M-NEXT: Cost Model: Found costs of 8 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
12+ ; V8M-NEXT: Cost Model: Found costs of 20 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
13+ ; V8M-NEXT: Cost Model: Found costs of 44 for: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
14+ ; V8M-NEXT: Cost Model: Found costs of 92 for: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
15+ ; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
1716;
18- ; NEON-RECIP- LABEL: 'reduce_i64'
19- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 3 for instruction : %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
20- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 16 for instruction : %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
21- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 29 for instruction : %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
22- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 55 for instruction : %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
23- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 107 for instruction : %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
24- ; NEON-RECIP- NEXT: Cost Model: Found an estimated cost of 0 for instruction : ret i32 undef
17+ ; NEON-LABEL: 'reduce_i64'
18+ ; NEON-NEXT: Cost Model: Found costs of 3 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
19+ ; NEON-NEXT: Cost Model: Found costs of 16 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
20+ ; NEON-NEXT: Cost Model: Found costs of 29 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
21+ ; NEON-NEXT: Cost Model: Found costs of RThru: 55 CodeSize:54 Lat:54 SizeLat:54 for : %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
22+ ; NEON-NEXT: Cost Model: Found costs of RThru: 107 CodeSize:103 Lat:103 SizeLat:103 for : %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
23+ ; NEON-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for : ret i32 undef
2524;
26- ; V8M-SIZE-LABEL: 'reduce_i64'
27- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
28- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
29- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
30- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
31- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
32- ; V8M-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
33- ;
34- ; NEON-SIZE-LABEL: 'reduce_i64'
35- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
36- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
37- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
38- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
39- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 103 for instruction: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
40- ; NEON-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
25+ ; MVE-LABEL: 'reduce_i64'
26+ ; MVE-NEXT: Cost Model: Found costs of 8 for: %V1 = call i64 @llvm.vector.reduce.add.v1i64(<1 x i64> undef)
27+ ; MVE-NEXT: Cost Model: Found costs of RThru:108 CodeSize:76 Lat:108 SizeLat:108 for: %V2 = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> undef)
28+ ; MVE-NEXT: Cost Model: Found costs of RThru:208 CodeSize:144 Lat:208 SizeLat:208 for: %V4 = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> undef)
29+ ; MVE-NEXT: Cost Model: Found costs of RThru:408 CodeSize:280 Lat:408 SizeLat:408 for: %V8 = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> undef)
30+ ; MVE-NEXT: Cost Model: Found costs of RThru:808 CodeSize:552 Lat:808 SizeLat:808 for: %V16 = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> undef)
31+ ; MVE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret i32 undef
4132;
4233 %V1 = call i64 @llvm.vector.reduce.add.v1i64 (<1 x i64 > undef )
4334 %V2 = call i64 @llvm.vector.reduce.add.v2i64 (<2 x i64 > undef )
@@ -48,45 +39,35 @@ define i32 @reduce_i64(i32 %arg) {
4839}
4940
5041define i32 @reduce_i32 (i32 %arg ) {
51- ; V8M-RECIP-LABEL: 'reduce_i32'
52- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
53- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
54- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
55- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
56- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
57- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
58- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 382 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
59- ; V8M-RECIP-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
60- ;
61- ; NEON-RECIP-LABEL: 'reduce_i32'
62- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
63- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
64- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 150 for instruction: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
65- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 391 for instruction: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
66- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 488 for instruction: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
67- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 682 for instruction: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
68- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 1070 for instruction: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
69- ; NEON-RECIP-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
42+ ; V8M-LABEL: 'reduce_i32'
43+ ; V8M-NEXT: Cost Model: Found costs of 4 for: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
44+ ; V8M-NEXT: Cost Model: Found costs of 10 for: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
45+ ; V8M-NEXT: Cost Model: Found costs of 22 for: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
46+ ; V8M-NEXT: Cost Model: Found costs of 46 for: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
47+ ; V8M-NEXT: Cost Model: Found costs of 94 for: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
48+ ; V8M-NEXT: Cost Model: Found costs of 190 for: %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
49+ ; V8M-NEXT: Cost Model: Found costs of 382 for: %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
50+ ; V8M-NEXT: Cost Model: Found costs of 1 for: ret i32 undef
7051;
71- ; V8M-SIZE -LABEL: 'reduce_i32'
72- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 4 for instruction : %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
73- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 10 for instruction : %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
74- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 22 for instruction : %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
75- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 46 for instruction : %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
76- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 94 for instruction : %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
77- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 190 for instruction : %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
78- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 382 for instruction : %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
79- ; V8M-SIZE- NEXT: Cost Model: Found an estimated cost of 1 for instruction : ret i32 undef
52+ ; NEON -LABEL: 'reduce_i32'
53+ ; NEON- NEXT: Cost Model: Found costs of 16 for: %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
54+ ; NEON- NEXT: Cost Model: Found costs of 53 for: %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
55+ ; NEON- NEXT: Cost Model: Found costs of 150 for: %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
56+ ; NEON- NEXT: Cost Model: Found costs of 391 for: %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
57+ ; NEON- NEXT: Cost Model: Found costs of 488 for: %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
58+ ; NEON- NEXT: Cost Model: Found costs of RThru:682 CodeSize:681 Lat:681 SizeLat:681 for : %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
59+ ; NEON- NEXT: Cost Model: Found costs of RThru:1070 CodeSize:1066 Lat:1066 SizeLat:1066 for : %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
60+ ; NEON- NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for : ret i32 undef
8061;
81- ; NEON-SIZE -LABEL: 'reduce_i32'
82- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 16 for instruction : %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
83- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 53 for instruction : %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
84- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 150 for instruction : %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
85- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 391 for instruction : %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
86- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 488 for instruction : %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
87- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 681 for instruction : %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
88- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 1066 for instruction : %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
89- ; NEON-SIZE- NEXT: Cost Model: Found an estimated cost of 1 for instruction : ret i32 undef
62+ ; MVE -LABEL: 'reduce_i32'
63+ ; MVE- NEXT: Cost Model: Found costs of RThru:54 CodeSize:38 Lat:54 SizeLat:54 for : %V2 = call i8 @llvm.vector.reduce.add.v2i8(<2 x i8> undef)
64+ ; MVE- NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for : %V4 = call i8 @llvm.vector.reduce.add.v4i8(<4 x i8> undef)
65+ ; MVE- NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for : %V8 = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> undef)
66+ ; MVE- NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:2 SizeLat:2 for : %V16 = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> undef)
67+ ; MVE- NEXT: Cost Model: Found costs of RThru:4 CodeSize:2 Lat:4 SizeLat:4 for : %V32 = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> undef)
68+ ; MVE- NEXT: Cost Model: Found costs of RThru:8 CodeSize:4 Lat:8 SizeLat:8 for : %V64 = call i8 @llvm.vector.reduce.add.v64i8(<64 x i8> undef)
69+ ; MVE- NEXT: Cost Model: Found costs of RThru:16 CodeSize:8 Lat:16 SizeLat:16 for : %V128 = call i8 @llvm.vector.reduce.add.v128i8(<128 x i8> undef)
70+ ; MVE- NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for : ret i32 undef
9071;
9172 %V2 = call i8 @llvm.vector.reduce.add.v2i8 (<2 x i8 > undef )
9273 %V4 = call i8 @llvm.vector.reduce.add.v4i8 (<4 x i8 > undef )
@@ -97,30 +78,3 @@ define i32 @reduce_i32(i32 %arg) {
9778 %V128 = call i8 @llvm.vector.reduce.add.v128i8 (<128 x i8 > undef )
9879 ret i32 undef
9980}
100-
101- declare i64 @llvm.vector.reduce.add.v1i64 (<1 x i64 >)
102- declare i64 @llvm.vector.reduce.add.v2i64 (<2 x i64 >)
103- declare i64 @llvm.vector.reduce.add.v4i64 (<4 x i64 >)
104- declare i64 @llvm.vector.reduce.add.v8i64 (<8 x i64 >)
105- declare i64 @llvm.vector.reduce.add.v16i64 (<16 x i64 >)
106-
107- declare i32 @llvm.vector.reduce.add.v2i32 (<2 x i32 >)
108- declare i32 @llvm.vector.reduce.add.v4i32 (<4 x i32 >)
109- declare i32 @llvm.vector.reduce.add.v8i32 (<8 x i32 >)
110- declare i32 @llvm.vector.reduce.add.v16i32 (<16 x i32 >)
111- declare i32 @llvm.vector.reduce.add.v32i32 (<32 x i32 >)
112-
113- declare i16 @llvm.vector.reduce.add.v2i16 (<2 x i16 >)
114- declare i16 @llvm.vector.reduce.add.v4i16 (<4 x i16 >)
115- declare i16 @llvm.vector.reduce.add.v8i16 (<8 x i16 >)
116- declare i16 @llvm.vector.reduce.add.v16i16 (<16 x i16 >)
117- declare i16 @llvm.vector.reduce.add.v32i16 (<32 x i16 >)
118- declare i16 @llvm.vector.reduce.add.v64i16 (<64 x i16 >)
119-
120- declare i8 @llvm.vector.reduce.add.v2i8 (<2 x i8 >)
121- declare i8 @llvm.vector.reduce.add.v4i8 (<4 x i8 >)
122- declare i8 @llvm.vector.reduce.add.v8i8 (<8 x i8 >)
123- declare i8 @llvm.vector.reduce.add.v16i8 (<16 x i8 >)
124- declare i8 @llvm.vector.reduce.add.v32i8 (<32 x i8 >)
125- declare i8 @llvm.vector.reduce.add.v64i8 (<64 x i8 >)
126- declare i8 @llvm.vector.reduce.add.v128i8 (<128 x i8 >)
0 commit comments