1
1
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2
- ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s
3
- ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s
4
-
5
- ;
6
- ; 128-bit Vectors
7
- ;
2
+ ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3
+ ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=AVX
4
+ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
5
+ ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=AVX
8
6
9
7
define <4 x float > @test_unpacklo_hadd_v4f32 (<4 x float > %0 , <4 x float > %1 , <4 x float > %2 , <4 x float > %3 ) {
10
- ; CHECK-LABEL: test_unpacklo_hadd_v4f32:
11
- ; CHECK: ## %bb.0:
12
- ; CHECK-NEXT: vhaddps %xmm2, %xmm0, %xmm0
13
- ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
14
- ; CHECK-NEXT: ret{{[l|q]}}
8
+ ; SSE-LABEL: test_unpacklo_hadd_v4f32:
9
+ ; SSE: ## %bb.0:
10
+ ; SSE-NEXT: haddps %xmm2, %xmm0
11
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
12
+ ; SSE-NEXT: ret{{[l|q]}}
13
+ ;
14
+ ; AVX-LABEL: test_unpacklo_hadd_v4f32:
15
+ ; AVX: ## %bb.0:
16
+ ; AVX-NEXT: vhaddps %xmm2, %xmm0, %xmm0
17
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
18
+ ; AVX-NEXT: ret{{[l|q]}}
15
19
%5 = tail call <4 x float > @llvm.x86.sse3.hadd.ps (<4 x float > %0 , <4 x float > %1 ) #4
16
20
%6 = tail call <4 x float > @llvm.x86.sse3.hadd.ps (<4 x float > %2 , <4 x float > %3 ) #4
17
21
%7 = shufflevector <4 x float > %5 , <4 x float > %6 , <4 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 >
18
22
ret <4 x float > %7
19
23
}
20
24
21
25
define <4 x float > @test_unpackhi_hadd_v4f32 (<4 x float > %0 , <4 x float > %1 , <4 x float > %2 , <4 x float > %3 ) {
22
- ; CHECK-LABEL: test_unpackhi_hadd_v4f32:
23
- ; CHECK: ## %bb.0:
24
- ; CHECK-NEXT: vhaddps %xmm3, %xmm1, %xmm0
25
- ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
26
- ; CHECK-NEXT: ret{{[l|q]}}
26
+ ; SSE-LABEL: test_unpackhi_hadd_v4f32:
27
+ ; SSE: ## %bb.0:
28
+ ; SSE-NEXT: movaps %xmm1, %xmm0
29
+ ; SSE-NEXT: haddps %xmm3, %xmm0
30
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
31
+ ; SSE-NEXT: ret{{[l|q]}}
32
+ ;
33
+ ; AVX-LABEL: test_unpackhi_hadd_v4f32:
34
+ ; AVX: ## %bb.0:
35
+ ; AVX-NEXT: vhaddps %xmm3, %xmm1, %xmm0
36
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
37
+ ; AVX-NEXT: ret{{[l|q]}}
27
38
%5 = tail call <4 x float > @llvm.x86.sse3.hadd.ps (<4 x float > %0 , <4 x float > %1 ) #4
28
39
%6 = tail call <4 x float > @llvm.x86.sse3.hadd.ps (<4 x float > %2 , <4 x float > %3 ) #4
29
40
%7 = shufflevector <4 x float > %5 , <4 x float > %6 , <4 x i32 > <i32 2 , i32 6 , i32 3 , i32 7 >
30
41
ret <4 x float > %7
31
42
}
32
43
33
44
define <4 x float > @test_unpacklo_hsub_v4f32 (<4 x float > %0 , <4 x float > %1 , <4 x float > %2 , <4 x float > %3 ) {
34
- ; CHECK-LABEL: test_unpacklo_hsub_v4f32:
35
- ; CHECK: ## %bb.0:
36
- ; CHECK-NEXT: vhsubps %xmm2, %xmm0, %xmm0
37
- ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
38
- ; CHECK-NEXT: ret{{[l|q]}}
45
+ ; SSE-LABEL: test_unpacklo_hsub_v4f32:
46
+ ; SSE: ## %bb.0:
47
+ ; SSE-NEXT: hsubps %xmm2, %xmm0
48
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
49
+ ; SSE-NEXT: ret{{[l|q]}}
50
+ ;
51
+ ; AVX-LABEL: test_unpacklo_hsub_v4f32:
52
+ ; AVX: ## %bb.0:
53
+ ; AVX-NEXT: vhsubps %xmm2, %xmm0, %xmm0
54
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
55
+ ; AVX-NEXT: ret{{[l|q]}}
39
56
%5 = tail call <4 x float > @llvm.x86.sse3.hsub.ps (<4 x float > %0 , <4 x float > %1 ) #4
40
57
%6 = tail call <4 x float > @llvm.x86.sse3.hsub.ps (<4 x float > %2 , <4 x float > %3 ) #4
41
58
%7 = shufflevector <4 x float > %5 , <4 x float > %6 , <4 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 >
42
59
ret <4 x float > %7
43
60
}
44
61
45
62
define <4 x float > @test_unpackhi_hsub_v4f32 (<4 x float > %0 , <4 x float > %1 , <4 x float > %2 , <4 x float > %3 ) {
46
- ; CHECK-LABEL: test_unpackhi_hsub_v4f32:
47
- ; CHECK: ## %bb.0:
48
- ; CHECK-NEXT: vhsubps %xmm3, %xmm1, %xmm0
49
- ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
50
- ; CHECK-NEXT: ret{{[l|q]}}
63
+ ; SSE-LABEL: test_unpackhi_hsub_v4f32:
64
+ ; SSE: ## %bb.0:
65
+ ; SSE-NEXT: movaps %xmm1, %xmm0
66
+ ; SSE-NEXT: hsubps %xmm3, %xmm0
67
+ ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
68
+ ; SSE-NEXT: ret{{[l|q]}}
69
+ ;
70
+ ; AVX-LABEL: test_unpackhi_hsub_v4f32:
71
+ ; AVX: ## %bb.0:
72
+ ; AVX-NEXT: vhsubps %xmm3, %xmm1, %xmm0
73
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
74
+ ; AVX-NEXT: ret{{[l|q]}}
51
75
%5 = tail call <4 x float > @llvm.x86.sse3.hsub.ps (<4 x float > %0 , <4 x float > %1 ) #4
52
76
%6 = tail call <4 x float > @llvm.x86.sse3.hsub.ps (<4 x float > %2 , <4 x float > %3 ) #4
53
77
%7 = shufflevector <4 x float > %5 , <4 x float > %6 , <4 x i32 > <i32 2 , i32 6 , i32 3 , i32 7 >
54
78
ret <4 x float > %7
55
79
}
56
80
57
81
define <4 x i32 > @test_unpacklo_hadd_v4i32 (<4 x i32 > %0 , <4 x i32 > %1 , <4 x i32 > %2 , <4 x i32 > %3 ) {
58
- ; CHECK-LABEL: test_unpacklo_hadd_v4i32:
59
- ; CHECK: ## %bb.0:
60
- ; CHECK-NEXT: vphaddd %xmm2, %xmm0, %xmm0
61
- ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
62
- ; CHECK-NEXT: ret{{[l|q]}}
82
+ ; SSE-LABEL: test_unpacklo_hadd_v4i32:
83
+ ; SSE: ## %bb.0:
84
+ ; SSE-NEXT: phaddd %xmm2, %xmm0
85
+ ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
86
+ ; SSE-NEXT: ret{{[l|q]}}
87
+ ;
88
+ ; AVX-LABEL: test_unpacklo_hadd_v4i32:
89
+ ; AVX: ## %bb.0:
90
+ ; AVX-NEXT: vphaddd %xmm2, %xmm0, %xmm0
91
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
92
+ ; AVX-NEXT: ret{{[l|q]}}
63
93
%5 = tail call <4 x i32 > @llvm.x86.ssse3.phadd.d.128 (<4 x i32 > %0 , <4 x i32 > %1 ) #5
64
94
%6 = tail call <4 x i32 > @llvm.x86.ssse3.phadd.d.128 (<4 x i32 > %2 , <4 x i32 > %3 ) #5
65
95
%7 = shufflevector <4 x i32 > %5 , <4 x i32 > %6 , <4 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 >
66
96
ret <4 x i32 > %7
67
97
}
68
98
69
99
define <4 x i32 > @test_unpackhi_hadd_v4i32 (<4 x i32 > %0 , <4 x i32 > %1 , <4 x i32 > %2 , <4 x i32 > %3 ) {
70
- ; CHECK-LABEL: test_unpackhi_hadd_v4i32:
71
- ; CHECK: ## %bb.0:
72
- ; CHECK-NEXT: vphaddd %xmm3, %xmm1, %xmm0
73
- ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
74
- ; CHECK-NEXT: ret{{[l|q]}}
100
+ ; SSE-LABEL: test_unpackhi_hadd_v4i32:
101
+ ; SSE: ## %bb.0:
102
+ ; SSE-NEXT: phaddd %xmm3, %xmm1
103
+ ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,1,3]
104
+ ; SSE-NEXT: ret{{[l|q]}}
105
+ ;
106
+ ; AVX-LABEL: test_unpackhi_hadd_v4i32:
107
+ ; AVX: ## %bb.0:
108
+ ; AVX-NEXT: vphaddd %xmm3, %xmm1, %xmm0
109
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
110
+ ; AVX-NEXT: ret{{[l|q]}}
75
111
%5 = tail call <4 x i32 > @llvm.x86.ssse3.phadd.d.128 (<4 x i32 > %0 , <4 x i32 > %1 ) #5
76
112
%6 = tail call <4 x i32 > @llvm.x86.ssse3.phadd.d.128 (<4 x i32 > %2 , <4 x i32 > %3 ) #5
77
113
%7 = shufflevector <4 x i32 > %5 , <4 x i32 > %6 , <4 x i32 > <i32 2 , i32 6 , i32 3 , i32 7 >
78
114
ret <4 x i32 > %7
79
115
}
80
116
81
117
define <4 x i32 > @test_unpacklo_hsub_v4i32 (<4 x i32 > %0 , <4 x i32 > %1 , <4 x i32 > %2 , <4 x i32 > %3 ) {
82
- ; CHECK-LABEL: test_unpacklo_hsub_v4i32:
83
- ; CHECK: ## %bb.0:
84
- ; CHECK-NEXT: vphsubd %xmm2, %xmm0, %xmm0
85
- ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
86
- ; CHECK-NEXT: ret{{[l|q]}}
118
+ ; SSE-LABEL: test_unpacklo_hsub_v4i32:
119
+ ; SSE: ## %bb.0:
120
+ ; SSE-NEXT: phsubd %xmm2, %xmm0
121
+ ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
122
+ ; SSE-NEXT: ret{{[l|q]}}
123
+ ;
124
+ ; AVX-LABEL: test_unpacklo_hsub_v4i32:
125
+ ; AVX: ## %bb.0:
126
+ ; AVX-NEXT: vphsubd %xmm2, %xmm0, %xmm0
127
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
128
+ ; AVX-NEXT: ret{{[l|q]}}
87
129
%5 = tail call <4 x i32 > @llvm.x86.ssse3.phsub.d.128 (<4 x i32 > %0 , <4 x i32 > %1 ) #5
88
130
%6 = tail call <4 x i32 > @llvm.x86.ssse3.phsub.d.128 (<4 x i32 > %2 , <4 x i32 > %3 ) #5
89
131
%7 = shufflevector <4 x i32 > %5 , <4 x i32 > %6 , <4 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 >
90
132
ret <4 x i32 > %7
91
133
}
92
134
93
135
define <4 x i32 > @test_unpackhi_hsub_v4i32 (<4 x i32 > %0 , <4 x i32 > %1 , <4 x i32 > %2 , <4 x i32 > %3 ) {
94
- ; CHECK-LABEL: test_unpackhi_hsub_v4i32:
95
- ; CHECK: ## %bb.0:
96
- ; CHECK-NEXT: vphsubd %xmm3, %xmm1, %xmm0
97
- ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
98
- ; CHECK-NEXT: ret{{[l|q]}}
136
+ ; SSE-LABEL: test_unpackhi_hsub_v4i32:
137
+ ; SSE: ## %bb.0:
138
+ ; SSE-NEXT: phsubd %xmm3, %xmm1
139
+ ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,1,3]
140
+ ; SSE-NEXT: ret{{[l|q]}}
141
+ ;
142
+ ; AVX-LABEL: test_unpackhi_hsub_v4i32:
143
+ ; AVX: ## %bb.0:
144
+ ; AVX-NEXT: vphsubd %xmm3, %xmm1, %xmm0
145
+ ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
146
+ ; AVX-NEXT: ret{{[l|q]}}
99
147
%5 = tail call <4 x i32 > @llvm.x86.ssse3.phsub.d.128 (<4 x i32 > %0 , <4 x i32 > %1 ) #5
100
148
%6 = tail call <4 x i32 > @llvm.x86.ssse3.phsub.d.128 (<4 x i32 > %2 , <4 x i32 > %3 ) #5
101
149
%7 = shufflevector <4 x i32 > %5 , <4 x i32 > %6 , <4 x i32 > <i32 2 , i32 6 , i32 3 , i32 7 >
102
150
ret <4 x i32 > %7
103
151
}
104
152
105
- ;
106
- ; 256-bit Vectors
107
- ;
108
-
109
- define <8 x float > @test_unpacklo_hadd_v8f32 (<8 x float > %0 , <8 x float > %1 , <8 x float > %2 , <8 x float > %3 ) {
110
- ; CHECK-LABEL: test_unpacklo_hadd_v8f32:
111
- ; CHECK: ## %bb.0:
112
- ; CHECK-NEXT: vhaddps %ymm2, %ymm0, %ymm0
113
- ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
114
- ; CHECK-NEXT: ret{{[l|q]}}
115
- %5 = tail call <8 x float > @llvm.x86.avx.hadd.ps.256 (<8 x float > %0 , <8 x float > %1 ) #4
116
- %6 = tail call <8 x float > @llvm.x86.avx.hadd.ps.256 (<8 x float > %2 , <8 x float > %3 ) #4
117
- %7 = shufflevector <8 x float > %5 , <8 x float > %6 , <8 x i32 > <i32 0 , i32 8 , i32 1 , i32 9 , i32 4 , i32 12 , i32 5 , i32 13 >
118
- ret <8 x float > %7
119
- }
120
-
121
- define <8 x float > @test_unpackhi_hadd_v8f32 (<8 x float > %0 , <8 x float > %1 , <8 x float > %2 , <8 x float > %3 ) {
122
- ; CHECK-LABEL: test_unpackhi_hadd_v8f32:
123
- ; CHECK: ## %bb.0:
124
- ; CHECK-NEXT: vhaddps %ymm3, %ymm1, %ymm0
125
- ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
126
- ; CHECK-NEXT: ret{{[l|q]}}
127
- %5 = tail call <8 x float > @llvm.x86.avx.hadd.ps.256 (<8 x float > %0 , <8 x float > %1 ) #4
128
- %6 = tail call <8 x float > @llvm.x86.avx.hadd.ps.256 (<8 x float > %2 , <8 x float > %3 ) #4
129
- %7 = shufflevector <8 x float > %5 , <8 x float > %6 , <8 x i32 > <i32 2 , i32 10 , i32 3 , i32 11 , i32 6 , i32 14 , i32 7 , i32 15 >
130
- ret <8 x float > %7
131
- }
132
-
133
- define <8 x float > @test_unpacklo_hsub_v8f32 (<8 x float > %0 , <8 x float > %1 , <8 x float > %2 , <8 x float > %3 ) {
134
- ; CHECK-LABEL: test_unpacklo_hsub_v8f32:
135
- ; CHECK: ## %bb.0:
136
- ; CHECK-NEXT: vhsubps %ymm2, %ymm0, %ymm0
137
- ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
138
- ; CHECK-NEXT: ret{{[l|q]}}
139
- %5 = tail call <8 x float > @llvm.x86.avx.hsub.ps.256 (<8 x float > %0 , <8 x float > %1 ) #4
140
- %6 = tail call <8 x float > @llvm.x86.avx.hsub.ps.256 (<8 x float > %2 , <8 x float > %3 ) #4
141
- %7 = shufflevector <8 x float > %5 , <8 x float > %6 , <8 x i32 > <i32 0 , i32 8 , i32 1 , i32 9 , i32 4 , i32 12 , i32 5 , i32 13 >
142
- ret <8 x float > %7
143
- }
144
-
145
- define <8 x float > @test_unpackhi_hsub_v8f32 (<8 x float > %0 , <8 x float > %1 , <8 x float > %2 , <8 x float > %3 ) {
146
- ; CHECK-LABEL: test_unpackhi_hsub_v8f32:
147
- ; CHECK: ## %bb.0:
148
- ; CHECK-NEXT: vhsubps %ymm3, %ymm1, %ymm0
149
- ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
150
- ; CHECK-NEXT: ret{{[l|q]}}
151
- %5 = tail call <8 x float > @llvm.x86.avx.hsub.ps.256 (<8 x float > %0 , <8 x float > %1 ) #4
152
- %6 = tail call <8 x float > @llvm.x86.avx.hsub.ps.256 (<8 x float > %2 , <8 x float > %3 ) #4
153
- %7 = shufflevector <8 x float > %5 , <8 x float > %6 , <8 x i32 > <i32 2 , i32 10 , i32 3 , i32 11 , i32 6 , i32 14 , i32 7 , i32 15 >
154
- ret <8 x float > %7
155
- }
156
-
157
- define <8 x i32 > @test_unpacklo_hadd_v8i32 (<8 x i32 > %0 , <8 x i32 > %1 , <8 x i32 > %2 , <8 x i32 > %3 ) {
158
- ; CHECK-LABEL: test_unpacklo_hadd_v8i32:
159
- ; CHECK: ## %bb.0:
160
- ; CHECK-NEXT: vphaddd %ymm2, %ymm0, %ymm0
161
- ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
162
- ; CHECK-NEXT: ret{{[l|q]}}
163
- %5 = tail call <8 x i32 > @llvm.x86.avx2.phadd.d (<8 x i32 > %0 , <8 x i32 > %1 ) #5
164
- %6 = tail call <8 x i32 > @llvm.x86.avx2.phadd.d (<8 x i32 > %2 , <8 x i32 > %3 ) #5
165
- %7 = shufflevector <8 x i32 > %5 , <8 x i32 > %6 , <8 x i32 > <i32 0 , i32 8 , i32 1 , i32 9 , i32 4 , i32 12 , i32 5 , i32 13 >
166
- ret <8 x i32 > %7
167
- }
168
-
169
- define <8 x i32 > @test_unpackhi_hadd_v8i32 (<8 x i32 > %0 , <8 x i32 > %1 , <8 x i32 > %2 , <8 x i32 > %3 ) {
170
- ; CHECK-LABEL: test_unpackhi_hadd_v8i32:
171
- ; CHECK: ## %bb.0:
172
- ; CHECK-NEXT: vphaddd %ymm3, %ymm1, %ymm0
173
- ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
174
- ; CHECK-NEXT: ret{{[l|q]}}
175
- %5 = tail call <8 x i32 > @llvm.x86.avx2.phadd.d (<8 x i32 > %0 , <8 x i32 > %1 ) #5
176
- %6 = tail call <8 x i32 > @llvm.x86.avx2.phadd.d (<8 x i32 > %2 , <8 x i32 > %3 ) #5
177
- %7 = shufflevector <8 x i32 > %5 , <8 x i32 > %6 , <8 x i32 > <i32 2 , i32 10 , i32 3 , i32 11 , i32 6 , i32 14 , i32 7 , i32 15 >
178
- ret <8 x i32 > %7
179
- }
180
-
181
- define <8 x i32 > @test_unpacklo_hsub_v8i32 (<8 x i32 > %0 , <8 x i32 > %1 , <8 x i32 > %2 , <8 x i32 > %3 ) {
182
- ; CHECK-LABEL: test_unpacklo_hsub_v8i32:
183
- ; CHECK: ## %bb.0:
184
- ; CHECK-NEXT: vphsubd %ymm2, %ymm0, %ymm0
185
- ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
186
- ; CHECK-NEXT: ret{{[l|q]}}
187
- %5 = tail call <8 x i32 > @llvm.x86.avx2.phsub.d (<8 x i32 > %0 , <8 x i32 > %1 ) #5
188
- %6 = tail call <8 x i32 > @llvm.x86.avx2.phsub.d (<8 x i32 > %2 , <8 x i32 > %3 ) #5
189
- %7 = shufflevector <8 x i32 > %5 , <8 x i32 > %6 , <8 x i32 > <i32 0 , i32 8 , i32 1 , i32 9 , i32 4 , i32 12 , i32 5 , i32 13 >
190
- ret <8 x i32 > %7
191
- }
192
-
193
- define <8 x i32 > @test_unpackhi_hsub_v8i32 (<8 x i32 > %0 , <8 x i32 > %1 , <8 x i32 > %2 , <8 x i32 > %3 ) {
194
- ; CHECK-LABEL: test_unpackhi_hsub_v8i32:
195
- ; CHECK: ## %bb.0:
196
- ; CHECK-NEXT: vphsubd %ymm3, %ymm1, %ymm0
197
- ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
198
- ; CHECK-NEXT: ret{{[l|q]}}
199
- %5 = tail call <8 x i32 > @llvm.x86.avx2.phsub.d (<8 x i32 > %0 , <8 x i32 > %1 ) #5
200
- %6 = tail call <8 x i32 > @llvm.x86.avx2.phsub.d (<8 x i32 > %2 , <8 x i32 > %3 ) #5
201
- %7 = shufflevector <8 x i32 > %5 , <8 x i32 > %6 , <8 x i32 > <i32 2 , i32 10 , i32 3 , i32 11 , i32 6 , i32 14 , i32 7 , i32 15 >
202
- ret <8 x i32 > %7
203
- }
204
-
205
153
;
206
154
; Special Case
207
155
;
208
156
209
157
define <4 x float > @test_unpacklo_hadd_v4f32_unary (<4 x float > %0 ) {
210
- ; CHECK-LABEL: test_unpacklo_hadd_v4f32_unary:
211
- ; CHECK: ## %bb.0:
212
- ; CHECK-NEXT: vhaddps %xmm0, %xmm0, %xmm0
213
- ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
214
- ; CHECK-NEXT: ret{{[l|q]}}
158
+ ; SSE-LABEL: test_unpacklo_hadd_v4f32_unary:
159
+ ; SSE: ## %bb.0:
160
+ ; SSE-NEXT: haddps %xmm0, %xmm0
161
+ ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
162
+ ; SSE-NEXT: ret{{[l|q]}}
163
+ ;
164
+ ; AVX-LABEL: test_unpacklo_hadd_v4f32_unary:
165
+ ; AVX: ## %bb.0:
166
+ ; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
167
+ ; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
168
+ ; AVX-NEXT: ret{{[l|q]}}
215
169
%2 = tail call <4 x float > @llvm.x86.sse3.hadd.ps (<4 x float > %0 , <4 x float > %0 ) #4
216
170
%3 = shufflevector <4 x float > %2 , <4 x float > %2 , <4 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 >
217
171
ret <4 x float > %3
@@ -231,18 +185,3 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>)
231
185
declare <8 x i16 > @llvm.x86.sse2.packssdw.128 (<4 x i32 >, <4 x i32 >)
232
186
declare <16 x i8 > @llvm.x86.sse2.packuswb.128 (<8 x i16 >, <8 x i16 >)
233
187
declare <8 x i16 > @llvm.x86.sse41.packusdw (<4 x i32 >, <4 x i32 >)
234
-
235
- declare <8 x float > @llvm.x86.avx.hadd.ps.256 (<8 x float >, <8 x float >)
236
- declare <8 x float > @llvm.x86.avx.hsub.ps.256 (<8 x float >, <8 x float >)
237
- declare <4 x double > @llvm.x86.avx.hadd.pd.256 (<4 x double >, <4 x double >)
238
- declare <4 x double > @llvm.x86.avx.hsub.pd.256 (<4 x double >, <4 x double >)
239
-
240
- declare <16 x i16 > @llvm.x86.avx2.phadd.w (<16 x i16 >, <16 x i16 >)
241
- declare <8 x i32 > @llvm.x86.avx2.phadd.d (<8 x i32 >, <8 x i32 >)
242
- declare <16 x i16 > @llvm.x86.avx2.phsub.w (<16 x i16 >, <16 x i16 >)
243
- declare <8 x i32 > @llvm.x86.avx2.phsub.d (<8 x i32 >, <8 x i32 >)
244
-
245
- declare <32 x i8 > @llvm.x86.avx2.packsswb (<16 x i16 >, <16 x i16 >)
246
- declare <16 x i16 > @llvm.x86.avx2.packssdw (<8 x i32 >, <8 x i32 >)
247
- declare <32 x i8 > @llvm.x86.avx2.packuswb (<16 x i16 >, <16 x i16 >)
248
- declare <16 x i16 > @llvm.x86.avx2.packusdw (<8 x i32 >, <8 x i32 >)
0 commit comments