Skip to content

Commit 5963229

Browse files
committed
[X86][SSE] Add missing SSE test coverage for permute(hop,hop) folds
Should help avoid bugs like reported in rG80dee7965dff
1 parent 41bf338 commit 5963229

File tree

2 files changed

+219
-166
lines changed

2 files changed

+219
-166
lines changed
Lines changed: 105 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -1,217 +1,171 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s
3-
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s
4-
5-
;
6-
; 128-bit Vectors
7-
;
2+
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
3+
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=AVX
4+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE
5+
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=AVX
86

97
define <4 x float> @test_unpacklo_hadd_v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3) {
10-
; CHECK-LABEL: test_unpacklo_hadd_v4f32:
11-
; CHECK: ## %bb.0:
12-
; CHECK-NEXT: vhaddps %xmm2, %xmm0, %xmm0
13-
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
14-
; CHECK-NEXT: ret{{[l|q]}}
8+
; SSE-LABEL: test_unpacklo_hadd_v4f32:
9+
; SSE: ## %bb.0:
10+
; SSE-NEXT: haddps %xmm2, %xmm0
11+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
12+
; SSE-NEXT: ret{{[l|q]}}
13+
;
14+
; AVX-LABEL: test_unpacklo_hadd_v4f32:
15+
; AVX: ## %bb.0:
16+
; AVX-NEXT: vhaddps %xmm2, %xmm0, %xmm0
17+
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
18+
; AVX-NEXT: ret{{[l|q]}}
1519
%5 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %0, <4 x float> %1) #4
1620
%6 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %3) #4
1721
%7 = shufflevector <4 x float> %5, <4 x float> %6, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1822
ret <4 x float> %7
1923
}
2024

2125
define <4 x float> @test_unpackhi_hadd_v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3) {
22-
; CHECK-LABEL: test_unpackhi_hadd_v4f32:
23-
; CHECK: ## %bb.0:
24-
; CHECK-NEXT: vhaddps %xmm3, %xmm1, %xmm0
25-
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
26-
; CHECK-NEXT: ret{{[l|q]}}
26+
; SSE-LABEL: test_unpackhi_hadd_v4f32:
27+
; SSE: ## %bb.0:
28+
; SSE-NEXT: movaps %xmm1, %xmm0
29+
; SSE-NEXT: haddps %xmm3, %xmm0
30+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
31+
; SSE-NEXT: ret{{[l|q]}}
32+
;
33+
; AVX-LABEL: test_unpackhi_hadd_v4f32:
34+
; AVX: ## %bb.0:
35+
; AVX-NEXT: vhaddps %xmm3, %xmm1, %xmm0
36+
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
37+
; AVX-NEXT: ret{{[l|q]}}
2738
%5 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %0, <4 x float> %1) #4
2839
%6 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %2, <4 x float> %3) #4
2940
%7 = shufflevector <4 x float> %5, <4 x float> %6, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
3041
ret <4 x float> %7
3142
}
3243

3344
define <4 x float> @test_unpacklo_hsub_v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3) {
34-
; CHECK-LABEL: test_unpacklo_hsub_v4f32:
35-
; CHECK: ## %bb.0:
36-
; CHECK-NEXT: vhsubps %xmm2, %xmm0, %xmm0
37-
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
38-
; CHECK-NEXT: ret{{[l|q]}}
45+
; SSE-LABEL: test_unpacklo_hsub_v4f32:
46+
; SSE: ## %bb.0:
47+
; SSE-NEXT: hsubps %xmm2, %xmm0
48+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
49+
; SSE-NEXT: ret{{[l|q]}}
50+
;
51+
; AVX-LABEL: test_unpacklo_hsub_v4f32:
52+
; AVX: ## %bb.0:
53+
; AVX-NEXT: vhsubps %xmm2, %xmm0, %xmm0
54+
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
55+
; AVX-NEXT: ret{{[l|q]}}
3956
%5 = tail call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %0, <4 x float> %1) #4
4057
%6 = tail call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %2, <4 x float> %3) #4
4158
%7 = shufflevector <4 x float> %5, <4 x float> %6, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
4259
ret <4 x float> %7
4360
}
4461

4562
define <4 x float> @test_unpackhi_hsub_v4f32(<4 x float> %0, <4 x float> %1, <4 x float> %2, <4 x float> %3) {
46-
; CHECK-LABEL: test_unpackhi_hsub_v4f32:
47-
; CHECK: ## %bb.0:
48-
; CHECK-NEXT: vhsubps %xmm3, %xmm1, %xmm0
49-
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
50-
; CHECK-NEXT: ret{{[l|q]}}
63+
; SSE-LABEL: test_unpackhi_hsub_v4f32:
64+
; SSE: ## %bb.0:
65+
; SSE-NEXT: movaps %xmm1, %xmm0
66+
; SSE-NEXT: hsubps %xmm3, %xmm0
67+
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
68+
; SSE-NEXT: ret{{[l|q]}}
69+
;
70+
; AVX-LABEL: test_unpackhi_hsub_v4f32:
71+
; AVX: ## %bb.0:
72+
; AVX-NEXT: vhsubps %xmm3, %xmm1, %xmm0
73+
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
74+
; AVX-NEXT: ret{{[l|q]}}
5175
%5 = tail call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %0, <4 x float> %1) #4
5276
%6 = tail call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %2, <4 x float> %3) #4
5377
%7 = shufflevector <4 x float> %5, <4 x float> %6, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
5478
ret <4 x float> %7
5579
}
5680

5781
define <4 x i32> @test_unpacklo_hadd_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3) {
58-
; CHECK-LABEL: test_unpacklo_hadd_v4i32:
59-
; CHECK: ## %bb.0:
60-
; CHECK-NEXT: vphaddd %xmm2, %xmm0, %xmm0
61-
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
62-
; CHECK-NEXT: ret{{[l|q]}}
82+
; SSE-LABEL: test_unpacklo_hadd_v4i32:
83+
; SSE: ## %bb.0:
84+
; SSE-NEXT: phaddd %xmm2, %xmm0
85+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
86+
; SSE-NEXT: ret{{[l|q]}}
87+
;
88+
; AVX-LABEL: test_unpacklo_hadd_v4i32:
89+
; AVX: ## %bb.0:
90+
; AVX-NEXT: vphaddd %xmm2, %xmm0, %xmm0
91+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
92+
; AVX-NEXT: ret{{[l|q]}}
6393
%5 = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %0, <4 x i32> %1) #5
6494
%6 = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %2, <4 x i32> %3) #5
6595
%7 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
6696
ret <4 x i32> %7
6797
}
6898

6999
define <4 x i32> @test_unpackhi_hadd_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3) {
70-
; CHECK-LABEL: test_unpackhi_hadd_v4i32:
71-
; CHECK: ## %bb.0:
72-
; CHECK-NEXT: vphaddd %xmm3, %xmm1, %xmm0
73-
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
74-
; CHECK-NEXT: ret{{[l|q]}}
100+
; SSE-LABEL: test_unpackhi_hadd_v4i32:
101+
; SSE: ## %bb.0:
102+
; SSE-NEXT: phaddd %xmm3, %xmm1
103+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,1,3]
104+
; SSE-NEXT: ret{{[l|q]}}
105+
;
106+
; AVX-LABEL: test_unpackhi_hadd_v4i32:
107+
; AVX: ## %bb.0:
108+
; AVX-NEXT: vphaddd %xmm3, %xmm1, %xmm0
109+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
110+
; AVX-NEXT: ret{{[l|q]}}
75111
%5 = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %0, <4 x i32> %1) #5
76112
%6 = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %2, <4 x i32> %3) #5
77113
%7 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
78114
ret <4 x i32> %7
79115
}
80116

81117
define <4 x i32> @test_unpacklo_hsub_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3) {
82-
; CHECK-LABEL: test_unpacklo_hsub_v4i32:
83-
; CHECK: ## %bb.0:
84-
; CHECK-NEXT: vphsubd %xmm2, %xmm0, %xmm0
85-
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
86-
; CHECK-NEXT: ret{{[l|q]}}
118+
; SSE-LABEL: test_unpacklo_hsub_v4i32:
119+
; SSE: ## %bb.0:
120+
; SSE-NEXT: phsubd %xmm2, %xmm0
121+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
122+
; SSE-NEXT: ret{{[l|q]}}
123+
;
124+
; AVX-LABEL: test_unpacklo_hsub_v4i32:
125+
; AVX: ## %bb.0:
126+
; AVX-NEXT: vphsubd %xmm2, %xmm0, %xmm0
127+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
128+
; AVX-NEXT: ret{{[l|q]}}
87129
%5 = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %0, <4 x i32> %1) #5
88130
%6 = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %2, <4 x i32> %3) #5
89131
%7 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
90132
ret <4 x i32> %7
91133
}
92134

93135
define <4 x i32> @test_unpackhi_hsub_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2, <4 x i32> %3) {
94-
; CHECK-LABEL: test_unpackhi_hsub_v4i32:
95-
; CHECK: ## %bb.0:
96-
; CHECK-NEXT: vphsubd %xmm3, %xmm1, %xmm0
97-
; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
98-
; CHECK-NEXT: ret{{[l|q]}}
136+
; SSE-LABEL: test_unpackhi_hsub_v4i32:
137+
; SSE: ## %bb.0:
138+
; SSE-NEXT: phsubd %xmm3, %xmm1
139+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,1,3]
140+
; SSE-NEXT: ret{{[l|q]}}
141+
;
142+
; AVX-LABEL: test_unpackhi_hsub_v4i32:
143+
; AVX: ## %bb.0:
144+
; AVX-NEXT: vphsubd %xmm3, %xmm1, %xmm0
145+
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
146+
; AVX-NEXT: ret{{[l|q]}}
99147
%5 = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %0, <4 x i32> %1) #5
100148
%6 = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %2, <4 x i32> %3) #5
101149
%7 = shufflevector <4 x i32> %5, <4 x i32> %6, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
102150
ret <4 x i32> %7
103151
}
104152

105-
;
106-
; 256-bit Vectors
107-
;
108-
109-
define <8 x float> @test_unpacklo_hadd_v8f32(<8 x float> %0, <8 x float> %1, <8 x float> %2, <8 x float> %3) {
110-
; CHECK-LABEL: test_unpacklo_hadd_v8f32:
111-
; CHECK: ## %bb.0:
112-
; CHECK-NEXT: vhaddps %ymm2, %ymm0, %ymm0
113-
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
114-
; CHECK-NEXT: ret{{[l|q]}}
115-
%5 = tail call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %0, <8 x float> %1) #4
116-
%6 = tail call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %2, <8 x float> %3) #4
117-
%7 = shufflevector <8 x float> %5, <8 x float> %6, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
118-
ret <8 x float> %7
119-
}
120-
121-
define <8 x float> @test_unpackhi_hadd_v8f32(<8 x float> %0, <8 x float> %1, <8 x float> %2, <8 x float> %3) {
122-
; CHECK-LABEL: test_unpackhi_hadd_v8f32:
123-
; CHECK: ## %bb.0:
124-
; CHECK-NEXT: vhaddps %ymm3, %ymm1, %ymm0
125-
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
126-
; CHECK-NEXT: ret{{[l|q]}}
127-
%5 = tail call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %0, <8 x float> %1) #4
128-
%6 = tail call <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float> %2, <8 x float> %3) #4
129-
%7 = shufflevector <8 x float> %5, <8 x float> %6, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
130-
ret <8 x float> %7
131-
}
132-
133-
define <8 x float> @test_unpacklo_hsub_v8f32(<8 x float> %0, <8 x float> %1, <8 x float> %2, <8 x float> %3) {
134-
; CHECK-LABEL: test_unpacklo_hsub_v8f32:
135-
; CHECK: ## %bb.0:
136-
; CHECK-NEXT: vhsubps %ymm2, %ymm0, %ymm0
137-
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
138-
; CHECK-NEXT: ret{{[l|q]}}
139-
%5 = tail call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %0, <8 x float> %1) #4
140-
%6 = tail call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %2, <8 x float> %3) #4
141-
%7 = shufflevector <8 x float> %5, <8 x float> %6, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
142-
ret <8 x float> %7
143-
}
144-
145-
define <8 x float> @test_unpackhi_hsub_v8f32(<8 x float> %0, <8 x float> %1, <8 x float> %2, <8 x float> %3) {
146-
; CHECK-LABEL: test_unpackhi_hsub_v8f32:
147-
; CHECK: ## %bb.0:
148-
; CHECK-NEXT: vhsubps %ymm3, %ymm1, %ymm0
149-
; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
150-
; CHECK-NEXT: ret{{[l|q]}}
151-
%5 = tail call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %0, <8 x float> %1) #4
152-
%6 = tail call <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float> %2, <8 x float> %3) #4
153-
%7 = shufflevector <8 x float> %5, <8 x float> %6, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
154-
ret <8 x float> %7
155-
}
156-
157-
define <8 x i32> @test_unpacklo_hadd_v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, <8 x i32> %3) {
158-
; CHECK-LABEL: test_unpacklo_hadd_v8i32:
159-
; CHECK: ## %bb.0:
160-
; CHECK-NEXT: vphaddd %ymm2, %ymm0, %ymm0
161-
; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
162-
; CHECK-NEXT: ret{{[l|q]}}
163-
%5 = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %0, <8 x i32> %1) #5
164-
%6 = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %2, <8 x i32> %3) #5
165-
%7 = shufflevector <8 x i32> %5, <8 x i32> %6, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
166-
ret <8 x i32> %7
167-
}
168-
169-
define <8 x i32> @test_unpackhi_hadd_v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, <8 x i32> %3) {
170-
; CHECK-LABEL: test_unpackhi_hadd_v8i32:
171-
; CHECK: ## %bb.0:
172-
; CHECK-NEXT: vphaddd %ymm3, %ymm1, %ymm0
173-
; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
174-
; CHECK-NEXT: ret{{[l|q]}}
175-
%5 = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %0, <8 x i32> %1) #5
176-
%6 = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %2, <8 x i32> %3) #5
177-
%7 = shufflevector <8 x i32> %5, <8 x i32> %6, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
178-
ret <8 x i32> %7
179-
}
180-
181-
define <8 x i32> @test_unpacklo_hsub_v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, <8 x i32> %3) {
182-
; CHECK-LABEL: test_unpacklo_hsub_v8i32:
183-
; CHECK: ## %bb.0:
184-
; CHECK-NEXT: vphsubd %ymm2, %ymm0, %ymm0
185-
; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
186-
; CHECK-NEXT: ret{{[l|q]}}
187-
%5 = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %0, <8 x i32> %1) #5
188-
%6 = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %2, <8 x i32> %3) #5
189-
%7 = shufflevector <8 x i32> %5, <8 x i32> %6, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
190-
ret <8 x i32> %7
191-
}
192-
193-
define <8 x i32> @test_unpackhi_hsub_v8i32(<8 x i32> %0, <8 x i32> %1, <8 x i32> %2, <8 x i32> %3) {
194-
; CHECK-LABEL: test_unpackhi_hsub_v8i32:
195-
; CHECK: ## %bb.0:
196-
; CHECK-NEXT: vphsubd %ymm3, %ymm1, %ymm0
197-
; CHECK-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
198-
; CHECK-NEXT: ret{{[l|q]}}
199-
%5 = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %0, <8 x i32> %1) #5
200-
%6 = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %2, <8 x i32> %3) #5
201-
%7 = shufflevector <8 x i32> %5, <8 x i32> %6, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
202-
ret <8 x i32> %7
203-
}
204-
205153
;
206154
; Special Case
207155
;
208156

209157
define <4 x float> @test_unpacklo_hadd_v4f32_unary(<4 x float> %0) {
210-
; CHECK-LABEL: test_unpacklo_hadd_v4f32_unary:
211-
; CHECK: ## %bb.0:
212-
; CHECK-NEXT: vhaddps %xmm0, %xmm0, %xmm0
213-
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
214-
; CHECK-NEXT: ret{{[l|q]}}
158+
; SSE-LABEL: test_unpacklo_hadd_v4f32_unary:
159+
; SSE: ## %bb.0:
160+
; SSE-NEXT: haddps %xmm0, %xmm0
161+
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0,0,1,1]
162+
; SSE-NEXT: ret{{[l|q]}}
163+
;
164+
; AVX-LABEL: test_unpacklo_hadd_v4f32_unary:
165+
; AVX: ## %bb.0:
166+
; AVX-NEXT: vhaddps %xmm0, %xmm0, %xmm0
167+
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
168+
; AVX-NEXT: ret{{[l|q]}}
215169
%2 = tail call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %0, <4 x float> %0) #4
216170
%3 = shufflevector <4 x float> %2, <4 x float> %2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
217171
ret <4 x float> %3
@@ -231,18 +185,3 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>)
231185
declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>)
232186
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
233187
declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)
234-
235-
declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>)
236-
declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>)
237-
declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>)
238-
declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>)
239-
240-
declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>)
241-
declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>)
242-
declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>)
243-
declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>)
244-
245-
declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>)
246-
declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>)
247-
declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>)
248-
declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>)

0 commit comments

Comments
 (0)