Skip to content

Commit 47d66bf

Browse files
authored
[X86] Add tests showing failure to concat fcmp instructions together (llvm#170313)
Some of the AVX512 cases are already handled by llvm#170295
1 parent 23e6dbf commit 47d66bf

File tree

1 file changed

+330
-0
lines changed

1 file changed

+330
-0
lines changed
Lines changed: 330 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,330 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=SSE
4+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX1OR2,AVX1
5+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=AVX1OR2,AVX2
6+
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=AVX512
7+
8+
define i4 @concat_fcmp_v4f64_v2f64(<2 x double> %a0, <2 x double> %a1) {
9+
; SSE-LABEL: concat_fcmp_v4f64_v2f64:
10+
; SSE: # %bb.0:
11+
; SSE-NEXT: xorpd %xmm2, %xmm2
12+
; SSE-NEXT: xorpd %xmm3, %xmm3
13+
; SSE-NEXT: cmpltpd %xmm0, %xmm3
14+
; SSE-NEXT: cmpltpd %xmm1, %xmm2
15+
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm2[0,2]
16+
; SSE-NEXT: movmskps %xmm3, %eax
17+
; SSE-NEXT: # kill: def $al killed $al killed $eax
18+
; SSE-NEXT: retq
19+
;
20+
; AVX1OR2-LABEL: concat_fcmp_v4f64_v2f64:
21+
; AVX1OR2: # %bb.0:
22+
; AVX1OR2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
23+
; AVX1OR2-NEXT: vcmpltpd %xmm0, %xmm2, %xmm0
24+
; AVX1OR2-NEXT: vcmpltpd %xmm1, %xmm2, %xmm1
25+
; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
26+
; AVX1OR2-NEXT: vmovmskps %xmm0, %eax
27+
; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax
28+
; AVX1OR2-NEXT: retq
29+
;
30+
; AVX512-LABEL: concat_fcmp_v4f64_v2f64:
31+
; AVX512: # %bb.0:
32+
; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2
33+
; AVX512-NEXT: vcmpltpd %xmm0, %xmm2, %k0
34+
; AVX512-NEXT: vcmpltpd %xmm1, %xmm2, %k1
35+
; AVX512-NEXT: kshiftlb $2, %k1, %k1
36+
; AVX512-NEXT: korw %k1, %k0, %k0
37+
; AVX512-NEXT: kmovd %k0, %eax
38+
; AVX512-NEXT: # kill: def $al killed $al killed $eax
39+
; AVX512-NEXT: retq
40+
%v0 = fcmp ogt <2 x double> %a0, zeroinitializer
41+
%v1 = fcmp ogt <2 x double> %a1, zeroinitializer
42+
%v = shufflevector <2 x i1> %v0, <2 x i1> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
43+
%r = bitcast <4 x i1> %v to i4
44+
ret i4 %r
45+
}
46+
47+
define i8 @concat_fcmp_v8f32_v4f32(<4 x float> %a0, <4 x float> %a1) {
48+
; SSE-LABEL: concat_fcmp_v8f32_v4f32:
49+
; SSE: # %bb.0:
50+
; SSE-NEXT: xorps %xmm2, %xmm2
51+
; SSE-NEXT: cmpeqps %xmm2, %xmm0
52+
; SSE-NEXT: cmpeqps %xmm2, %xmm1
53+
; SSE-NEXT: packssdw %xmm1, %xmm0
54+
; SSE-NEXT: packsswb %xmm0, %xmm0
55+
; SSE-NEXT: pmovmskb %xmm0, %eax
56+
; SSE-NEXT: # kill: def $al killed $al killed $eax
57+
; SSE-NEXT: retq
58+
;
59+
; AVX1OR2-LABEL: concat_fcmp_v8f32_v4f32:
60+
; AVX1OR2: # %bb.0:
61+
; AVX1OR2-NEXT: vxorps %xmm2, %xmm2, %xmm2
62+
; AVX1OR2-NEXT: vcmpeqps %xmm2, %xmm0, %xmm0
63+
; AVX1OR2-NEXT: vcmpeqps %xmm2, %xmm1, %xmm1
64+
; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
65+
; AVX1OR2-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
66+
; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
67+
; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax
68+
; AVX1OR2-NEXT: retq
69+
;
70+
; AVX512-LABEL: concat_fcmp_v8f32_v4f32:
71+
; AVX512: # %bb.0:
72+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
73+
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
74+
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
75+
; AVX512-NEXT: vcmpeqps %ymm1, %ymm0, %k0
76+
; AVX512-NEXT: kmovd %k0, %eax
77+
; AVX512-NEXT: # kill: def $al killed $al killed $eax
78+
; AVX512-NEXT: vzeroupper
79+
; AVX512-NEXT: retq
80+
%v0 = fcmp oeq <4 x float> %a0, zeroinitializer
81+
%v1 = fcmp oeq <4 x float> %a1, zeroinitializer
82+
%v = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
83+
%r = bitcast <8 x i1> %v to i8
84+
ret i8 %r
85+
}
86+
87+
define i8 @concat_fcmp_v8f64_v2f64(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> %a3) {
88+
; SSE-LABEL: concat_fcmp_v8f64_v2f64:
89+
; SSE: # %bb.0:
90+
; SSE-NEXT: xorpd %xmm4, %xmm4
91+
; SSE-NEXT: cmpltpd %xmm4, %xmm0
92+
; SSE-NEXT: cmpltpd %xmm4, %xmm1
93+
; SSE-NEXT: packssdw %xmm1, %xmm0
94+
; SSE-NEXT: cmpltpd %xmm4, %xmm2
95+
; SSE-NEXT: cmpltpd %xmm4, %xmm3
96+
; SSE-NEXT: packssdw %xmm3, %xmm2
97+
; SSE-NEXT: packssdw %xmm0, %xmm0
98+
; SSE-NEXT: packssdw %xmm2, %xmm2
99+
; SSE-NEXT: packsswb %xmm2, %xmm0
100+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,3,2,3]
101+
; SSE-NEXT: pmovmskb %xmm0, %eax
102+
; SSE-NEXT: # kill: def $al killed $al killed $eax
103+
; SSE-NEXT: retq
104+
;
105+
; AVX1OR2-LABEL: concat_fcmp_v8f64_v2f64:
106+
; AVX1OR2: # %bb.0:
107+
; AVX1OR2-NEXT: vxorpd %xmm4, %xmm4, %xmm4
108+
; AVX1OR2-NEXT: vcmpltpd %xmm4, %xmm0, %xmm0
109+
; AVX1OR2-NEXT: vcmpltpd %xmm4, %xmm1, %xmm1
110+
; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
111+
; AVX1OR2-NEXT: vcmpltpd %xmm4, %xmm2, %xmm1
112+
; AVX1OR2-NEXT: vcmpltpd %xmm4, %xmm3, %xmm2
113+
; AVX1OR2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
114+
; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm1, %xmm1
115+
; AVX1OR2-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
116+
; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
117+
; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,3,0,3]
118+
; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
119+
; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax
120+
; AVX1OR2-NEXT: retq
121+
;
122+
; AVX512-LABEL: concat_fcmp_v8f64_v2f64:
123+
; AVX512: # %bb.0:
124+
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
125+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
126+
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
127+
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
128+
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
129+
; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
130+
; AVX512-NEXT: vcmpltpd %zmm1, %zmm0, %k0
131+
; AVX512-NEXT: kmovd %k0, %eax
132+
; AVX512-NEXT: # kill: def $al killed $al killed $eax
133+
; AVX512-NEXT: vzeroupper
134+
; AVX512-NEXT: retq
135+
%v0 = fcmp olt <2 x double> %a0, zeroinitializer
136+
%v1 = fcmp olt <2 x double> %a1, zeroinitializer
137+
%v2 = fcmp olt <2 x double> %a2, zeroinitializer
138+
%v3 = fcmp olt <2 x double> %a3, zeroinitializer
139+
%v01 = shufflevector <2 x i1> %v0, <2 x i1> %v1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
140+
%v23 = shufflevector <2 x i1> %v2, <2 x i1> %v3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
141+
%v = shufflevector <4 x i1> %v01, <4 x i1> %v23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
142+
%r = bitcast <8 x i1> %v to i8
143+
ret i8 %r
144+
}
145+
146+
define i16 @concat_fcmp_v16f32_v4f32(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> %a3) {
147+
; SSE-LABEL: concat_fcmp_v16f32_v4f32:
148+
; SSE: # %bb.0:
149+
; SSE-NEXT: xorps %xmm4, %xmm4
150+
; SSE-NEXT: xorps %xmm5, %xmm5
151+
; SSE-NEXT: cmpleps %xmm0, %xmm5
152+
; SSE-NEXT: xorps %xmm0, %xmm0
153+
; SSE-NEXT: cmpleps %xmm1, %xmm0
154+
; SSE-NEXT: packssdw %xmm0, %xmm5
155+
; SSE-NEXT: xorps %xmm0, %xmm0
156+
; SSE-NEXT: cmpleps %xmm2, %xmm0
157+
; SSE-NEXT: cmpleps %xmm3, %xmm4
158+
; SSE-NEXT: packssdw %xmm4, %xmm0
159+
; SSE-NEXT: packsswb %xmm0, %xmm5
160+
; SSE-NEXT: pmovmskb %xmm5, %eax
161+
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
162+
; SSE-NEXT: retq
163+
;
164+
; AVX1OR2-LABEL: concat_fcmp_v16f32_v4f32:
165+
; AVX1OR2: # %bb.0:
166+
; AVX1OR2-NEXT: vxorps %xmm4, %xmm4, %xmm4
167+
; AVX1OR2-NEXT: vcmpleps %xmm0, %xmm4, %xmm0
168+
; AVX1OR2-NEXT: vcmpleps %xmm1, %xmm4, %xmm1
169+
; AVX1OR2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
170+
; AVX1OR2-NEXT: vcmpleps %xmm2, %xmm4, %xmm1
171+
; AVX1OR2-NEXT: vcmpleps %xmm3, %xmm4, %xmm2
172+
; AVX1OR2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
173+
; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
174+
; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
175+
; AVX1OR2-NEXT: # kill: def $ax killed $ax killed $eax
176+
; AVX1OR2-NEXT: retq
177+
;
178+
; AVX512-LABEL: concat_fcmp_v16f32_v4f32:
179+
; AVX512: # %bb.0:
180+
; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $ymm2
181+
; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
182+
; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
183+
; AVX512-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
184+
; AVX512-NEXT: vinsertf64x4 $1, %ymm2, %zmm0, %zmm0
185+
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
186+
; AVX512-NEXT: vcmpleps %zmm0, %zmm1, %k0
187+
; AVX512-NEXT: kmovd %k0, %eax
188+
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
189+
; AVX512-NEXT: vzeroupper
190+
; AVX512-NEXT: retq
191+
%v0 = fcmp oge <4 x float> %a0, zeroinitializer
192+
%v1 = fcmp oge <4 x float> %a1, zeroinitializer
193+
%v2 = fcmp oge <4 x float> %a2, zeroinitializer
194+
%v3 = fcmp oge <4 x float> %a3, zeroinitializer
195+
%v01 = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
196+
%v23 = shufflevector <4 x i1> %v2, <4 x i1> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
197+
%v = shufflevector <8 x i1> %v01, <8 x i1> %v23, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
198+
%r = bitcast <16 x i1> %v to i16
199+
ret i16 %r
200+
}
201+
202+
define i8 @concat_fcmp_v8f64_v4f64(<4 x double> %a0, <4 x double> %a1) {
203+
; SSE-LABEL: concat_fcmp_v8f64_v4f64:
204+
; SSE: # %bb.0:
205+
; SSE-NEXT: xorpd %xmm4, %xmm4
206+
; SSE-NEXT: movapd %xmm1, %xmm5
207+
; SSE-NEXT: cmpneqpd %xmm4, %xmm5
208+
; SSE-NEXT: cmpordpd %xmm4, %xmm1
209+
; SSE-NEXT: andpd %xmm5, %xmm1
210+
; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
211+
; SSE-NEXT: movapd %xmm0, %xmm5
212+
; SSE-NEXT: cmpneqpd %xmm4, %xmm5
213+
; SSE-NEXT: cmpordpd %xmm4, %xmm0
214+
; SSE-NEXT: andpd %xmm5, %xmm0
215+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
216+
; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
217+
; SSE-NEXT: movapd %xmm3, %xmm1
218+
; SSE-NEXT: cmpneqpd %xmm4, %xmm1
219+
; SSE-NEXT: cmpordpd %xmm4, %xmm3
220+
; SSE-NEXT: andpd %xmm1, %xmm3
221+
; SSE-NEXT: movapd %xmm2, %xmm1
222+
; SSE-NEXT: cmpneqpd %xmm4, %xmm1
223+
; SSE-NEXT: cmpordpd %xmm4, %xmm2
224+
; SSE-NEXT: andpd %xmm1, %xmm2
225+
; SSE-NEXT: packssdw %xmm3, %xmm2
226+
; SSE-NEXT: packssdw %xmm2, %xmm2
227+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
228+
; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,1,3,4,5,6,7]
229+
; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
230+
; SSE-NEXT: packsswb %xmm0, %xmm0
231+
; SSE-NEXT: pmovmskb %xmm0, %eax
232+
; SSE-NEXT: # kill: def $al killed $al killed $eax
233+
; SSE-NEXT: retq
234+
;
235+
; AVX1-LABEL: concat_fcmp_v8f64_v4f64:
236+
; AVX1: # %bb.0:
237+
; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2
238+
; AVX1-NEXT: vcmpneq_oqpd %ymm2, %ymm0, %ymm0
239+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
240+
; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
241+
; AVX1-NEXT: vcmpneq_oqpd %ymm2, %ymm1, %ymm1
242+
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
243+
; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
244+
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
245+
; AVX1-NEXT: vpsrlw $8, %xmm0, %xmm0
246+
; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
247+
; AVX1-NEXT: vpmovmskb %xmm0, %eax
248+
; AVX1-NEXT: # kill: def $al killed $al killed $eax
249+
; AVX1-NEXT: vzeroupper
250+
; AVX1-NEXT: retq
251+
;
252+
; AVX2-LABEL: concat_fcmp_v8f64_v4f64:
253+
; AVX2: # %bb.0:
254+
; AVX2-NEXT: vxorpd %xmm2, %xmm2, %xmm2
255+
; AVX2-NEXT: vcmpneq_oqpd %ymm2, %ymm0, %ymm0
256+
; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm3
257+
; AVX2-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2]
258+
; AVX2-NEXT: vcmpneq_oqpd %ymm2, %ymm1, %ymm1
259+
; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2
260+
; AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2]
261+
; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
262+
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,3,5,7,9,11,13,15,u,u,u,u,u,u,u,u]
263+
; AVX2-NEXT: vpmovmskb %xmm0, %eax
264+
; AVX2-NEXT: # kill: def $al killed $al killed $eax
265+
; AVX2-NEXT: vzeroupper
266+
; AVX2-NEXT: retq
267+
;
268+
; AVX512-LABEL: concat_fcmp_v8f64_v4f64:
269+
; AVX512: # %bb.0:
270+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
271+
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
272+
; AVX512-NEXT: vxorpd %xmm1, %xmm1, %xmm1
273+
; AVX512-NEXT: vcmpneq_oqpd %zmm1, %zmm0, %k0
274+
; AVX512-NEXT: kmovd %k0, %eax
275+
; AVX512-NEXT: # kill: def $al killed $al killed $eax
276+
; AVX512-NEXT: vzeroupper
277+
; AVX512-NEXT: retq
278+
%v0 = fcmp one <4 x double> %a0, zeroinitializer
279+
%v1 = fcmp one <4 x double> %a1, zeroinitializer
280+
%v = shufflevector <4 x i1> %v0, <4 x i1> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
281+
%r = bitcast <8 x i1> %v to i8
282+
ret i8 %r
283+
}
284+
285+
define i16 @concat_fcmp_v16f32_v8f32(<8 x float> %a0, <8 x float> %a1) {
286+
; SSE-LABEL: concat_fcmp_v16f32_v8f32:
287+
; SSE: # %bb.0:
288+
; SSE-NEXT: xorps %xmm4, %xmm4
289+
; SSE-NEXT: cmpleps %xmm4, %xmm1
290+
; SSE-NEXT: cmpleps %xmm4, %xmm0
291+
; SSE-NEXT: packssdw %xmm1, %xmm0
292+
; SSE-NEXT: cmpleps %xmm4, %xmm3
293+
; SSE-NEXT: cmpleps %xmm4, %xmm2
294+
; SSE-NEXT: packssdw %xmm3, %xmm2
295+
; SSE-NEXT: packsswb %xmm2, %xmm0
296+
; SSE-NEXT: pmovmskb %xmm0, %eax
297+
; SSE-NEXT: # kill: def $ax killed $ax killed $eax
298+
; SSE-NEXT: retq
299+
;
300+
; AVX1OR2-LABEL: concat_fcmp_v16f32_v8f32:
301+
; AVX1OR2: # %bb.0:
302+
; AVX1OR2-NEXT: vxorps %xmm2, %xmm2, %xmm2
303+
; AVX1OR2-NEXT: vcmpleps %ymm2, %ymm0, %ymm0
304+
; AVX1OR2-NEXT: vextractf128 $1, %ymm0, %xmm3
305+
; AVX1OR2-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
306+
; AVX1OR2-NEXT: vcmpleps %ymm2, %ymm1, %ymm1
307+
; AVX1OR2-NEXT: vextractf128 $1, %ymm1, %xmm2
308+
; AVX1OR2-NEXT: vpackssdw %xmm2, %xmm1, %xmm1
309+
; AVX1OR2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0
310+
; AVX1OR2-NEXT: vpmovmskb %xmm0, %eax
311+
; AVX1OR2-NEXT: # kill: def $ax killed $ax killed $eax
312+
; AVX1OR2-NEXT: vzeroupper
313+
; AVX1OR2-NEXT: retq
314+
;
315+
; AVX512-LABEL: concat_fcmp_v16f32_v8f32:
316+
; AVX512: # %bb.0:
317+
; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
318+
; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
319+
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
320+
; AVX512-NEXT: vcmpleps %zmm1, %zmm0, %k0
321+
; AVX512-NEXT: kmovd %k0, %eax
322+
; AVX512-NEXT: # kill: def $ax killed $ax killed $eax
323+
; AVX512-NEXT: vzeroupper
324+
; AVX512-NEXT: retq
325+
%v0 = fcmp ole <8 x float> %a0, zeroinitializer
326+
%v1 = fcmp ole <8 x float> %a1, zeroinitializer
327+
%v = shufflevector <8 x i1> %v0, <8 x i1> %v1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
328+
%r = bitcast <16 x i1> %v to i16
329+
ret i16 %r
330+
}

0 commit comments

Comments
 (0)