@@ -82,13 +82,11 @@ define <2 x i32> @strict_vector_fptosi_v2f16_to_v2i32(<2 x half> %a) #0 {
8282;
8383; NOVL-LABEL: strict_vector_fptosi_v2f16_to_v2i32:
8484; NOVL: # %bb.0:
85- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
86- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
87- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
88- ; NOVL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
89- ; NOVL-NEXT: vcvttph2dq %ymm0, %zmm0
90- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
91- ; NOVL-NEXT: vzeroupper
85+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
86+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
87+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
88+ ; NOVL-NEXT: vmovd %ecx, %xmm0
89+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
9290; NOVL-NEXT: retq
9391 %ret = call <2 x i32 > @llvm.experimental.constrained.fptosi.v2i32.v2f16 (<2 x half > %a ,
9492 metadata !"fpexcept.strict" ) #0
@@ -105,13 +103,11 @@ define <2 x i32> @strict_vector_fptoui_v2f16_to_v2i32(<2 x half> %a) #0 {
105103;
106104; NOVL-LABEL: strict_vector_fptoui_v2f16_to_v2i32:
107105; NOVL: # %bb.0:
108- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
109- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
110- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
111- ; NOVL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
112- ; NOVL-NEXT: vcvttph2udq %ymm0, %zmm0
113- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
114- ; NOVL-NEXT: vzeroupper
106+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
107+ ; NOVL-NEXT: vcvttsh2usi %xmm1, %eax
108+ ; NOVL-NEXT: vcvttsh2usi %xmm0, %ecx
109+ ; NOVL-NEXT: vmovd %ecx, %xmm0
110+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
115111; NOVL-NEXT: retq
116112 %ret = call <2 x i32 > @llvm.experimental.constrained.fptoui.v2i32.v2f16 (<2 x half > %a ,
117113 metadata !"fpexcept.strict" ) #0
@@ -128,13 +124,12 @@ define <2 x i16> @strict_vector_fptosi_v2f16_to_v2i16(<2 x half> %a) #0 {
128124;
129125; NOVL-LABEL: strict_vector_fptosi_v2f16_to_v2i16:
130126; NOVL: # %bb.0:
131- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
132- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
133- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
134- ; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
135- ; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
136- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
137- ; NOVL-NEXT: vzeroupper
127+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
128+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
129+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
130+ ; NOVL-NEXT: vmovd %ecx, %xmm0
131+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
132+ ; NOVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
138133; NOVL-NEXT: retq
139134 %ret = call <2 x i16 > @llvm.experimental.constrained.fptosi.v2i16.v2f16 (<2 x half > %a ,
140135 metadata !"fpexcept.strict" ) #0
@@ -151,13 +146,12 @@ define <2 x i16> @strict_vector_fptoui_v2f16_to_v2i16(<2 x half> %a) #0 {
151146;
152147; NOVL-LABEL: strict_vector_fptoui_v2f16_to_v2i16:
153148; NOVL: # %bb.0:
154- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
155- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
156- ; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
157- ; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
158- ; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
159- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
160- ; NOVL-NEXT: vzeroupper
149+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
150+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
151+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
152+ ; NOVL-NEXT: vmovd %ecx, %xmm0
153+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
154+ ; NOVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
161155; NOVL-NEXT: retq
162156 %ret = call <2 x i16 > @llvm.experimental.constrained.fptoui.v2i16.v2f16 (<2 x half > %a ,
163157 metadata !"fpexcept.strict" ) #0
@@ -175,13 +169,13 @@ define <2 x i8> @strict_vector_fptosi_v2f16_to_v2i8(<2 x half> %a) #0 {
175169;
176170; NOVL-LABEL: strict_vector_fptosi_v2f16_to_v2i8:
177171; NOVL: # %bb.0:
178- ; NOVL-NEXT: vxorps %xmm1 , %xmm1 , %xmm1
179- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0], xmm1[1,2,3]
180- ; NOVL-NEXT: vxorps %xmm1 , %xmm1, %xmm1
181- ; NOVL-NEXT: vinsertf32x4 $0, %xmm0 , %zmm1, %zmm0
182- ; NOVL-NEXT: vcvttph2w %zmm0 , %zmm0
183- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
184- ; NOVL-NEXT: vzeroupper
172+ ; NOVL-NEXT: vpsrld $16 , %xmm0 , %xmm1
173+ ; NOVL-NEXT: vcvttsh2si % xmm1, %eax
174+ ; NOVL-NEXT: vcvttsh2si %xmm0 , %ecx
175+ ; NOVL-NEXT: vmovd %ecx , %xmm0
176+ ; NOVL-NEXT: vpinsrd $1, %eax , %xmm0, %xmm0
177+ ; NOVL-NEXT: vpackssdw % xmm0, % xmm0, %xmm0
178+ ; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
185179; NOVL-NEXT: retq
186180 %ret = call <2 x i8 > @llvm.experimental.constrained.fptosi.v2i8.v2f16 (<2 x half > %a ,
187181 metadata !"fpexcept.strict" ) #0
@@ -199,13 +193,13 @@ define <2 x i8> @strict_vector_fptoui_v2f16_to_v2i8(<2 x half> %a) #0 {
199193;
200194; NOVL-LABEL: strict_vector_fptoui_v2f16_to_v2i8:
201195; NOVL: # %bb.0:
202- ; NOVL-NEXT: vxorps %xmm1 , %xmm1 , %xmm1
203- ; NOVL-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0], xmm1[1,2,3]
204- ; NOVL-NEXT: vxorps %xmm1 , %xmm1, %xmm1
205- ; NOVL-NEXT: vinsertf32x4 $0, %xmm0 , %zmm1, %zmm0
206- ; NOVL-NEXT: vcvttph2uw %zmm0 , %zmm0
207- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
208- ; NOVL-NEXT: vzeroupper
196+ ; NOVL-NEXT: vpsrld $16 , %xmm0 , %xmm1
197+ ; NOVL-NEXT: vcvttsh2si % xmm1, %eax
198+ ; NOVL-NEXT: vcvttsh2si %xmm0 , %ecx
199+ ; NOVL-NEXT: vmovd %ecx , %xmm0
200+ ; NOVL-NEXT: vpinsrd $1, %eax , %xmm0, %xmm0
201+ ; NOVL-NEXT: vpackusdw % xmm0, % xmm0, %xmm0
202+ ; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
209203; NOVL-NEXT: retq
210204 %ret = call <2 x i8 > @llvm.experimental.constrained.fptoui.v2i8.v2f16 (<2 x half > %a ,
211205 metadata !"fpexcept.strict" ) #0
@@ -335,12 +329,18 @@ define <4 x i16> @strict_vector_fptosi_v4f16_to_v4i16(<4 x half> %a) #0 {
335329;
336330; NOVL-LABEL: strict_vector_fptosi_v4f16_to_v4i16:
337331; NOVL: # %bb.0:
338- ; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
339- ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
340- ; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
341- ; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
342- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
343- ; NOVL-NEXT: vzeroupper
332+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
333+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
334+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
335+ ; NOVL-NEXT: vmovd %ecx, %xmm1
336+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
337+ ; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
338+ ; NOVL-NEXT: vcvttsh2si %xmm2, %eax
339+ ; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
340+ ; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
341+ ; NOVL-NEXT: vcvttsh2si %xmm0, %eax
342+ ; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
343+ ; NOVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
344344; NOVL-NEXT: retq
345345 %ret = call <4 x i16 > @llvm.experimental.constrained.fptosi.v4i16.v4f16 (<4 x half > %a ,
346346 metadata !"fpexcept.strict" ) #0
@@ -356,12 +356,18 @@ define <4 x i16> @strict_vector_fptoui_v4f16_to_v4i16(<4 x half> %a) #0 {
356356;
357357; NOVL-LABEL: strict_vector_fptoui_v4f16_to_v4i16:
358358; NOVL: # %bb.0:
359- ; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
360- ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
361- ; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
362- ; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
363- ; NOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
364- ; NOVL-NEXT: vzeroupper
359+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
360+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
361+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
362+ ; NOVL-NEXT: vmovd %ecx, %xmm1
363+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
364+ ; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
365+ ; NOVL-NEXT: vcvttsh2si %xmm2, %eax
366+ ; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
367+ ; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
368+ ; NOVL-NEXT: vcvttsh2si %xmm0, %eax
369+ ; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
370+ ; NOVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
365371; NOVL-NEXT: retq
366372 %ret = call <4 x i16 > @llvm.experimental.constrained.fptoui.v4i16.v4f16 (<4 x half > %a ,
367373 metadata !"fpexcept.strict" ) #0
@@ -378,12 +384,19 @@ define <4 x i8> @strict_vector_fptosi_v4f16_to_v4i8(<4 x half> %a) #0 {
378384;
379385; NOVL-LABEL: strict_vector_fptosi_v4f16_to_v4i8:
380386; NOVL: # %bb.0:
381- ; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
382- ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
383- ; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
384- ; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
385- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
386- ; NOVL-NEXT: vzeroupper
387+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
388+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
389+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
390+ ; NOVL-NEXT: vmovd %ecx, %xmm1
391+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
392+ ; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
393+ ; NOVL-NEXT: vcvttsh2si %xmm2, %eax
394+ ; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
395+ ; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
396+ ; NOVL-NEXT: vcvttsh2si %xmm0, %eax
397+ ; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
398+ ; NOVL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
399+ ; NOVL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
387400; NOVL-NEXT: retq
388401 %ret = call <4 x i8 > @llvm.experimental.constrained.fptosi.v4i8.v4f16 (<4 x half > %a ,
389402 metadata !"fpexcept.strict" ) #0
@@ -400,12 +413,19 @@ define <4 x i8> @strict_vector_fptoui_v4f16_to_v4i8(<4 x half> %a) #0 {
400413;
401414; NOVL-LABEL: strict_vector_fptoui_v4f16_to_v4i8:
402415; NOVL: # %bb.0:
403- ; NOVL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
404- ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
405- ; NOVL-NEXT: vinserti32x4 $0, %xmm0, %zmm1, %zmm0
406- ; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
407- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
408- ; NOVL-NEXT: vzeroupper
416+ ; NOVL-NEXT: vpsrld $16, %xmm0, %xmm1
417+ ; NOVL-NEXT: vcvttsh2si %xmm1, %eax
418+ ; NOVL-NEXT: vcvttsh2si %xmm0, %ecx
419+ ; NOVL-NEXT: vmovd %ecx, %xmm1
420+ ; NOVL-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
421+ ; NOVL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
422+ ; NOVL-NEXT: vcvttsh2si %xmm2, %eax
423+ ; NOVL-NEXT: vpinsrd $2, %eax, %xmm1, %xmm1
424+ ; NOVL-NEXT: vpsrlq $48, %xmm0, %xmm0
425+ ; NOVL-NEXT: vcvttsh2si %xmm0, %eax
426+ ; NOVL-NEXT: vpinsrd $3, %eax, %xmm1, %xmm0
427+ ; NOVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
428+ ; NOVL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
409429; NOVL-NEXT: retq
410430 %ret = call <4 x i8 > @llvm.experimental.constrained.fptoui.v4i8.v4f16 (<4 x half > %a ,
411431 metadata !"fpexcept.strict" ) #0
@@ -554,7 +574,7 @@ define <8 x i8> @strict_vector_fptosi_v8f16_to_v8i8(<8 x half> %a) #0 {
554574; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
555575; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
556576; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
557- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
577+ ; NOVL-NEXT: vpacksswb % xmm0, % xmm0, %xmm0
558578; NOVL-NEXT: vzeroupper
559579; NOVL-NEXT: retq
560580 %ret = call <8 x i8 > @llvm.experimental.constrained.fptosi.v8i8.v8f16 (<8 x half > %a ,
@@ -573,8 +593,8 @@ define <8 x i8> @strict_vector_fptoui_v8f16_to_v8i8(<8 x half> %a) #0 {
573593; NOVL: # %bb.0:
574594; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
575595; NOVL-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0
576- ; NOVL-NEXT: vcvttph2uw %zmm0, %zmm0
577- ; NOVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
596+ ; NOVL-NEXT: vcvttph2w %zmm0, %zmm0
597+ ; NOVL-NEXT: vpackuswb % xmm0, % xmm0, %xmm0
578598; NOVL-NEXT: vzeroupper
579599; NOVL-NEXT: retq
580600 %ret = call <8 x i8 > @llvm.experimental.constrained.fptoui.v8i8.v8f16 (<8 x half > %a ,
0 commit comments