11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2- ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512
3- ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512VL
2+ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
3+ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512FP16
4+ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512VL,AVX512VLF
5+ ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512fp16 | FileCheck %s --check-prefixes=CHECK,AVX512VLFP16
46
57define half @test_half (half %x , i32 %exp ) nounwind {
6- ; AVX512-LABEL: test_half:
7- ; AVX512: # %bb.0: # %entry
8- ; AVX512-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
9- ; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0
10- ; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
11- ; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0
12- ; AVX512-NEXT: retq
8+ ; AVX512F-LABEL: test_half:
9+ ; AVX512F: # %bb.0: # %entry
10+ ; AVX512F-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
11+ ; AVX512F-NEXT: vcvtph2ps %xmm0, %xmm0
12+ ; AVX512F-NEXT: vscalefss %xmm1, %xmm0, %xmm0
13+ ; AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
14+ ; AVX512F-NEXT: retq
15+ ;
16+ ; AVX512FP16-LABEL: test_half:
17+ ; AVX512FP16: # %bb.0: # %entry
18+ ; AVX512FP16-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
19+ ; AVX512FP16-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
20+ ; AVX512FP16-NEXT: retq
1321;
1422; AVX512VL-LABEL: test_half:
1523; AVX512VL: # %bb.0: # %entry
16- ; AVX512VL-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
17- ; AVX512VL-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
24+ ; AVX512VL-NEXT: vcvtsi2ss %edi, %xmm15, %xmm1
25+ ; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0
26+ ; AVX512VL-NEXT: vscalefss %xmm1, %xmm0, %xmm0
27+ ; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0
1828; AVX512VL-NEXT: retq
29+ ;
30+ ; AVX512VLFP16-LABEL: test_half:
31+ ; AVX512VLFP16: # %bb.0: # %entry
32+ ; AVX512VLFP16-NEXT: vcvtsi2sh %edi, %xmm31, %xmm1
33+ ; AVX512VLFP16-NEXT: vscalefsh %xmm1, %xmm0, %xmm0
34+ ; AVX512VLFP16-NEXT: retq
1935entry:
2036 %r = tail call fast half @llvm.ldexp.f16.i32 (half %x , i32 %exp )
2137 ret half %r
@@ -240,30 +256,24 @@ declare <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half>, <8 x i16>)
240256define <4 x float > @test_ldexp_4xfloat (<4 x float > %x , <4 x i32 > %exp ) nounwind {
241257; AVX512-LABEL: test_ldexp_4xfloat:
242258; AVX512: # %bb.0:
243- ; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm2
244- ; AVX512-NEXT: vscalefss %xmm2, %xmm0, %xmm2
245- ; AVX512-NEXT: vmovshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
246- ; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[1,1,1,1]
247- ; AVX512-NEXT: vcvtdq2ps %xmm4, %xmm4
248- ; AVX512-NEXT: vscalefss %xmm4, %xmm3, %xmm3
249- ; AVX512-NEXT: vunpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
250- ; AVX512-NEXT: vshufpd {{.*#+}} xmm3 = xmm0[1,0]
251- ; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[2,3,2,3]
252- ; AVX512-NEXT: vcvtdq2ps %xmm4, %xmm4
253- ; AVX512-NEXT: vscalefss %xmm4, %xmm3, %xmm3
254- ; AVX512-NEXT: vmovlhps {{.*#+}} xmm2 = xmm2[0],xmm3[0]
255- ; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
256- ; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
259+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
257260; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
258- ; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
259- ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0]
261+ ; AVX512-NEXT: vscalefps %zmm1, %zmm0, %zmm0
262+ ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
263+ ; AVX512-NEXT: vzeroupper
260264; AVX512-NEXT: retq
261265;
262266; AVX512VL-LABEL: test_ldexp_4xfloat:
263267; AVX512VL: # %bb.0:
264268; AVX512VL-NEXT: vcvtdq2ps %xmm1, %xmm1
265269; AVX512VL-NEXT: vscalefps %xmm1, %xmm0, %xmm0
266270; AVX512VL-NEXT: retq
271+ ;
272+ ; AVX512VLFP16-LABEL: test_ldexp_4xfloat:
273+ ; AVX512VLFP16: # %bb.0:
274+ ; AVX512VLFP16-NEXT: vcvtdq2ps %xmm1, %xmm1
275+ ; AVX512VLFP16-NEXT: vscalefps %xmm1, %xmm0, %xmm0
276+ ; AVX512VLFP16-NEXT: retq
267277 %r = call <4 x float > @llvm.ldexp.v4f32.v4i32 (<4 x float > %x , <4 x i32 > %exp )
268278 ret <4 x float > %r
269279}
@@ -663,50 +673,23 @@ declare <16 x half> @llvm.ldexp.v16f16.v16i16(<16 x half>, <16 x i16>)
663673define <8 x float > @test_ldexp_8xfloat (<8 x float > %x , <8 x i32 > %exp ) nounwind {
664674; AVX512-LABEL: test_ldexp_8xfloat:
665675; AVX512: # %bb.0:
666- ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
667- ; AVX512-NEXT: vextractf128 $1, %ymm1, %xmm3
668- ; AVX512-NEXT: vcvtdq2ps %xmm3, %xmm4
669- ; AVX512-NEXT: vscalefss %xmm4, %xmm2, %xmm4
670- ; AVX512-NEXT: vmovshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
671- ; AVX512-NEXT: vshufps {{.*#+}} xmm6 = xmm3[1,1,1,1]
672- ; AVX512-NEXT: vcvtdq2ps %xmm6, %xmm6
673- ; AVX512-NEXT: vscalefss %xmm6, %xmm5, %xmm5
674- ; AVX512-NEXT: vunpcklps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
675- ; AVX512-NEXT: vshufpd {{.*#+}} xmm5 = xmm2[1,0]
676- ; AVX512-NEXT: vshufps {{.*#+}} xmm6 = xmm3[2,3,2,3]
677- ; AVX512-NEXT: vcvtdq2ps %xmm6, %xmm6
678- ; AVX512-NEXT: vscalefss %xmm6, %xmm5, %xmm5
679- ; AVX512-NEXT: vmovlhps {{.*#+}} xmm4 = xmm4[0],xmm5[0]
680- ; AVX512-NEXT: vshufps {{.*#+}} xmm2 = xmm2[3,3,3,3]
681- ; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm3[3,3,3,3]
682- ; AVX512-NEXT: vcvtdq2ps %xmm3, %xmm3
683- ; AVX512-NEXT: vscalefss %xmm3, %xmm2, %xmm2
684- ; AVX512-NEXT: vinsertps {{.*#+}} xmm2 = xmm4[0,1,2],xmm2[0]
685- ; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm3
686- ; AVX512-NEXT: vscalefss %xmm3, %xmm0, %xmm3
687- ; AVX512-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
688- ; AVX512-NEXT: vshufps {{.*#+}} xmm5 = xmm1[1,1,1,1]
689- ; AVX512-NEXT: vcvtdq2ps %xmm5, %xmm5
690- ; AVX512-NEXT: vscalefss %xmm5, %xmm4, %xmm4
691- ; AVX512-NEXT: vunpcklps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
692- ; AVX512-NEXT: vshufpd {{.*#+}} xmm4 = xmm0[1,0]
693- ; AVX512-NEXT: vshufps {{.*#+}} xmm5 = xmm1[2,3,2,3]
694- ; AVX512-NEXT: vcvtdq2ps %xmm5, %xmm5
695- ; AVX512-NEXT: vscalefss %xmm5, %xmm4, %xmm4
696- ; AVX512-NEXT: vmovlhps {{.*#+}} xmm3 = xmm3[0],xmm4[0]
697- ; AVX512-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
698- ; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[3,3,3,3]
699- ; AVX512-NEXT: vcvtdq2ps %xmm1, %xmm1
700- ; AVX512-NEXT: vscalefss %xmm1, %xmm0, %xmm0
701- ; AVX512-NEXT: vinsertps {{.*#+}} xmm0 = xmm3[0,1,2],xmm0[0]
702- ; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
676+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
677+ ; AVX512-NEXT: vcvtdq2ps %ymm1, %ymm1
678+ ; AVX512-NEXT: vscalefps %zmm1, %zmm0, %zmm0
679+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
703680; AVX512-NEXT: retq
704681;
705682; AVX512VL-LABEL: test_ldexp_8xfloat:
706683; AVX512VL: # %bb.0:
707684; AVX512VL-NEXT: vcvtdq2ps %ymm1, %ymm1
708685; AVX512VL-NEXT: vscalefps %ymm1, %ymm0, %ymm0
709686; AVX512VL-NEXT: retq
687+ ;
688+ ; AVX512VLFP16-LABEL: test_ldexp_8xfloat:
689+ ; AVX512VLFP16: # %bb.0:
690+ ; AVX512VLFP16-NEXT: vcvtdq2ps %ymm1, %ymm1
691+ ; AVX512VLFP16-NEXT: vscalefps %ymm1, %ymm0, %ymm0
692+ ; AVX512VLFP16-NEXT: retq
710693 %r = call <8 x float > @llvm.ldexp.v8f32.v8i32 (<8 x float > %x , <8 x i32 > %exp )
711694 ret <8 x float > %r
712695}
@@ -715,30 +698,23 @@ declare <8 x float> @llvm.ldexp.v8f32.v8i32(<8 x float>, <8 x i32>)
715698define <4 x double > @test_ldexp_4xdouble (<4 x double > %x , <4 x i32 > %exp ) nounwind {
716699; AVX512-LABEL: test_ldexp_4xdouble:
717700; AVX512: # %bb.0:
718- ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm2
719- ; AVX512-NEXT: vshufps {{.*#+}} xmm3 = xmm1[2,3,2,3]
720- ; AVX512-NEXT: vcvtdq2pd %xmm3, %xmm3
721- ; AVX512-NEXT: vscalefsd %xmm3, %xmm2, %xmm3
722- ; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm4
723- ; AVX512-NEXT: vscalefsd %xmm4, %xmm0, %xmm4
724- ; AVX512-NEXT: vinsertf128 $1, %xmm3, %ymm4, %ymm3
725- ; AVX512-NEXT: vshufps {{.*#+}} xmm4 = xmm1[3,3,3,3]
726- ; AVX512-NEXT: vcvtdq2pd %xmm4, %xmm4
727- ; AVX512-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
728- ; AVX512-NEXT: vscalefsd %xmm4, %xmm2, %xmm2
729- ; AVX512-NEXT: vshufps {{.*#+}} xmm1 = xmm1[1,1,1,1]
730- ; AVX512-NEXT: vcvtdq2pd %xmm1, %xmm1
731- ; AVX512-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
732- ; AVX512-NEXT: vscalefsd %xmm1, %xmm0, %xmm0
733- ; AVX512-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
734- ; AVX512-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm3[0],ymm0[0],ymm3[2],ymm0[2]
701+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
702+ ; AVX512-NEXT: vcvtdq2pd %xmm1, %ymm1
703+ ; AVX512-NEXT: vscalefpd %zmm1, %zmm0, %zmm0
704+ ; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0
735705; AVX512-NEXT: retq
736706;
737707; AVX512VL-LABEL: test_ldexp_4xdouble:
738708; AVX512VL: # %bb.0:
739709; AVX512VL-NEXT: vcvtdq2pd %xmm1, %ymm1
740710; AVX512VL-NEXT: vscalefpd %ymm1, %ymm0, %ymm0
741711; AVX512VL-NEXT: retq
712+ ;
713+ ; AVX512VLFP16-LABEL: test_ldexp_4xdouble:
714+ ; AVX512VLFP16: # %bb.0:
715+ ; AVX512VLFP16-NEXT: vcvtdq2pd %xmm1, %ymm1
716+ ; AVX512VLFP16-NEXT: vscalefpd %ymm1, %ymm0, %ymm0
717+ ; AVX512VLFP16-NEXT: retq
742718 %r = call <4 x double > @llvm.ldexp.v4f64.v4i32 (<4 x double > %x , <4 x i32 > %exp )
743719 ret <4 x double > %r
744720}
@@ -1495,3 +1471,5 @@ define <8 x double> @test_ldexp_8xdouble(<8 x double> %x, <8 x i32> %exp) nounwi
14951471}
14961472declare <8 x double > @llvm.ldexp.v8f64.v8i32 (<8 x double >, <8 x i32 >)
14971473
1474+ ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
1475+ ; AVX512VLF: {{.*}}
0 commit comments