@@ -79,38 +79,54 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
7979; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8
8080; CHECK-SSE-NEXT: retq
8181;
82- ; CHECK-AVX-LABEL: fmul_pow2_ldexp_4xfloat:
83- ; CHECK-AVX: # %bb.0:
84- ; CHECK-AVX-NEXT: subq $40, %rsp
85- ; CHECK-AVX-NEXT: .cfi_def_cfa_offset 48
86- ; CHECK-AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
87- ; CHECK-AVX-NEXT: vextractps $1, %xmm0, %edi
88- ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
89- ; CHECK-AVX-NEXT: callq ldexpf@PLT
90- ; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
91- ; CHECK-AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
92- ; CHECK-AVX-NEXT: vmovd %xmm0, %edi
93- ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
94- ; CHECK-AVX-NEXT: callq ldexpf@PLT
95- ; CHECK-AVX-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
96- ; CHECK-AVX-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
97- ; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
98- ; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
99- ; CHECK-AVX-NEXT: vextractps $2, %xmm0, %edi
100- ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
101- ; CHECK-AVX-NEXT: callq ldexpf@PLT
102- ; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
103- ; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
104- ; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
105- ; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
106- ; CHECK-AVX-NEXT: vextractps $3, %xmm0, %edi
107- ; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
108- ; CHECK-AVX-NEXT: callq ldexpf@PLT
109- ; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
110- ; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
111- ; CHECK-AVX-NEXT: addq $40, %rsp
112- ; CHECK-AVX-NEXT: .cfi_def_cfa_offset 8
113- ; CHECK-AVX-NEXT: retq
82+ ; CHECK-AVX2-LABEL: fmul_pow2_ldexp_4xfloat:
83+ ; CHECK-AVX2: # %bb.0:
84+ ; CHECK-AVX2-NEXT: subq $40, %rsp
85+ ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
86+ ; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
87+ ; CHECK-AVX2-NEXT: vextractps $1, %xmm0, %edi
88+ ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
89+ ; CHECK-AVX2-NEXT: callq ldexpf@PLT
90+ ; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
91+ ; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
92+ ; CHECK-AVX2-NEXT: vmovd %xmm0, %edi
93+ ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
94+ ; CHECK-AVX2-NEXT: callq ldexpf@PLT
95+ ; CHECK-AVX2-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
96+ ; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
97+ ; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
98+ ; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
99+ ; CHECK-AVX2-NEXT: vextractps $2, %xmm0, %edi
100+ ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
101+ ; CHECK-AVX2-NEXT: callq ldexpf@PLT
102+ ; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
103+ ; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
104+ ; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
105+ ; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
106+ ; CHECK-AVX2-NEXT: vextractps $3, %xmm0, %edi
107+ ; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
108+ ; CHECK-AVX2-NEXT: callq ldexpf@PLT
109+ ; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
110+ ; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
111+ ; CHECK-AVX2-NEXT: addq $40, %rsp
112+ ; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
113+ ; CHECK-AVX2-NEXT: retq
114+ ;
115+ ; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_4xfloat:
116+ ; CHECK-ONLY-AVX512F: # %bb.0:
117+ ; CHECK-ONLY-AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
118+ ; CHECK-ONLY-AVX512F-NEXT: vmovaps %xmm0, %xmm0
119+ ; CHECK-ONLY-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
120+ ; CHECK-ONLY-AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
121+ ; CHECK-ONLY-AVX512F-NEXT: vzeroupper
122+ ; CHECK-ONLY-AVX512F-NEXT: retq
123+ ;
124+ ; CHECK-SKX-LABEL: fmul_pow2_ldexp_4xfloat:
125+ ; CHECK-SKX: # %bb.0:
126+ ; CHECK-SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
127+ ; CHECK-SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
128+ ; CHECK-SKX-NEXT: vscalefps %xmm0, %xmm1, %xmm0
129+ ; CHECK-SKX-NEXT: retq
114130 %r = call <4 x float > @llvm.ldexp.v4f32.v4i32 (<4 x float > <float 9 .000000e+00 , float 9 .000000e+00 , float 9 .000000e+00 , float 9 .000000e+00 >, <4 x i32 > %i )
115131 ret <4 x float > %r
116132}
@@ -562,79 +578,11 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
562578;
563579; CHECK-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
564580; CHECK-AVX512F: # %bb.0:
565- ; CHECK-AVX512F-NEXT: subq $72, %rsp
566- ; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80
567- ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
568- ; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
569- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
570- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
571- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
572- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
573- ; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
574- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
575- ; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
576- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
577- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
578- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
579- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
580- ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
581- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
582- ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
583- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
584- ; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
585- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
586- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
587- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
588- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
589- ; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
590- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
591- ; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
592- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
593- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
594- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
595- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
596- ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
597- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
598- ; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
599- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
600- ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
601- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
602- ; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
603- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
604- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
605- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
606- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
607- ; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
608- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
609- ; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
610- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
611- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
612- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
613- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
614- ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
615- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
616- ; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
617- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
618- ; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
619- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
620- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
621- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
622- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
623- ; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
624- ; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
625- ; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax
626- ; CHECK-AVX512F-NEXT: movswl %ax, %edi
627- ; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
628- ; CHECK-AVX512F-NEXT: callq ldexpf@PLT
629- ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
630- ; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
631- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
632- ; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
633- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
634- ; CHECK-AVX512F-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
635- ; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0]
636- ; CHECK-AVX512F-NEXT: addq $72, %rsp
637- ; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 8
581+ ; CHECK-AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3]
582+ ; CHECK-AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
583+ ; CHECK-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
584+ ; CHECK-AVX512F-NEXT: vcvtps2ph $4, %ymm0, %xmm0
585+ ; CHECK-AVX512F-NEXT: vzeroupper
638586; CHECK-AVX512F-NEXT: retq
639587 %r = call <8 x half > @llvm.ldexp.v8f16.v8i16 (<8 x half > <half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000>, <8 x i16 > %i )
640588 ret <8 x half > %r
0 commit comments