Skip to content

Commit 6c79cc7

Browse files
authored
[X86] Lower mathlib call ldexp into scalef when avx512 is enabled (#166839)
Resolves #165694
1 parent b9d9811 commit 6c79cc7

File tree

3 files changed

+326
-1744
lines changed

3 files changed

+326
-1744
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2098,9 +2098,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
20982098
// pre-AVX512 equivalents. Without VLX we use 512-bit operations for
20992099
// narrower widths.
21002100
if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
2101+
for (MVT VT : {MVT::f16, MVT::f32, MVT::f64, MVT::v8f16, MVT::v4f32,
2102+
MVT::v2f64, MVT::v16f16, MVT::v8f32, MVT::v4f64, MVT::v32f16,
2103+
MVT::v16f32, MVT::v8f64})
2104+
setOperationAction(ISD::FLDEXP, VT, Custom);
2105+
21012106
// These operations are handled on non-VLX by artificially widening in
21022107
// isel patterns.
2103-
21042108
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
21052109
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
21062110
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
@@ -19220,6 +19224,72 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
1922019224
return SDValue();
1922119225
}
1922219226

19227+
static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget,
19228+
SelectionDAG &DAG) {
19229+
SDLoc DL(Op);
19230+
SDValue X = Op.getOperand(0);
19231+
MVT XTy = X.getSimpleValueType();
19232+
SDValue Exp = Op.getOperand(1);
19233+
19234+
switch (XTy.SimpleTy) {
19235+
default:
19236+
return SDValue();
19237+
case MVT::f16:
19238+
if (!Subtarget.hasFP16())
19239+
X = DAG.getFPExtendOrRound(X, DL, MVT::f32);
19240+
[[fallthrough]];
19241+
case MVT::f32:
19242+
case MVT::f64: {
19243+
MVT VT = MVT::getVectorVT(X.getSimpleValueType(),
19244+
128 / X.getSimpleValueType().getSizeInBits());
19245+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
19246+
SDValue VX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, X);
19247+
SDValue VExp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Exp);
19248+
SDValue Scalefs = DAG.getNode(X86ISD::SCALEFS, DL, VT, VX, VExp);
19249+
SDValue Final = DAG.getExtractVectorElt(DL, X.getValueType(), Scalefs, 0);
19250+
return DAG.getFPExtendOrRound(Final, DL, XTy);
19251+
}
19252+
case MVT::v4f32:
19253+
case MVT::v2f64:
19254+
case MVT::v8f32:
19255+
case MVT::v4f64:
19256+
case MVT::v16f32:
19257+
case MVT::v8f64:
19258+
if (XTy.getSizeInBits() == 512 || Subtarget.hasVLX()) {
19259+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19260+
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp);
19261+
}
19262+
break;
19263+
case MVT::v8f16:
19264+
case MVT::v16f16:
19265+
if (Subtarget.hasFP16()) {
19266+
if (Subtarget.hasVLX()) {
19267+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19268+
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp);
19269+
}
19270+
break;
19271+
}
19272+
X = DAG.getFPExtendOrRound(X, DL, XTy.changeVectorElementType(MVT::f32));
19273+
Exp = DAG.getSExtOrTrunc(Exp, DL,
19274+
X.getSimpleValueType().changeTypeToInteger());
19275+
break;
19276+
case MVT::v32f16:
19277+
if (Subtarget.hasFP16()) {
19278+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
19279+
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp);
19280+
}
19281+
return splitVectorOp(Op, DAG, DL);
19282+
}
19283+
SDValue WideX = widenSubVector(X, true, Subtarget, DAG, DL, 512);
19284+
SDValue WideExp = widenSubVector(Exp, true, Subtarget, DAG, DL, 512);
19285+
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, WideExp.getSimpleValueType(), Exp);
19286+
SDValue Scalef =
19287+
DAG.getNode(X86ISD::SCALEF, DL, WideX.getValueType(), WideX, WideExp);
19288+
SDValue Final =
19289+
DAG.getExtractSubvector(DL, X.getSimpleValueType(), Scalef, 0);
19290+
return DAG.getFPExtendOrRound(Final, DL, XTy);
19291+
}
19292+
1922319293
static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
1922419294
SelectionDAG &DAG) {
1922519295
SDLoc dl(Op);
@@ -33734,7 +33804,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3373433804
case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
3373533805
case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
3373633806
case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
33737-
// clang-format on
33807+
case ISD::FLDEXP: return LowerFLDEXP(Op, Subtarget, DAG);
33808+
// clang-format on
3373833809
}
3373933810
}
3374033811

llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll

Lines changed: 53 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -79,38 +79,54 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
7979
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8
8080
; CHECK-SSE-NEXT: retq
8181
;
82-
; CHECK-AVX-LABEL: fmul_pow2_ldexp_4xfloat:
83-
; CHECK-AVX: # %bb.0:
84-
; CHECK-AVX-NEXT: subq $40, %rsp
85-
; CHECK-AVX-NEXT: .cfi_def_cfa_offset 48
86-
; CHECK-AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
87-
; CHECK-AVX-NEXT: vextractps $1, %xmm0, %edi
88-
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
89-
; CHECK-AVX-NEXT: callq ldexpf@PLT
90-
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
91-
; CHECK-AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
92-
; CHECK-AVX-NEXT: vmovd %xmm0, %edi
93-
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
94-
; CHECK-AVX-NEXT: callq ldexpf@PLT
95-
; CHECK-AVX-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
96-
; CHECK-AVX-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
97-
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
98-
; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
99-
; CHECK-AVX-NEXT: vextractps $2, %xmm0, %edi
100-
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
101-
; CHECK-AVX-NEXT: callq ldexpf@PLT
102-
; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
103-
; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
104-
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
105-
; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
106-
; CHECK-AVX-NEXT: vextractps $3, %xmm0, %edi
107-
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
108-
; CHECK-AVX-NEXT: callq ldexpf@PLT
109-
; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
110-
; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
111-
; CHECK-AVX-NEXT: addq $40, %rsp
112-
; CHECK-AVX-NEXT: .cfi_def_cfa_offset 8
113-
; CHECK-AVX-NEXT: retq
82+
; CHECK-AVX2-LABEL: fmul_pow2_ldexp_4xfloat:
83+
; CHECK-AVX2: # %bb.0:
84+
; CHECK-AVX2-NEXT: subq $40, %rsp
85+
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
86+
; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
87+
; CHECK-AVX2-NEXT: vextractps $1, %xmm0, %edi
88+
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
89+
; CHECK-AVX2-NEXT: callq ldexpf@PLT
90+
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
91+
; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
92+
; CHECK-AVX2-NEXT: vmovd %xmm0, %edi
93+
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
94+
; CHECK-AVX2-NEXT: callq ldexpf@PLT
95+
; CHECK-AVX2-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
96+
; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
97+
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
98+
; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
99+
; CHECK-AVX2-NEXT: vextractps $2, %xmm0, %edi
100+
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
101+
; CHECK-AVX2-NEXT: callq ldexpf@PLT
102+
; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
103+
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
104+
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
105+
; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
106+
; CHECK-AVX2-NEXT: vextractps $3, %xmm0, %edi
107+
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
108+
; CHECK-AVX2-NEXT: callq ldexpf@PLT
109+
; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
110+
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
111+
; CHECK-AVX2-NEXT: addq $40, %rsp
112+
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
113+
; CHECK-AVX2-NEXT: retq
114+
;
115+
; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_4xfloat:
116+
; CHECK-ONLY-AVX512F: # %bb.0:
117+
; CHECK-ONLY-AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
118+
; CHECK-ONLY-AVX512F-NEXT: vmovaps %xmm0, %xmm0
119+
; CHECK-ONLY-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
120+
; CHECK-ONLY-AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
121+
; CHECK-ONLY-AVX512F-NEXT: vzeroupper
122+
; CHECK-ONLY-AVX512F-NEXT: retq
123+
;
124+
; CHECK-SKX-LABEL: fmul_pow2_ldexp_4xfloat:
125+
; CHECK-SKX: # %bb.0:
126+
; CHECK-SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
127+
; CHECK-SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
128+
; CHECK-SKX-NEXT: vscalefps %xmm0, %xmm1, %xmm0
129+
; CHECK-SKX-NEXT: retq
114130
%r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
115131
ret <4 x float> %r
116132
}
@@ -562,79 +578,11 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
562578
;
563579
; CHECK-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
564580
; CHECK-AVX512F: # %bb.0:
565-
; CHECK-AVX512F-NEXT: subq $72, %rsp
566-
; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80
567-
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
568-
; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
569-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
570-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
571-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
572-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
573-
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
574-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
575-
; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
576-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
577-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
578-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
579-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
580-
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
581-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
582-
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
583-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
584-
; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
585-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
586-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
587-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
588-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
589-
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
590-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
591-
; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
592-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
593-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
594-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
595-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
596-
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
597-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
598-
; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
599-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
600-
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
601-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
602-
; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
603-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
604-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
605-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
606-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
607-
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
608-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
609-
; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
610-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
611-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
612-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
613-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
614-
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
615-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
616-
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
617-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
618-
; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
619-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
620-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
621-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
622-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
623-
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
624-
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
625-
; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax
626-
; CHECK-AVX512F-NEXT: movswl %ax, %edi
627-
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
628-
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
629-
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
630-
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
631-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
632-
; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
633-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
634-
; CHECK-AVX512F-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
635-
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0]
636-
; CHECK-AVX512F-NEXT: addq $72, %rsp
637-
; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 8
581+
; CHECK-AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3]
582+
; CHECK-AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
583+
; CHECK-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
584+
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %ymm0, %xmm0
585+
; CHECK-AVX512F-NEXT: vzeroupper
638586
; CHECK-AVX512F-NEXT: retq
639587
%r = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> <half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000>, <8 x i16> %i)
640588
ret <8 x half> %r

0 commit comments

Comments
 (0)