Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 73 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2101,6 +2101,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// These operations are handled on non-VLX by artificially widening in
// isel patterns.

for (MVT VT : {MVT::f16, MVT::f32, MVT::f64, MVT::v8f16, MVT::v4f32,
MVT::v2f64, MVT::v16f16, MVT::v8f32, MVT::v4f64, MVT::v32f16,
MVT::v16f32, MVT::v8f64})
setOperationAction(ISD::FLDEXP, VT, Custom);

setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
Expand Down Expand Up @@ -19149,6 +19154,72 @@ SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return SDValue();
}

static SDValue LowerFLDEXP(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc DL(Op);
SDValue X = Op.getOperand(0);
MVT XTy = X.getSimpleValueType();
SDValue Exp = Op.getOperand(1);

switch (XTy.SimpleTy) {
default:
return SDValue();
case MVT::f16:
if (!Subtarget.hasFP16())
X = DAG.getFPExtendOrRound(X, DL, MVT::f32);
[[fallthrough]];
case MVT::f32:
case MVT::f64: {
MVT VT = MVT::getVectorVT(X.getSimpleValueType(),
128 / X.getSimpleValueType().getSizeInBits());
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, X.getValueType(), Exp);
SDValue VX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, X);
SDValue VExp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Exp);
SDValue Scalefs = DAG.getNode(X86ISD::SCALEFS, DL, VT, VX, VExp);
SDValue Final = DAG.getExtractVectorElt(DL, X.getValueType(), Scalefs, 0);
return DAG.getFPExtendOrRound(Final, DL, XTy);
}
case MVT::v4f32:
case MVT::v2f64:
case MVT::v8f32:
case MVT::v4f64:
case MVT::v16f32:
case MVT::v8f64:
if (XTy.getSizeInBits() == 512 || Subtarget.hasVLX()) {
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp);
}
break;
case MVT::v8f16:
case MVT::v16f16:
if (Subtarget.hasFP16()) {
if (Subtarget.hasVLX()) {
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp);
}
break;
}
X = DAG.getFPExtendOrRound(X, DL, XTy.changeVectorElementType(MVT::f32));
Exp = DAG.getSExtOrTrunc(Exp, DL,
X.getSimpleValueType().changeTypeToInteger());
break;
case MVT::v32f16:
if (Subtarget.hasFP16()) {
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, XTy, Exp);
return DAG.getNode(X86ISD::SCALEF, DL, XTy, X, Exp);
}
return splitVectorOp(Op, DAG, DL);
}
SDValue WideX = widenSubVector(X, true, Subtarget, DAG, DL, 512);
SDValue WideExp = widenSubVector(Exp, true, Subtarget, DAG, DL, 512);
Exp = DAG.getNode(ISD::SINT_TO_FP, DL, WideExp.getSimpleValueType(), Exp);
SDValue Scalef =
DAG.getNode(X86ISD::SCALEF, DL, WideX.getValueType(), WideX, WideExp);
SDValue Final =
DAG.getExtractSubvector(DL, X.getSimpleValueType(), Scalef, 0);
return DAG.getFPExtendOrRound(Final, DL, XTy);
}

static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget,
SelectionDAG &DAG) {
SDLoc dl(Op);
Expand Down Expand Up @@ -33681,7 +33752,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG);
case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG);
case ISD::PREFETCH: return LowerPREFETCH(Op, Subtarget, DAG);
// clang-format on
case ISD::FLDEXP: return LowerFLDEXP(Op, Subtarget, DAG);
// clang-format on
}
}

Expand Down
158 changes: 53 additions & 105 deletions llvm/test/CodeGen/X86/fold-int-pow2-with-fmul-or-fdiv.ll
Original file line number Diff line number Diff line change
Expand Up @@ -79,38 +79,54 @@ define <4 x float> @fmul_pow2_ldexp_4xfloat(<4 x i32> %i) {
; CHECK-SSE-NEXT: .cfi_def_cfa_offset 8
; CHECK-SSE-NEXT: retq
;
; CHECK-AVX-LABEL: fmul_pow2_ldexp_4xfloat:
; CHECK-AVX: # %bb.0:
; CHECK-AVX-NEXT: subq $40, %rsp
; CHECK-AVX-NEXT: .cfi_def_cfa_offset 48
; CHECK-AVX-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX-NEXT: vextractps $1, %xmm0, %edi
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX-NEXT: callq ldexpf@PLT
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX-NEXT: vmovd %xmm0, %edi
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX-NEXT: callq ldexpf@PLT
; CHECK-AVX-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX-NEXT: vextractps $2, %xmm0, %edi
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX-NEXT: callq ldexpf@PLT
; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; CHECK-AVX-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX-NEXT: vextractps $3, %xmm0, %edi
; CHECK-AVX-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX-NEXT: callq ldexpf@PLT
; CHECK-AVX-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; CHECK-AVX-NEXT: addq $40, %rsp
; CHECK-AVX-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX-NEXT: retq
; CHECK-AVX2-LABEL: fmul_pow2_ldexp_4xfloat:
; CHECK-AVX2: # %bb.0:
; CHECK-AVX2-NEXT: subq $40, %rsp
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 48
; CHECK-AVX2-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX2-NEXT: vextractps $1, %xmm0, %edi
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX2-NEXT: callq ldexpf@PLT
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX2-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX2-NEXT: vmovd %xmm0, %edi
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX2-NEXT: callq ldexpf@PLT
; CHECK-AVX2-NEXT: vinsertps $16, (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX2-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[2,3]
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX2-NEXT: vextractps $2, %xmm0, %edi
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX2-NEXT: callq ldexpf@PLT
; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0],xmm1[3]
; CHECK-AVX2-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX2-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
; CHECK-AVX2-NEXT: vextractps $3, %xmm0, %edi
; CHECK-AVX2-NEXT: vmovss {{.*#+}} xmm0 = [9.0E+0,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX2-NEXT: callq ldexpf@PLT
; CHECK-AVX2-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload
; CHECK-AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; CHECK-AVX2-NEXT: addq $40, %rsp
; CHECK-AVX2-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX2-NEXT: retq
;
; CHECK-ONLY-AVX512F-LABEL: fmul_pow2_ldexp_4xfloat:
; CHECK-ONLY-AVX512F: # %bb.0:
; CHECK-ONLY-AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
; CHECK-ONLY-AVX512F-NEXT: vmovaps %xmm0, %xmm0
; CHECK-ONLY-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
; CHECK-ONLY-AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
; CHECK-ONLY-AVX512F-NEXT: vzeroupper
; CHECK-ONLY-AVX512F-NEXT: retq
;
; CHECK-SKX-LABEL: fmul_pow2_ldexp_4xfloat:
; CHECK-SKX: # %bb.0:
; CHECK-SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
; CHECK-SKX-NEXT: vbroadcastss {{.*#+}} xmm1 = [9.0E+0,9.0E+0,9.0E+0,9.0E+0]
; CHECK-SKX-NEXT: vscalefps %xmm0, %xmm1, %xmm0
; CHECK-SKX-NEXT: retq
%r = call <4 x float> @llvm.ldexp.v4f32.v4i32(<4 x float> <float 9.000000e+00, float 9.000000e+00, float 9.000000e+00, float 9.000000e+00>, <4 x i32> %i)
ret <4 x float> %r
}
Expand Down Expand Up @@ -562,79 +578,11 @@ define <8 x half> @fmul_pow2_ldexp_8xhalf(<8 x i16> %i) {
;
; CHECK-AVX512F-LABEL: fmul_pow2_ldexp_8xhalf:
; CHECK-AVX512F: # %bb.0:
; CHECK-AVX512F-NEXT: subq $72, %rsp
; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 80
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, (%rsp) # 16-byte Spill
; CHECK-AVX512F-NEXT: vpextrw $7, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $6, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $5, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $4, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $3, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $2, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-AVX512F-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vpextrw $1, %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
; CHECK-AVX512F-NEXT: vmovdqa (%rsp), %xmm0 # 16-byte Reload
; CHECK-AVX512F-NEXT: vmovd %xmm0, %eax
; CHECK-AVX512F-NEXT: movswl %ax, %edi
; CHECK-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = [8.192E+3,0.0E+0,0.0E+0,0.0E+0]
; CHECK-AVX512F-NEXT: callq ldexpf@PLT
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %xmm0, %xmm0
; CHECK-AVX512F-NEXT: vpunpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; CHECK-AVX512F-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
; CHECK-AVX512F-NEXT: vpunpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
; CHECK-AVX512F-NEXT: # xmm0 = xmm0[0],mem[0]
; CHECK-AVX512F-NEXT: addq $72, %rsp
; CHECK-AVX512F-NEXT: .cfi_def_cfa_offset 8
; CHECK-AVX512F-NEXT: vbroadcastss {{.*#+}} ymm1 = [8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3,8.192E+3]
; CHECK-AVX512F-NEXT: vpmovsxwd %xmm0, %ymm0
; CHECK-AVX512F-NEXT: vscalefps %zmm0, %zmm1, %zmm0
; CHECK-AVX512F-NEXT: vcvtps2ph $4, %ymm0, %xmm0
; CHECK-AVX512F-NEXT: vzeroupper
; CHECK-AVX512F-NEXT: retq
%r = call <8 x half> @llvm.ldexp.v8f16.v8i16(<8 x half> <half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000, half 0xH7000>, <8 x i16> %i)
ret <8 x half> %r
Expand Down
Loading