Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5208,7 +5208,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
if (N->getOpcode() == ISD::ZERO_EXTEND &&
getTypeAction(InVT) == TargetLowering::TypePromoteInteger &&
TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() !=
WidenVT.getScalarSizeInBits()) {
WidenVT.getScalarSizeInBits()) {
InOp = ZExtPromotedInteger(InOp);
InVT = InOp.getValueType();
if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits())
Expand All @@ -5225,7 +5225,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
InVTEC = InVT.getVectorElementCount();
if (InVTEC == WidenEC) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, Flags);
if (N->getNumOperands() == 3) {
assert(N->isVPOpcode() && "Expected VP opcode");
SDValue Mask =
Expand Down Expand Up @@ -5261,7 +5261,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVec);
return DAG.getNode(Opcode, DL, WidenVT, InVec, Flags);
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}

Expand All @@ -5270,7 +5270,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
DAG.getVectorIdxConstant(0, DL));
// Extract the input and convert the shorten input vector.
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InVal);
return DAG.getNode(Opcode, DL, WidenVT, InVal, Flags);
return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
}
}
Expand All @@ -5285,7 +5285,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
DAG.getVectorIdxConstant(i, DL));
if (N->getNumOperands() == 1)
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, Flags);
else
Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
}
Expand Down
27 changes: 18 additions & 9 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8364,22 +8364,31 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
}

bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue &Chain,
SelectionDAG &DAG) const {
SDValue &Chain, SelectionDAG &DAG) const {
// This transform is not correct for converting 0 when rounding mode is set
// to round toward negative infinity which will produce -0.0. So disable under
// strictfp.
// to round toward negative infinity which will produce -0.0. So disable
// under strictfp.
if (Node->isStrictFPOpcode())
return false;

SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);

// If the input is known to be non-negative and SINT_TO_FP is legal then use
// it.
if (Node->getFlags().hasNonNeg() &&
isOperationLegalOrCustom(ISD::SINT_TO_FP, SrcVT)) {
Result =
DAG.getNode(ISD::SINT_TO_FP, SDLoc(Node), DstVT, Node->getOperand(0));
return true;
}

if (SrcVT.getScalarType() != MVT::i64 || DstVT.getScalarType() != MVT::f64)
return false;

// Only expand vector types if we have the appropriate vector bit operations.
// Only expand vector types if we have the appropriate vector bit
// operations.
if (SrcVT.isVector() && (!isOperationLegalOrCustom(ISD::SRL, SrcVT) ||
!isOperationLegalOrCustom(ISD::FADD, DstVT) ||
!isOperationLegalOrCustom(ISD::FSUB, DstVT) ||
Expand All @@ -8393,8 +8402,9 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
// Implementation of unsigned i64 to f64 following the algorithm in
// __floatundidf in compiler_rt. This implementation performs rounding
// correctly in all rounding modes with the exception of converting 0
// when rounding toward negative infinity. In that case the fsub will produce
// -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
// when rounding toward negative infinity. In that case the fsub will
// produce -0.0. This will be added to +0.0 and produce -0.0 which is
// incorrect.
SDValue TwoP52 = DAG.getConstant(UINT64_C(0x4330000000000000), dl, SrcVT);
SDValue TwoP84PlusTwoP52 = DAG.getConstantFP(
llvm::bit_cast<double>(UINT64_C(0x4530000000100000)), dl, DstVT);
Expand All @@ -8408,8 +8418,7 @@ bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result,
SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84);
SDValue LoFlt = DAG.getBitcast(DstVT, LoOr);
SDValue HiFlt = DAG.getBitcast(DstVT, HiOr);
SDValue HiSub =
DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52);
Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub);
return true;
}
Expand Down
38 changes: 38 additions & 0 deletions llvm/test/CodeGen/VE/Scalar/cast.ll
Original file line number Diff line number Diff line change
Expand Up @@ -568,6 +568,44 @@ define float @ull2f(i64 %x) {
ret float %r
}

define float @ull2f_nneg(i64 %x) {
; CHECK-LABEL: ull2f_nneg:
; CHECK: # %bb.0:
; CHECK-NEXT: cvt.d.l %s0, %s0
; CHECK-NEXT: cvt.s.d %s0, %s0
; CHECK-NEXT: b.l.t (, %s10)
%r = uitofp nneg i64 %x to float
ret float %r
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing vector tests? Can you also add strictfp tests?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't find a target that has uitofp marked as Expand and sitofp marked as legal or custom on vector types.
I added a strictfp.

define float @ull2f_strict(i32 %x) {
; CHECK-LABEL: ull2f_strict:
; CHECK: # %bb.0:
; CHECK-NEXT: adds.l %s11, -16, %s11
; CHECK-NEXT: brge.l.t %s11, %s8, .LBB58_2
; CHECK-NEXT: # %bb.1:
; CHECK-NEXT: ld %s61, 24(, %s14)
; CHECK-NEXT: or %s62, 0, %s0
; CHECK-NEXT: lea %s63, 315
; CHECK-NEXT: shm.l %s63, (%s61)
; CHECK-NEXT: shm.l %s8, 8(%s61)
; CHECK-NEXT: shm.l %s11, 16(%s61)
; CHECK-NEXT: monc
; CHECK-NEXT: or %s0, 0, %s62
; CHECK-NEXT: .LBB58_2:
; CHECK-NEXT: lea %s1, 1127219200
; CHECK-NEXT: stl %s1, 12(, %s11)
; CHECK-NEXT: stl %s0, 8(, %s11)
; CHECK-NEXT: ld %s0, 8(, %s11)
; CHECK-NEXT: lea.sl %s1, 1127219200
; CHECK-NEXT: fsub.d %s0, %s0, %s1
; CHECK-NEXT: cvt.s.d %s0, %s0
; CHECK-NEXT: adds.l %s11, 16, %s11
; CHECK-NEXT: b.l.t (, %s10)
%val = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %x, metadata !"round.tonearest", metadata !"fpexcept.strict")
ret float %val
}

define double @ull2d(i64 %x) {
; CHECK-LABEL: ull2d:
; CHECK: # %bb.0:
Expand Down
51 changes: 43 additions & 8 deletions llvm/test/CodeGen/X86/avx512-cvt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -312,11 +312,46 @@ define <4 x float> @ulto4f32(<4 x i64> %a) {
ret <4 x float> %b
}

define <4 x float> @ulto4f32_nneg(<4 x i64> %a) {
; NODQ-LABEL: ulto4f32_nneg:
; NODQ: # %bb.0:
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3]
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm0
; NODQ-NEXT: vmovq %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm2
; NODQ-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3]
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ss %rax, %xmm3, %xmm0
; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0]
; NODQ-NEXT: vzeroupper
; NODQ-NEXT: retq
;
; VLDQ-LABEL: ulto4f32_nneg:
; VLDQ: # %bb.0:
; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
; VLDQ-NEXT: vzeroupper
; VLDQ-NEXT: retq
;
; DQNOVL-LABEL: ulto4f32_nneg:
; DQNOVL: # %bb.0:
; DQNOVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; DQNOVL-NEXT: vcvtqq2ps %zmm0, %ymm0
; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; DQNOVL-NEXT: vzeroupper
; DQNOVL-NEXT: retq
%b = uitofp nneg <4 x i64> %a to <4 x float>
ret <4 x float> %b
}

define <8 x double> @ulto8f64(<8 x i64> %a) {
; NODQ-LABEL: ulto8f64:
; NODQ: # %bb.0:
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
; NODQ-NEXT: vpternlogq $248, {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm1
; NODQ-NEXT: vpternlogq {{.*#+}} zmm1 = zmm1 | (zmm0 & mem)
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
; NODQ-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
; NODQ-NEXT: vsubpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0
Expand All @@ -342,14 +377,14 @@ define <16 x double> @ulto16f64(<16 x i64> %a) {
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295]
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm3 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200]
; NODQ-NEXT: vmovdqa64 %zmm3, %zmm4
; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm0, %zmm4
; NODQ-NEXT: vpternlogq {{.*#+}} zmm4 = zmm4 | (zmm0 & zmm2)
; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0
; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072]
; NODQ-NEXT: vporq %zmm5, %zmm0, %zmm0
; NODQ-NEXT: vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25]
; NODQ-NEXT: vsubpd %zmm6, %zmm0, %zmm0
; NODQ-NEXT: vaddpd %zmm0, %zmm4, %zmm0
; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm1, %zmm3
; NODQ-NEXT: vpternlogq {{.*#+}} zmm3 = zmm3 | (zmm1 & zmm2)
; NODQ-NEXT: vpsrlq $32, %zmm1, %zmm1
; NODQ-NEXT: vporq %zmm5, %zmm1, %zmm1
; NODQ-NEXT: vsubpd %zmm6, %zmm1, %zmm1
Expand Down Expand Up @@ -1483,7 +1518,7 @@ define <16 x float> @sbto16f32(<16 x i32> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
; NODQ-NEXT: retq
;
Expand Down Expand Up @@ -1564,7 +1599,7 @@ define <16 x double> @sbto16f64(<16 x double> %a) {
; NODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k0
; NODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
; NODQ-NEXT: kunpckbw %k0, %k1, %k1
; NODQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; NODQ-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm1
Expand Down Expand Up @@ -1603,7 +1638,7 @@ define <8 x double> @sbto8f64(<8 x double> %a) {
; NOVLDQ: # %bb.0:
; NOVLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
; NOVLDQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NOVLDQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
; NOVLDQ-NEXT: vcvtdq2pd %ymm0, %zmm0
; NOVLDQ-NEXT: retq
;
Expand Down Expand Up @@ -1864,7 +1899,7 @@ define <16 x float> @ubto16f32(<16 x i32> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
; NODQ-NEXT: vpsrld $31, %zmm0, %zmm0
; NODQ-NEXT: vcvtdq2ps %zmm0, %zmm0
; NODQ-NEXT: retq
Expand Down Expand Up @@ -1894,7 +1929,7 @@ define <16 x double> @ubto16f64(<16 x i32> %a) {
; NODQ: # %bb.0:
; NODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NODQ-NEXT: vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
; NODQ-NEXT: vpsrld $31, %zmm0, %zmm1
; NODQ-NEXT: vcvtdq2pd %ymm1, %zmm0
; NODQ-NEXT: vextracti64x4 $1, %zmm1, %ymm1
Expand Down
Loading