Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21851,6 +21851,15 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
"Expected saturation width smaller than result width");

// AVX512 provides VCVTSS/D2USI which return INT_MAX/LONG_MAX when overflow
// happens. X86ISD::FMAX makes sure negative value and NaN return 0.
if (Subtarget.hasAVX512() && !IsSigned && SatWidth == DstWidth &&
(DstVT == MVT::i32 || (Subtarget.is64Bit() && DstVT == MVT::i64))) {
SDValue MinFloatNode = DAG.getConstantFP(0.0, dl, SrcVT);
SDValue Clamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
return DAG.getNode(ISD::FP_TO_UINT, dl, DstVT, Clamped);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is relying on properties of the ultimately selected instruction, doesn't this need a custom ISD node?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so. FP_TO_UINT(f32/f64->i32/64) is and only is legal with AVX512, so we can consider it's a 1:1 map between them. See the lowering code:

  if (!IsSigned && UseSSEReg) {
    // Conversions from f32/f64 with AVX512 should be legal.
    if (Subtarget.hasAVX512())
      return Op;

}

// Promote result of FP_TO_*INT to at least 32 bits.
if (TmpWidth < 32) {
TmpVT = MVT::i32;
Expand Down Expand Up @@ -21912,14 +21921,19 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
}

// Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
SDValue MinClamped = DAG.getNode(
X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
// Clamp by MaxFloat from above. NaN cannot occur.
SDValue BothClamped = DAG.getNode(
X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);
SDValue MinClamped = Src;
// If Src is NaN, the result is MaxFloat.
unsigned MinOpc = X86ISD::FMIN;
if (!IsSigned || SatWidth != DstWidth) {
// Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
MinClamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
// NaN cannot occur.
MinOpc = X86ISD::FMINC;
}
// Clamp by MaxFloat from above.
SDValue Clamped = DAG.getNode(MinOpc, dl, SrcVT, MinClamped, MaxFloatNode);
// Convert clamped value to integer.
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped);
SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, Clamped);

if (!IsSigned) {
// In the unsigned case we're done, because we mapped NaN to MinFloat,
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/X86/fpclamptosat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ define i32 @stest_f64i32(double %x) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ucomisd %xmm0, %xmm0
; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cvttsd2si %xmm0, %ecx
; CHECK-NEXT: cmovnpl %ecx, %eax
Expand Down Expand Up @@ -621,7 +620,6 @@ define i32 @stest_f64i32_mm(double %x) nounwind {
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: ucomisd %xmm0, %xmm0
; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; CHECK-NEXT: cvttsd2si %xmm0, %ecx
; CHECK-NEXT: cmovnpl %ecx, %eax
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/CodeGen/X86/fptosi-sat-scalar.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1455,7 +1455,6 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; X86-SSE-NEXT: xorl %eax, %eax
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
; X86-SSE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx
; X86-SSE-NEXT: cmovnpl %ecx, %eax
Expand All @@ -1465,7 +1464,6 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
; X64: # %bb.0:
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: ucomisd %xmm0, %xmm0
; X64-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; X64-NEXT: cvttsd2si %xmm0, %ecx
; X64-NEXT: cmovnpl %ecx, %eax
Expand Down
9 changes: 3 additions & 6 deletions llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -418,20 +418,17 @@ define <2 x i16> @test_signed_v2i16_v2f64(<2 x double> %f) nounwind {
define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %f) nounwind {
; CHECK-LABEL: test_signed_v2i32_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [-2.147483648E+9,0.0E+0]
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [2.147483647E+9,0.0E+0]
; CHECK-NEXT: movapd %xmm0, %xmm1
; CHECK-NEXT: maxsd %xmm2, %xmm1
; CHECK-NEXT: movsd {{.*#+}} xmm3 = [2.147483647E+9,0.0E+0]
; CHECK-NEXT: minsd %xmm3, %xmm1
; CHECK-NEXT: minsd %xmm2, %xmm1
; CHECK-NEXT: cvttsd2si %xmm1, %eax
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: ucomisd %xmm0, %xmm0
; CHECK-NEXT: cmovpl %ecx, %eax
; CHECK-NEXT: movd %eax, %xmm1
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
; CHECK-NEXT: ucomisd %xmm0, %xmm0
; CHECK-NEXT: maxsd %xmm2, %xmm0
; CHECK-NEXT: minsd %xmm3, %xmm0
; CHECK-NEXT: minsd %xmm2, %xmm0
; CHECK-NEXT: cvttsd2si %xmm0, %eax
; CHECK-NEXT: cmovpl %ecx, %eax
; CHECK-NEXT: movd %eax, %xmm0
Expand Down
Loading
Loading