Skip to content

Commit 4a56480

Browse files
committed
[X86][SATCVT] Reduce MIN/MAXSS/D by conversion instruction result
CVTSD2SI returns INT_MIN/LONG_MIN when underflow happens. VCVTPS/D2UD/QQ returns INT_MAX/LONG_MAX when overflow happens. We can reduce one MIN/MAXSS/D instruction leveraging the result of the conversion instruction. Partially fixes #136342
1 parent 0a17427 commit 4a56480

File tree

5 files changed

+639
-21
lines changed

5 files changed

+639
-21
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21851,6 +21851,15 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
2185121851
assert(SatWidth <= DstWidth && SatWidth <= TmpWidth &&
2185221852
"Expected saturation width smaller than result width");
2185321853

21854+
// AVX512 provides VCVTPS/D2UD/QQ which return INT_MAX/LONG_MAX when overflow
21855+
// happens. X86ISD::FMAX makes sure negative value and NaN return 0.
21856+
if (Subtarget.hasAVX512() && !IsSigned && SatWidth == DstWidth &&
21857+
(DstVT == MVT::i32 || (Subtarget.is64Bit() && DstVT == MVT::i64))) {
21858+
SDValue MinFloatNode = DAG.getConstantFP(0.0, dl, SrcVT);
21859+
SDValue Clamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
21860+
return DAG.getNode(ISD::FP_TO_UINT, dl, DstVT, Clamped);
21861+
}
21862+
2185421863
// Promote result of FP_TO_*INT to at least 32 bits.
2185521864
if (TmpWidth < 32) {
2185621865
TmpVT = MVT::i32;
@@ -21912,9 +21921,10 @@ X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const {
2191221921
return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt);
2191321922
}
2191421923

21924+
SDValue MinClamped = Src;
2191521925
// Clamp by MinFloat from below. If Src is NaN, the result is MinFloat.
21916-
SDValue MinClamped = DAG.getNode(
21917-
X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
21926+
if (!IsSigned || SatWidth != DstWidth)
21927+
MinClamped = DAG.getNode(X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode);
2191821928
// Clamp by MaxFloat from above. NaN cannot occur.
2191921929
SDValue BothClamped = DAG.getNode(
2192021930
X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode);

llvm/test/CodeGen/X86/fpclamptosat.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
define i32 @stest_f64i32(double %x) nounwind {
77
; CHECK-LABEL: stest_f64i32:
88
; CHECK: # %bb.0: # %entry
9+
; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.147483647E+9,0.0E+0]
10+
; CHECK-NEXT: minsd %xmm0, %xmm1
11+
; CHECK-NEXT: cvttsd2si %xmm1, %ecx
912
; CHECK-NEXT: xorl %eax, %eax
1013
; CHECK-NEXT: ucomisd %xmm0, %xmm0
11-
; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
12-
; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
13-
; CHECK-NEXT: cvttsd2si %xmm0, %ecx
1414
; CHECK-NEXT: cmovnpl %ecx, %eax
1515
; CHECK-NEXT: retq
1616
entry:
@@ -619,11 +619,11 @@ entry:
619619
define i32 @stest_f64i32_mm(double %x) nounwind {
620620
; CHECK-LABEL: stest_f64i32_mm:
621621
; CHECK: # %bb.0: # %entry
622+
; CHECK-NEXT: movsd {{.*#+}} xmm1 = [2.147483647E+9,0.0E+0]
623+
; CHECK-NEXT: minsd %xmm0, %xmm1
624+
; CHECK-NEXT: cvttsd2si %xmm1, %ecx
622625
; CHECK-NEXT: xorl %eax, %eax
623626
; CHECK-NEXT: ucomisd %xmm0, %xmm0
624-
; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
625-
; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
626-
; CHECK-NEXT: cvttsd2si %xmm0, %ecx
627627
; CHECK-NEXT: cmovnpl %ecx, %eax
628628
; CHECK-NEXT: retq
629629
entry:

llvm/test/CodeGen/X86/fptosi-sat-scalar.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1453,21 +1453,21 @@ define i32 @test_signed_i32_f64(double %f) nounwind {
14531453
; X86-SSE-LABEL: test_signed_i32_f64:
14541454
; X86-SSE: # %bb.0:
14551455
; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
1456+
; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = [2.147483647E+9,0.0E+0]
1457+
; X86-SSE-NEXT: minsd %xmm0, %xmm1
1458+
; X86-SSE-NEXT: cvttsd2si %xmm1, %ecx
14561459
; X86-SSE-NEXT: xorl %eax, %eax
14571460
; X86-SSE-NEXT: ucomisd %xmm0, %xmm0
1458-
; X86-SSE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1459-
; X86-SSE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
1460-
; X86-SSE-NEXT: cvttsd2si %xmm0, %ecx
14611461
; X86-SSE-NEXT: cmovnpl %ecx, %eax
14621462
; X86-SSE-NEXT: retl
14631463
;
14641464
; X64-LABEL: test_signed_i32_f64:
14651465
; X64: # %bb.0:
1466+
; X64-NEXT: movsd {{.*#+}} xmm1 = [2.147483647E+9,0.0E+0]
1467+
; X64-NEXT: minsd %xmm0, %xmm1
1468+
; X64-NEXT: cvttsd2si %xmm1, %ecx
14661469
; X64-NEXT: xorl %eax, %eax
14671470
; X64-NEXT: ucomisd %xmm0, %xmm0
1468-
; X64-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1469-
; X64-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
1470-
; X64-NEXT: cvttsd2si %xmm0, %ecx
14711471
; X64-NEXT: cmovnpl %ecx, %eax
14721472
; X64-NEXT: retq
14731473
%x = call i32 @llvm.fptosi.sat.i32.f64(double %f)

llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -418,21 +418,18 @@ define <2 x i16> @test_signed_v2i16_v2f64(<2 x double> %f) nounwind {
418418
define <2 x i32> @test_signed_v2i32_v2f64(<2 x double> %f) nounwind {
419419
; CHECK-LABEL: test_signed_v2i32_v2f64:
420420
; CHECK: # %bb.0:
421-
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [-2.147483648E+9,0.0E+0]
421+
; CHECK-NEXT: movsd {{.*#+}} xmm2 = [2.147483647E+9,0.0E+0]
422422
; CHECK-NEXT: movapd %xmm0, %xmm1
423-
; CHECK-NEXT: maxsd %xmm2, %xmm1
424-
; CHECK-NEXT: movsd {{.*#+}} xmm3 = [2.147483647E+9,0.0E+0]
425-
; CHECK-NEXT: minsd %xmm3, %xmm1
423+
; CHECK-NEXT: minsd %xmm2, %xmm1
426424
; CHECK-NEXT: cvttsd2si %xmm1, %eax
427425
; CHECK-NEXT: xorl %ecx, %ecx
428426
; CHECK-NEXT: ucomisd %xmm0, %xmm0
429427
; CHECK-NEXT: cmovpl %ecx, %eax
430428
; CHECK-NEXT: movd %eax, %xmm1
431429
; CHECK-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
430+
; CHECK-NEXT: minsd %xmm0, %xmm2
431+
; CHECK-NEXT: cvttsd2si %xmm2, %eax
432432
; CHECK-NEXT: ucomisd %xmm0, %xmm0
433-
; CHECK-NEXT: maxsd %xmm2, %xmm0
434-
; CHECK-NEXT: minsd %xmm3, %xmm0
435-
; CHECK-NEXT: cvttsd2si %xmm0, %eax
436433
; CHECK-NEXT: cmovpl %ecx, %eax
437434
; CHECK-NEXT: movd %eax, %xmm0
438435
; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]

0 commit comments

Comments
 (0)