Optimize fptrunc(x)>=C1 --> x>=C2 Fix round value & add scalable vector test

kissholic · kissholic · commit 7804fb159092 · 2024-08-04T14:23:54.000+08:00
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -32,6 +32,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/KnownBits.h"
@@ -7895,47 +7896,73 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
 static Instruction *foldFCmpFpTrunc(FCmpInst &I, Instruction *LHSI,
                                     Constant *RHSC) {
   FCmpInst::Predicate Pred = I.getPredicate();
+  bool RoundDown = false;
+
+  if ((Pred == FCmpInst::FCMP_OGE) || (Pred == FCmpInst::FCMP_UGE) ||
+      (Pred == FCmpInst::FCMP_OLT) || (Pred == FCmpInst::FCMP_ULT))
+    RoundDown = true;
+  else if ((Pred == FCmpInst::FCMP_OGT) || (Pred == FCmpInst::FCMP_UGT) ||
+           (Pred == FCmpInst::FCMP_OLE) || (Pred == FCmpInst::FCMP_ULE))
+    RoundDown = false;
+  else
+    return nullptr;
 
-  // Check that predicates are valid.
-  if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
-      (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
+  const APFloat *RValue;
+  if (!match(RHSC, m_APFloat(RValue)))
     return nullptr;
 
-  if (ConstantFP *ConstRFp = dyn_cast<ConstantFP>(RHSC)) {
-    Type *LType = LHSI->getOperand(0)->getType();
-    bool lossInfo;
-    APFloat RValue = ConstRFp->getValue();
-    RValue.convert(LType->getFltSemantics(), APFloat::rmNearestTiesToEven,
-                   &lossInfo);
+  Type *LType = LHSI->getOperand(0)->getType();
+  Type *RType = RHSC->getType();
+  Type *LEleType = LType->getScalarType();
+  Type *REleType = RType->getScalarType();
 
-    return new FCmpInst(Pred, LHSI->getOperand(0),
-                        ConstantFP::get(LType, RValue), "", &I);
-  }
+  APFloat NextRValue = *RValue;
+  NextRValue.next(RoundDown);
 
-  if (RHSC->getType()->isVectorTy()) {
-    Type *LVecType = LHSI->getOperand(0)->getType();
-    Type *LEleType = dyn_cast<VectorType>(LVecType)->getElementType();
+  // Round RValue to suitable value
+  APFloat ExtRValue = *RValue;
+  APFloat ExtNextRValue = NextRValue;
+  bool lossInfo;
+  ExtRValue.convert(LEleType->getFltSemantics(), APFloat::rmNearestTiesToEven,
+                    &lossInfo);
+  ExtNextRValue.convert(LEleType->getFltSemantics(),
+                        APFloat::rmNearestTiesToEven, &lossInfo);
 
-    FixedVectorType *VecType = dyn_cast<FixedVectorType>(RHSC->getType());
-    uint64_t EleNum = VecType->getNumElements();
+  APFloat RoundValue{LEleType->getFltSemantics()};
+  {
+    APFloat Two{LEleType->getFltSemantics(), 2};
+    APFloat LowBound = RoundDown ? ExtNextRValue : ExtRValue;
+    APFloat UpBound = RoundDown ? ExtRValue : ExtNextRValue;
+
+    while (true) {
+      APFloat DupUpBound = UpBound;
+      DupUpBound.next(true);
+      if (DupUpBound == LowBound) {
+        RoundValue = RoundDown ? UpBound : LowBound;
+        break;
+      }
 
-    std::vector<Constant *> EleVec(EleNum);
-    for (uint64_t Idx = 0; Idx < EleNum; ++Idx) {
-      bool lossInfo;
-      APFloat EleValue =
-          dyn_cast<ConstantFP>(RHSC->getAggregateElement(Idx))->getValueAPF();
-      EleValue.convert(LEleType->getFltSemantics(),
+      APFloat Mid = (LowBound + UpBound) / Two;
+      APFloat TruncMid = Mid;
+      TruncMid.convert(REleType->getFltSemantics(),
                        APFloat::rmNearestTiesToEven, &lossInfo);
-      EleVec[Idx] = ConstantFP::get(LEleType, EleValue);
-    }
-
-    ArrayRef<Constant *> EleArr(EleVec);
 
-    return new FCmpInst(Pred, LHSI->getOperand(0), ConstantVector::get(EleArr),
-                        "", &I);
+      if (TruncMid == *RValue) {
+        if (RoundDown)
+          UpBound = Mid;
+        else
+          LowBound = Mid;
+      } else {
+        if (RoundDown)
+          LowBound = Mid;
+        else
+          UpBound = Mid;
+      }
+    }
   }
 
-  return nullptr;
+  return new FCmpInst(Pred, LHSI->getOperand(0),
+                      ConstantFP::get(LType, RoundValue), "", &I);
 }
 
 /// Optimize fabs(X) compared with zero.
diff --git a/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
@@ -5,18 +5,62 @@
 define i1 @fcmp_trunc(double %0) {
 ; CHECK-LABEL: define i1 @fcmp_trunc(
 ; CHECK-SAME: double [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[RESULT:%.*]] = fcmp oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp oge double [[TMP0]], 0x4058FFFFF0000000
 ; CHECK-NEXT:    ret i1 [[RESULT]]
 ;
   %trunc = fptrunc double %0 to float
   %result = fcmp oge float %trunc, 1.000000e+02
   ret i1 %result
 }
 
+define i1 @fcmp_trunc_ult(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_ult(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp ult double [[TMP0]], 0x4068FFFFF0000000
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp ult float %trunc, 2.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_ole(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_ole(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp ole double [[TMP0]], 0x4072C00010000000
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp ole float %trunc, 3.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_ogt(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_ogt(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp ogt double [[TMP0]], 0x4079000010000000
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp ogt float %trunc, 4.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_zero(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_zero(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp oge double [[TMP0]], 0xB690000000000000
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp oge float %trunc, 0.000000
+  ret i1 %result
+}
+
 define i1 @fcmp_trunc_with_nnan(double %0) {
 ; CHECK-LABEL: define i1 @fcmp_trunc_with_nnan(
 ; CHECK-SAME: double [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[RESULT:%.*]] = fcmp nnan oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp nnan oge double [[TMP0]], 0x4058FFFFF0000000
 ; CHECK-NEXT:    ret i1 [[RESULT]]
 ;
   %trunc = fptrunc double %0 to float
@@ -27,7 +71,7 @@ define i1 @fcmp_trunc_with_nnan(double %0) {
 define i1 @fcmp_trunc_with_ninf(double %0) {
 ; CHECK-LABEL: define i1 @fcmp_trunc_with_ninf(
 ; CHECK-SAME: double [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[RESULT:%.*]] = fcmp ninf oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp ninf oge double [[TMP0]], 0x4058FFFFF0000000
 ; CHECK-NEXT:    ret i1 [[RESULT]]
 ;
   %trunc = fptrunc double %0 to float
@@ -38,7 +82,7 @@ define i1 @fcmp_trunc_with_ninf(double %0) {
 define i1 @fcmp_trunc_with_nsz(double %0) {
 ; CHECK-LABEL: define i1 @fcmp_trunc_with_nsz(
 ; CHECK-SAME: double [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[RESULT:%.*]] = fcmp nsz oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp nsz oge double [[TMP0]], 0x4058FFFFF0000000
 ; CHECK-NEXT:    ret i1 [[RESULT]]
 ;
   %trunc = fptrunc double %0 to float
@@ -49,7 +93,7 @@ define i1 @fcmp_trunc_with_nsz(double %0) {
 define i1 @fcmp_trunc_with_reassoc(double %0) {
 ; CHECK-LABEL: define i1 @fcmp_trunc_with_reassoc(
 ; CHECK-SAME: double [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[RESULT:%.*]] = fcmp reassoc oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp reassoc oge double [[TMP0]], 0x4058FFFFF0000000
 ; CHECK-NEXT:    ret i1 [[RESULT]]
 ;
   %trunc = fptrunc double %0 to float
@@ -60,7 +104,7 @@ define i1 @fcmp_trunc_with_reassoc(double %0) {
 define i1 @fcmp_trunc_with_fast(double %0) {
 ; CHECK-LABEL: define i1 @fcmp_trunc_with_fast(
 ; CHECK-SAME: double [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge double [[TMP0]], 0x4058FFFFF0000000 
 ; CHECK-NEXT:    ret i1 [[RESULT]]
 ;
   %trunc = fptrunc double %0 to float
@@ -71,29 +115,29 @@ define i1 @fcmp_trunc_with_fast(double %0) {
 define <4 x i1> @fcmp_vec_trunc(<4 x double> %0) {
 ; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc(
 ; CHECK-SAME: <4 x double> [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x double> [[TMP0]], <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x double> [[TMP0]], <double 0x3FEFFFFFF0000000, double 0x3FEFFFFFF0000000, double 0x3FEFFFFFF0000000, double 0x3FEFFFFFF0000000>
 ; CHECK-NEXT:    ret <4 x i1> [[CMP]]
 ;
   %vec = fptrunc <4 x double> %0 to <4 x float>
-  %cmp = fcmp olt <4 x float> %vec, <float 1.0, float 2.0, float 3.0, float 4.0>
+  %cmp = fcmp olt <4 x float> %vec, <float 1.0, float 1.0, float 1.0, float 1.0>
   ret <4 x i1> %cmp
 }
 
-define <4 x i1> @fcmp_vec_trunc_with_flag(<4 x double> %0) {
-; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc_with_flag(
-; CHECK-SAME: <4 x double> [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[CMP:%.*]] = fcmp fast olt <4 x double> [[TMP0]], <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
-; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+define <1 x i1> @fcmp_vec_trunc_scalar(<1 x double> %0) {
+; CHECK-LABEL: define <1 x i1> @fcmp_vec_trunc_scalar(
+; CHECK-SAME: <1 x double> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp fast olt <1 x double> [[TMP0]], <double 0x3FEFFFFFF0000000>
+; CHECK-NEXT:    ret <1 x i1> [[CMP]]
 ;
-  %vec = fptrunc <4 x double> %0 to <4 x float>
-  %cmp = fcmp fast olt <4 x float> %vec, <float 1.0, float 2.0, float 3.0, float 4.0>
-  ret <4 x i1> %cmp
+  %vec = fptrunc <1 x double> %0 to <1 x float>
+  %cmp = fcmp fast olt <1 x float> %vec, <float 1.0>
+  ret <1 x i1> %cmp
 }
 
 define i1 @fcmp_trunc_fp128(fp128 %0) {
 ; CHECK-LABEL: define i1 @fcmp_trunc_fp128(
 ; CHECK-SAME: fp128 [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge fp128 [[TMP0]], 0xL00000000000000004005900000000000
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge fp128 [[TMP0]], 0xL000000000000000040058FFFFF000000
 ; CHECK-NEXT:    ret i1 [[RESULT]]
 ;
   %trunc = fptrunc fp128 %0 to float
@@ -104,7 +148,7 @@ define i1 @fcmp_trunc_fp128(fp128 %0) {
 define i1 @fcmp_trunc_x86_fp80(x86_fp80 %0) {
 ; CHECK-LABEL: define i1 @fcmp_trunc_x86_fp80(
 ; CHECK-SAME: x86_fp80 [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge x86_fp80 [[TMP0]], 0xK4005C800000000000000
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge x86_fp80 [[TMP0]], 0xK4005C7FFFF8000000000
 ; CHECK-NEXT:    ret i1 [[RESULT]]
 ;
   %trunc = fptrunc x86_fp80 %0 to float
@@ -115,7 +159,7 @@ define i1 @fcmp_trunc_x86_fp80(x86_fp80 %0) {
 define i1 @fcmp_trunc_ppc_fp128(ppc_fp128 %0) {
 ; CHECK-LABEL: define i1 @fcmp_trunc_ppc_fp128(
 ; CHECK-SAME: ppc_fp128 [[TMP0:%.*]]) {
-; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge ppc_fp128 [[TMP0]], 0xM40590000000000000000000000000000
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge ppc_fp128 [[TMP0]], 0xM4058FFFFF0000000BD00000000000000
 ; CHECK-NEXT:    ret i1 [[RESULT]]
 ;
   %trunc = fptrunc ppc_fp128 %0 to float