Optimize fptrunc(x)>=C1 --> x>=C2. Add check cases and support for vector types.

kissholic · kissholic · commit 73b1f0b57b45 · 2024-07-25T08:53:59.000+08:00
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -11,9 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombineInternal.h"
+#include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CaptureTracking.h"
 #include "llvm/Analysis/CmpInstAnalysis.h"
@@ -24,14 +26,18 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/InstCombine/InstCombiner.h"
 #include <bitset>
+#include <cstdint>
 
 using namespace llvm;
 using namespace PatternMatch;
@@ -7888,25 +7894,48 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
 // Fold trunc(x) < constant --> x < constant if possible.
 static Instruction *foldFCmpFpTrunc(FCmpInst &I, Instruction *LHSI,
                                     Constant *RHSC) {
-  //
   FCmpInst::Predicate Pred = I.getPredicate();
 
   // Check that predicates are valid.
   if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
       (Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
     return nullptr;
 
-  auto *LType = LHSI->getOperand(0)->getType();
-  auto *RType = RHSC->getType();
+  if (ConstantFP *ConstRFp = dyn_cast<ConstantFP>(RHSC)) {
+    Type *LType = LHSI->getOperand(0)->getType();
+    bool lossInfo;
+    APFloat RValue = ConstRFp->getValue();
+    RValue.convert(LType->getFltSemantics(), APFloat::rmNearestTiesToEven,
+                   &lossInfo);
 
-  if (!(LType->isFloatingPointTy() && RType->isFloatingPointTy() &&
-        LType->getTypeID() >= RType->getTypeID()))
-    return nullptr;
+    return new FCmpInst(Pred, LHSI->getOperand(0),
+                        ConstantFP::get(LType, RValue), "", &I);
+  }
+
+  if (RHSC->getType()->isVectorTy()) {
+    Type *LVecType = LHSI->getOperand(0)->getType();
+    Type *LEleType = dyn_cast<VectorType>(LVecType)->getElementType();
+
+    FixedVectorType *VecType = dyn_cast<FixedVectorType>(RHSC->getType());
+    uint64_t EleNum = VecType->getNumElements();
 
-  auto *ROperand = llvm::ConstantFP::get(
-      LType, dyn_cast<ConstantFP>(RHSC)->getValue().convertToDouble());
+    std::vector<Constant *> EleVec(EleNum);
+    for (uint64_t Idx = 0; Idx < EleNum; ++Idx) {
+      bool lossInfo;
+      APFloat EleValue =
+          dyn_cast<ConstantFP>(RHSC->getAggregateElement(Idx))->getValueAPF();
+      EleValue.convert(LEleType->getFltSemantics(),
+                       APFloat::rmNearestTiesToEven, &lossInfo);
+      EleVec[Idx] = ConstantFP::get(LEleType, EleValue);
+    }
+
+    ArrayRef<Constant *> EleArr(EleVec);
 
-  return new FCmpInst(Pred, LHSI->getOperand(0), ROperand, "", &I);
+    return new FCmpInst(Pred, LHSI->getOperand(0), ConstantVector::get(EleArr),
+                        "", &I);
+  }
+
+  return nullptr;
 }
 
 /// Optimize fabs(X) compared with zero.
diff --git a/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll b/llvm/test/Transforms/InstCombine/fold-fcmp-trunc.ll
@@ -1,11 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -passes=instcombine -S < %s | FileCheck %s
 
 
-;CHECK-LABEL: @src(
-;CHECK: %result = fcmp oge double %0, 1.000000e+02
-;CHECK-NEXT: ret i1 %result
-define i1 @src(double %0) {
-    %trunc = fptrunc double %0 to float
-    %result = fcmp oge float %trunc, 1.000000e+02
-    ret i1 %result
-}
+define i1 @fcmp_trunc(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_nnan(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_nnan(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp nnan oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp nnan oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_ninf(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_ninf(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp ninf oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp ninf oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_nsz(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_nsz(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp nsz oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp nsz oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_reassoc(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_reassoc(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp reassoc oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp reassoc oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_with_fast(double %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_with_fast(
+; CHECK-SAME: double [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge double [[TMP0]], 1.000000e+02
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc double %0 to float
+  %result = fcmp fast oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define <4 x i1> @fcmp_vec_trunc(<4 x double> %0) {
+; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc(
+; CHECK-SAME: <4 x double> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x double> [[TMP0]], <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
+; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+;
+  %vec = fptrunc <4 x double> %0 to <4 x float>
+  %cmp = fcmp olt <4 x float> %vec, <float 1.0, float 2.0, float 3.0, float 4.0>
+  ret <4 x i1> %cmp
+}
+
+define <4 x i1> @fcmp_vec_trunc_with_flag(<4 x double> %0) {
+; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc_with_flag(
+; CHECK-SAME: <4 x double> [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp fast olt <4 x double> [[TMP0]], <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
+; CHECK-NEXT:    ret <4 x i1> [[CMP]]
+;
+  %vec = fptrunc <4 x double> %0 to <4 x float>
+  %cmp = fcmp fast olt <4 x float> %vec, <float 1.0, float 2.0, float 3.0, float 4.0>
+  ret <4 x i1> %cmp
+}
+
+define i1 @fcmp_trunc_fp128(fp128 %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_fp128(
+; CHECK-SAME: fp128 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge fp128 [[TMP0]], 0xL00000000000000004005900000000000
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc fp128 %0 to float
+  %result = fcmp fast oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_x86_fp80(x86_fp80 %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_x86_fp80(
+; CHECK-SAME: x86_fp80 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge x86_fp80 [[TMP0]], 0xK4005C800000000000000
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc x86_fp80 %0 to float
+  %result = fcmp fast oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+
+define i1 @fcmp_trunc_ppc_fp128(ppc_fp128 %0) {
+; CHECK-LABEL: define i1 @fcmp_trunc_ppc_fp128(
+; CHECK-SAME: ppc_fp128 [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[RESULT:%.*]] = fcmp fast oge ppc_fp128 [[TMP0]], 0xM40590000000000000000000000000000
+; CHECK-NEXT:    ret i1 [[RESULT]]
+;
+  %trunc = fptrunc ppc_fp128 %0 to float
+  %result = fcmp fast oge float %trunc, 1.000000e+02
+  ret i1 %result
+}
+