Skip to content

Commit 73b1f0b

Browse files
committed
Optimize fptrunc(x)>=C1 --> x>=C2. Add check cases and support for vector types.
1 parent a1547d2 commit 73b1f0b

File tree

2 files changed

+160
-17
lines changed

2 files changed

+160
-17
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "InstCombineInternal.h"
14+
#include "llvm/ADT/APFloat.h"
1415
#include "llvm/ADT/APSInt.h"
1516
#include "llvm/ADT/ScopeExit.h"
1617
#include "llvm/ADT/SetVector.h"
18+
#include "llvm/ADT/SmallVector.h"
1719
#include "llvm/ADT/Statistic.h"
1820
#include "llvm/Analysis/CaptureTracking.h"
1921
#include "llvm/Analysis/CmpInstAnalysis.h"
@@ -24,14 +26,18 @@
2426
#include "llvm/IR/ConstantRange.h"
2527
#include "llvm/IR/Constants.h"
2628
#include "llvm/IR/DataLayout.h"
29+
#include "llvm/IR/DerivedTypes.h"
2730
#include "llvm/IR/InstrTypes.h"
2831
#include "llvm/IR/Instruction.h"
32+
#include "llvm/IR/Instructions.h"
2933
#include "llvm/IR/IntrinsicInst.h"
3034
#include "llvm/IR/PatternMatch.h"
35+
#include "llvm/IR/Value.h"
3136
#include "llvm/Support/Casting.h"
3237
#include "llvm/Support/KnownBits.h"
3338
#include "llvm/Transforms/InstCombine/InstCombiner.h"
3439
#include <bitset>
40+
#include <cstdint>
3541

3642
using namespace llvm;
3743
using namespace PatternMatch;
@@ -7888,25 +7894,48 @@ static Instruction *foldFCmpReciprocalAndZero(FCmpInst &I, Instruction *LHSI,
78887894
// Fold trunc(x) < constant --> x < constant if possible.
78897895
static Instruction *foldFCmpFpTrunc(FCmpInst &I, Instruction *LHSI,
78907896
Constant *RHSC) {
7891-
//
78927897
FCmpInst::Predicate Pred = I.getPredicate();
78937898

78947899
// Check that predicates are valid.
78957900
if ((Pred != FCmpInst::FCMP_OGT) && (Pred != FCmpInst::FCMP_OLT) &&
78967901
(Pred != FCmpInst::FCMP_OGE) && (Pred != FCmpInst::FCMP_OLE))
78977902
return nullptr;
78987903

7899-
auto *LType = LHSI->getOperand(0)->getType();
7900-
auto *RType = RHSC->getType();
7904+
if (ConstantFP *ConstRFp = dyn_cast<ConstantFP>(RHSC)) {
7905+
Type *LType = LHSI->getOperand(0)->getType();
7906+
bool lossInfo;
7907+
APFloat RValue = ConstRFp->getValue();
7908+
RValue.convert(LType->getFltSemantics(), APFloat::rmNearestTiesToEven,
7909+
&lossInfo);
79017910

7902-
if (!(LType->isFloatingPointTy() && RType->isFloatingPointTy() &&
7903-
LType->getTypeID() >= RType->getTypeID()))
7904-
return nullptr;
7911+
return new FCmpInst(Pred, LHSI->getOperand(0),
7912+
ConstantFP::get(LType, RValue), "", &I);
7913+
}
7914+
7915+
if (RHSC->getType()->isVectorTy()) {
7916+
Type *LVecType = LHSI->getOperand(0)->getType();
7917+
Type *LEleType = dyn_cast<VectorType>(LVecType)->getElementType();
7918+
7919+
FixedVectorType *VecType = dyn_cast<FixedVectorType>(RHSC->getType());
7920+
uint64_t EleNum = VecType->getNumElements();
79057921

7906-
auto *ROperand = llvm::ConstantFP::get(
7907-
LType, dyn_cast<ConstantFP>(RHSC)->getValue().convertToDouble());
7922+
std::vector<Constant *> EleVec(EleNum);
7923+
for (uint64_t Idx = 0; Idx < EleNum; ++Idx) {
7924+
bool lossInfo;
7925+
APFloat EleValue =
7926+
dyn_cast<ConstantFP>(RHSC->getAggregateElement(Idx))->getValueAPF();
7927+
EleValue.convert(LEleType->getFltSemantics(),
7928+
APFloat::rmNearestTiesToEven, &lossInfo);
7929+
EleVec[Idx] = ConstantFP::get(LEleType, EleValue);
7930+
}
7931+
7932+
ArrayRef<Constant *> EleArr(EleVec);
79087933

7909-
return new FCmpInst(Pred, LHSI->getOperand(0), ROperand, "", &I);
7934+
return new FCmpInst(Pred, LHSI->getOperand(0), ConstantVector::get(EleArr),
7935+
"", &I);
7936+
}
7937+
7938+
return nullptr;
79107939
}
79117940

79127941
/// Optimize fabs(X) compared with zero.
Lines changed: 122 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,125 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
12
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
23

34

4-
;CHECK-LABEL: @src(
5-
;CHECK: %result = fcmp oge double %0, 1.000000e+02
6-
;CHECK-NEXT: ret i1 %result
7-
define i1 @src(double %0) {
8-
%trunc = fptrunc double %0 to float
9-
%result = fcmp oge float %trunc, 1.000000e+02
10-
ret i1 %result
11-
}
5+
define i1 @fcmp_trunc(double %0) {
6+
; CHECK-LABEL: define i1 @fcmp_trunc(
7+
; CHECK-SAME: double [[TMP0:%.*]]) {
8+
; CHECK-NEXT: [[RESULT:%.*]] = fcmp oge double [[TMP0]], 1.000000e+02
9+
; CHECK-NEXT: ret i1 [[RESULT]]
10+
;
11+
%trunc = fptrunc double %0 to float
12+
%result = fcmp oge float %trunc, 1.000000e+02
13+
ret i1 %result
14+
}
15+
16+
define i1 @fcmp_trunc_with_nnan(double %0) {
17+
; CHECK-LABEL: define i1 @fcmp_trunc_with_nnan(
18+
; CHECK-SAME: double [[TMP0:%.*]]) {
19+
; CHECK-NEXT: [[RESULT:%.*]] = fcmp nnan oge double [[TMP0]], 1.000000e+02
20+
; CHECK-NEXT: ret i1 [[RESULT]]
21+
;
22+
%trunc = fptrunc double %0 to float
23+
%result = fcmp nnan oge float %trunc, 1.000000e+02
24+
ret i1 %result
25+
}
26+
27+
define i1 @fcmp_trunc_with_ninf(double %0) {
28+
; CHECK-LABEL: define i1 @fcmp_trunc_with_ninf(
29+
; CHECK-SAME: double [[TMP0:%.*]]) {
30+
; CHECK-NEXT: [[RESULT:%.*]] = fcmp ninf oge double [[TMP0]], 1.000000e+02
31+
; CHECK-NEXT: ret i1 [[RESULT]]
32+
;
33+
%trunc = fptrunc double %0 to float
34+
%result = fcmp ninf oge float %trunc, 1.000000e+02
35+
ret i1 %result
36+
}
37+
38+
define i1 @fcmp_trunc_with_nsz(double %0) {
39+
; CHECK-LABEL: define i1 @fcmp_trunc_with_nsz(
40+
; CHECK-SAME: double [[TMP0:%.*]]) {
41+
; CHECK-NEXT: [[RESULT:%.*]] = fcmp nsz oge double [[TMP0]], 1.000000e+02
42+
; CHECK-NEXT: ret i1 [[RESULT]]
43+
;
44+
%trunc = fptrunc double %0 to float
45+
%result = fcmp nsz oge float %trunc, 1.000000e+02
46+
ret i1 %result
47+
}
48+
49+
define i1 @fcmp_trunc_with_reassoc(double %0) {
50+
; CHECK-LABEL: define i1 @fcmp_trunc_with_reassoc(
51+
; CHECK-SAME: double [[TMP0:%.*]]) {
52+
; CHECK-NEXT: [[RESULT:%.*]] = fcmp reassoc oge double [[TMP0]], 1.000000e+02
53+
; CHECK-NEXT: ret i1 [[RESULT]]
54+
;
55+
%trunc = fptrunc double %0 to float
56+
%result = fcmp reassoc oge float %trunc, 1.000000e+02
57+
ret i1 %result
58+
}
59+
60+
define i1 @fcmp_trunc_with_fast(double %0) {
61+
; CHECK-LABEL: define i1 @fcmp_trunc_with_fast(
62+
; CHECK-SAME: double [[TMP0:%.*]]) {
63+
; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge double [[TMP0]], 1.000000e+02
64+
; CHECK-NEXT: ret i1 [[RESULT]]
65+
;
66+
%trunc = fptrunc double %0 to float
67+
%result = fcmp fast oge float %trunc, 1.000000e+02
68+
ret i1 %result
69+
}
70+
71+
define <4 x i1> @fcmp_vec_trunc(<4 x double> %0) {
72+
; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc(
73+
; CHECK-SAME: <4 x double> [[TMP0:%.*]]) {
74+
; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[TMP0]], <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
75+
; CHECK-NEXT: ret <4 x i1> [[CMP]]
76+
;
77+
%vec = fptrunc <4 x double> %0 to <4 x float>
78+
%cmp = fcmp olt <4 x float> %vec, <float 1.0, float 2.0, float 3.0, float 4.0>
79+
ret <4 x i1> %cmp
80+
}
81+
82+
define <4 x i1> @fcmp_vec_trunc_with_flag(<4 x double> %0) {
83+
; CHECK-LABEL: define <4 x i1> @fcmp_vec_trunc_with_flag(
84+
; CHECK-SAME: <4 x double> [[TMP0:%.*]]) {
85+
; CHECK-NEXT: [[CMP:%.*]] = fcmp fast olt <4 x double> [[TMP0]], <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00, double 4.000000e+00>
86+
; CHECK-NEXT: ret <4 x i1> [[CMP]]
87+
;
88+
%vec = fptrunc <4 x double> %0 to <4 x float>
89+
%cmp = fcmp fast olt <4 x float> %vec, <float 1.0, float 2.0, float 3.0, float 4.0>
90+
ret <4 x i1> %cmp
91+
}
92+
93+
define i1 @fcmp_trunc_fp128(fp128 %0) {
94+
; CHECK-LABEL: define i1 @fcmp_trunc_fp128(
95+
; CHECK-SAME: fp128 [[TMP0:%.*]]) {
96+
; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge fp128 [[TMP0]], 0xL00000000000000004005900000000000
97+
; CHECK-NEXT: ret i1 [[RESULT]]
98+
;
99+
%trunc = fptrunc fp128 %0 to float
100+
%result = fcmp fast oge float %trunc, 1.000000e+02
101+
ret i1 %result
102+
}
103+
104+
define i1 @fcmp_trunc_x86_fp80(x86_fp80 %0) {
105+
; CHECK-LABEL: define i1 @fcmp_trunc_x86_fp80(
106+
; CHECK-SAME: x86_fp80 [[TMP0:%.*]]) {
107+
; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge x86_fp80 [[TMP0]], 0xK4005C800000000000000
108+
; CHECK-NEXT: ret i1 [[RESULT]]
109+
;
110+
%trunc = fptrunc x86_fp80 %0 to float
111+
%result = fcmp fast oge float %trunc, 1.000000e+02
112+
ret i1 %result
113+
}
114+
115+
define i1 @fcmp_trunc_ppc_fp128(ppc_fp128 %0) {
116+
; CHECK-LABEL: define i1 @fcmp_trunc_ppc_fp128(
117+
; CHECK-SAME: ppc_fp128 [[TMP0:%.*]]) {
118+
; CHECK-NEXT: [[RESULT:%.*]] = fcmp fast oge ppc_fp128 [[TMP0]], 0xM40590000000000000000000000000000
119+
; CHECK-NEXT: ret i1 [[RESULT]]
120+
;
121+
%trunc = fptrunc ppc_fp128 %0 to float
122+
%result = fcmp fast oge float %trunc, 1.000000e+02
123+
ret i1 %result
124+
}
125+

0 commit comments

Comments
 (0)