Skip to content

Commit 08a58b2

Browse files
authored
[InstCombine] Optimize redundant floating point comparisons in or/and inst's (#158097)
Resolves #157371 We can eliminate one of the `fcmp` when we have two same `olt` or `ogt` instructions matched in `or`/`and` simplification.
1 parent 30633f3 commit 08a58b2

File tree

3 files changed

+216
-4
lines changed

3 files changed

+216
-4
lines changed

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1850,6 +1850,35 @@ static Value *simplifyAndOrOfFCmps(const SimplifyQuery &Q, FCmpInst *LHS,
18501850
: ConstantInt::getBool(LHS->getType(), !IsAnd);
18511851
}
18521852

1853+
Value *V0;
1854+
const APFloat *V0Op1, *V1Op1;
1855+
// (fcmp olt V0, V0Op1) || (fcmp olt V0, V1Op1)
1856+
// --> fcmp olt V0, max(V0Op1, V1Op1)
1857+
// (fcmp ogt V0, V0Op1) || (fcmp ogt V0, V1Op1)
1858+
// --> fcmp ogt V0, max(V0Op1, V1Op1)
1859+
//
1860+
// (fcmp olt V0, V0Op1) && (fcmp olt V0, V1Op1)
1861+
// --> fcmp olt V0, min(V0Op1, V1Op1)
1862+
// (fcmp ogt V0, V0Op1) && (fcmp ogt V0, V1Op1)
1863+
// --> fcmp ogt V0, min(V0Op1, V1Op1)
1864+
if (match(LHS, m_SpecificFCmp(FCmpInst::FCMP_OLT, m_Value(V0),
1865+
m_APFloat(V0Op1))) &&
1866+
match(RHS, m_SpecificFCmp(FCmpInst::FCMP_OLT, m_Specific(V0),
1867+
m_APFloat(V1Op1)))) {
1868+
if (*V0Op1 > *V1Op1)
1869+
return IsAnd ? RHS : LHS;
1870+
if (*V1Op1 > *V0Op1)
1871+
return IsAnd ? LHS : RHS;
1872+
} else if (match(LHS, m_SpecificFCmp(FCmpInst::FCMP_OGT, m_Value(V0),
1873+
m_APFloat(V0Op1))) &&
1874+
match(RHS, m_SpecificFCmp(FCmpInst::FCMP_OGT, m_Specific(V0),
1875+
m_APFloat(V1Op1)))) {
1876+
if (*V0Op1 < *V1Op1)
1877+
return IsAnd ? RHS : LHS;
1878+
if (*V1Op1 < *V0Op1)
1879+
return IsAnd ? LHS : RHS;
1880+
}
1881+
18531882
return nullptr;
18541883
}
18551884

llvm/test/CodeGen/AMDGPU/xor3-i1-const.ll

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,7 @@
55
define amdgpu_ps float @xor3_i1_const(float inreg %arg1, i32 inreg %arg2) {
66
; GCN-LABEL: xor3_i1_const:
77
; GCN: ; %bb.0: ; %main_body
8-
; GCN-NEXT: v_mov_b32_e32 v0, 0x42640000
9-
; GCN-NEXT: v_cmp_lt_f32_e64 s[2:3], s0, 0
10-
; GCN-NEXT: v_cmp_lt_f32_e32 vcc, s0, v0
11-
; GCN-NEXT: s_and_b64 s[0:1], s[2:3], vcc
8+
; GCN-NEXT: v_cmp_lt_f32_e64 s[0:1], s0, 0
129
; GCN-NEXT: v_cndmask_b32_e64 v0, 1.0, 0, s[0:1]
1310
; GCN-NEXT: ; return to shader part epilog
1411
main_body:
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3+
4+
define i1 @or_fcmp_redundant_or1(double %v0) {
5+
; CHECK-LABEL: @or_fcmp_redundant_or1(
6+
; CHECK-NEXT: [[V2:%.*]] = fcmp olt double [[V0:%.*]], 1.990000e+00
7+
; CHECK-NEXT: ret i1 [[V2]]
8+
;
9+
%v1 = fcmp olt double %v0, 1.000000e-02
10+
%v2 = fcmp olt double %v0, 1.990000e+00
11+
%v3 = or i1 %v1, %v2
12+
ret i1 %v3
13+
}
14+
15+
define i1 @or_fcmp_redundant_or2(double %v0) {
16+
; CHECK-LABEL: @or_fcmp_redundant_or2(
17+
; CHECK-NEXT: [[V1:%.*]] = fcmp olt double [[V0:%.*]], 2.300000e+00
18+
; CHECK-NEXT: ret i1 [[V1]]
19+
;
20+
%v1 = fcmp olt double %v0, 2.300000e+00
21+
%v2 = fcmp olt double %v0, 1.990000e+00
22+
%v3 = or i1 %v1, %v2
23+
ret i1 %v3
24+
}
25+
26+
define i1 @or_fcmp_redundant_or3(double %v0) {
27+
; CHECK-LABEL: @or_fcmp_redundant_or3(
28+
; CHECK-NEXT: [[V1:%.*]] = fcmp ogt double [[V0:%.*]], 1.000000e-02
29+
; CHECK-NEXT: ret i1 [[V1]]
30+
;
31+
%v1 = fcmp ogt double %v0, 1.000000e-02
32+
%v2 = fcmp ogt double %v0, 1.990000e+00
33+
%v3 = or i1 %v1, %v2
34+
ret i1 %v3
35+
}
36+
37+
define i1 @or_fcmp_redundant_or4(double %v0) {
38+
; CHECK-LABEL: @or_fcmp_redundant_or4(
39+
; CHECK-NEXT: [[V2:%.*]] = fcmp ogt double [[V0:%.*]], 1.990000e+00
40+
; CHECK-NEXT: ret i1 [[V2]]
41+
;
42+
%v1 = fcmp ogt double %v0, 2.300000e+00
43+
%v2 = fcmp ogt double %v0, 1.990000e+00
44+
%v3 = or i1 %v1, %v2
45+
ret i1 %v3
46+
}
47+
48+
define i1 @or_fcmp_redundant_or_neg1(double %v0) {
49+
; CHECK-LABEL: @or_fcmp_redundant_or_neg1(
50+
; CHECK-NEXT: [[V1:%.*]] = fcmp olt double [[V0:%.*]], 1.000000e-02
51+
; CHECK-NEXT: [[V2:%.*]] = fcmp ogt double [[V0]], 1.990000e+00
52+
; CHECK-NEXT: [[V3:%.*]] = or i1 [[V1]], [[V2]]
53+
; CHECK-NEXT: ret i1 [[V3]]
54+
;
55+
%v1 = fcmp olt double %v0, 1.000000e-02
56+
%v2 = fcmp ogt double %v0, 1.990000e+00
57+
%v3 = or i1 %v1, %v2
58+
ret i1 %v3
59+
}
60+
61+
define i1 @or_fcmp_redundant_or_neg2(double %v0) {
62+
; CHECK-LABEL: @or_fcmp_redundant_or_neg2(
63+
; CHECK-NEXT: [[V1:%.*]] = fcmp ogt double [[V0:%.*]], 1.000000e-02
64+
; CHECK-NEXT: [[V2:%.*]] = fcmp olt double [[V0]], 1.990000e+00
65+
; CHECK-NEXT: [[V3:%.*]] = or i1 [[V1]], [[V2]]
66+
; CHECK-NEXT: ret i1 [[V3]]
67+
;
68+
%v1 = fcmp ogt double %v0, 1.000000e-02
69+
%v2 = fcmp olt double %v0, 1.990000e+00
70+
%v3 = or i1 %v1, %v2
71+
ret i1 %v3
72+
}
73+
74+
define i1 @or_fcmp_redundant_and1(double %v0) {
75+
; CHECK-LABEL: @or_fcmp_redundant_and1(
76+
; CHECK-NEXT: [[V1:%.*]] = fcmp olt double [[V0:%.*]], 1.000000e-02
77+
; CHECK-NEXT: ret i1 [[V1]]
78+
;
79+
%v1 = fcmp olt double %v0, 1.000000e-02
80+
%v2 = fcmp olt double %v0, 1.990000e+00
81+
%v3 = and i1 %v1, %v2
82+
ret i1 %v3
83+
}
84+
85+
define i1 @or_fcmp_redundant_and2(double %v0) {
86+
; CHECK-LABEL: @or_fcmp_redundant_and2(
87+
; CHECK-NEXT: [[V2:%.*]] = fcmp olt double [[V0:%.*]], 1.990000e+00
88+
; CHECK-NEXT: ret i1 [[V2]]
89+
;
90+
%v1 = fcmp olt double %v0, 2.300000e+00
91+
%v2 = fcmp olt double %v0, 1.990000e+00
92+
%v3 = and i1 %v1, %v2
93+
ret i1 %v3
94+
}
95+
96+
define i1 @or_fcmp_redundant_and3(double %v0) {
97+
; CHECK-LABEL: @or_fcmp_redundant_and3(
98+
; CHECK-NEXT: [[V2:%.*]] = fcmp ogt double [[V0:%.*]], 1.990000e+00
99+
; CHECK-NEXT: ret i1 [[V2]]
100+
;
101+
%v1 = fcmp ogt double %v0, 1.000000e-02
102+
%v2 = fcmp ogt double %v0, 1.990000e+00
103+
%v3 = and i1 %v1, %v2
104+
ret i1 %v3
105+
}
106+
107+
define i1 @or_fcmp_redundant_and4(double %v0) {
108+
; CHECK-LABEL: @or_fcmp_redundant_and4(
109+
; CHECK-NEXT: [[V1:%.*]] = fcmp ogt double [[V0:%.*]], 2.300000e+00
110+
; CHECK-NEXT: ret i1 [[V1]]
111+
;
112+
%v1 = fcmp ogt double %v0, 2.300000e+00
113+
%v2 = fcmp ogt double %v0, 1.990000e+00
114+
%v3 = and i1 %v1, %v2
115+
ret i1 %v3
116+
}
117+
118+
define i1 @or_fcmp_redundant_and_neg1(double %v0) {
119+
; CHECK-LABEL: @or_fcmp_redundant_and_neg1(
120+
; CHECK-NEXT: [[V1:%.*]] = fcmp olt double [[V0:%.*]], 1.000000e-02
121+
; CHECK-NEXT: [[V2:%.*]] = fcmp ogt double [[V0]], 1.990000e+00
122+
; CHECK-NEXT: [[V3:%.*]] = and i1 [[V1]], [[V2]]
123+
; CHECK-NEXT: ret i1 [[V3]]
124+
;
125+
%v1 = fcmp olt double %v0, 1.000000e-02
126+
%v2 = fcmp ogt double %v0, 1.990000e+00
127+
%v3 = and i1 %v1, %v2
128+
ret i1 %v3
129+
}
130+
131+
define i1 @or_fcmp_redundant_and_neg2(double %v0) {
132+
; CHECK-LABEL: @or_fcmp_redundant_and_neg2(
133+
; CHECK-NEXT: [[V1:%.*]] = fcmp ogt double [[V0:%.*]], 1.000000e-02
134+
; CHECK-NEXT: [[V2:%.*]] = fcmp olt double [[V0]], 1.990000e+00
135+
; CHECK-NEXT: [[V3:%.*]] = and i1 [[V1]], [[V2]]
136+
; CHECK-NEXT: ret i1 [[V3]]
137+
;
138+
%v1 = fcmp ogt double %v0, 1.000000e-02
139+
%v2 = fcmp olt double %v0, 1.990000e+00
140+
%v3 = and i1 %v1, %v2
141+
ret i1 %v3
142+
}
143+
144+
define i1 @or_fcmp_redundant_select_or1(double %v0) {
145+
; CHECK-LABEL: @or_fcmp_redundant_select_or1(
146+
; CHECK-NEXT: [[V1:%.*]] = fcmp olt double [[V0:%.*]], 1.000000e-02
147+
; CHECK-NEXT: ret i1 [[V1]]
148+
;
149+
%v1 = fcmp olt double %v0, 1.000000e-02
150+
%v2 = fcmp olt double %v0, 1.990000e+00
151+
%v3 = select i1 %v1, i1 %v2, i1 false
152+
ret i1 %v3
153+
}
154+
155+
define i1 @or_fcmp_redundant_select_or2(double %v0) {
156+
; CHECK-LABEL: @or_fcmp_redundant_select_or2(
157+
; CHECK-NEXT: [[V2:%.*]] = fcmp ogt double [[V0:%.*]], 1.990000e+00
158+
; CHECK-NEXT: ret i1 [[V2]]
159+
;
160+
%v1 = fcmp ogt double %v0, 1.000000e-02
161+
%v2 = fcmp ogt double %v0, 1.990000e+00
162+
%v3 = select i1 %v1, i1 %v2, i1 false
163+
ret i1 %v3
164+
}
165+
166+
define i1 @or_fcmp_redundant_select_and1(double %v0) {
167+
; CHECK-LABEL: @or_fcmp_redundant_select_and1(
168+
; CHECK-NEXT: [[V2:%.*]] = fcmp olt double [[V0:%.*]], 1.990000e+00
169+
; CHECK-NEXT: ret i1 [[V2]]
170+
;
171+
%v1 = fcmp olt double %v0, 1.000000e-02
172+
%v2 = fcmp olt double %v0, 1.990000e+00
173+
%v3 = select i1 %v1, i1 true, i1 %v2
174+
ret i1 %v3
175+
}
176+
177+
define i1 @or_fcmp_redundant_select_and2(double %v0) {
178+
; CHECK-LABEL: @or_fcmp_redundant_select_and2(
179+
; CHECK-NEXT: [[V1:%.*]] = fcmp ogt double [[V0:%.*]], 1.000000e-02
180+
; CHECK-NEXT: ret i1 [[V1]]
181+
;
182+
%v1 = fcmp ogt double %v0, 1.000000e-02
183+
%v2 = fcmp ogt double %v0, 1.990000e+00
184+
%v3 = select i1 %v1, i1 true, i1 %v2
185+
ret i1 %v3
186+
}

0 commit comments

Comments
 (0)