-
Couldn't load subscription status.
- Fork 15k
[InstCombine] Optimize redundant floating point comparisons in or/and inst's
#158097
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-transforms Author: Rajveer Singh Bharadwaj (Rajveer100) ChangesResolves #157371 We can eliminate one of the Full diff: https://github.com/llvm/llvm-project/pull/158097.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 6e46898634070..0719932e1394d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -4514,6 +4514,30 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyAddWithRemainder(I))
return replaceInstUsesWith(I, V);
+ Value *V0, *V1;
+ const APFloat *V0Op1, *V1Op1;
+ if (match(Op0, m_SpecificFCmp(FCmpInst::FCMP_OLT, m_Value(V0),
+ m_APFloat(V0Op1))) &&
+ match(Op1, m_SpecificFCmp(FCmpInst::FCMP_OLT, m_Value(V1),
+ m_APFloat(V1Op1)))) {
+ if (V0 == V1) {
+ if (V0Op1 > V1Op1)
+ replaceInstUsesWith(I, Op0);
+ else if (V1Op1 > V0Op1)
+ replaceInstUsesWith(I, Op1);
+ }
+ } else if (match(Op0, m_SpecificFCmp(FCmpInst::FCMP_OGT, m_Value(V0),
+ m_APFloat(V0Op1))) &&
+ match(Op1, m_SpecificFCmp(FCmpInst::FCMP_OGT, m_Value(V1),
+ m_APFloat(V1Op1)))) {
+ if (V0 == V1) {
+ if (V0Op1 < V1Op1)
+ replaceInstUsesWith(I, Op1);
+ else if (V1Op1 < V0Op1)
+ replaceInstUsesWith(I, Op0);
+ }
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/or-fcmp.ll b/llvm/test/Transforms/InstCombine/or-fcmp.ll
index 193fe4b5cc722..926f66f622914 100644
--- a/llvm/test/Transforms/InstCombine/or-fcmp.ll
+++ b/llvm/test/Transforms/InstCombine/or-fcmp.ll
@@ -4657,3 +4657,63 @@ define i1 @or_fcmp_reassoc4(i1 %x, double %a, double %b) {
%retval = or i1 %cmp1, %or
ret i1 %retval
}
+
+define i1 @or_fcmp_redundant1(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant1(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz olt double [[V0]], 1.000000e-02
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz olt double [[V0]], 1.990000e+00
+; CHECK-NEXT: [[V3:%.*]] = or i1 [[V1]], [[V2]]
+; CHECK-NEXT: ret i1 [[V2]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 1.000000e-02
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant2(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant2(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz olt double [[V0]], 2.300000e+00
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz olt double [[V0]], 1.990000e+00
+; CHECK-NEXT: [[V3:%.*]] = or i1 [[V1]], [[V2]]
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 2.300000e+00
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant3(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant3(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz ogt double [[V0]], 1.000000e-02
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz olt double [[V0]], 1.990000e+00
+; CHECK-NEXT: [[V3:%.*]] = or i1 [[V1]], [[V2]]
+; CHECK-NEXT: ret i1 [[V3]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 1.000000e-02
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant4(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant4(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz ogt double [[V0]], 2.300000e+00
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz ogt double [[V0]], 1.990000e+00
+; CHECK-NEXT: [[V3:%.*]] = or i1 [[V1]], [[V2]]
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 2.300000e+00
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
diff --git a/llvm/test/Transforms/InstCombine/redundant-fcmp.ll b/llvm/test/Transforms/InstCombine/redundant-fcmp.ll
new file mode 100644
index 0000000000000..934d9e6c46507
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/redundant-fcmp.ll
@@ -0,0 +1,54 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -O3 -S | FileCheck %s
+
+define i1 @or_fcmp_redundant1(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant1(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V2:%.*]] = fcmp nsz olt double [[V0]], 1.990000e+00
+; CHECK-NEXT: ret i1 [[V2]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 1.000000e-02
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant2(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant2(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz olt double [[V0]], 2.300000e+00
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz olt double %v0, 2.300000e+00
+ %v2 = fcmp nsz olt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant3(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant3(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V3:%.*]] = fcmp nsz ogt double [[V0]], 1.990000e+00
+; CHECK-NEXT: ret i1 [[V3]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 1.000000e-02
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
+
+define i1 @or_fcmp_redundant4(ptr %arg0) {
+; CHECK-LABEL: @or_fcmp_redundant4(
+; CHECK-NEXT: [[V0:%.*]] = load double, ptr [[ARG0:%.*]], align 8
+; CHECK-NEXT: [[V1:%.*]] = fcmp nsz ogt double [[V0]], 2.300000e+00
+; CHECK-NEXT: ret i1 [[V1]]
+;
+ %v0 = load double, ptr %arg0, align 8
+ %v1 = fcmp nsz ogt double %v0, 2.300000e+00
+ %v2 = fcmp nsz ogt double %v0, 1.990000e+00
+ %v3 = or i1 %v1, %v2
+ ret i1 %v3
+}
|
45430d5 to
7a8a3b0
Compare
|
@zyw-bot mfuzz |
7a8a3b0 to
28d9dfa
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should check !isAnd if you only fold the pattern or(fcmp, fcmp).
|
General proof: proof |
Yeah, for the |
28d9dfa to
d219143
Compare
d219143 to
a65787b
Compare
or instor/and inst's
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
24257f7 to
0c3d1c3
Compare
|
@dtcxzyw |
0c3d1c3 to
52c9e4f
Compare
|
CI looks good now. |
52c9e4f to
1501af4
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG. As a follow-up, we can use the ConstantFPRange API to generalize this pattern (makeExactFCmpRegion + contains).
…and` inst's Resolves llvm#157371 We can eliminate one of the `fcmp` when we have two same `olt` or `ogt` instructions matched in `or`/`and` simplification.
1501af4 to
b7a8db4
Compare
…parisons with `ConstantFPRange` Follow up of llvm#158097 Similar to `simplifyAndOrOfICmpsWithConstants`, we can do so for floating point comparisons.
Resolves #157371
We can eliminate one of the
fcmpwhen we have two sameoltorogtinstructions matched inor/andsimplification.