-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[InstCombine] Fold icmp pred X + K, Y -> icmp pred2 X, Y if both X and Y is divisible by K
#147130
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Yingwei Zheng (dtcxzyw) ChangesThis patch generalizes Compile-time improvement (Stage2-O3 -0.09%): https://llvm-compile-time-tracker.com/compare.php?from=0ba59587fa98849ed5107fee4134e810e84b69a3&to=f80e5fe0bb2e63c05401bde7cd42899ea270909b&stat=instructions:u Full diff: https://github.com/llvm/llvm-project/pull/147130.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 6737b50405ee2..8d8682880f41c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5120,6 +5120,15 @@ static Instruction *foldICmpXorXX(ICmpInst &I, const SimplifyQuery &Q,
return nullptr;
}
+/// Return true if X is a multiple of C.
+/// TODO: Handle non-power-of-2 factors.
+static bool isMultipleOf(Value *X, const APInt &C, const SimplifyQuery &Q) {
+ if (!C.isPowerOf2())
+ return false;
+
+ return MaskedValueIsZero(X, C - 1, Q);
+}
+
/// Try to fold icmp (binop), X or icmp X, (binop).
/// TODO: A large part of this logic is duplicated in InstSimplify's
/// simplifyICmpWithBinOp(). We should be able to share that and avoid the code
@@ -5278,66 +5287,62 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, Y, Z);
}
- // icmp slt (A + -1), Op1 -> icmp sle A, Op1
- if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
- match(B, m_AllOnes()))
- return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
-
- // icmp sge (A + -1), Op1 -> icmp sgt A, Op1
- if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
- match(B, m_AllOnes()))
- return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
-
- // icmp sle (A + 1), Op1 -> icmp slt A, Op1
- if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
- return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
-
- // icmp sgt (A + 1), Op1 -> icmp sge A, Op1
- if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
- return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+ if (ICmpInst::isRelational(Pred)) {
+ // Return if both X and Y is divisible by Z/-Z.
+ // TODO: Generalize to check if (X - Y) is divisible by Z/-Z.
+ auto ShareCommonDivisor = [&Q](Value *X, Value *Y, Value *Z,
+ bool IsNegative) -> bool {
+ const APInt *OffsetC;
+ if (!match(Z, m_APInt(OffsetC)))
+ return false;
- // icmp sgt Op0, (C + -1) -> icmp sge Op0, C
- if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
- match(D, m_AllOnes()))
- return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
+ // Fast path for Z == 1/-1.
+ if (IsNegative ? OffsetC->isAllOnes() : OffsetC->isOne())
+ return true;
- // icmp sle Op0, (C + -1) -> icmp slt Op0, C
- if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
- match(D, m_AllOnes()))
- return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
+ APInt C = *OffsetC;
+ if (IsNegative)
+ C.negate();
+ // Note: -INT_MIN is also negative.
+ if (!C.isStrictlyPositive())
+ return false;
- // icmp sge Op0, (C + 1) -> icmp sgt Op0, C
- if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
- return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
+ return isMultipleOf(X, C, Q) && isMultipleOf(Y, C, Q);
+ };
- // icmp slt Op0, (C + 1) -> icmp sle Op0, C
- if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
- return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
+ // TODO: The subtraction-related identities shown below also hold, but
+ // canonicalization from (X -nuw 1) to (X + -1) means that the combinations
+ // wouldn't happen even if they were implemented.
+ //
+ // icmp ult (A - 1), Op1 -> icmp ule A, Op1
+ // icmp uge (A - 1), Op1 -> icmp ugt A, Op1
+ // icmp ugt Op0, (C - 1) -> icmp uge Op0, C
+ // icmp ule Op0, (C - 1) -> icmp ult Op0, C
+
+ // icmp slt (A + -1), Op1 -> icmp sle A, Op1
+ // icmp sge (A + -1), Op1 -> icmp sgt A, Op1
+ // icmp sle (A + 1), Op1 -> icmp slt A, Op1
+ // icmp sgt (A + 1), Op1 -> icmp sge A, Op1
+ // icmp ule (A + 1), Op0 -> icmp ult A, Op1
+ // icmp ugt (A + 1), Op0 -> icmp uge A, Op1
+ if (A && NoOp0WrapProblem &&
+ ShareCommonDivisor(A, Op1, B,
+ ICmpInst::isLT(Pred) || ICmpInst::isGE(Pred)))
+ return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), A,
+ Op1);
- // TODO: The subtraction-related identities shown below also hold, but
- // canonicalization from (X -nuw 1) to (X + -1) means that the combinations
- // wouldn't happen even if they were implemented.
- //
- // icmp ult (A - 1), Op1 -> icmp ule A, Op1
- // icmp uge (A - 1), Op1 -> icmp ugt A, Op1
- // icmp ugt Op0, (C - 1) -> icmp uge Op0, C
- // icmp ule Op0, (C - 1) -> icmp ult Op0, C
-
- // icmp ule (A + 1), Op0 -> icmp ult A, Op1
- if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_ULE && match(B, m_One()))
- return new ICmpInst(CmpInst::ICMP_ULT, A, Op1);
-
- // icmp ugt (A + 1), Op0 -> icmp uge A, Op1
- if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_UGT && match(B, m_One()))
- return new ICmpInst(CmpInst::ICMP_UGE, A, Op1);
-
- // icmp uge Op0, (C + 1) -> icmp ugt Op0, C
- if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_UGE && match(D, m_One()))
- return new ICmpInst(CmpInst::ICMP_UGT, Op0, C);
-
- // icmp ult Op0, (C + 1) -> icmp ule Op0, C
- if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_ULT && match(D, m_One()))
- return new ICmpInst(CmpInst::ICMP_ULE, Op0, C);
+ // icmp sgt Op0, (C + -1) -> icmp sge Op0, C
+ // icmp sle Op0, (C + -1) -> icmp slt Op0, C
+ // icmp sge Op0, (C + 1) -> icmp sgt Op0, C
+ // icmp slt Op0, (C + 1) -> icmp sle Op0, C
+ // icmp uge Op0, (C + 1) -> icmp ugt Op0, C
+ // icmp ult Op0, (C + 1) -> icmp ule Op0, C
+ if (C && NoOp1WrapProblem &&
+ ShareCommonDivisor(Op0, C, D,
+ ICmpInst::isGT(Pred) || ICmpInst::isLE(Pred)))
+ return new ICmpInst(ICmpInst::getFlippedStrictnessPredicate(Pred), Op0,
+ C);
+ }
// if C1 has greater magnitude than C2:
// icmp (A + C1), (C + C2) -> icmp (A + C3), C
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 365c17b35a468..a090f9c4d2614 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -1900,6 +1900,108 @@ define i1 @icmp_add1_sle(i32 %x, i32 %y) {
ret i1 %cmp
}
+define i1 @icmp_slt_offset_with_common_divisor(i64 %x, i64 %y) {
+; CHECK-LABEL: @icmp_slt_offset_with_common_divisor(
+; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
+; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[SHLX]], [[SHLY]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shlx = shl i64 %x, 4
+ %shly = shl i64 %y, 4
+ %shlx_offset = add nsw i64 %shlx, -16
+ %cmp = icmp slt i64 %shlx_offset, %shly
+ ret i1 %cmp
+}
+
+define i1 @icmp_slt_offset_with_smaller_common_divisor(i64 %x, i64 %y) {
+; CHECK-LABEL: @icmp_slt_offset_with_smaller_common_divisor(
+; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
+; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp sle i64 [[SHLX]], [[SHLY]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shlx = shl i64 %x, 4
+ %shly = shl i64 %y, 4
+ %shlx_offset = add nsw i64 %shlx, -8
+ %cmp = icmp slt i64 %shlx_offset, %shly
+ ret i1 %cmp
+}
+
+define i1 @icmp_sle_offset_with_common_divisor(i64 %x, i64 %y) {
+; CHECK-LABEL: @icmp_sle_offset_with_common_divisor(
+; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
+; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[SHLX]], [[SHLY]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shlx = shl i64 %x, 4
+ %shly = shl i64 %y, 4
+ %shlx_offset = add nsw i64 %shlx, 16
+ %cmp = icmp sle i64 %shlx_offset, %shly
+ ret i1 %cmp
+}
+
+define i1 @icmp_ule_offset_with_common_divisor(i64 %x, i64 %y) {
+; CHECK-LABEL: @icmp_ule_offset_with_common_divisor(
+; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
+; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[SHLX]], [[SHLY]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shlx = shl i64 %x, 4
+ %shly = shl i64 %y, 4
+ %shlx_offset = add nuw i64 %shlx, 16
+ %cmp = icmp ule i64 %shlx_offset, %shly
+ ret i1 %cmp
+}
+
+; TODO: Handle non-power-of-2 divisors
+define i1 @icmp_ule_offset_with_common_non_pow2_divisor(i64 %x, i64 %y) {
+; CHECK-LABEL: @icmp_ule_offset_with_common_non_pow2_divisor(
+; CHECK-NEXT: [[MULX:%.*]] = mul nuw i64 [[X:%.*]], 7
+; CHECK-NEXT: [[MULY:%.*]] = mul nuw i64 [[Y:%.*]], 7
+; CHECK-NEXT: [[MULX_OFFSET:%.*]] = add nuw i64 [[MULX]], 7
+; CHECK-NEXT: [[CMP:%.*]] = icmp ule i64 [[MULX_OFFSET]], [[MULY]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %mulx = mul nuw i64 %x, 7
+ %muly = mul nuw i64 %y, 7
+ %mulx_offset = add nuw i64 %mulx, 7
+ %cmp = icmp ule i64 %mulx_offset, %muly
+ ret i1 %cmp
+}
+
+define i1 @neg_icmp_slt_offset_without_common_divisor(i64 %x, i64 %y) {
+; CHECK-LABEL: @neg_icmp_slt_offset_without_common_divisor(
+; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
+; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
+; CHECK-NEXT: [[SHLX_OFFSET:%.*]] = add nsw i64 [[SHLX]], -32
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[SHLX_OFFSET]], [[SHLY]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shlx = shl i64 %x, 4
+ %shly = shl i64 %y, 4
+ %shlx_offset = add nsw i64 %shlx, -32
+ %cmp = icmp slt i64 %shlx_offset, %shly
+ ret i1 %cmp
+}
+
+define i1 @neg_icmp_slt_offset_with_wrong_sign(i64 %x, i64 %y) {
+; CHECK-LABEL: @neg_icmp_slt_offset_with_wrong_sign(
+; CHECK-NEXT: [[SHLX:%.*]] = shl i64 [[X:%.*]], 4
+; CHECK-NEXT: [[SHLY:%.*]] = shl i64 [[Y:%.*]], 4
+; CHECK-NEXT: [[SHLX_OFFSET:%.*]] = add nsw i64 [[SHLX]], 16
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[SHLX_OFFSET]], [[SHLY]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shlx = shl i64 %x, 4
+ %shly = shl i64 %y, 4
+ %shlx_offset = add nsw i64 %shlx, 16
+ %cmp = icmp slt i64 %shlx_offset, %shly
+ ret i1 %cmp
+}
+
define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) {
; CHECK-LABEL: @icmp_add20_sge_add57(
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 [[Y:%.*]], 37
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
This patch generalizes
icmp ule X +nuw 1, Y -> icmp ult X, Y-like optimizations to handle the case that the added RHS constant is a common power-of-2 divisor of both X and Y. We can further generalize this pattern to handle non-power-of-2 divisors as well.Alive2: https://alive2.llvm.org/ce/z/QgpeM_
Compile-time improvement (Stage2-O3 -0.06%): https://llvm-compile-time-tracker.com/compare.php?from=0ba59587fa98849ed5107fee4134e810e84b69a3&to=f76e411fc7bba985d64c465baf443be8142d8203&stat=instructions%3Au
The original case is from the comparison of expanded GEP offsets: https://github.com/dtcxzyw/llvm-opt-benchmark/pull/2530/files#r2183005292