Skip to content

Conversation

@el-ev
Copy link
Member

@el-ev el-ev commented Jun 11, 2025

@el-ev el-ev requested a review from nikic as a code owner June 11, 2025 11:06
@el-ev el-ev requested a review from dtcxzyw June 11, 2025 11:06
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jun 11, 2025
@llvmbot
Copy link
Member

llvmbot commented Jun 11, 2025

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Iris Shi (el-ev)

Changes
  • Closes #143636

https://alive2.llvm.org/ce/z/q5-XqY


Full diff: https://github.com/llvm/llvm-project/pull/143683.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+18)
  • (added) llvm/test/Transforms/InstCombine/ceil-shift.ll (+176)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c112fae351817..4569a2cd82d2a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Utils/Local.h"
 #include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/CmpPredicate.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstrTypes.h"
@@ -1298,6 +1299,23 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
     // eq/ne (mul X, Y)) with (icmp eq/ne X/Y) and if X/Y is known non-zero that
     // will fold to a constant elsewhere.
   }
+
+  // (X >> C) + ((X & ((1 << C) - 1)) != 0) == 0 -> X == 0
+  if (Pred == ICmpInst::ICMP_EQ) {
+    Value *X;
+    const APInt *C1, *C2;
+    CmpPredicate PredNE;
+    if (match(Cmp.getOperand(0),
+              m_OneUse(
+                  m_Add(m_LShr(m_Value(X), m_APInt(C1)),
+                        m_ZExt(m_ICmp(PredNE, m_And(m_Deferred(X), m_APInt(C2)),
+                                      m_Zero()))))) &&
+        PredNE == CmpInst::ICMP_NE &&
+        *C2 == APInt::getLowBitsSet(C2->getBitWidth(), C1->getZExtValue()))
+      return new ICmpInst(ICmpInst::ICMP_EQ, X,
+                          ConstantInt::getNullValue(X->getType()));
+  }
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/ceil-shift.ll b/llvm/test/Transforms/InstCombine/ceil-shift.ll
new file mode 100644
index 0000000000000..853985eff258d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/ceil-shift.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @ceil_shift4(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift4(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = lshr i32 %arg0, 4
+  %2 = and i32 %arg0, 15
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define i1 @ceil_shift6(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift6(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = lshr i32 %arg0, 6
+  %2 = and i32 %arg0, 63
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define i1 @ceil_shift11(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift11(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = lshr i32 %arg0, 11
+  %2 = and i32 %arg0, 2047
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define i1 @ceil_shift0(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift0(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    ret i1 [[TMP1]]
+;
+  %1 = lshr i32 %arg0, 0
+  %2 = and i32 %arg0, 0
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+declare void @use(i32)
+
+define i1 @ceil_shift4_used_1(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift4_used_1(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
+; CHECK-NEXT:    call void @use(i32 [[TMP1]])
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[ARG0]], 0
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %1 = lshr i32 %arg0, 4
+  call void @use(i32 %1)
+  %2 = and i32 %arg0, 15
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define i1 @ceil_shift4_used_5(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift4_used_5(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ARG0]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i32 [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    call void @use(i32 [[TMP5]])
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %1 = lshr i32 %arg0, 4
+  %2 = and i32 %arg0, 15
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  call void @use(i32 %5)
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define <4 x i1> @ceil_shift4_v4i32(<4 x i32> %arg0) {
+; CHECK-LABEL: define <4 x i1> @ceil_shift4_v4i32(
+; CHECK-SAME: <4 x i32> [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <4 x i32> [[ARG0]], zeroinitializer
+; CHECK-NEXT:    ret <4 x i1> [[TMP1]]
+;
+  %1 = lshr <4 x i32> %arg0, splat (i32 16)
+  %2 = and <4 x i32> %arg0, splat (i32 65535)
+  %3 = icmp ne <4 x i32> %2, zeroinitializer
+  %4 = zext <4 x i1> %3 to <4 x i32>
+  %5 = add <4 x i32> %1, %4
+  %6 = icmp eq <4 x i32> %5, zeroinitializer
+  ret <4 x i1> %6
+}
+
+define <8 x i1> @ceil_shift4_v8i16(<8 x i16> %arg0) {
+; CHECK-LABEL: define <8 x i1> @ceil_shift4_v8i16(
+; CHECK-SAME: <8 x i16> [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> [[ARG0]], zeroinitializer
+; CHECK-NEXT:    ret <8 x i1> [[TMP1]]
+;
+  %1 = lshr <8 x i16> %arg0, splat (i16 4)
+  %2 = and <8 x i16> %arg0, splat (i16 15)
+  %3 = icmp ne <8 x i16> %2, zeroinitializer
+  %4 = zext <8 x i1> %3 to <8 x i16>
+  %5 = add <8 x i16> %1, %4
+  %6 = icmp eq <8 x i16> %5, zeroinitializer
+  ret <8 x i1> %6
+}
+
+; negative tests
+
+define i1 @ceil_shift_not_mask_1(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift_not_mask_1(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[ARG0]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ARG0]], 31
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %1 = lshr i32 %arg0, 4
+  %2 = and i32 %arg0, 31
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}
+
+define i1 @ceil_shift_not_mask_2(i32 %arg0) {
+; CHECK-LABEL: define i1 @ceil_shift_not_mask_2(
+; CHECK-SAME: i32 [[ARG0:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[ARG0]], 5
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[ARG0]], 15
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT:    ret i1 [[TMP6]]
+;
+  %1 = lshr i32 %arg0, 5
+  %2 = and i32 %arg0, 15
+  %3 = icmp ne i32 %2, 0
+  %4 = zext i1 %3 to i32
+  %5 = add i32 %1, %4
+  %6 = icmp eq i32 %5, 0
+  ret i1 %6
+}

@el-ev el-ev changed the title [InstCombine] Fold ceil(X >> C) == 0 -> X == 0 [InstCombine] Fold ceil(X / (2 ^ C)) == 0 -> X == 0 Jun 11, 2025
@el-ev el-ev requested a review from dtcxzyw June 13, 2025 12:05
@el-ev el-ev requested a review from dtcxzyw June 14, 2025 13:10
@el-ev el-ev force-pushed the users/el-ev/fold-ceil-shift branch from 943a95f to 0c68968 Compare June 15, 2025 02:42
@dtcxzyw
Copy link
Member

dtcxzyw commented Jun 18, 2025

IIRC the regression can be fixed by handling this pattern in isKnownNonZero. It would be better to add a helper stripNullTest to find patterns in the form of f(x) == 0 <-> x == 0.

@el-ev el-ev force-pushed the users/el-ev/fold-ceil-shift branch from 0c68968 to 90d1ed7 Compare June 21, 2025 06:25
@llvmbot llvmbot added the llvm:analysis Includes value tracking, cost tables and constant folding label Jun 21, 2025
@el-ev
Copy link
Member Author

el-ev commented Jun 21, 2025

IIRC the regression can be fixed by handling this pattern in isKnownNonZero. It would be better to add a helper stripNullTest to find patterns in the form of f(x) == 0 <-> x == 0.

Updated. Could you help me rerun the benchmarks?

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add tests from https://godbolt.org/z/Wcsv453n6?

@el-ev el-ev requested a review from dtcxzyw June 21, 2025 13:23
@el-ev el-ev force-pushed the users/el-ev/fold-ceil-shift branch from c107041 to 4811b7d Compare June 21, 2025 13:28
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It doesn't work for icmp sge. BTW, do you know the reason that @src1 got folded without this change?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When debugging I found a call to simplifyICmpInst with icmp uge i32 %ceil, 1. Usually it is canonicalized to icmp ne i32 %ceil, 0 but not here.
image

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually it is canonicalized to icmp ne i32 %ceil, 0 but not here.

It doesn't happen because we are simplifying umax(%x, 1).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is handled by isImpliedByDomCondition.

@dtcxzyw
Copy link
Member

dtcxzyw commented Jun 21, 2025

Can you file a separate patch for 4811b7d? I think we can land this change first :)

@el-ev el-ev force-pushed the users/el-ev/fold-ceil-shift branch from 4802be7 to e854101 Compare June 22, 2025 05:43
@el-ev el-ev requested a review from dtcxzyw June 22, 2025 05:43
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Please wait for additional approval from other reviewers.

@github-actions
Copy link

github-actions bot commented Jun 22, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@el-ev el-ev force-pushed the users/el-ev/fold-ceil-shift branch from 689446a to da1b1d0 Compare June 22, 2025 09:41
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@el-ev el-ev merged commit 32f911f into main Jun 23, 2025
7 checks passed
@el-ev el-ev deleted the users/el-ev/fold-ceil-shift branch June 23, 2025 02:51
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:analysis Includes value tracking, cost tables and constant folding llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Missed optimization: fold ceil(x/16) == 0 to x == 0

5 participants