[llvm] [InstCombine] fold "icmp eq (X + (V - 1)) & -V, X" to "icmp eq (and X, V - 1), 0" (#152851)

pskrgag · web-flow · commit 30144226a4ea · 2025-08-14T10:23:03.000+03:00
This fold optimizes ```llvm define i1 @src(i32 %num, i32 %val) { %mask = add i32 %val, -1 %neg = sub nsw i32 0, %val %num.biased = add i32 %num, %mask %_2.sroa.0.0 = and i32 %num.biased, %neg %_0 = icmp eq i32 %_2.sroa.0.0, %num ret i1 %_0 } ``` to ```llvm define i1 @tgt(i32 %num, i32 %val) { %mask = add i32 %val, -1 %tmp = and i32 %num, %mask %ret = icmp eq i32 %tmp, 0 ret i1 %ret } ``` For power-of-two `val`. Observed in real life for following code ```rust pub fn is_aligned(num: usize) -> bool { num.next_multiple_of(1 << 12) == num } ``` which verifies that num is aligned to 4096. Alive2 proof https://alive2.llvm.org/ce/z/QisECm
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1320,6 +1320,35 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
   return nullptr;
 }
 
+/// Fold icmp eq (num + mask) & ~mask, num
+///      to
+///      icmp eq (and num, mask), 0
+/// Where mask is a low bit mask.
+Instruction *InstCombinerImpl::foldIsMultipleOfAPowerOfTwo(ICmpInst &Cmp) {
+  Value *Num;
+  CmpPredicate Pred;
+  const APInt *Mask, *Neg;
+
+  if (!match(&Cmp,
+             m_c_ICmp(Pred, m_Value(Num),
+                      m_OneUse(m_c_And(m_OneUse(m_c_Add(m_Deferred(Num),
+                                                        m_LowBitMask(Mask))),
+                                       m_APInt(Neg))))))
+    return nullptr;
+
+  if (*Neg != ~*Mask)
+    return nullptr;
+
+  if (!ICmpInst::isEquality(Pred))
+    return nullptr;
+
+  // Create new icmp eq (num & mask), 0
+  auto *NewAnd = Builder.CreateAnd(Num, *Mask);
+  auto *Zero = Constant::getNullValue(Num->getType());
+
+  return new ICmpInst(Pred, NewAnd, Zero);
+}
+
 /// Fold icmp Pred X, C.
 /// TODO: This code structure does not make sense. The saturating add fold
 /// should be moved to some other helper and extended as noted below (it is also
@@ -7644,6 +7673,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
   if (Instruction *Res = foldICmpUsingKnownBits(I))
     return Res;
 
+  if (Instruction *Res = foldIsMultipleOfAPowerOfTwo(I))
+    return Res;
+
   // Test if the ICmpInst instruction is used exclusively by a select as
   // part of a minimum or maximum operation. If so, refrain from doing
   // any other folding. This helps out other analyses which understand
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -721,6 +721,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
   Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp);
   Instruction *foldICmpWithConstant(ICmpInst &Cmp);
+  Instruction *foldIsMultipleOfAPowerOfTwo(ICmpInst &Cmp);
   Instruction *foldICmpUsingBoolRange(ICmpInst &I);
   Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
   Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);
diff --git a/llvm/test/Transforms/InstCombine/icmp-add.ll b/llvm/test/Transforms/InstCombine/icmp-add.ll
@@ -3300,3 +3300,149 @@ entry:
   %cmp = icmp ult i32 %add, 253
   ret i1 %cmp
 }
+
+; PR 152851
+
+define i1 @val_is_aligend_const_pow2(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_pow2_add_commute(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_add_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32  4095, %num
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_pow2_and_commute(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_and_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 -4096, %num.biased
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_pow2_icm_commute(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_icm_commute(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp eq i32 %num, %num.masked
+  ret i1 %_0
+}
+
+; Should not work for non-power-of-two cases
+define i1 @val_is_aligend_const_non_pow2(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_non_pow2(
+; CHECK-NEXT:    [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 6
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -7
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 6
+  %num.masked = and i32 %num.biased, -7
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_pow2_multiuse(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_multiuse(
+; CHECK-NEXT:    [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -4096
+; CHECK-NEXT:    call void @use(i32 [[NUM_MASKED]])
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 %num.biased, -4096
+  call void @use(i32 %num.masked)
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+; Applies since number of instructions do not change
+define i1 @val_is_aligend_const_pow2_multiuse1(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_multiuse1(
+; CHECK-NEXT:    [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    call void @use(i32 [[NUM_BIASED]])
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -4096
+; CHECK-NEXT:    [[_0:%.*]] = icmp eq i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  call void @use(i32 %num.biased)
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp eq i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_pow2_ne(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_pow2_ne(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp ne i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_mismatch(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_mismatch(
+; CHECK-NEXT:    [[NUM_BIASED:%.*]] = add i32 [[NUM:%.*]], 4095
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = and i32 [[NUM_BIASED]], -4095
+; CHECK-NEXT:    [[_0:%.*]] = icmp ne i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4095
+  %num.masked = and i32 %num.biased, -4095
+  %_0 = icmp ne i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_const_mismatch1(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_const_mismatch1(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], -4096
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = add i32 [[TMP1]], 4096
+; CHECK-NEXT:    [[_0:%.*]] = icmp ne i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4096
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp ne i32 %num.masked, %num
+  ret i1 %_0
+}
+
+define i1 @val_is_aligend_pred_mismatch(i32 %num) {
+; CHECK-LABEL: @val_is_aligend_pred_mismatch(
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[NUM:%.*]], -4096
+; CHECK-NEXT:    [[NUM_MASKED:%.*]] = add i32 [[TMP1]], 4096
+; CHECK-NEXT:    [[_0:%.*]] = icmp sge i32 [[NUM_MASKED]], [[NUM]]
+; CHECK-NEXT:    ret i1 [[_0]]
+;
+  %num.biased = add i32 %num, 4096
+  %num.masked = and i32 %num.biased, -4096
+  %_0 = icmp sge i32 %num.masked, %num
+  ret i1 %_0
+}