Skip to content

Conversation

@benwu25
Copy link
Contributor

@benwu25 benwu25 commented Sep 10, 2025

This PR addresses #157524.

alive2: https://alive2.llvm.org/ce/z/xe_vb2
godbolt: https://alive2.llvm.org/ce/z/7A8PxK

This fold is invalid for @llvm.smin.i1 since smin(-1, 0) == -1. I also avoided i1 in general since this uses zext, but it seems like those checks for width might not be necessary, since other folds get to it first.

The alive2 proof in #157524 used a select for the fold, but it seems like select X < Y, 1, 0 should be canonicalized to zext X < Y if the bit width is correct.

Could someone help review and merge?

Thanks!

This fold is invalid for @llvm.smin.i1, since smin(-1, 0) == -1
(take X = Y = 0). Otherwise, if X+1 has the appropriate nsw or nuw,
this transform replaces a sub and at least one min with an icmp and
a zext. It is also invalid for i1 in general, but it seems that other
folds take care of i1. In llvm#157524, this expression was folded to a
select, but it seems that select X < Y, 1, 0 can be canonicalized to
zext X < Y.
@benwu25 benwu25 requested a review from nikic as a code owner September 10, 2025 02:52
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Sep 10, 2025
@llvmbot
Copy link
Member

llvmbot commented Sep 10, 2025

@llvm/pr-subscribers-llvm-transforms

Author: None (benwu25)

Changes

This PR addresses #157524.

alive2: https://alive2.llvm.org/ce/z/xe_vb2
godbolt: https://alive2.llvm.org/ce/z/7A8PxK

This fold is invalid for @<!-- -->llvm.smin.i1 since smin(-1, 0) == -1. I also avoided i1 in general since this uses zext, but it seems like those checks for width might not be necessary, since other folds get to it first.

The alive2 proof in #157524 used a select for the fold, but it seems like select X &lt; Y, 1, 0 should be canonicalized to zext X &lt; Y if the bit width is correct.

Could someone help review and commit?

Thanks!


Full diff: https://github.com/llvm/llvm-project/pull/157782.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp (+18)
  • (added) llvm/test/Transforms/InstCombine/min-zext.ll (+116)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index d934638c15e75..63c6fc52322d6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -2719,6 +2719,24 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
     return BinaryOperator::CreateSub(X, Not);
   }
 
+  // min(X+1, Y) - min(X, Y) --> zext X < Y
+  // Replacing a sub and at least one min with an icmp
+  // and a zext is a potential improvement.
+  if (match(Op0, m_c_SMin(m_c_NSWAdd(m_Value(X), m_One()), m_Value(Y))) &&
+      match(Op1, m_c_SMin(m_Value(X), m_Value(Y))) &&
+      I.getType()->getScalarSizeInBits() != 1 &&
+      (Op0->hasOneUse() || Op1->hasOneUse())) {
+    Value *Cond = Builder.CreateICmpSLT(X, Y);
+    return new ZExtInst(Cond, I.getType());
+  }
+  if (match(Op0, m_c_UMin(m_c_NUWAdd(m_Value(X), m_One()), m_Value(Y))) &&
+      match(Op1, m_c_UMin(m_Value(X), m_Value(Y))) &&
+      I.getType()->getScalarSizeInBits() != 1 &&
+      (Op0->hasOneUse() || Op1->hasOneUse())) {
+    Value *Cond = Builder.CreateICmpULT(X, Y);
+    return new ZExtInst(Cond, I.getType());
+  }
+
   // Optimize pointer differences into the same array into a size.  Consider:
   //  &A[10] - &A[0]: we should compile this to "10".
   Value *LHSOp, *RHSOp;
diff --git a/llvm/test/Transforms/InstCombine/min-zext.ll b/llvm/test/Transforms/InstCombine/min-zext.ll
new file mode 100644
index 0000000000000..43af1f48bcfed
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/min-zext.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i32 @test_smin(i32 %arg0, i32 %arg1) {
+; CHECK-LABEL: define i32 @test_smin(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i32 [[ARG0]], [[ARG1]]
+; CHECK-NEXT:    [[V3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[V3]]
+;
+  %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1)
+  %v1 = add nsw i32 %arg0, 1
+  %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1)
+  %v3 = sub i32 %v2, %v0
+  ret i32 %v3
+}
+
+define i32 @test_umin(i32 %arg0, i32 %arg1) {
+; CHECK-LABEL: define i32 @test_umin(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[ARG0]], [[ARG1]]
+; CHECK-NEXT:    [[V3:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT:    ret i32 [[V3]]
+;
+  %v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1)
+  %v1 = add nuw i32 %arg0, 1
+  %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 %arg1)
+  %v3 = sub i32 %v2, %v0
+  ret i32 %v3
+}
+
+define i1 @test_smin_i1(i1 %arg0, i1 %arg1) {
+; CHECK-LABEL: define i1 @test_smin_i1(
+; CHECK-SAME: i1 [[ARG0:%.*]], i1 [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V0:%.*]] = or i1 [[ARG0]], [[ARG1]]
+; CHECK-NEXT:    [[V3:%.*]] = xor i1 [[V0]], true
+; CHECK-NEXT:    ret i1 [[V3]]
+;
+  %v0 = tail call i1 @llvm.smin.i1(i1 %arg0, i1 %arg1)
+  %v1 = add nsw i1 %arg0, 1
+  %v2 = tail call i1 @llvm.smin.i1(i1 %v1, i1 %arg1)
+  %v3 = sub i1 %v2, %v0
+  ret i1 %v3
+}
+
+declare void @use(i2)
+
+define i2 @test_smin_use_operands(i2 %arg0, i2 %arg1) {
+; CHECK-LABEL: define i2 @test_smin_use_operands(
+; CHECK-SAME: i2 [[ARG0:%.*]], i2 [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V0:%.*]] = tail call i2 @llvm.smin.i2(i2 [[ARG0]], i2 [[ARG1]])
+; CHECK-NEXT:    [[V1:%.*]] = add nsw i2 [[ARG0]], 1
+; CHECK-NEXT:    [[V2:%.*]] = tail call i2 @llvm.smin.i2(i2 [[V1]], i2 [[ARG1]])
+; CHECK-NEXT:    [[V3:%.*]] = sub i2 [[V2]], [[V0]]
+; CHECK-NEXT:    call void @use(i2 [[V2]])
+; CHECK-NEXT:    call void @use(i2 [[V0]])
+; CHECK-NEXT:    ret i2 [[V3]]
+;
+  %v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1)
+  %v1 = add nsw i2 %arg0, 1
+  %v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1)
+  %v3 = sub i2 %v2, %v0
+  call void @use(i2 %v2)
+  call void @use(i2 %v0)
+  ret i2 %v3
+}
+
+define i2 @test_smin_use_operand(i2 %arg0, i2 %arg1) {
+; CHECK-LABEL: define i2 @test_smin_use_operand(
+; CHECK-SAME: i2 [[ARG0:%.*]], i2 [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V1:%.*]] = add nsw i2 [[ARG0]], 1
+; CHECK-NEXT:    [[V2:%.*]] = tail call i2 @llvm.smin.i2(i2 [[V1]], i2 [[ARG1]])
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i2 [[ARG0]], [[ARG1]]
+; CHECK-NEXT:    [[V3:%.*]] = zext i1 [[TMP1]] to i2
+; CHECK-NEXT:    call void @use(i2 [[V2]])
+; CHECK-NEXT:    ret i2 [[V3]]
+;
+  %v0 = tail call i2 @llvm.smin.i2(i2 %arg0, i2 %arg1)
+  %v1 = add nsw i2 %arg0, 1
+  %v2 = tail call i2 @llvm.smin.i2(i2 %v1, i2 %arg1)
+  %v3 = sub i2 %v2, %v0
+  call void @use(i2 %v2)
+  ret i2 %v3
+}
+
+define i32 @test_smin_missing_nsw(i32 %arg0, i32 %arg1) {
+; CHECK-LABEL: define i32 @test_smin_missing_nsw(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V0:%.*]] = tail call i32 @llvm.smin.i32(i32 [[ARG0]], i32 [[ARG1]])
+; CHECK-NEXT:    [[V1:%.*]] = add i32 [[ARG0]], 1
+; CHECK-NEXT:    [[V2:%.*]] = tail call i32 @llvm.smin.i32(i32 [[V1]], i32 [[ARG1]])
+; CHECK-NEXT:    [[V3:%.*]] = sub i32 [[V2]], [[V0]]
+; CHECK-NEXT:    ret i32 [[V3]]
+;
+  %v0 = tail call i32 @llvm.smin.i32(i32 %arg0, i32 %arg1)
+  %v1 = add i32 %arg0, 1
+  %v2 = tail call i32 @llvm.smin.i32(i32 %v1, i32 %arg1)
+  %v3 = sub i32 %v2, %v0
+  ret i32 %v3
+}
+
+define i32 @test_umin_missing_nuw(i32 %arg0, i32 %arg1) {
+; CHECK-LABEL: define i32 @test_umin_missing_nuw(
+; CHECK-SAME: i32 [[ARG0:%.*]], i32 [[ARG1:%.*]]) {
+; CHECK-NEXT:    [[V0:%.*]] = tail call i32 @llvm.umin.i32(i32 [[ARG0]], i32 [[ARG1]])
+; CHECK-NEXT:    [[V1:%.*]] = add i32 [[ARG0]], 1
+; CHECK-NEXT:    [[V2:%.*]] = tail call i32 @llvm.umin.i32(i32 [[V1]], i32 [[ARG1]])
+; CHECK-NEXT:    [[V3:%.*]] = sub i32 [[V2]], [[V0]]
+; CHECK-NEXT:    ret i32 [[V3]]
+;
+  %v0 = tail call i32 @llvm.umin.i32(i32 %arg0, i32 %arg1)
+  %v1 = add i32 %arg0, 1
+  %v2 = tail call i32 @llvm.umin.i32(i32 %v1, i32 %arg1)
+  %v3 = sub i32 %v2, %v0
+  ret i32 %v3
+}

@benwu25 benwu25 changed the title [InstCombine] Fold min(X+1, Y) - min(X, Y) --> zext X < Y (#157524) [InstCombine] Fold min(X+1, Y) - min(X, Y) --> zext X < Y Sep 10, 2025
@dtcxzyw
Copy link
Member

dtcxzyw commented Sep 11, 2025

@zyw-bot mfuzz

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thanks!

@dtcxzyw dtcxzyw merged commit 2508851 into llvm:main Sep 12, 2025
9 checks passed
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's also possible to fold this if the sub operands are swapped, just using sext instead of zext: https://alive2.llvm.org/ce/z/UtxZm_

No idea whether that variant occurs in the wild though.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants