Skip to content

Conversation

@el-ev
Copy link
Member

@el-ev el-ev commented Mar 13, 2025

This PR introduces the following transformations:

  1. umax(nuw_shl(z, x), nuw_shl(z, y)) -> nuw_shl(z, umax(x, y))
  2. umin(nuw_shl(z, x), nuw_shl(z, y)) -> nuw_shl(z, umin(x, y))
  3. umax(nuw_shl(x, z), nuw_shl(y, z)) -> nuw_shl(umax(x, y),z)
  4. umin(nuw_shl(x, z), nuw_shl(y, z)) -> nuw_shl(umin(x, y),z)

Alive2 live proof:

el-ev added 2 commits March 13, 2025 12:04
…C0, umax(x, y))` and `umin(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umin(x, y))`
@el-ev el-ev requested a review from nikic as a code owner March 13, 2025 05:34
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Mar 13, 2025
@llvmbot
Copy link
Member

llvmbot commented Mar 13, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Iris (el-ev)

Changes
  • Closes #129947

This PR introcuces the following transformations:

  1. umax(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umax(x, y))
  2. umin(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umin(x, y))

Alive2 live proof: https://alive2.llvm.org/ce/z/6bM-p7


Full diff: https://github.com/llvm/llvm-project/pull/131076.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+13)
  • (added) llvm/test/Transforms/InstCombine/shift-uminmax.ll (+232)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 63f2fd0a733ce..a36f01c98b900 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1887,6 +1887,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
       if (Instruction *I = foldMaxMulShift(I1, I0))
         return I;
     }
+
+    // umax(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umax(x, y))
+    // umin(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umin(x, y))
+    const APInt *C1, *C2;
+    if (match(I0, m_OneUse(m_NUWShl(m_APInt(C1), m_Value()))) &&
+        match(I1, m_OneUse(m_NUWShl(m_APInt(C2), m_Value()))) && *C1 == *C2) {
+      Value *X = cast<ShlOperator>(I0)->getOperand(1);
+      Value *Y = cast<ShlOperator>(I1)->getOperand(1);
+      Value *MaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
+      return BinaryOperator::CreateNUWShl(ConstantInt::get(I0->getType(), *C1),
+                                          MaxMin);
+    }
+
     // If both operands of unsigned min/max are sign-extended, it is still ok
     // to narrow the operation.
     [[fallthrough]];
diff --git a/llvm/test/Transforms/InstCombine/shift-uminmax.ll b/llvm/test/Transforms/InstCombine/shift-uminmax.ll
new file mode 100644
index 0000000000000..14c600579999d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shift-uminmax.ll
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; For the following patterns:
+; umax(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umax(x, y))
+; umin(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umin(x, y))
+
+define i32 @test_umax_shl_const1(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_const1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    [[MAX:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[MAX]]
+;
+  %shl_x = shl nuw i32 1, %x
+  %shl_y = shl nuw i32 1, %y
+  %max = call i32 @llvm.umax.i32(i32 %shl_x, i32 %shl_y)
+  ret i32 %max
+}
+
+define i32 @test_umin_shl_const1(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_const1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    [[MIN:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[MIN]]
+;
+  %shl_x = shl nuw i32 1, %x
+  %shl_y = shl nuw i32 1, %y
+  %min = call i32 @llvm.umin.i32(i32 %shl_x, i32 %shl_y)
+  ret i32 %min
+}
+
+define i32 @test_umax_shl_const5(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_const5(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    [[MAX:%.*]] = shl nuw i32 5, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[MAX]]
+;
+  %shl_x = shl nuw i32 5, %x
+  %shl_y = shl nuw i32 5, %y
+  %max = call i32 @llvm.umax.i32(i32 %shl_x, i32 %shl_y)
+  ret i32 %max
+}
+
+define i32 @test_umin_shl_const5(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_const5(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT:    [[MIN:%.*]] = shl nuw i32 5, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[MIN]]
+;
+  %shl_x = shl nuw i32 5, %x
+  %shl_y = shl nuw i32 5, %y
+  %min = call i32 @llvm.umin.i32(i32 %shl_x, i32 %shl_y)
+  ret i32 %min
+}
+
+declare void @use(i8)
+
+define i32 @test_umax_shl_const1_multi_use(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_const1_multi_use(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[SHL_X:%.*]] = shl nuw i32 1, [[X]]
+; CHECK-NEXT:    [[SHL_Y:%.*]] = shl nuw i32 1, [[Y]]
+; CHECK-NEXT:    call void @use(i32 [[SHL_X]])
+; CHECK-NEXT:    call void @use(i32 [[SHL_Y]])
+; CHECK-NEXT:    [[MAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT:    ret i32 [[MAX]]
+;
+  %shl_x = shl nuw i32 1, %x
+  %shl_y = shl nuw i32 1, %y
+  call void @use(i32 %shl_x)
+  call void @use(i32 %shl_y)
+  %max = call i32 @llvm.umax.i32(i32 %shl_x, i32 %shl_y)
+  ret i32 %max
+}
+
+define i32 @test_umin_shl_const1_multi_use(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_const1_multi_use(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[SHL_X:%.*]] = shl nuw i32 1, [[X]]
+; CHECK-NEXT:    [[SHL_Y:%.*]] = shl nuw i32 1, [[Y]]
+; CHECK-NEXT:    call void @use(i32 [[SHL_X]])
+; CHECK-NEXT:    call void @use(i32 [[SHL_Y]])
+; CHECK-NEXT:    [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT:    ret i32 [[MIN]]
+;
+  %shl_x = shl nuw i32 1, %x
+  %shl_y = shl nuw i32 1, %y
+  call void @use(i32 %shl_x)
+  call void @use(i32 %shl_y)
+  %min = call i32 @llvm.umin.i32(i32 %shl_x, i32 %shl_y)
+  ret i32 %min
+}
+
+define i32 @test_umax_shl_const1_commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_const1_commuted(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[Y]], i32 [[X]])
+; CHECK-NEXT:    [[MAX:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[MAX]]
+;
+  %shl_x = shl nuw i32 1, %x
+  %shl_y = shl nuw i32 1, %y
+  %max = call i32 @llvm.umax.i32(i32 %shl_y, i32 %shl_x)
+  ret i32 %max
+}
+
+define i32 @test_umin_shl_const1_commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_const1_commuted(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[Y]], i32 [[X]])
+; CHECK-NEXT:    [[MIN:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT:    ret i32 [[MIN]]
+;
+  %shl_x = shl nuw i32 1, %x
+  %shl_y = shl nuw i32 1, %y
+  %min = call i32 @llvm.umin.i32(i32 %shl_y, i32 %shl_x)
+  ret i32 %min
+}
+
+define <2 x i32> @test_umax_shl_vector_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x i32> @test_umax_shl_vector_splat(
+; CHECK-SAME: <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[X]], <2 x i32> [[Y]])
+; CHECK-NEXT:    [[MAX:%.*]] = shl nuw <2 x i32> splat (i32 1), [[TMP1]]
+; CHECK-NEXT:    ret <2 x i32> [[MAX]]
+;
+  %shl_x = shl nuw <2 x i32> <i32 1, i32 1>, %x
+  %shl_y = shl nuw <2 x i32> <i32 1, i32 1>, %y
+  %max = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %shl_x, <2 x i32> %shl_y)
+  ret <2 x i32> %max
+}
+
+define <2 x i32> @test_umin_shl_vector_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x i32> @test_umin_shl_vector_splat(
+; CHECK-SAME: <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X]], <2 x i32> [[Y]])
+; CHECK-NEXT:    [[MIN:%.*]] = shl nuw <2 x i32> splat (i32 1), [[TMP1]]
+; CHECK-NEXT:    ret <2 x i32> [[MIN]]
+;
+  %shl_x = shl nuw <2 x i32> <i32 1, i32 1>, %x
+  %shl_y = shl nuw <2 x i32> <i32 1, i32 1>, %y
+  %min = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %shl_x, <2 x i32> %shl_y)
+  ret <2 x i32> %min
+}
+
+define <2 x i32> @test_umax_shl_vector_non_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x i32> @test_umax_shl_vector_non_splat(
+; CHECK-SAME: <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[SHL_X:%.*]] = shl nuw <2 x i32> <i32 1, i32 2>, [[X]]
+; CHECK-NEXT:    [[SHL_Y:%.*]] = shl nuw <2 x i32> <i32 1, i32 2>, [[Y]]
+; CHECK-NEXT:    [[MAX:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[SHL_X]], <2 x i32> [[SHL_Y]])
+; CHECK-NEXT:    ret <2 x i32> [[MAX]]
+;
+  %shl_x = shl nuw <2 x i32> <i32 1, i32 2>, %x
+  %shl_y = shl nuw <2 x i32> <i32 1, i32 2>, %y
+  %max = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %shl_x, <2 x i32> %shl_y)
+  ret <2 x i32> %max
+}
+
+define <2 x i32> @test_umin_shl_vector_non_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x i32> @test_umin_shl_vector_non_splat(
+; CHECK-SAME: <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT:    [[SHL_X:%.*]] = shl nuw <2 x i32> <i32 1, i32 2>, [[X]]
+; CHECK-NEXT:    [[SHL_Y:%.*]] = shl nuw <2 x i32> <i32 1, i32 2>, [[Y]]
+; CHECK-NEXT:    [[MIN:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[SHL_X]], <2 x i32> [[SHL_Y]])
+; CHECK-NEXT:    ret <2 x i32> [[MIN]]
+;
+  %shl_x = shl nuw <2 x i32> <i32 1, i32 2>, %x
+  %shl_y = shl nuw <2 x i32> <i32 1, i32 2>, %y
+  %min = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %shl_x, <2 x i32> %shl_y)
+  ret <2 x i32> %min
+}
+
+define i32 @test_umax_shl_different_base(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_different_base(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[SHL_X:%.*]] = shl nuw i32 1, [[X]]
+; CHECK-NEXT:    [[SHL_Y:%.*]] = shl nuw i32 2, [[Y]]
+; CHECK-NEXT:    [[MAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT:    ret i32 [[MAX]]
+;
+  %shl_x = shl nuw i32 1, %x
+  %shl_y = shl nuw i32 2, %y
+  %max = call i32 @llvm.umax.i32(i32 %shl_x, i32 %shl_y)
+  ret i32 %max
+}
+
+define i32 @test_umin_shl_different_base(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_different_base(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[SHL_X:%.*]] = shl nuw i32 1, [[X]]
+; CHECK-NEXT:    [[SHL_Y:%.*]] = shl nuw i32 2, [[Y]]
+; CHECK-NEXT:    [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT:    ret i32 [[MIN]]
+;
+  %shl_x = shl nuw i32 1, %x
+  %shl_y = shl nuw i32 2, %y
+  %min = call i32 @llvm.umin.i32(i32 %shl_x, i32 %shl_y)
+  ret i32 %min
+}
+
+define i32 @test_umax_shl_no_nuw_flag(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_no_nuw_flag(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[SHL_X:%.*]] = shl i32 2, [[X]]
+; CHECK-NEXT:    [[SHL_Y:%.*]] = shl i32 2, [[Y]]
+; CHECK-NEXT:    [[MAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT:    ret i32 [[MAX]]
+;
+  %shl_x = shl i32 2, %x
+  %shl_y = shl i32 2, %y
+  %max = call i32 @llvm.umax.i32(i32 %shl_x, i32 %shl_y)
+  ret i32 %max
+}
+
+define i32 @test_umin_shl_no_nuw_flag(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_no_nuw_flag(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:    [[SHL_X:%.*]] = shl i32 2, [[X]]
+; CHECK-NEXT:    [[SHL_Y:%.*]] = shl i32 2, [[Y]]
+; CHECK-NEXT:    [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT:    ret i32 [[MIN]]
+;
+  %shl_x = shl i32 2, %x
+  %shl_y = shl i32 2, %y
+  %min = call i32 @llvm.umin.i32(i32 %shl_x, i32 %shl_y)
+  ret i32 %min
+}

@el-ev el-ev changed the title [InstCombine] Fold umax(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umax(x, y)) and umin(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umin(x, y)) [InstCombine] Fold umax(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umax(x, y)) and umin(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umin(x, y)) Mar 13, 2025
@dtcxzyw dtcxzyw changed the title [InstCombine] Fold umax(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umax(x, y)) and umin(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umin(x, y)) [InstCombine] Fold umax/umin(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umax/umin(x, y)) Mar 13, 2025
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we handle this in foldIntrinsicUsingDistributiveLaws instead? It currently assumes commutative ops, but if we drop that assumption we can handle the shift case there as well.

Note that while the transform is not valid for nsw + smin/smax, you can preserve the nsw flag for the unsigned case.

@nikic
Copy link
Contributor

nikic commented Mar 13, 2025

It's worth noting that this variant is valid as well: https://alive2.llvm.org/ce/z/aqLRYA

@el-ev
Copy link
Member Author

el-ev commented Mar 13, 2025

Can we handle this in foldIntrinsicUsingDistributiveLaws instead? It currently assumes commutative ops, but if we drop that assumption we can handle the shift case there as well.

I'm not quite sure how to modify that function to achieve this, as it not only assumes the inner ops are commutative but also tries to swap their operands.

@nikic
Copy link
Contributor

nikic commented Mar 13, 2025

Can we handle this in foldIntrinsicUsingDistributiveLaws instead? It currently assumes commutative ops, but if we drop that assumption we can handle the shift case there as well.

I'm not quite sure how to modify that function to achieve this, as it not only assumes the inner ops are commutative but also tries to swap their operands.

What I'd expect is to basically skip the swapping logic if the ops are not commutative.

@el-ev el-ev requested a review from dtcxzyw March 14, 2025 10:51
@el-ev el-ev changed the title [InstCombine] Fold umax/umin(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umax/umin(x, y)) [InstCombine] Fold umax/umin(nuw_shl(z, x), nuw_shl(z, y)) -> nuw_shl(z, umax/umin(x, y)) and umax/umin(nuw_shl(x, z), nuw_shl(y, z)) -> nuw_shl(umax/umin(x, y), z) Mar 14, 2025
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After this patch, InstCombine cannot fold the following pattern: https://godbolt.org/z/eadW6T8nb

define i8 @umin_of_add_nuw_r(i8 %a, i8 %b, i8 %c) {
  %add1 = add nuw i8 %b, %a
  %add2 = add nuw i8 %a, %c
  %min = call i8 @llvm.umin.i8(i8 %add1, i8 %add2)
  ret i8 %min
}

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LG

@dtcxzyw dtcxzyw merged commit 1762f16 into llvm:main Mar 15, 2025
11 checks passed
@el-ev el-ev deleted the umax_shl branch March 16, 2025 08:45
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

Missed optimization: umax(1 << x, 1 << y) => 1 << umax(x, y)

4 participants