-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[InstCombine] Fold umax/umin(nuw_shl(z, x), nuw_shl(z, y)) -> nuw_shl(z, umax/umin(x, y)) and umax/umin(nuw_shl(x, z), nuw_shl(y, z)) -> nuw_shl(umax/umin(x, y), z)
#131076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…C0, umax(x, y))` and `umin(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umin(x, y))`
|
@llvm/pr-subscribers-llvm-transforms Author: Iris (el-ev) Changes
This PR introcuces the following transformations:
Alive2 live proof: https://alive2.llvm.org/ce/z/6bM-p7 Full diff: https://github.com/llvm/llvm-project/pull/131076.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 63f2fd0a733ce..a36f01c98b900 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1887,6 +1887,19 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (Instruction *I = foldMaxMulShift(I1, I0))
return I;
}
+
+ // umax(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umax(x, y))
+ // umin(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umin(x, y))
+ const APInt *C1, *C2;
+ if (match(I0, m_OneUse(m_NUWShl(m_APInt(C1), m_Value()))) &&
+ match(I1, m_OneUse(m_NUWShl(m_APInt(C2), m_Value()))) && *C1 == *C2) {
+ Value *X = cast<ShlOperator>(I0)->getOperand(1);
+ Value *Y = cast<ShlOperator>(I1)->getOperand(1);
+ Value *MaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
+ return BinaryOperator::CreateNUWShl(ConstantInt::get(I0->getType(), *C1),
+ MaxMin);
+ }
+
// If both operands of unsigned min/max are sign-extended, it is still ok
// to narrow the operation.
[[fallthrough]];
diff --git a/llvm/test/Transforms/InstCombine/shift-uminmax.ll b/llvm/test/Transforms/InstCombine/shift-uminmax.ll
new file mode 100644
index 0000000000000..14c600579999d
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/shift-uminmax.ll
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; For the following patterns:
+; umax(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umax(x, y))
+; umin(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umin(x, y))
+
+define i32 @test_umax_shl_const1(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_const1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: [[MAX:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT: ret i32 [[MAX]]
+;
+ %shl_x = shl nuw i32 1, %x
+ %shl_y = shl nuw i32 1, %y
+ %max = call i32 @llvm.umax.i32(i32 %shl_x, i32 %shl_y)
+ ret i32 %max
+}
+
+define i32 @test_umin_shl_const1(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_const1(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: [[MIN:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT: ret i32 [[MIN]]
+;
+ %shl_x = shl nuw i32 1, %x
+ %shl_y = shl nuw i32 1, %y
+ %min = call i32 @llvm.umin.i32(i32 %shl_x, i32 %shl_y)
+ ret i32 %min
+}
+
+define i32 @test_umax_shl_const5(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_const5(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: [[MAX:%.*]] = shl nuw i32 5, [[TMP1]]
+; CHECK-NEXT: ret i32 [[MAX]]
+;
+ %shl_x = shl nuw i32 5, %x
+ %shl_y = shl nuw i32 5, %y
+ %max = call i32 @llvm.umax.i32(i32 %shl_x, i32 %shl_y)
+ ret i32 %max
+}
+
+define i32 @test_umin_shl_const5(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_const5(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[X]], i32 [[Y]])
+; CHECK-NEXT: [[MIN:%.*]] = shl nuw i32 5, [[TMP1]]
+; CHECK-NEXT: ret i32 [[MIN]]
+;
+ %shl_x = shl nuw i32 5, %x
+ %shl_y = shl nuw i32 5, %y
+ %min = call i32 @llvm.umin.i32(i32 %shl_x, i32 %shl_y)
+ ret i32 %min
+}
+
+declare void @use(i8)
+
+define i32 @test_umax_shl_const1_multi_use(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_const1_multi_use(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[SHL_X:%.*]] = shl nuw i32 1, [[X]]
+; CHECK-NEXT: [[SHL_Y:%.*]] = shl nuw i32 1, [[Y]]
+; CHECK-NEXT: call void @use(i32 [[SHL_X]])
+; CHECK-NEXT: call void @use(i32 [[SHL_Y]])
+; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT: ret i32 [[MAX]]
+;
+ %shl_x = shl nuw i32 1, %x
+ %shl_y = shl nuw i32 1, %y
+ call void @use(i32 %shl_x)
+ call void @use(i32 %shl_y)
+ %max = call i32 @llvm.umax.i32(i32 %shl_x, i32 %shl_y)
+ ret i32 %max
+}
+
+define i32 @test_umin_shl_const1_multi_use(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_const1_multi_use(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[SHL_X:%.*]] = shl nuw i32 1, [[X]]
+; CHECK-NEXT: [[SHL_Y:%.*]] = shl nuw i32 1, [[Y]]
+; CHECK-NEXT: call void @use(i32 [[SHL_X]])
+; CHECK-NEXT: call void @use(i32 [[SHL_Y]])
+; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT: ret i32 [[MIN]]
+;
+ %shl_x = shl nuw i32 1, %x
+ %shl_y = shl nuw i32 1, %y
+ call void @use(i32 %shl_x)
+ call void @use(i32 %shl_y)
+ %min = call i32 @llvm.umin.i32(i32 %shl_x, i32 %shl_y)
+ ret i32 %min
+}
+
+define i32 @test_umax_shl_const1_commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_const1_commuted(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umax.i32(i32 [[Y]], i32 [[X]])
+; CHECK-NEXT: [[MAX:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT: ret i32 [[MAX]]
+;
+ %shl_x = shl nuw i32 1, %x
+ %shl_y = shl nuw i32 1, %y
+ %max = call i32 @llvm.umax.i32(i32 %shl_y, i32 %shl_x)
+ ret i32 %max
+}
+
+define i32 @test_umin_shl_const1_commuted(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_const1_commuted(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.umin.i32(i32 [[Y]], i32 [[X]])
+; CHECK-NEXT: [[MIN:%.*]] = shl nuw i32 1, [[TMP1]]
+; CHECK-NEXT: ret i32 [[MIN]]
+;
+ %shl_x = shl nuw i32 1, %x
+ %shl_y = shl nuw i32 1, %y
+ %min = call i32 @llvm.umin.i32(i32 %shl_y, i32 %shl_x)
+ ret i32 %min
+}
+
+define <2 x i32> @test_umax_shl_vector_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x i32> @test_umax_shl_vector_splat(
+; CHECK-SAME: <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[X]], <2 x i32> [[Y]])
+; CHECK-NEXT: [[MAX:%.*]] = shl nuw <2 x i32> splat (i32 1), [[TMP1]]
+; CHECK-NEXT: ret <2 x i32> [[MAX]]
+;
+ %shl_x = shl nuw <2 x i32> <i32 1, i32 1>, %x
+ %shl_y = shl nuw <2 x i32> <i32 1, i32 1>, %y
+ %max = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %shl_x, <2 x i32> %shl_y)
+ ret <2 x i32> %max
+}
+
+define <2 x i32> @test_umin_shl_vector_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x i32> @test_umin_shl_vector_splat(
+; CHECK-SAME: <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[X]], <2 x i32> [[Y]])
+; CHECK-NEXT: [[MIN:%.*]] = shl nuw <2 x i32> splat (i32 1), [[TMP1]]
+; CHECK-NEXT: ret <2 x i32> [[MIN]]
+;
+ %shl_x = shl nuw <2 x i32> <i32 1, i32 1>, %x
+ %shl_y = shl nuw <2 x i32> <i32 1, i32 1>, %y
+ %min = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %shl_x, <2 x i32> %shl_y)
+ ret <2 x i32> %min
+}
+
+define <2 x i32> @test_umax_shl_vector_non_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x i32> @test_umax_shl_vector_non_splat(
+; CHECK-SAME: <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT: [[SHL_X:%.*]] = shl nuw <2 x i32> <i32 1, i32 2>, [[X]]
+; CHECK-NEXT: [[SHL_Y:%.*]] = shl nuw <2 x i32> <i32 1, i32 2>, [[Y]]
+; CHECK-NEXT: [[MAX:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[SHL_X]], <2 x i32> [[SHL_Y]])
+; CHECK-NEXT: ret <2 x i32> [[MAX]]
+;
+ %shl_x = shl nuw <2 x i32> <i32 1, i32 2>, %x
+ %shl_y = shl nuw <2 x i32> <i32 1, i32 2>, %y
+ %max = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %shl_x, <2 x i32> %shl_y)
+ ret <2 x i32> %max
+}
+
+define <2 x i32> @test_umin_shl_vector_non_splat(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: define <2 x i32> @test_umin_shl_vector_non_splat(
+; CHECK-SAME: <2 x i32> [[X:%.*]], <2 x i32> [[Y:%.*]]) {
+; CHECK-NEXT: [[SHL_X:%.*]] = shl nuw <2 x i32> <i32 1, i32 2>, [[X]]
+; CHECK-NEXT: [[SHL_Y:%.*]] = shl nuw <2 x i32> <i32 1, i32 2>, [[Y]]
+; CHECK-NEXT: [[MIN:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[SHL_X]], <2 x i32> [[SHL_Y]])
+; CHECK-NEXT: ret <2 x i32> [[MIN]]
+;
+ %shl_x = shl nuw <2 x i32> <i32 1, i32 2>, %x
+ %shl_y = shl nuw <2 x i32> <i32 1, i32 2>, %y
+ %min = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %shl_x, <2 x i32> %shl_y)
+ ret <2 x i32> %min
+}
+
+define i32 @test_umax_shl_different_base(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_different_base(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[SHL_X:%.*]] = shl nuw i32 1, [[X]]
+; CHECK-NEXT: [[SHL_Y:%.*]] = shl nuw i32 2, [[Y]]
+; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT: ret i32 [[MAX]]
+;
+ %shl_x = shl nuw i32 1, %x
+ %shl_y = shl nuw i32 2, %y
+ %max = call i32 @llvm.umax.i32(i32 %shl_x, i32 %shl_y)
+ ret i32 %max
+}
+
+define i32 @test_umin_shl_different_base(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_different_base(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[SHL_X:%.*]] = shl nuw i32 1, [[X]]
+; CHECK-NEXT: [[SHL_Y:%.*]] = shl nuw i32 2, [[Y]]
+; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT: ret i32 [[MIN]]
+;
+ %shl_x = shl nuw i32 1, %x
+ %shl_y = shl nuw i32 2, %y
+ %min = call i32 @llvm.umin.i32(i32 %shl_x, i32 %shl_y)
+ ret i32 %min
+}
+
+define i32 @test_umax_shl_no_nuw_flag(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umax_shl_no_nuw_flag(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[SHL_X:%.*]] = shl i32 2, [[X]]
+; CHECK-NEXT: [[SHL_Y:%.*]] = shl i32 2, [[Y]]
+; CHECK-NEXT: [[MAX:%.*]] = call i32 @llvm.umax.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT: ret i32 [[MAX]]
+;
+ %shl_x = shl i32 2, %x
+ %shl_y = shl i32 2, %y
+ %max = call i32 @llvm.umax.i32(i32 %shl_x, i32 %shl_y)
+ ret i32 %max
+}
+
+define i32 @test_umin_shl_no_nuw_flag(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @test_umin_shl_no_nuw_flag(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT: [[SHL_X:%.*]] = shl i32 2, [[X]]
+; CHECK-NEXT: [[SHL_Y:%.*]] = shl i32 2, [[Y]]
+; CHECK-NEXT: [[MIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SHL_X]], i32 [[SHL_Y]])
+; CHECK-NEXT: ret i32 [[MIN]]
+;
+ %shl_x = shl i32 2, %x
+ %shl_y = shl i32 2, %y
+ %min = call i32 @llvm.umin.i32(i32 %shl_x, i32 %shl_y)
+ ret i32 %min
+}
|
umax(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umax(x, y)) and umin(nuw_shl(C0, x), nuw_shl(C0, y)) -> nuw_shl(C0, umin(x, y))umax(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umax(x, y)) and umin(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umin(x, y))
umax(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umax(x, y)) and umin(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umin(x, y))umax/umin(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umax/umin(x, y))
dtcxzyw
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
nikic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we handle this in foldIntrinsicUsingDistributiveLaws instead? It currently assumes commutative ops, but if we drop that assumption we can handle the shift case there as well.
Note that while the transform is not valid for nsw + smin/smax, you can preserve the nsw flag for the unsigned case.
|
It's worth noting that this variant is valid as well: https://alive2.llvm.org/ce/z/aqLRYA |
I'm not quite sure how to modify that function to achieve this, as it not only assumes the inner ops are commutative but also tries to swap their operands. |
What I'd expect is to basically skip the swapping logic if the ops are not commutative. |
umax/umin(nuw_shl(base, x), nuw_shl(base, y)) -> nuw_shl(base, umax/umin(x, y))umax/umin(nuw_shl(z, x), nuw_shl(z, y)) -> nuw_shl(z, umax/umin(x, y)) and umax/umin(nuw_shl(x, z), nuw_shl(y, z)) -> nuw_shl(umax/umin(x, y), z)
nikic
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
dtcxzyw
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After this patch, InstCombine cannot fold the following pattern: https://godbolt.org/z/eadW6T8nb
define i8 @umin_of_add_nuw_r(i8 %a, i8 %b, i8 %c) {
%add1 = add nuw i8 %b, %a
%add2 = add nuw i8 %a, %c
%min = call i8 @llvm.umin.i8(i8 %add1, i8 %add2)
ret i8 %min
}
dtcxzyw
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
umax(1 << x, 1 << y) => 1 << umax(x, y)#129947This PR introduces the following transformations:
umax(nuw_shl(z, x), nuw_shl(z, y)) -> nuw_shl(z, umax(x, y))umin(nuw_shl(z, x), nuw_shl(z, y)) -> nuw_shl(z, umin(x, y))umax(nuw_shl(x, z), nuw_shl(y, z)) -> nuw_shl(umax(x, y),z)umin(nuw_shl(x, z), nuw_shl(y, z)) -> nuw_shl(umin(x, y),z)Alive2 live proof: