-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[InstCombine] Avoid unprofitable add with remainder transform #147319
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesIf C1 is 1, this will end up replacing the remainder with a multiply and a longer dependency chain. This is clearly unprofitable in the case where the remainder is an Fixes #147176. Full diff: https://github.com/llvm/llvm-project/pull/147319.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 1ba548b6ff062..1ad0a9c488e80 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1193,8 +1193,10 @@ Value *InstCombinerImpl::SimplifyAddWithRemainder(BinaryOperator &I) {
}
Value *DivOpV;
APInt DivOpC;
+ // The transform is valid for C1==1, but not profitable.
if (MatchRem(Rem, X, C0, IsSigned) &&
- MatchDiv(Div, DivOpV, DivOpC, IsSigned) && X == DivOpV && C0 == DivOpC) {
+ MatchDiv(Div, DivOpV, DivOpC, IsSigned) && X == DivOpV && C0 == DivOpC &&
+ !C1.isOne()) {
APInt NewC = C1 - C2 * C0;
if (!NewC.isZero() && !Rem->hasOneUse())
return nullptr;
diff --git a/llvm/test/Transforms/InstCombine/add4.ll b/llvm/test/Transforms/InstCombine/add4.ll
index 0e97deb4d98ad..f766ccf651aa2 100644
--- a/llvm/test/Transforms/InstCombine/add4.ll
+++ b/llvm/test/Transforms/InstCombine/add4.ll
@@ -289,3 +289,89 @@ entry:
%add = add i32 %shl, %rem
ret i32 %add
}
+
+define i32 @fold_add_udiv_urem_no_mul(i32 noundef %val) {
+; CHECK-LABEL: @fold_add_udiv_urem_no_mul(
+; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], 10
+; CHECK-NEXT: [[REM:%.*]] = urem i32 [[VAL]], 10
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[DIV]], [[REM]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %div = udiv i32 %val, 10
+ %rem = urem i32 %val, 10
+ %add = add i32 %div, %rem
+ ret i32 %add
+}
+
+define i32 @fold_add_udiv_urem_rem_mul(i32 noundef %val) {
+; CHECK-LABEL: @fold_add_udiv_urem_rem_mul(
+; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[VAL:%.*]], 10
+; CHECK-NEXT: [[REM:%.*]] = urem i32 [[VAL]], 10
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[REM]], 3
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[DIV]], [[MUL]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %div = udiv i32 %val, 10
+ %rem = urem i32 %val, 10
+ %mul = mul i32 %rem, 3
+ %add = add i32 %div, %mul
+ ret i32 %add
+}
+
+define i32 @fold_add_udiv_urem_pow2_no_mul(i32 noundef %arg) {
+; CHECK-LABEL: @fold_add_udiv_urem_pow2_no_mul(
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ARG:%.*]], 4
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG]], 15
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[LSHR]], [[AND]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %lshr = lshr i32 %arg, 4
+ %and = and i32 %arg, 15
+ %add = add i32 %lshr, %and
+ ret i32 %add
+}
+
+define i32 @fold_add_udiv_urem_pow2_div_mul(i32 noundef %arg) {
+; CHECK-LABEL: @fold_add_udiv_urem_pow2_div_mul(
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ARG:%.*]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[LSHR]], -13
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[ARG]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %lshr = lshr i32 %arg, 4
+ %mul = mul i32 %lshr, 3
+ %and = and i32 %arg, 15
+ %add = add i32 %mul, %and
+ ret i32 %add
+}
+
+define i32 @fold_add_udiv_urem_pow2_rem_mul(i32 noundef %arg) {
+; CHECK-LABEL: @fold_add_udiv_urem_pow2_rem_mul(
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ARG:%.*]], 4
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[ARG]], 15
+; CHECK-NEXT: [[MUL:%.*]] = mul nuw nsw i32 [[AND]], 3
+; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[LSHR]], [[MUL]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %lshr = lshr i32 %arg, 4
+ %and = and i32 %arg, 15
+ %mul = mul i32 %and, 3
+ %add = add i32 %lshr, %mul
+ ret i32 %add
+}
+
+define i32 @fold_add_udiv_urem_pow2_both_mul(i32 noundef %arg) {
+; CHECK-LABEL: @fold_add_udiv_urem_pow2_both_mul(
+; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 [[ARG:%.*]], 4
+; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[ARG]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[LSHR]], -41
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP2]], [[TMP1]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %lshr = lshr i32 %arg, 4
+ %mul1 = mul i32 %lshr, 7
+ %and = and i32 %arg, 15
+ %mul2 = mul i32 %and, 3
+ %add = add i32 %mul1, %mul2
+ ret i32 %add
+}
|
I've added a special case where the divisor is 2, as that gets beneficial followup folds. On further consideration, I think it probably generally makes sense to do the transform for the cases using actual div/rem. For constant divisors we're not going to use actual divrem instructions anyway, and after playing around with codegen for different cases a bit, I think the mul ends up being marginally better in most cases. |
|
Updated to narrow the exclusion to just the unsigned, power-of-two (which is not 2) case, so this should now only exclude the specific case of converting and to mul. |
dtcxzyw
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
If C1 is 1 and we're working with a power of two divisor, this will end up replacing the
andfor the remainder with a multiply and a longer dependency chain.Fixes #147176.