-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Fold mul (lshr exact (X, N)), 2^N + 1 to add (X , lshr exact (X, N)) #95042
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: AtariDreams (AtariDreams) ChangesAlive2 Proof: Full diff: https://github.com/llvm/llvm-project/pull/95042.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index ca1b1921404d8..9248c902c1e90 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -255,6 +255,42 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
}
}
+ {
+ // mul (lshr exact (X, 2^N + 1)), N -> add (X , lshr (X, N))
+ Value *NewOp;
+ const APInt *ShiftC;
+ const APInt *MulAP;
+ if (match(&I, m_Mul(m_CombineOr(m_LShr(m_Value(NewOp), m_APInt(ShiftC)),
+ m_AShr(m_Value(NewOp), m_APInt(ShiftC))),
+ m_APInt(MulAP)))) {
+ if (BitWidth > 2 && (*MulAP - 1).isPowerOf2() &&
+ MulAP->logBase2() == ShiftC->getZExtValue()) {
+ BinaryOperator *OpBO = cast<BinaryOperator>(Op0);
+ if (OpBO->isExact()) {
+ Value *AddOp;
+ if (!HasNUW && !HasNUW)
+ AddOp = Builder.CreateFreeze(NewOp);
+ else
+ AddOp = NewOp;
+
+ Value *BinOp;
+ if (OpBO->getOpcode() == Instruction::LShr ||
+ (OpBO->getOpcode() == Instruction::AShr && HasNUW)) {
+ BinOp = Builder.CreateLShr(
+ AddOp, ConstantInt::get(Ty, ShiftC->getZExtValue()), "", true);
+ } else {
+ BinOp = Builder.CreateAShr(
+ AddOp, ConstantInt::get(Ty, ShiftC->getZExtValue()), "", true);
+ }
+
+ auto *NewAdd = BinaryOperator::CreateAdd(AddOp, BinOp);
+ NewAdd->copyIRFlags(&I);
+ return NewAdd;
+ }
+ }
+ }
+ }
+
if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) {
// Interpret X * (-1<<C) as (-X) * (1<<C) and try to sink the negation.
// The "* (1<<C)" thus becomes a potential shifting opportunity.
diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index c2a4f35412670..3309b18e0f11a 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -862,4 +862,134 @@ define i32 @ashr_mul_times_5_div_4_exact_2(i32 %x) {
ret i32 %ashr
}
+define i32 @ashr_shift_mul(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul(
+; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a = ashr exact i32 %x, 3
+ %res = mul i32 %a, 9
+ ret i32 %res
+}
+
+define i32 @ashr_shift_mul_nuw(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul_nuw(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 3
+; CHECK-NEXT: [[RES:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a = ashr exact i32 %x, 3
+ %res = mul nuw i32 %a, 9
+ ret i32 %res
+}
+
+define i32 @ashr_shift_mul_nsw(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul_nsw(
+; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a = ashr exact i32 %x, 3
+ %res = mul nsw i32 %a, 9
+ ret i32 %res
+}
+
+define i32 @lshr_shift_mul_nuw(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul_nuw(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 3
+; CHECK-NEXT: [[RES:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a = lshr exact i32 %x, 3
+ %res = mul nuw i32 %a, 9
+ ret i32 %res
+}
+
+define i32 @lshr_shift_mul(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul(
+; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT: [[RES:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a = lshr exact i32 %x, 3
+ %res = mul i32 %a, 9
+ ret i32 %res
+}
+
+define i32 @lshr_shift_mul_nsw(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul_nsw(
+; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a = lshr exact i32 %x, 3
+ %res = mul nsw i32 %a, 9
+ ret i32 %res
+}
+
+; Negative test
+
+define i32 @lshr_no_exact(i32 %x) {
+; CHECK-LABEL: @lshr_no_exact(
+; CHECK-NEXT: [[A:%.*]] = lshr i32 [[X:%.*]], 3
+; CHECK-NEXT: [[RES:%.*]] = mul nuw nsw i32 [[A]], 9
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a = lshr i32 %x, 3
+ %res = mul nsw i32 %a, 9
+ ret i32 %res
+}
+
+; Negative test
+
+define i32 @ashr_no_exact(i32 %x) {
+; CHECK-LABEL: @ashr_no_exact(
+; CHECK-NEXT: [[A:%.*]] = ashr i32 [[X:%.*]], 3
+; CHECK-NEXT: [[RES:%.*]] = mul nsw i32 [[A]], 9
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a = ashr i32 %x, 3
+ %res = mul nsw i32 %a, 9
+ ret i32 %res
+}
+
+; Negative test
+
+define i32 @lshr_multiuse(i32 %x) {
+; CHECK-LABEL: @lshr_multiuse(
+; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[A:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT: call void @use(i32 [[A]])
+; CHECK-NEXT: [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a = lshr exact i32 %x, 3
+ call void @use(i32 %a)
+ %res = mul nsw i32 %a, 9
+ ret i32 %res
+}
+
+; Negative test
+
+define i32 @ashr_multiuse(i32 %x) {
+; CHECK-LABEL: @ashr_multiuse(
+; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT: [[A:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT: call void @use(i32 [[A]])
+; CHECK-NEXT: [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a = ashr exact i32 %x, 3
+ call void @use(i32 %a)
+ %res = mul nsw i32 %a, 9
+ ret i32 %res
+}
+
declare void @use(i32)
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn Then it should be one use no? |
Im not sure what you are asking. |
|
As a general rule, don't create more instructions. So in that case, req oneuse. |
And what about the freeze? Does that count? |
I'll just reuse Op0 directly I think hopefully it works |
2b053e2 to
9eae258
Compare
|
Okay, this LGTM. Please wait on an additional approval to push. |
4083038 to
1c02e6a
Compare
0a25f83 to
65ab780
Compare
Fixed! |
fa42a8e to
f4c3f3b
Compare
6cfc0e4 to
297d794
Compare
7072760 to
229867c
Compare
b0ea894 to
51aa810
Compare
51aa810 to
19bd045
Compare

Alive2 Proofs:
https://alive2.llvm.org/ce/z/aJnxyp
https://alive2.llvm.org/ce/z/dyeGEv