Fold mul (lshr exact (X, N)), 2^N + 1 to add (X , lshr exact (X, N)) #95042

AZero13 · 2024-06-10T21:11:28Z

Alive2 Proofs:
https://alive2.llvm.org/ce/z/aJnxyp
https://alive2.llvm.org/ce/z/dyeGEv

llvmbot · 2024-06-10T21:12:02Z

@llvm/pr-subscribers-llvm-transforms

Author: AtariDreams (AtariDreams)

Changes

Alive2 Proof:
https://alive2.llvm.org/ce/z/LVqGEo

Full diff: https://github.com/llvm/llvm-project/pull/95042.diff

2 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (+36)
(modified) llvm/test/Transforms/InstCombine/ashr-lshr.ll (+130)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index ca1b1921404d8..9248c902c1e90 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -255,6 +255,42 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
     }
   }
 
+  {
+    // mul (lshr exact (X, 2^N + 1)), N -> add (X , lshr (X, N))
+    Value *NewOp;
+    const APInt *ShiftC;
+    const APInt *MulAP;
+    if (match(&I, m_Mul(m_CombineOr(m_LShr(m_Value(NewOp), m_APInt(ShiftC)),
+                                    m_AShr(m_Value(NewOp), m_APInt(ShiftC))),
+                        m_APInt(MulAP)))) {
+      if (BitWidth > 2 && (*MulAP - 1).isPowerOf2() &&
+          MulAP->logBase2() == ShiftC->getZExtValue()) {
+        BinaryOperator *OpBO = cast<BinaryOperator>(Op0);
+        if (OpBO->isExact()) {
+          Value *AddOp;
+          if (!HasNUW && !HasNUW)
+            AddOp = Builder.CreateFreeze(NewOp);
+          else
+            AddOp = NewOp;
+
+          Value *BinOp;
+          if (OpBO->getOpcode() == Instruction::LShr ||
+              (OpBO->getOpcode() == Instruction::AShr && HasNUW)) {
+            BinOp = Builder.CreateLShr(
+                AddOp, ConstantInt::get(Ty, ShiftC->getZExtValue()), "", true);
+          } else {
+            BinOp = Builder.CreateAShr(
+                AddOp, ConstantInt::get(Ty, ShiftC->getZExtValue()), "", true);
+          }
+
+          auto *NewAdd = BinaryOperator::CreateAdd(AddOp, BinOp);
+          NewAdd->copyIRFlags(&I);
+          return NewAdd;
+        }
+      }
+    }
+  }
+
   if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) {
     // Interpret  X * (-1<<C)  as  (-X) * (1<<C)  and try to sink the negation.
     // The "* (1<<C)" thus becomes a potential shifting opportunity.
diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index c2a4f35412670..3309b18e0f11a 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -862,4 +862,134 @@ define i32 @ashr_mul_times_5_div_4_exact_2(i32 %x) {
   ret i32 %ashr
 }
 
+define i32 @ashr_shift_mul(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  %res = mul i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @ashr_shift_mul_nuw(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  %res = mul nuw i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @ashr_shift_mul_nsw(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @lshr_shift_mul_nuw(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  %res = mul nuw i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @lshr_shift_mul(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  %res = mul i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @lshr_shift_mul_nsw(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @lshr_no_exact(i32 %x) {
+; CHECK-LABEL: @lshr_no_exact(
+; CHECK-NEXT:    [[A:%.*]] = lshr i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = mul nuw nsw i32 [[A]], 9
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @ashr_no_exact(i32 %x) {
+; CHECK-LABEL: @ashr_no_exact(
+; CHECK-NEXT:    [[A:%.*]] = ashr i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = mul nsw i32 [[A]], 9
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @lshr_multiuse(i32 %x) {
+; CHECK-LABEL: @lshr_multiuse(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    call void @use(i32 [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  call void @use(i32 %a)
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @ashr_multiuse(i32 %x) {
+; CHECK-LABEL: @ashr_multiuse(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    call void @use(i32 [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  call void @use(i32 %a)
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
 declare void @use(i32)

github-actions · 2024-06-11T14:30:59Z

✅ With the latest revision this PR passed the C/C++ code formatter.

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

llvm/test/Transforms/InstCombine/ashr-lshr.ll

AZero13 · 2024-06-12T14:57:40Z

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

goldsteinn · 2024-06-12T15:55:37Z

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

Im not sure what you are asking.

AZero13 · 2024-06-12T16:04:20Z

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

Im not sure what you are asking.

See:

goldsteinn · 2024-06-12T16:11:00Z

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

Im not sure what you are asking.

See:

As a general rule, don't create more instructions. So in that case, req oneuse.

AZero13 · 2024-06-12T16:12:45Z

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

Im not sure what you are asking.

See:

As a general rule, don't create more instructions. So in that case, req oneuse.

And what about the freeze? Does that count?

AZero13 · 2024-06-12T16:16:22Z

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

Im not sure what you are asking.

See:

As a general rule, don't create more instructions. So in that case, req oneuse.

I'll just reuse Op0 directly I think hopefully it works

goldsteinn · 2024-06-15T19:59:26Z

Okay, this LGTM. Please wait on an additional approval to push.

AZero13 · 2024-06-21T01:57:58Z

The code doesn't match the proofs (the proofs have freeze instructions, the code does not).

Fixed!

…exact (X, N)) Alive2 Proofs: https://alive2.llvm.org/ce/z/aJnxyp https://alive2.llvm.org/ce/z/dyeGEv

AZero13 requested a review from nikic as a code owner June 10, 2024 21:11

llvmbot added the llvm:transforms label Jun 10, 2024

dtcxzyw reviewed Jun 11, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Outdated Show resolved Hide resolved

goldsteinn reviewed Jun 11, 2024

View reviewed changes

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp Outdated Show resolved Hide resolved

goldsteinn reviewed Jun 11, 2024

View reviewed changes

llvm/test/Transforms/InstCombine/ashr-lshr.ll Outdated Show resolved Hide resolved

AZero13 changed the title ~~[InstCombine] Fold mul (lshr exact (X, 2^N + 1)), N -> add (X , lshr (X, N))~~ [InstCombine] Fold mul (lshr exact (X, N)), 2^N + 1 -> add (X , lshr (X, N)) Jun 12, 2024

AZero13 changed the title ~~[InstCombine] Fold mul (lshr exact (X, N)), 2^N + 1 -> add (X , lshr (X, N))~~ [InstCombine] Fold mul (lshr exact (X, N)), 2^N + 1 -> add (X , lshr exact (X, N)) Jun 12, 2024

AZero13 force-pushed the inverse-div-shift branch 11 times, most recently from 2b053e2 to 9eae258 Compare June 15, 2024 14:56

AZero13 force-pushed the inverse-div-shift branch 2 times, most recently from 4083038 to 1c02e6a Compare June 16, 2024 00:47

AZero13 requested a review from dtcxzyw June 16, 2024 01:10

AZero13 requested a review from goldsteinn June 20, 2024 16:41

AZero13 force-pushed the inverse-div-shift branch 3 times, most recently from 0a25f83 to 65ab780 Compare June 21, 2024 01:55

AZero13 force-pushed the inverse-div-shift branch 2 times, most recently from fa42a8e to f4c3f3b Compare June 21, 2024 14:51

AZero13 force-pushed the inverse-div-shift branch 5 times, most recently from 6cfc0e4 to 297d794 Compare July 23, 2024 03:30

AZero13 force-pushed the inverse-div-shift branch 5 times, most recently from 7072760 to 229867c Compare August 11, 2024 23:37

AZero13 force-pushed the inverse-div-shift branch 6 times, most recently from b0ea894 to 51aa810 Compare September 6, 2024 20:26

AZero13 closed this Sep 15, 2024

AZero13 reopened this Oct 6, 2024

AZero13 added 2 commits October 6, 2024 13:27

[InstCombine] Pre-commit tests (NFC)

9bb9e07

[InstCombine] Fold mul (lshr exact (X, N)), 2^N + 1 -> add (X , lshr …

19bd045

…exact (X, N)) Alive2 Proofs: https://alive2.llvm.org/ce/z/aJnxyp https://alive2.llvm.org/ce/z/dyeGEv

AZero13 force-pushed the inverse-div-shift branch from 51aa810 to 19bd045 Compare October 6, 2024 17:27

AZero13 closed this Oct 15, 2024

AZero13 changed the title ~~[InstCombine] Fold mul (lshr exact (X, N)), 2^N + 1 -> add (X , lshr exact (X, N))~~ Fold mul (lshr exact (X, N)), 2^N + 1 to add (X , lshr exact (X, N)) Oct 15, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Fold mul (lshr exact (X, N)), 2^N + 1 to add (X , lshr exact (X, N)) #95042

Fold mul (lshr exact (X, N)), 2^N + 1 to add (X , lshr exact (X, N)) #95042

Uh oh!

AZero13 commented Jun 10, 2024 •

edited

Loading

Uh oh!

llvmbot commented Jun 10, 2024

Uh oh!

github-actions bot commented Jun 11, 2024 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

AZero13 commented Jun 12, 2024

Uh oh!

goldsteinn commented Jun 12, 2024

Uh oh!

AZero13 commented Jun 12, 2024

Uh oh!

goldsteinn commented Jun 12, 2024

Uh oh!

AZero13 commented Jun 12, 2024

Uh oh!

AZero13 commented Jun 12, 2024

Uh oh!

goldsteinn commented Jun 15, 2024

Uh oh!

AZero13 commented Jun 21, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

Fold mul (lshr exact (X, N)), 2^N + 1 to add (X , lshr exact (X, N)) #95042

Fold mul (lshr exact (X, N)), 2^N + 1 to add (X , lshr exact (X, N)) #95042

Uh oh!

Conversation

AZero13 commented Jun 10, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Jun 10, 2024

Uh oh!

github-actions bot commented Jun 11, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

AZero13 commented Jun 12, 2024

Uh oh!

goldsteinn commented Jun 12, 2024

Uh oh!

AZero13 commented Jun 12, 2024

Uh oh!

goldsteinn commented Jun 12, 2024

Uh oh!

AZero13 commented Jun 12, 2024

Uh oh!

AZero13 commented Jun 12, 2024

Uh oh!

goldsteinn commented Jun 15, 2024

Uh oh!

AZero13 commented Jun 21, 2024

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

5 participants

AZero13 commented Jun 10, 2024 •

edited

Loading

github-actions bot commented Jun 11, 2024 •

edited

Loading