Skip to content

Conversation

@AZero13
Copy link
Contributor

@AZero13 AZero13 commented Jun 10, 2024

@llvmbot
Copy link
Member

llvmbot commented Jun 10, 2024

@llvm/pr-subscribers-llvm-transforms

Author: AtariDreams (AtariDreams)

Changes

Alive2 Proof:
https://alive2.llvm.org/ce/z/LVqGEo


Full diff: https://github.com/llvm/llvm-project/pull/95042.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp (+36)
  • (modified) llvm/test/Transforms/InstCombine/ashr-lshr.ll (+130)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index ca1b1921404d8..9248c902c1e90 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -255,6 +255,42 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
     }
   }
 
+  {
+    // mul (lshr exact (X, 2^N + 1)), N -> add (X , lshr (X, N))
+    Value *NewOp;
+    const APInt *ShiftC;
+    const APInt *MulAP;
+    if (match(&I, m_Mul(m_CombineOr(m_LShr(m_Value(NewOp), m_APInt(ShiftC)),
+                                    m_AShr(m_Value(NewOp), m_APInt(ShiftC))),
+                        m_APInt(MulAP)))) {
+      if (BitWidth > 2 && (*MulAP - 1).isPowerOf2() &&
+          MulAP->logBase2() == ShiftC->getZExtValue()) {
+        BinaryOperator *OpBO = cast<BinaryOperator>(Op0);
+        if (OpBO->isExact()) {
+          Value *AddOp;
+          if (!HasNUW && !HasNUW)
+            AddOp = Builder.CreateFreeze(NewOp);
+          else
+            AddOp = NewOp;
+
+          Value *BinOp;
+          if (OpBO->getOpcode() == Instruction::LShr ||
+              (OpBO->getOpcode() == Instruction::AShr && HasNUW)) {
+            BinOp = Builder.CreateLShr(
+                AddOp, ConstantInt::get(Ty, ShiftC->getZExtValue()), "", true);
+          } else {
+            BinOp = Builder.CreateAShr(
+                AddOp, ConstantInt::get(Ty, ShiftC->getZExtValue()), "", true);
+          }
+
+          auto *NewAdd = BinaryOperator::CreateAdd(AddOp, BinOp);
+          NewAdd->copyIRFlags(&I);
+          return NewAdd;
+        }
+      }
+    }
+  }
+
   if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) {
     // Interpret  X * (-1<<C)  as  (-X) * (1<<C)  and try to sink the negation.
     // The "* (1<<C)" thus becomes a potential shifting opportunity.
diff --git a/llvm/test/Transforms/InstCombine/ashr-lshr.ll b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
index c2a4f35412670..3309b18e0f11a 100644
--- a/llvm/test/Transforms/InstCombine/ashr-lshr.ll
+++ b/llvm/test/Transforms/InstCombine/ashr-lshr.ll
@@ -862,4 +862,134 @@ define i32 @ashr_mul_times_5_div_4_exact_2(i32 %x) {
   ret i32 %ashr
 }
 
+define i32 @ashr_shift_mul(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  %res = mul i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @ashr_shift_mul_nuw(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  %res = mul nuw i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @ashr_shift_mul_nsw(i32 %x) {
+; CHECK-LABEL: @ashr_shift_mul_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @lshr_shift_mul_nuw(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul_nuw(
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nuw i32 [[TMP1]], [[X]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  %res = mul nuw i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @lshr_shift_mul(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  %res = mul i32 %a, 9
+  ret i32 %res
+}
+
+define i32 @lshr_shift_mul_nsw(i32 %x) {
+; CHECK-LABEL: @lshr_shift_mul_nsw(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @lshr_no_exact(i32 %x) {
+; CHECK-LABEL: @lshr_no_exact(
+; CHECK-NEXT:    [[A:%.*]] = lshr i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = mul nuw nsw i32 [[A]], 9
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @ashr_no_exact(i32 %x) {
+; CHECK-LABEL: @ashr_no_exact(
+; CHECK-NEXT:    [[A:%.*]] = ashr i32 [[X:%.*]], 3
+; CHECK-NEXT:    [[RES:%.*]] = mul nsw i32 [[A]], 9
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr i32 %x, 3
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @lshr_multiuse(i32 %x) {
+; CHECK-LABEL: @lshr_multiuse(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    call void @use(i32 [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = lshr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = lshr exact i32 %x, 3
+  call void @use(i32 %a)
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
+; Negative test
+
+define i32 @ashr_multiuse(i32 %x) {
+; CHECK-LABEL: @ashr_multiuse(
+; CHECK-NEXT:    [[TMP1:%.*]] = freeze i32 [[X:%.*]]
+; CHECK-NEXT:    [[A:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    call void @use(i32 [[A]])
+; CHECK-NEXT:    [[TMP2:%.*]] = ashr exact i32 [[TMP1]], 3
+; CHECK-NEXT:    [[RES:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %a = ashr exact i32 %x, 3
+  call void @use(i32 %a)
+  %res = mul nsw i32 %a, 9
+  ret i32 %res
+}
+
 declare void @use(i32)

@github-actions
Copy link

github-actions bot commented Jun 11, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

@AZero13
Copy link
Contributor Author

AZero13 commented Jun 12, 2024

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

@AZero13 AZero13 changed the title [InstCombine] Fold mul (lshr exact (X, 2^N + 1)), N -> add (X , lshr (X, N)) [InstCombine] Fold mul (lshr exact (X, N)), 2^N + 1 -> add (X , lshr (X, N)) Jun 12, 2024
@AZero13 AZero13 changed the title [InstCombine] Fold mul (lshr exact (X, N)), 2^N + 1 -> add (X , lshr (X, N)) [InstCombine] Fold mul (lshr exact (X, N)), 2^N + 1 -> add (X , lshr exact (X, N)) Jun 12, 2024
@goldsteinn
Copy link
Contributor

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

Im not sure what you are asking.

@AZero13
Copy link
Contributor Author

AZero13 commented Jun 12, 2024

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

Im not sure what you are asking.

See:
image

@goldsteinn
Copy link
Contributor

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

Im not sure what you are asking.

See:

As a general rule, don't create more instructions. So in that case, req oneuse.

@AZero13
Copy link
Contributor Author

AZero13 commented Jun 12, 2024

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

Im not sure what you are asking.

See:

As a general rule, don't create more instructions. So in that case, req oneuse.

And what about the freeze? Does that count?

@AZero13
Copy link
Contributor Author

AZero13 commented Jun 12, 2024

Okay so if the IR just repeats I guess I can remove one use but what if there's frozen involved @goldsteinn

Then it should be one use no?

Im not sure what you are asking.

See:

As a general rule, don't create more instructions. So in that case, req oneuse.

I'll just reuse Op0 directly I think hopefully it works

@AZero13 AZero13 force-pushed the inverse-div-shift branch 11 times, most recently from 2b053e2 to 9eae258 Compare June 15, 2024 14:56
@goldsteinn
Copy link
Contributor

Okay, this LGTM. Please wait on an additional approval to push.

@AZero13 AZero13 force-pushed the inverse-div-shift branch 2 times, most recently from 4083038 to 1c02e6a Compare June 16, 2024 00:47
@AZero13 AZero13 requested a review from dtcxzyw June 16, 2024 01:10
@AZero13 AZero13 requested a review from goldsteinn June 20, 2024 16:41
@AZero13 AZero13 force-pushed the inverse-div-shift branch 3 times, most recently from 0a25f83 to 65ab780 Compare June 21, 2024 01:55
@AZero13
Copy link
Contributor Author

AZero13 commented Jun 21, 2024

The code doesn't match the proofs (the proofs have freeze instructions, the code does not).

Fixed!

@AZero13 AZero13 force-pushed the inverse-div-shift branch 2 times, most recently from fa42a8e to f4c3f3b Compare June 21, 2024 14:51
@AZero13 AZero13 force-pushed the inverse-div-shift branch 5 times, most recently from 6cfc0e4 to 297d794 Compare July 23, 2024 03:30
@AZero13 AZero13 force-pushed the inverse-div-shift branch 5 times, most recently from 7072760 to 229867c Compare August 11, 2024 23:37
@AZero13 AZero13 force-pushed the inverse-div-shift branch 6 times, most recently from b0ea894 to 51aa810 Compare September 6, 2024 20:26
@AZero13 AZero13 closed this Sep 15, 2024
@AZero13 AZero13 reopened this Oct 6, 2024
@AZero13 AZero13 force-pushed the inverse-div-shift branch from 51aa810 to 19bd045 Compare October 6, 2024 17:27
@AZero13 AZero13 closed this Oct 15, 2024
@AZero13 AZero13 changed the title [InstCombine] Fold mul (lshr exact (X, N)), 2^N + 1 -> add (X , lshr exact (X, N)) Fold mul (lshr exact (X, N)), 2^N + 1 to add (X , lshr exact (X, N)) Oct 15, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants