Skip to content

Commit fa42a8e

Browse files
committed
[InstCombine] Fold mul (lshr exact (X, 2^N + 1)), N -> add (X , lshr (X, N))
Alive2 Proofs: https://alive2.llvm.org/ce/z/LVqGEo https://alive2.llvm.org/ce/z/dyeGEv
1 parent f8603bb commit fa42a8e

File tree

2 files changed

+72
-13
lines changed

2 files changed

+72
-13
lines changed

llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,35 @@ Instruction *InstCombinerImpl::visitMul(BinaryOperator &I) {
257257
}
258258
}
259259

260+
{
261+
// mul (lshr exact X, N), (2^N + 1) -> add (X, lshr exact (X, N))
262+
Value *NewOp;
263+
const APInt *ShiftC;
264+
const APInt *MulAP;
265+
if (match(&I, m_Mul(m_Exact(m_Shr(m_Value(NewOp), m_APInt(ShiftC))),
266+
m_APInt(MulAP)))) {
267+
if (BitWidth > 2 && (*MulAP - 1).isPowerOf2() &&
268+
*ShiftC == MulAP->logBase2()) {
269+
Value *BinOp = Op0;
270+
BinaryOperator *OpBO = cast<BinaryOperator>(Op0);
271+
if (!HasNUW && !HasNSW)
272+
NewOp = Builder.CreateFreeze(NewOp, ".fr");
273+
else if (HasNUW && OpBO->getOpcode() == Instruction::AShr &&
274+
OpBO->hasOneUse())
275+
BinOp = Builder.CreateLShr(NewOp, ConstantInt::get(Ty, *ShiftC), "",
276+
/*isExact=*/true);
277+
278+
auto *NewAdd = BinaryOperator::CreateAdd(NewOp, BinOp);
279+
if (HasNSW && (OpBO->getOpcode() == Instruction::LShr ||
280+
ShiftC->getZExtValue() < BitWidth - 1))
281+
NewAdd->setHasNoSignedWrap(true);
282+
283+
NewAdd->setHasNoUnsignedWrap(HasNUW);
284+
return NewAdd;
285+
}
286+
}
287+
}
288+
260289
if (Op0->hasOneUse() && match(Op1, m_NegatedPower2())) {
261290
// Interpret X * (-1<<C) as (-X) * (1<<C) and try to sink the negation.
262291
// The "* (1<<C)" thus becomes a potential shifting opportunity.

llvm/test/Transforms/InstCombine/ashr-lshr.ll

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -864,8 +864,9 @@ define i32 @ashr_mul_times_5_div_4_exact_2(i32 %x) {
864864

865865
define i32 @ashr_shift_mul(i32 %x) {
866866
; CHECK-LABEL: @ashr_shift_mul(
867-
; CHECK-NEXT: [[A:%.*]] = ashr exact i32 [[X:%.*]], 3
868-
; CHECK-NEXT: [[RES:%.*]] = mul i32 [[A]], 9
867+
; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[X:%.*]]
868+
; CHECK-NEXT: [[A:%.*]] = ashr exact i32 [[DOTFR]], 3
869+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[DOTFR]], [[A]]
869870
; CHECK-NEXT: ret i32 [[RES]]
870871
;
871872
%a = ashr exact i32 %x, 3
@@ -875,8 +876,8 @@ define i32 @ashr_shift_mul(i32 %x) {
875876

876877
define i32 @ashr_shift_mul_nuw(i32 %x) {
877878
; CHECK-LABEL: @ashr_shift_mul_nuw(
878-
; CHECK-NEXT: [[A:%.*]] = ashr exact i32 [[X:%.*]], 3
879-
; CHECK-NEXT: [[RES:%.*]] = mul nuw i32 [[A]], 9
879+
; CHECK-NEXT: [[TMP1:%.*]] = lshr exact i32 [[X:%.*]], 3
880+
; CHECK-NEXT: [[RES:%.*]] = add nuw i32 [[TMP1]], [[X]]
880881
; CHECK-NEXT: ret i32 [[RES]]
881882
;
882883
%a = ashr exact i32 %x, 3
@@ -887,7 +888,7 @@ define i32 @ashr_shift_mul_nuw(i32 %x) {
887888
define i32 @ashr_shift_mul_nsw(i32 %x) {
888889
; CHECK-LABEL: @ashr_shift_mul_nsw(
889890
; CHECK-NEXT: [[A:%.*]] = ashr exact i32 [[X:%.*]], 3
890-
; CHECK-NEXT: [[RES:%.*]] = mul nsw i32 [[A]], 9
891+
; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[A]], [[X]]
891892
; CHECK-NEXT: ret i32 [[RES]]
892893
;
893894
%a = ashr exact i32 %x, 3
@@ -898,7 +899,7 @@ define i32 @ashr_shift_mul_nsw(i32 %x) {
898899
define i32 @lshr_shift_mul_nuw(i32 %x) {
899900
; CHECK-LABEL: @lshr_shift_mul_nuw(
900901
; CHECK-NEXT: [[A:%.*]] = lshr exact i32 [[X:%.*]], 3
901-
; CHECK-NEXT: [[RES:%.*]] = mul nuw i32 [[A]], 9
902+
; CHECK-NEXT: [[RES:%.*]] = add nuw i32 [[A]], [[X]]
902903
; CHECK-NEXT: ret i32 [[RES]]
903904
;
904905
%a = lshr exact i32 %x, 3
@@ -908,8 +909,9 @@ define i32 @lshr_shift_mul_nuw(i32 %x) {
908909

909910
define i32 @lshr_shift_mul(i32 %x) {
910911
; CHECK-LABEL: @lshr_shift_mul(
911-
; CHECK-NEXT: [[A:%.*]] = lshr exact i32 [[X:%.*]], 3
912-
; CHECK-NEXT: [[RES:%.*]] = mul i32 [[A]], 9
912+
; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[X:%.*]]
913+
; CHECK-NEXT: [[A:%.*]] = lshr exact i32 [[DOTFR]], 3
914+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[DOTFR]], [[A]]
913915
; CHECK-NEXT: ret i32 [[RES]]
914916
;
915917
%a = lshr exact i32 %x, 3
@@ -920,7 +922,7 @@ define i32 @lshr_shift_mul(i32 %x) {
920922
define i32 @lshr_shift_mul_nsw(i32 %x) {
921923
; CHECK-LABEL: @lshr_shift_mul_nsw(
922924
; CHECK-NEXT: [[A:%.*]] = lshr exact i32 [[X:%.*]], 3
923-
; CHECK-NEXT: [[RES:%.*]] = mul nuw nsw i32 [[A]], 9
925+
; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[A]], [[X]]
924926
; CHECK-NEXT: ret i32 [[RES]]
925927
;
926928
%a = lshr exact i32 %x, 3
@@ -954,13 +956,11 @@ define i32 @ashr_no_exact(i32 %x) {
954956
ret i32 %res
955957
}
956958

957-
; Negative test
958-
959959
define i32 @lshr_multiuse(i32 %x) {
960960
; CHECK-LABEL: @lshr_multiuse(
961961
; CHECK-NEXT: [[A:%.*]] = lshr exact i32 [[X:%.*]], 3
962962
; CHECK-NEXT: call void @use(i32 [[A]])
963-
; CHECK-NEXT: [[RES:%.*]] = mul nuw nsw i32 [[A]], 9
963+
; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[A]], [[X]]
964964
; CHECK-NEXT: ret i32 [[RES]]
965965
;
966966
%a = lshr exact i32 %x, 3
@@ -997,11 +997,41 @@ define i32 @ashr_multiuse_no_flags(i32 %x) {
997997
ret i32 %res
998998
}
999999

1000+
define i32 @lshr_multiuse_no_flags(i32 %x) {
1001+
; CHECK-LABEL: @lshr_multiuse_no_flags(
1002+
; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[X:%.*]]
1003+
; CHECK-NEXT: [[A:%.*]] = lshr exact i32 [[DOTFR]], 3
1004+
; CHECK-NEXT: call void @use(i32 [[A]])
1005+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[DOTFR]], [[A]]
1006+
; CHECK-NEXT: ret i32 [[RES]]
1007+
;
1008+
%a = lshr exact i32 %x, 3
1009+
call void @use(i32 %a)
1010+
%res = mul i32 %a, 9
1011+
ret i32 %res
1012+
}
1013+
1014+
; Negative test
1015+
1016+
define i32 @ashr_multiuse_no_flags(i32 %x) {
1017+
; CHECK-LABEL: @ashr_multiuse_no_flags(
1018+
; CHECK-NEXT: [[DOTFR:%.*]] = freeze i32 [[X:%.*]]
1019+
; CHECK-NEXT: [[A:%.*]] = ashr exact i32 [[DOTFR]], 3
1020+
; CHECK-NEXT: call void @use(i32 [[A]])
1021+
; CHECK-NEXT: [[RES:%.*]] = add i32 [[DOTFR]], [[A]]
1022+
; CHECK-NEXT: ret i32 [[RES]]
1023+
;
1024+
%a = ashr exact i32 %x, 3
1025+
call void @use(i32 %a)
1026+
%res = mul i32 %a, 9
1027+
ret i32 %res
1028+
}
1029+
10001030
define i32 @ashr_multiuse(i32 %x) {
10011031
; CHECK-LABEL: @ashr_multiuse(
10021032
; CHECK-NEXT: [[A:%.*]] = ashr exact i32 [[X:%.*]], 3
10031033
; CHECK-NEXT: call void @use(i32 [[A]])
1004-
; CHECK-NEXT: [[RES:%.*]] = mul nsw i32 [[A]], 9
1034+
; CHECK-NEXT: [[RES:%.*]] = add nsw i32 [[A]], [[X]]
10051035
; CHECK-NEXT: ret i32 [[RES]]
10061036
;
10071037
%a = ashr exact i32 %x, 3

0 commit comments

Comments
 (0)