Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -770,6 +770,15 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
return InsertNewInstWith(Shl, I->getIterator());
}
}

const APInt *Factor;
if (match(I->getOperand(0),
m_OneUse(m_Mul(m_Value(X), m_APInt(Factor)))) &&
Factor->countr_zero() >= ShiftAmt) {
BinaryOperator *Mul = BinaryOperator::CreateMul(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can preserve nuw and nsw iff you have nuw:
https://alive2.llvm.org/ce/z/P9NudG

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(X *nuw (Y << K)) u>> K has been handled in other places: https://godbolt.org/z/43W4vE8bK

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we put the two impls in the same place?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think the two patterns would combine well. In one case we can optimize because there is no overflow. In the other case we can optimize because the bits affected by overflow are not demanded.

X, ConstantInt::get(X->getType(), Factor->lshr(ShiftAmt)));
return InsertNewInstWith(Mul, I->getIterator());
}
}

// Unsigned shift right.
Expand Down
62 changes: 62 additions & 0 deletions llvm/test/Transforms/InstCombine/lshr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1523,3 +1523,65 @@ define <2 x i8> @bool_add_lshr_vec_wrong_shift_amt(<2 x i1> %a, <2 x i1> %b) {
%lshr = lshr <2 x i8> %add, <i8 1, i8 2>
ret <2 x i8> %lshr
}

define i32 @lowbits_of_lshr_mul(i64 %x) {
; CHECK-LABEL: @lowbits_of_lshr_mul(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[X:%.*]] to i32
; CHECK-NEXT: [[CONV:%.*]] = mul i32 [[TMP0]], 15
; CHECK-NEXT: ret i32 [[CONV]]
;
entry:
%mul = mul i64 %x, 64424509440
%shift = lshr i64 %mul, 32
%conv = trunc i64 %shift to i32
ret i32 %conv
}

define i32 @lowbits_of_lshr_mul_mask(i32 %x) {
; CHECK-LABEL: @lowbits_of_lshr_mul_mask(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[X:%.*]], 1600
; CHECK-NEXT: [[CONV:%.*]] = and i32 [[TMP0]], 32704
; CHECK-NEXT: ret i32 [[CONV]]
;
entry:
%mul = mul i32 %x, 104857600
%shift = lshr i32 %mul, 16
%conv = and i32 %shift, 32767
ret i32 %conv
}

; Negative tests

define i32 @lowbits_of_lshr_mul_mask_multiuse(i32 %x) {
; CHECK-LABEL: @lowbits_of_lshr_mul_mask_multiuse(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X:%.*]], 104857600
; CHECK-NEXT: call void @use(i32 [[MUL]])
; CHECK-NEXT: [[SHIFT:%.*]] = lshr exact i32 [[MUL]], 16
; CHECK-NEXT: [[CONV:%.*]] = and i32 [[SHIFT]], 32704
; CHECK-NEXT: ret i32 [[CONV]]
;
entry:
%mul = mul i32 %x, 104857600
call void @use(i32 %mul)
%shift = lshr i32 %mul, 16
%conv = and i32 %shift, 32767
ret i32 %conv
}

define i32 @lowbits_of_lshr_mul_mask_indivisible(i32 %x) {
; CHECK-LABEL: @lowbits_of_lshr_mul_mask_indivisible(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[X:%.*]], 25600
; CHECK-NEXT: [[SHIFT:%.*]] = lshr i32 [[MUL]], 16
; CHECK-NEXT: [[CONV:%.*]] = and i32 [[SHIFT]], 32767
; CHECK-NEXT: ret i32 [[CONV]]
;
entry:
%mul = mul i32 %x, 25600
%shift = lshr i32 %mul, 16
%conv = and i32 %shift, 32767
ret i32 %conv
}
Loading