Skip to content

Commit 92e1943

Browse files
committed
[InstCombine] Narrow trunc(lshr) in more cases
We can narrow `trunc(lshr(i32)) to i8` to `trunc(lshr(i16)) to i8` even when the bits that we are shifting in are not zero, in the cases where the MSBs of the shifted value don't actually matter and actually end up being truncated away. This kind of narrowing does not remove the trunc but can help the vectorizer generate better code in a smaller type. Motivation: libyuv, functions like ARGBToUV444Row_C(). Change-Id: I681a247eac20a4fcf68e54d4a5009f594030a387 Proof: https://alive2.llvm.org/ce/z/9Ao2aJ
1 parent bc37712 commit 92e1943

File tree

2 files changed

+19
-12
lines changed

2 files changed

+19
-12
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ Value *InstCombinerImpl::EvaluateInDifferentType(Value *V, Type *Ty,
5151
Value *LHS = EvaluateInDifferentType(I->getOperand(0), Ty, isSigned);
5252
Value *RHS = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
5353
Res = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
54+
if (Opc == Instruction::LShr || Opc == Instruction::AShr)
55+
Res->setIsExact(I->isExact());
5456
break;
5557
}
5658
case Instruction::Trunc:
@@ -319,13 +321,21 @@ static bool canEvaluateTruncated(Value *V, Type *Ty, InstCombinerImpl &IC,
319321
// zero - use AmtKnownBits.getMaxValue().
320322
uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
321323
uint32_t BitWidth = Ty->getScalarSizeInBits();
322-
KnownBits AmtKnownBits =
323-
llvm::computeKnownBits(I->getOperand(1), IC.getDataLayout());
324+
KnownBits AmtKnownBits = IC.computeKnownBits(I->getOperand(1), 0, CxtI);
325+
APInt MaxShiftAmt = AmtKnownBits.getMaxValue();
324326
APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
325-
if (AmtKnownBits.getMaxValue().ult(BitWidth) &&
326-
IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI)) {
327-
return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
328-
canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
327+
if (MaxShiftAmt.ult(BitWidth)) {
328+
if (IC.MaskedValueIsZero(I->getOperand(0), ShiftedBits, 0, CxtI))
329+
return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
330+
canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
331+
// If the only user is a trunc then we can narrow the shift if any new
332+
// MSBs are not going to be used.
333+
if (auto *Trunc = dyn_cast<TruncInst>(V->user_back())) {
334+
auto DemandedBits = Trunc->getType()->getScalarSizeInBits();
335+
if (MaxShiftAmt.ule(BitWidth - DemandedBits))
336+
return canEvaluateTruncated(I->getOperand(0), Ty, IC, CxtI) &&
337+
canEvaluateTruncated(I->getOperand(1), Ty, IC, CxtI);
338+
}
329339
}
330340
break;
331341
}

llvm/test/Transforms/InstCombine/cast.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2044,12 +2044,9 @@ define <2 x i8> @trunc_lshr_zext_uses1(<2 x i8> %A) {
20442044

20452045
define i8 @trunc_lshr_ext_halfWidth(i16 %a, i16 %b, i16 range(i16 0, 8) %shiftAmt) {
20462046
; ALL-LABEL: @trunc_lshr_ext_halfWidth(
2047-
; ALL-NEXT: [[ZEXT_A:%.*]] = zext i16 [[A:%.*]] to i32
2048-
; ALL-NEXT: [[ZEXT_B:%.*]] = zext i16 [[B:%.*]] to i32
2049-
; ALL-NEXT: [[ZEXT_SHIFTAMT:%.*]] = zext nneg i16 [[SHIFTAMT:%.*]] to i32
2050-
; ALL-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[ZEXT_A]], [[ZEXT_B]]
2051-
; ALL-NEXT: [[SHR:%.*]] = lshr i32 [[ADD]], [[ZEXT_SHIFTAMT]]
2052-
; ALL-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
2047+
; ALL-NEXT: [[ADD:%.*]] = add i16 [[A:%.*]], [[B:%.*]]
2048+
; ALL-NEXT: [[SHR:%.*]] = lshr i16 [[ADD]], [[SHIFTAMT:%.*]]
2049+
; ALL-NEXT: [[TRUNC:%.*]] = trunc i16 [[SHR]] to i8
20532050
; ALL-NEXT: ret i8 [[TRUNC]]
20542051
;
20552052
%zext_a = zext i16 %a to i32

0 commit comments

Comments
 (0)