Skip to content

Commit 9630c22

Browse files
committed
Moved pattern to SimplifyDemandedUseBits
1 parent bc50358 commit 9630c22

File tree

3 files changed

+66
-84
lines changed

3 files changed

+66
-84
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -756,47 +756,6 @@ static Instruction *shrinkInsertElt(CastInst &Trunc,
756756
return nullptr;
757757
}
758758

759-
/// Let N = 2 * M.
760-
/// Given an N-bit integer representing a pack of two M-bit integers,
761-
/// we can select one of the packed integers by right-shifting by either zero or
762-
/// M, and then truncating the result to M bits.
763-
///
764-
/// This function folds this shift-and-truncate into a select instruction,
765-
/// enabling further simplification.
766-
static Instruction *foldPackSelectingShift(TruncInst &Trunc,
767-
InstCombinerImpl &IC) {
768-
769-
const uint64_t BitWidth = Trunc.getDestTy()->getScalarSizeInBits();
770-
if (!isPowerOf2_64(BitWidth))
771-
return nullptr;
772-
if (Trunc.getSrcTy()->getScalarSizeInBits() < 2 * BitWidth)
773-
return nullptr;
774-
775-
Value *Upper, *Lower, *ShrAmt;
776-
if (!match(Trunc.getOperand(0),
777-
m_OneUse(m_Shr(
778-
m_OneUse(m_DisjointOr(
779-
m_OneUse(m_Shl(m_Value(Upper), m_SpecificInt(BitWidth))),
780-
m_Value(Lower))),
781-
m_Value(ShrAmt)))))
782-
return nullptr;
783-
784-
KnownBits KnownLower = IC.computeKnownBits(Lower, nullptr);
785-
if (!KnownLower.getMaxValue().isIntN(BitWidth))
786-
return nullptr;
787-
788-
KnownBits KnownShr = IC.computeKnownBits(ShrAmt, nullptr);
789-
if ((~KnownShr.Zero).getZExtValue() != BitWidth)
790-
return nullptr;
791-
792-
Value *ShrAmtZ =
793-
IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(Trunc.getSrcTy()),
794-
ShrAmt->getName() + ".z");
795-
Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper);
796-
Select->takeName(Trunc.getOperand(0));
797-
return CastInst::CreateTruncOrBitCast(Select, Trunc.getDestTy());
798-
}
799-
800759
Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
801760
if (Instruction *Result = commonCastTransforms(Trunc))
802761
return Result;
@@ -948,9 +907,6 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
948907
if (Instruction *I = shrinkInsertElt(Trunc, Builder))
949908
return I;
950909

951-
if (Instruction *I = foldPackSelectingShift(Trunc, *this))
952-
return I;
953-
954910
if (Src->hasOneUse() &&
955911
(isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
956912
// Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,48 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
800800
Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
801801
} else {
802802
llvm::computeKnownBits(I, Known, Q, Depth);
803+
804+
// Let N = 2 * M.
805+
// Given an N-bit integer representing a pack of two M-bit integers,
806+
// we can select one of the packed integers by right-shifting by either
807+
// zero or M (which is the most straightforward to check if M is a power
808+
// of 2), and then isolating the lower M bits. In this case, we can
809+
// represent the shift as a select on whether the shr amount is nonzero.
810+
uint64_t ShlAmt;
811+
Value *Upper, *Lower;
812+
if (!match(I->getOperand(0),
813+
m_OneUse(m_DisjointOr(
814+
m_OneUse(m_Shl(m_Value(Upper), m_ConstantInt(ShlAmt))),
815+
m_Value(Lower)))))
816+
break;
817+
if (!isPowerOf2_64(ShlAmt))
818+
break;
819+
820+
const uint64_t DemandedBitWidth = DemandedMask.getActiveBits();
821+
if (DemandedBitWidth > ShlAmt)
822+
break;
823+
824+
// Check that upper demanded bits are not lost from lshift.
825+
if (Upper->getType()->getScalarSizeInBits() < ShlAmt + DemandedBitWidth)
826+
break;
827+
828+
KnownBits KnownLowerBits = computeKnownBits(Lower, I, Depth);
829+
if (!KnownLowerBits.getMaxValue().isIntN(ShlAmt))
830+
break;
831+
832+
Value *ShrAmt = I->getOperand(1);
833+
KnownBits KnownShrBits = computeKnownBits(ShrAmt, I, Depth);
834+
// Verify that ShrAmt is either exactly ShlAmt (which is a power of 2) or
835+
// zero.
836+
if ((~KnownShrBits.Zero).getZExtValue() != ShlAmt)
837+
break;
838+
839+
Value *ShrAmtZ = Builder.CreateICmpEQ(
840+
ShrAmt, Constant::getNullValue(ShrAmt->getType()),
841+
ShrAmt->getName() + ".z");
842+
Value *Select = Builder.CreateSelect(ShrAmtZ, Lower, Upper);
843+
Select->takeName(I);
844+
return Select;
803845
}
804846
break;
805847
}

llvm/test/Transforms/InstCombine/fold-selective-shift.ll

Lines changed: 24 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) {
3939
ret i16 %trunc
4040
}
4141

42-
define i16 @selective_shift_16_range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
43-
; CHECK-LABEL: define i16 @selective_shift_16_range(
42+
define i16 @selective_shift_16.range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
43+
; CHECK-LABEL: define i16 @selective_shift_16.range(
4444
; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
4545
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
4646
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
@@ -56,8 +56,27 @@ define i16 @selective_shift_16_range(i32 %mask, i32 %upper, i32 range(i32 0, 655
5656
ret i16 %trunc
5757
}
5858

59-
define <2 x i16> @selective_shift_v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) {
60-
; CHECK-LABEL: define <2 x i16> @selective_shift_v16(
59+
define i32 @selective_shift_16.masked(i32 %mask, i16 %upper, i16 %lower) {
60+
; CHECK-LABEL: define i32 @selective_shift_16.masked(
61+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
62+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
63+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
64+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
65+
; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32
66+
; CHECK-NEXT: ret i32 [[SEL]]
67+
;
68+
%upper.zext = zext i16 %upper to i32
69+
%upper.shl = shl nuw i32 %upper.zext, 16
70+
%lower.zext = zext i16 %lower to i32
71+
%pack = or disjoint i32 %lower.zext, %upper.shl
72+
%mask.bit = and i32 %mask, 16
73+
%sel = lshr i32 %pack, %mask.bit
74+
%sel.masked = and i32 %sel, 65535
75+
ret i32 %sel.masked
76+
}
77+
78+
define <2 x i16> @selective_shift.v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) {
79+
; CHECK-LABEL: define <2 x i16> @selective_shift.v16(
6180
; CHECK-SAME: <2 x i32> [[MASK:%.*]], <2 x i16> [[UPPER:%.*]], <2 x i16> [[LOWER:%.*]]) {
6281
; CHECK-NEXT: [[MASK_BIT:%.*]] = and <2 x i32> [[MASK]], splat (i32 16)
6382
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq <2 x i32> [[MASK_BIT]], zeroinitializer
@@ -183,7 +202,7 @@ define i16 @selective_shift_16.mu.1(i32 %mask, i16 %upper, i16 %lower) {
183202
ret i16 %trunc
184203
}
185204

186-
; multi-use of %sel blocks fold
205+
; non-truncated use of %sel blocks fold
187206
define i16 @selective_shift_16.mu.2(i32 %mask, i16 %upper, i16 %lower) {
188207
; CHECK-LABEL: define i16 @selective_shift_16.mu.2(
189208
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
@@ -248,38 +267,3 @@ define i32 @selective_shift_32(i64 %mask, i32 %upper, i32 %lower) {
248267
%trunc = trunc i64 %sel to i32
249268
ret i32 %trunc
250269
}
251-
252-
define i32 @selective_shift_32.commute(i64 %mask, i32 %upper, i32 %lower) {
253-
; CHECK-LABEL: define i32 @selective_shift_32.commute(
254-
; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
255-
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
256-
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
257-
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
258-
; CHECK-NEXT: ret i32 [[SEL_V]]
259-
;
260-
%upper.zext = zext i32 %upper to i64
261-
%upper.shl = shl nuw i64 %upper.zext, 32
262-
%lower.zext = zext i32 %lower to i64
263-
%pack = or disjoint i64 %lower.zext, %upper.shl
264-
%mask.bit = and i64 %mask, 32
265-
%sel = lshr i64 %pack, %mask.bit
266-
%trunc = trunc i64 %sel to i32
267-
ret i32 %trunc
268-
}
269-
270-
define i32 @selective_shift_32_range(i64 %mask, i64 %upper, i64 range(i64 0, 4294967296) %lower) {
271-
; CHECK-LABEL: define i32 @selective_shift_32_range(
272-
; CHECK-SAME: i64 [[MASK:%.*]], i64 [[UPPER:%.*]], i64 range(i64 0, 4294967296) [[LOWER:%.*]]) {
273-
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
274-
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
275-
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i64 [[LOWER]], i64 [[UPPER]]
276-
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[SEL]] to i32
277-
; CHECK-NEXT: ret i32 [[TRUNC]]
278-
;
279-
%upper.shl = shl nuw i64 %upper, 32
280-
%pack = or disjoint i64 %upper.shl, %lower
281-
%mask.bit = and i64 %mask, 32
282-
%sel = lshr i64 %pack, %mask.bit
283-
%trunc = trunc i64 %sel to i32
284-
ret i32 %trunc
285-
}

0 commit comments

Comments
 (0)