Skip to content

Commit bc50358

Browse files
committed
Implemented InstCombine pattern.
1 parent f4370fb commit bc50358

File tree

2 files changed

+329
-0
lines changed

2 files changed

+329
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -756,6 +756,47 @@ static Instruction *shrinkInsertElt(CastInst &Trunc,
756756
return nullptr;
757757
}
758758

759+
/// Let N = 2 * M.
760+
/// Given an N-bit integer representing a pack of two M-bit integers,
761+
/// we can select one of the packed integers by right-shifting by either zero or
762+
/// M, and then truncating the result to M bits.
763+
///
764+
/// This function folds this shift-and-truncate into a select instruction,
765+
/// enabling further simplification.
766+
static Instruction *foldPackSelectingShift(TruncInst &Trunc,
767+
InstCombinerImpl &IC) {
768+
769+
const uint64_t BitWidth = Trunc.getDestTy()->getScalarSizeInBits();
770+
if (!isPowerOf2_64(BitWidth))
771+
return nullptr;
772+
if (Trunc.getSrcTy()->getScalarSizeInBits() < 2 * BitWidth)
773+
return nullptr;
774+
775+
Value *Upper, *Lower, *ShrAmt;
776+
if (!match(Trunc.getOperand(0),
777+
m_OneUse(m_Shr(
778+
m_OneUse(m_DisjointOr(
779+
m_OneUse(m_Shl(m_Value(Upper), m_SpecificInt(BitWidth))),
780+
m_Value(Lower))),
781+
m_Value(ShrAmt)))))
782+
return nullptr;
783+
784+
KnownBits KnownLower = IC.computeKnownBits(Lower, nullptr);
785+
if (!KnownLower.getMaxValue().isIntN(BitWidth))
786+
return nullptr;
787+
788+
KnownBits KnownShr = IC.computeKnownBits(ShrAmt, nullptr);
789+
if ((~KnownShr.Zero).getZExtValue() != BitWidth)
790+
return nullptr;
791+
792+
Value *ShrAmtZ =
793+
IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(Trunc.getSrcTy()),
794+
ShrAmt->getName() + ".z");
795+
Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper);
796+
Select->takeName(Trunc.getOperand(0));
797+
return CastInst::CreateTruncOrBitCast(Select, Trunc.getDestTy());
798+
}
799+
759800
Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
760801
if (Instruction *Result = commonCastTransforms(Trunc))
761802
return Result;
@@ -907,6 +948,9 @@ Instruction *InstCombinerImpl::visitTrunc(TruncInst &Trunc) {
907948
if (Instruction *I = shrinkInsertElt(Trunc, Builder))
908949
return I;
909950

951+
if (Instruction *I = foldPackSelectingShift(Trunc, *this))
952+
return I;
953+
910954
if (Src->hasOneUse() &&
911955
(isa<VectorType>(SrcTy) || shouldChangeType(SrcTy, DestTy))) {
912956
// Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=instcombine %s -S | FileCheck %s
3+
4+
declare void @clobber.i32(i32)
5+
6+
define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) {
7+
; CHECK-LABEL: define i16 @selective_shift_16(
8+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
9+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
10+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
11+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
12+
; CHECK-NEXT: ret i16 [[SEL_V]]
13+
;
14+
%upper.zext = zext i16 %upper to i32
15+
%upper.shl = shl nuw i32 %upper.zext, 16
16+
%lower.zext = zext i16 %lower to i32
17+
%pack = or disjoint i32 %upper.shl, %lower.zext
18+
%mask.bit = and i32 %mask, 16
19+
%sel = lshr i32 %pack, %mask.bit
20+
%trunc = trunc i32 %sel to i16
21+
ret i16 %trunc
22+
}
23+
24+
define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) {
25+
; CHECK-LABEL: define i16 @selective_shift_16.commute(
26+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
27+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
28+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
29+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
30+
; CHECK-NEXT: ret i16 [[SEL_V]]
31+
;
32+
%upper.zext = zext i16 %upper to i32
33+
%upper.shl = shl nuw i32 %upper.zext, 16
34+
%lower.zext = zext i16 %lower to i32
35+
%pack = or disjoint i32 %lower.zext, %upper.shl
36+
%mask.bit = and i32 %mask, 16
37+
%sel = lshr i32 %pack, %mask.bit
38+
%trunc = trunc i32 %sel to i16
39+
ret i16 %trunc
40+
}
41+
42+
define i16 @selective_shift_16_range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
43+
; CHECK-LABEL: define i16 @selective_shift_16_range(
44+
; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
45+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
46+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
47+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
48+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
49+
; CHECK-NEXT: ret i16 [[TRUNC]]
50+
;
51+
%upper.shl = shl nuw i32 %upper, 16
52+
%pack = or disjoint i32 %upper.shl, %lower
53+
%mask.bit = and i32 %mask, 16
54+
%sel = lshr i32 %pack, %mask.bit
55+
%trunc = trunc i32 %sel to i16
56+
ret i16 %trunc
57+
}
58+
59+
define <2 x i16> @selective_shift_v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) {
60+
; CHECK-LABEL: define <2 x i16> @selective_shift_v16(
61+
; CHECK-SAME: <2 x i32> [[MASK:%.*]], <2 x i16> [[UPPER:%.*]], <2 x i16> [[LOWER:%.*]]) {
62+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and <2 x i32> [[MASK]], splat (i32 16)
63+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq <2 x i32> [[MASK_BIT]], zeroinitializer
64+
; CHECK-NEXT: [[SEL_V:%.*]] = select <2 x i1> [[MASK_BIT_Z]], <2 x i16> [[LOWER]], <2 x i16> [[UPPER]]
65+
; CHECK-NEXT: ret <2 x i16> [[SEL_V]]
66+
;
67+
%upper.zext = zext <2 x i16> %upper to <2 x i32>
68+
%upper.shl = shl nuw <2 x i32> %upper.zext, splat(i32 16)
69+
%lower.zext = zext <2 x i16> %lower to <2 x i32>
70+
%pack = or disjoint <2 x i32> %upper.shl, %lower.zext
71+
%mask.bit = and <2 x i32> %mask, splat(i32 16)
72+
%sel = lshr <2 x i32> %pack, %mask.bit
73+
%trunc = trunc <2 x i32> %sel to <2 x i16>
74+
ret <2 x i16> %trunc
75+
}
76+
77+
define i16 @selective_shift_16.wide(i64 %mask, i16 %upper, i16 %lower) {
78+
; CHECK-LABEL: define i16 @selective_shift_16.wide(
79+
; CHECK-SAME: i64 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
80+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 16
81+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
82+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
83+
; CHECK-NEXT: ret i16 [[SEL_V]]
84+
;
85+
%upper.zext = zext i16 %upper to i64
86+
%upper.shl = shl nuw i64 %upper.zext, 16
87+
%lower.zext = zext i16 %lower to i64
88+
%pack = or disjoint i64 %upper.shl, %lower.zext
89+
%mask.bit = and i64 %mask, 16
90+
%sel = lshr i64 %pack, %mask.bit
91+
%trunc = trunc i64 %sel to i16
92+
ret i16 %trunc
93+
}
94+
95+
; narrow zext type blocks fold
96+
define i16 @selective_shift_16.narrow(i24 %mask, i16 %upper, i16 %lower) {
97+
; CHECK-LABEL: define i16 @selective_shift_16.narrow(
98+
; CHECK-SAME: i24 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
99+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i24
100+
; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl i24 [[UPPER_ZEXT]], 16
101+
; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i24
102+
; CHECK-NEXT: [[PACK:%.*]] = or disjoint i24 [[UPPER_SHL]], [[LOWER_ZEXT]]
103+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i24 [[MASK]], 16
104+
; CHECK-NEXT: [[SEL:%.*]] = lshr i24 [[PACK]], [[MASK_BIT]]
105+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i24 [[SEL]] to i16
106+
; CHECK-NEXT: ret i16 [[TRUNC]]
107+
;
108+
%upper.zext = zext i16 %upper to i24
109+
%upper.shl = shl i24 %upper.zext, 16
110+
%lower.zext = zext i16 %lower to i24
111+
%pack = or disjoint i24 %upper.shl, %lower.zext
112+
%mask.bit = and i24 %mask, 16
113+
%sel = lshr i24 %pack, %mask.bit
114+
%trunc = trunc i24 %sel to i16
115+
ret i16 %trunc
116+
}
117+
118+
; %lower's upper bits block fold
119+
define i16 @selective_shift_16_norange(i32 %mask, i32 %upper, i32 %lower) {
120+
; CHECK-LABEL: define i16 @selective_shift_16_norange(
121+
; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
122+
; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER]], 16
123+
; CHECK-NEXT: [[PACK:%.*]] = or i32 [[UPPER_SHL]], [[LOWER]]
124+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
125+
; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
126+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
127+
; CHECK-NEXT: ret i16 [[TRUNC]]
128+
;
129+
%upper.shl = shl nuw i32 %upper, 16
130+
%pack = or i32 %upper.shl, %lower
131+
%mask.bit = and i32 %mask, 16
132+
%sel = lshr i32 %pack, %mask.bit
133+
%trunc = trunc i32 %sel to i16
134+
ret i16 %trunc
135+
}
136+
137+
define i16 @selective_shift_16.mu.0(i32 %mask, i16 %upper, i16 %lower) {
138+
; CHECK-LABEL: define i16 @selective_shift_16.mu.0(
139+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
140+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
141+
; CHECK-NEXT: call void @clobber.i32(i32 [[UPPER_ZEXT]])
142+
; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
143+
; CHECK-NEXT: call void @clobber.i32(i32 [[LOWER_ZEXT]])
144+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
145+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
146+
; CHECK-NEXT: [[TRUNC:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
147+
; CHECK-NEXT: ret i16 [[TRUNC]]
148+
;
149+
%upper.zext = zext i16 %upper to i32
150+
call void @clobber.i32(i32 %upper.zext)
151+
%upper.shl = shl nuw i32 %upper.zext, 16
152+
%lower.zext = zext i16 %lower to i32
153+
call void @clobber.i32(i32 %lower.zext)
154+
%pack = or disjoint i32 %upper.shl, %lower.zext
155+
%mask.bit = and i32 %mask, 16
156+
%sel = lshr i32 %pack, %mask.bit
157+
%trunc = trunc i32 %sel to i16
158+
ret i16 %trunc
159+
}
160+
161+
; multi-use of %pack blocks fold
162+
define i16 @selective_shift_16.mu.1(i32 %mask, i16 %upper, i16 %lower) {
163+
; CHECK-LABEL: define i16 @selective_shift_16.mu.1(
164+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
165+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
166+
; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
167+
; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
168+
; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
169+
; CHECK-NEXT: call void @clobber.i32(i32 [[PACK]])
170+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
171+
; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
172+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
173+
; CHECK-NEXT: ret i16 [[TRUNC]]
174+
;
175+
%upper.zext = zext i16 %upper to i32
176+
%upper.shl = shl nuw i32 %upper.zext, 16
177+
%lower.zext = zext i16 %lower to i32
178+
%pack = or disjoint i32 %upper.shl, %lower.zext
179+
call void @clobber.i32(i32 %pack)
180+
%mask.bit = and i32 %mask, 16
181+
%sel = lshr i32 %pack, %mask.bit
182+
%trunc = trunc i32 %sel to i16
183+
ret i16 %trunc
184+
}
185+
186+
; multi-use of %sel blocks fold
187+
define i16 @selective_shift_16.mu.2(i32 %mask, i16 %upper, i16 %lower) {
188+
; CHECK-LABEL: define i16 @selective_shift_16.mu.2(
189+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
190+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
191+
; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
192+
; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
193+
; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
194+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
195+
; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
196+
; CHECK-NEXT: call void @clobber.i32(i32 [[SEL]])
197+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
198+
; CHECK-NEXT: ret i16 [[TRUNC]]
199+
;
200+
%upper.zext = zext i16 %upper to i32
201+
%upper.shl = shl nuw i32 %upper.zext, 16
202+
%lower.zext = zext i16 %lower to i32
203+
%pack = or disjoint i32 %upper.shl, %lower.zext
204+
%mask.bit = and i32 %mask, 16
205+
%sel = lshr i32 %pack, %mask.bit
206+
call void @clobber.i32(i32 %sel)
207+
%trunc = trunc i32 %sel to i16
208+
ret i16 %trunc
209+
}
210+
211+
; bitwidth must be a power of 2 to fold
212+
define i24 @selective_shift_24(i48 %mask, i24 %upper, i24 %lower) {
213+
; CHECK-LABEL: define i24 @selective_shift_24(
214+
; CHECK-SAME: i48 [[MASK:%.*]], i24 [[UPPER:%.*]], i24 [[LOWER:%.*]]) {
215+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i24 [[UPPER]] to i48
216+
; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i48 [[UPPER_ZEXT]], 24
217+
; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i24 [[LOWER]] to i48
218+
; CHECK-NEXT: [[PACK:%.*]] = or disjoint i48 [[UPPER_SHL]], [[LOWER_ZEXT]]
219+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i48 [[MASK]], 24
220+
; CHECK-NEXT: [[SEL:%.*]] = lshr i48 [[PACK]], [[MASK_BIT]]
221+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i48 [[SEL]] to i24
222+
; CHECK-NEXT: ret i24 [[TRUNC]]
223+
;
224+
%upper.zext = zext i24 %upper to i48
225+
%upper.shl = shl nuw i48 %upper.zext, 24
226+
%lower.zext = zext i24 %lower to i48
227+
%pack = or disjoint i48 %upper.shl, %lower.zext
228+
%mask.bit = and i48 %mask, 24
229+
%sel = lshr i48 %pack, %mask.bit
230+
%trunc = trunc i48 %sel to i24
231+
ret i24 %trunc
232+
}
233+
234+
define i32 @selective_shift_32(i64 %mask, i32 %upper, i32 %lower) {
235+
; CHECK-LABEL: define i32 @selective_shift_32(
236+
; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
237+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
238+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
239+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
240+
; CHECK-NEXT: ret i32 [[SEL_V]]
241+
;
242+
%upper.zext = zext i32 %upper to i64
243+
%upper.shl = shl nuw i64 %upper.zext, 32
244+
%lower.zext = zext i32 %lower to i64
245+
%pack = or disjoint i64 %upper.shl, %lower.zext
246+
%mask.bit = and i64 %mask, 32
247+
%sel = lshr i64 %pack, %mask.bit
248+
%trunc = trunc i64 %sel to i32
249+
ret i32 %trunc
250+
}
251+
252+
define i32 @selective_shift_32.commute(i64 %mask, i32 %upper, i32 %lower) {
253+
; CHECK-LABEL: define i32 @selective_shift_32.commute(
254+
; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
255+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
256+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
257+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
258+
; CHECK-NEXT: ret i32 [[SEL_V]]
259+
;
260+
%upper.zext = zext i32 %upper to i64
261+
%upper.shl = shl nuw i64 %upper.zext, 32
262+
%lower.zext = zext i32 %lower to i64
263+
%pack = or disjoint i64 %lower.zext, %upper.shl
264+
%mask.bit = and i64 %mask, 32
265+
%sel = lshr i64 %pack, %mask.bit
266+
%trunc = trunc i64 %sel to i32
267+
ret i32 %trunc
268+
}
269+
270+
define i32 @selective_shift_32_range(i64 %mask, i64 %upper, i64 range(i64 0, 4294967296) %lower) {
271+
; CHECK-LABEL: define i32 @selective_shift_32_range(
272+
; CHECK-SAME: i64 [[MASK:%.*]], i64 [[UPPER:%.*]], i64 range(i64 0, 4294967296) [[LOWER:%.*]]) {
273+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
274+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
275+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i64 [[LOWER]], i64 [[UPPER]]
276+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[SEL]] to i32
277+
; CHECK-NEXT: ret i32 [[TRUNC]]
278+
;
279+
%upper.shl = shl nuw i64 %upper, 32
280+
%pack = or disjoint i64 %upper.shl, %lower
281+
%mask.bit = and i64 %mask, 32
282+
%sel = lshr i64 %pack, %mask.bit
283+
%trunc = trunc i64 %sel to i32
284+
ret i32 %trunc
285+
}

0 commit comments

Comments
 (0)