Skip to content

Commit 7910ed2

Browse files
authored
[InstCombine] Canonicalise packed-integer-selecting shifts (#162147)
This patch resolves recent regressions related to [issue #92891](#92891). It specifically enables the following types of reductions. ```llvm define i16 @src(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) { %upper.shl = shl nuw i32 %upper, 16 %pack = or disjoint i32 %upper.shl, %lower %mask.bit = and i32 %mask, 16 %sel = lshr i32 %pack, %mask.bit %trunc = trunc i32 %sel to i16 ret i16 %trunc } ; => define i16 @tgt(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) { %mask.bit = and i32 %mask, 16 %mask.bit.z = icmp eq i32 %mask.bit, 0 %sel = select i1 %mask.bit.z, i32 %lower, i32 %upper %trunc = trunc i32 %sel to i16 ret i16 %trunc } ``` Alive2 proofs: [gJ9MpP](https://alive2.llvm.org/ce/z/gJ9MpP)
1 parent f53b624 commit 7910ed2

File tree

2 files changed

+381
-2
lines changed

2 files changed

+381
-2
lines changed

llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines changed: 58 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,58 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
6060
return true;
6161
}
6262

63+
/// Let N = 2 * M.
64+
/// Given an N-bit integer representing a pack of two M-bit integers,
65+
/// we can select one of the packed integers by right-shifting by either
66+
/// zero or M (which is the most straightforward to check if M is a power
67+
/// of 2), and then isolating the lower M bits. In this case, we can
68+
/// represent the shift as a select on whether the shr amount is nonzero.
69+
static Value *simplifyShiftSelectingPackedElement(Instruction *I,
70+
const APInt &DemandedMask,
71+
InstCombinerImpl &IC,
72+
unsigned Depth) {
73+
assert(I->getOpcode() == Instruction::LShr &&
74+
"Only lshr instruction supported");
75+
76+
uint64_t ShlAmt;
77+
Value *Upper, *Lower;
78+
if (!match(I->getOperand(0),
79+
m_OneUse(m_c_DisjointOr(
80+
m_OneUse(m_Shl(m_Value(Upper), m_ConstantInt(ShlAmt))),
81+
m_Value(Lower)))))
82+
return nullptr;
83+
84+
if (!isPowerOf2_64(ShlAmt))
85+
return nullptr;
86+
87+
const uint64_t DemandedBitWidth = DemandedMask.getActiveBits();
88+
if (DemandedBitWidth > ShlAmt)
89+
return nullptr;
90+
91+
// Check that upper demanded bits are not lost from lshift.
92+
if (Upper->getType()->getScalarSizeInBits() < ShlAmt + DemandedBitWidth)
93+
return nullptr;
94+
95+
KnownBits KnownLowerBits = IC.computeKnownBits(Lower, I, Depth);
96+
if (!KnownLowerBits.getMaxValue().isIntN(ShlAmt))
97+
return nullptr;
98+
99+
Value *ShrAmt = I->getOperand(1);
100+
KnownBits KnownShrBits = IC.computeKnownBits(ShrAmt, I, Depth);
101+
102+
// Verify that ShrAmt is either exactly ShlAmt (which is a power of 2) or
103+
// zero.
104+
if (~KnownShrBits.Zero != ShlAmt)
105+
return nullptr;
106+
107+
Value *ShrAmtZ =
108+
IC.Builder.CreateICmpEQ(ShrAmt, Constant::getNullValue(ShrAmt->getType()),
109+
ShrAmt->getName() + ".z");
110+
Value *Select = IC.Builder.CreateSelect(ShrAmtZ, Lower, Upper);
111+
Select->takeName(I);
112+
return Select;
113+
}
114+
63115
/// Returns the bitwidth of the given scalar or pointer type. For vector types,
64116
/// returns the element type's bitwidth.
65117
static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -798,9 +850,13 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
798850
Known >>= ShiftAmt;
799851
if (ShiftAmt)
800852
Known.Zero.setHighBits(ShiftAmt); // high bits known zero.
801-
} else {
802-
llvm::computeKnownBits(I, Known, Q, Depth);
853+
break;
803854
}
855+
if (Value *V =
856+
simplifyShiftSelectingPackedElement(I, DemandedMask, *this, Depth))
857+
return V;
858+
859+
llvm::computeKnownBits(I, Known, Q, Depth);
804860
break;
805861
}
806862
case Instruction::AShr: {
Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=instcombine %s -S | FileCheck %s
3+
4+
declare void @clobber.i32(i32)
5+
6+
define i16 @selective_shift_16(i32 %mask, i16 %upper, i16 %lower) {
7+
; CHECK-LABEL: define i16 @selective_shift_16(
8+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
9+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
10+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
11+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
12+
; CHECK-NEXT: ret i16 [[SEL_V]]
13+
;
14+
%upper.zext = zext i16 %upper to i32
15+
%upper.shl = shl nuw i32 %upper.zext, 16
16+
%lower.zext = zext i16 %lower to i32
17+
%pack = or disjoint i32 %upper.shl, %lower.zext
18+
%mask.bit = and i32 %mask, 16
19+
%sel = lshr i32 %pack, %mask.bit
20+
%trunc = trunc i32 %sel to i16
21+
ret i16 %trunc
22+
}
23+
24+
define i16 @selective_shift_16.commute(i32 %mask, i16 %upper, i16 %lower) {
25+
; CHECK-LABEL: define i16 @selective_shift_16.commute(
26+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
27+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
28+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
29+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
30+
; CHECK-NEXT: ret i16 [[SEL_V]]
31+
;
32+
%upper.zext = zext i16 %upper to i32
33+
%upper.shl = shl nuw i32 %upper.zext, 16
34+
%lower.zext = zext i16 %lower to i32
35+
%pack = or disjoint i32 %lower.zext, %upper.shl
36+
%mask.bit = and i32 %mask, 16
37+
%sel = lshr i32 %pack, %mask.bit
38+
%trunc = trunc i32 %sel to i16
39+
ret i16 %trunc
40+
}
41+
42+
define i16 @selective_shift_16.range(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
43+
; CHECK-LABEL: define i16 @selective_shift_16.range(
44+
; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
45+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
46+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
47+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
48+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
49+
; CHECK-NEXT: ret i16 [[TRUNC]]
50+
;
51+
%upper.shl = shl nuw i32 %upper, 16
52+
%pack = or disjoint i32 %upper.shl, %lower
53+
%mask.bit = and i32 %mask, 16
54+
%sel = lshr i32 %pack, %mask.bit
55+
%trunc = trunc i32 %sel to i16
56+
ret i16 %trunc
57+
}
58+
59+
define i16 @selective_shift_16.range.commute(i32 %mask, i32 %upper, i32 range(i32 0, 65536) %lower) {
60+
; CHECK-LABEL: define i16 @selective_shift_16.range.commute(
61+
; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 range(i32 0, 65536) [[LOWER:%.*]]) {
62+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
63+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
64+
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
65+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
66+
; CHECK-NEXT: ret i16 [[TRUNC]]
67+
;
68+
%upper.shl = shl nuw i32 %upper, 16
69+
%pack = or disjoint i32 %lower, %upper.shl
70+
%mask.bit = and i32 %mask, 16
71+
%sel = lshr i32 %pack, %mask.bit
72+
%trunc = trunc i32 %sel to i16
73+
ret i16 %trunc
74+
}
75+
76+
define i32 @selective_shift_16.masked(i32 %mask, i16 %upper, i16 %lower) {
77+
; CHECK-LABEL: define i32 @selective_shift_16.masked(
78+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
79+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
80+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
81+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
82+
; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32
83+
; CHECK-NEXT: ret i32 [[SEL]]
84+
;
85+
%upper.zext = zext i16 %upper to i32
86+
%upper.shl = shl nuw i32 %upper.zext, 16
87+
%lower.zext = zext i16 %lower to i32
88+
%pack = or disjoint i32 %lower.zext, %upper.shl
89+
%mask.bit = and i32 %mask, 16
90+
%sel = lshr i32 %pack, %mask.bit
91+
%sel.masked = and i32 %sel, 65535
92+
ret i32 %sel.masked
93+
}
94+
95+
define i32 @selective_shift_16.masked.commute(i32 %mask, i16 %upper, i16 %lower) {
96+
; CHECK-LABEL: define i32 @selective_shift_16.masked.commute(
97+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
98+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
99+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
100+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
101+
; CHECK-NEXT: [[SEL:%.*]] = zext i16 [[SEL_V]] to i32
102+
; CHECK-NEXT: ret i32 [[SEL]]
103+
;
104+
%upper.zext = zext i16 %upper to i32
105+
%upper.shl = shl nuw i32 %upper.zext, 16
106+
%lower.zext = zext i16 %lower to i32
107+
%pack = or disjoint i32 %upper.shl, %lower.zext
108+
%mask.bit = and i32 %mask, 16
109+
%sel = lshr i32 %pack, %mask.bit
110+
%sel.masked = and i32 %sel, 65535
111+
ret i32 %sel.masked
112+
}
113+
114+
define <2 x i16> @selective_shift.v16(<2 x i32> %mask, <2 x i16> %upper, <2 x i16> %lower) {
115+
; CHECK-LABEL: define <2 x i16> @selective_shift.v16(
116+
; CHECK-SAME: <2 x i32> [[MASK:%.*]], <2 x i16> [[UPPER:%.*]], <2 x i16> [[LOWER:%.*]]) {
117+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and <2 x i32> [[MASK]], splat (i32 16)
118+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq <2 x i32> [[MASK_BIT]], zeroinitializer
119+
; CHECK-NEXT: [[SEL_V:%.*]] = select <2 x i1> [[MASK_BIT_Z]], <2 x i16> [[LOWER]], <2 x i16> [[UPPER]]
120+
; CHECK-NEXT: ret <2 x i16> [[SEL_V]]
121+
;
122+
%upper.zext = zext <2 x i16> %upper to <2 x i32>
123+
%upper.shl = shl nuw <2 x i32> %upper.zext, splat(i32 16)
124+
%lower.zext = zext <2 x i16> %lower to <2 x i32>
125+
%pack = or disjoint <2 x i32> %upper.shl, %lower.zext
126+
%mask.bit = and <2 x i32> %mask, splat(i32 16)
127+
%sel = lshr <2 x i32> %pack, %mask.bit
128+
%trunc = trunc <2 x i32> %sel to <2 x i16>
129+
ret <2 x i16> %trunc
130+
}
131+
132+
define i16 @selective_shift_16.wide(i64 %mask, i16 %upper, i16 %lower) {
133+
; CHECK-LABEL: define i16 @selective_shift_16.wide(
134+
; CHECK-SAME: i64 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
135+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 16
136+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
137+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
138+
; CHECK-NEXT: ret i16 [[SEL_V]]
139+
;
140+
%upper.zext = zext i16 %upper to i64
141+
%upper.shl = shl nuw i64 %upper.zext, 16
142+
%lower.zext = zext i16 %lower to i64
143+
%pack = or disjoint i64 %upper.shl, %lower.zext
144+
%mask.bit = and i64 %mask, 16
145+
%sel = lshr i64 %pack, %mask.bit
146+
%trunc = trunc i64 %sel to i16
147+
ret i16 %trunc
148+
}
149+
150+
; narrow zext type blocks fold
151+
define i16 @selective_shift_16.narrow(i24 %mask, i16 %upper, i16 %lower) {
152+
; CHECK-LABEL: define i16 @selective_shift_16.narrow(
153+
; CHECK-SAME: i24 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
154+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i24
155+
; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl i24 [[UPPER_ZEXT]], 16
156+
; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i24
157+
; CHECK-NEXT: [[PACK:%.*]] = or disjoint i24 [[UPPER_SHL]], [[LOWER_ZEXT]]
158+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i24 [[MASK]], 16
159+
; CHECK-NEXT: [[SEL:%.*]] = lshr i24 [[PACK]], [[MASK_BIT]]
160+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i24 [[SEL]] to i16
161+
; CHECK-NEXT: ret i16 [[TRUNC]]
162+
;
163+
%upper.zext = zext i16 %upper to i24
164+
%upper.shl = shl i24 %upper.zext, 16
165+
%lower.zext = zext i16 %lower to i24
166+
%pack = or disjoint i24 %upper.shl, %lower.zext
167+
%mask.bit = and i24 %mask, 16
168+
%sel = lshr i24 %pack, %mask.bit
169+
%trunc = trunc i24 %sel to i16
170+
ret i16 %trunc
171+
}
172+
173+
; %lower's upper bits block fold
174+
define i16 @selective_shift_16_norange(i32 %mask, i32 %upper, i32 %lower) {
175+
; CHECK-LABEL: define i16 @selective_shift_16_norange(
176+
; CHECK-SAME: i32 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
177+
; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER]], 16
178+
; CHECK-NEXT: [[PACK:%.*]] = or i32 [[UPPER_SHL]], [[LOWER]]
179+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
180+
; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
181+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
182+
; CHECK-NEXT: ret i16 [[TRUNC]]
183+
;
184+
%upper.shl = shl nuw i32 %upper, 16
185+
%pack = or i32 %upper.shl, %lower
186+
%mask.bit = and i32 %mask, 16
187+
%sel = lshr i32 %pack, %mask.bit
188+
%trunc = trunc i32 %sel to i16
189+
ret i16 %trunc
190+
}
191+
192+
define i16 @selective_shift_16.mu.0(i32 %mask, i16 %upper, i16 %lower) {
193+
; CHECK-LABEL: define i16 @selective_shift_16.mu.0(
194+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
195+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
196+
; CHECK-NEXT: call void @clobber.i32(i32 [[UPPER_ZEXT]])
197+
; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
198+
; CHECK-NEXT: call void @clobber.i32(i32 [[LOWER_ZEXT]])
199+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
200+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i32 [[MASK_BIT]], 0
201+
; CHECK-NEXT: [[TRUNC:%.*]] = select i1 [[MASK_BIT_Z]], i16 [[LOWER]], i16 [[UPPER]]
202+
; CHECK-NEXT: ret i16 [[TRUNC]]
203+
;
204+
%upper.zext = zext i16 %upper to i32
205+
call void @clobber.i32(i32 %upper.zext)
206+
%upper.shl = shl nuw i32 %upper.zext, 16
207+
%lower.zext = zext i16 %lower to i32
208+
call void @clobber.i32(i32 %lower.zext)
209+
%pack = or disjoint i32 %upper.shl, %lower.zext
210+
%mask.bit = and i32 %mask, 16
211+
%sel = lshr i32 %pack, %mask.bit
212+
%trunc = trunc i32 %sel to i16
213+
ret i16 %trunc
214+
}
215+
216+
; multi-use of %pack blocks fold
217+
define i16 @selective_shift_16.mu.1(i32 %mask, i16 %upper, i16 %lower) {
218+
; CHECK-LABEL: define i16 @selective_shift_16.mu.1(
219+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
220+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
221+
; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
222+
; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
223+
; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
224+
; CHECK-NEXT: call void @clobber.i32(i32 [[PACK]])
225+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
226+
; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
227+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
228+
; CHECK-NEXT: ret i16 [[TRUNC]]
229+
;
230+
%upper.zext = zext i16 %upper to i32
231+
%upper.shl = shl nuw i32 %upper.zext, 16
232+
%lower.zext = zext i16 %lower to i32
233+
%pack = or disjoint i32 %upper.shl, %lower.zext
234+
call void @clobber.i32(i32 %pack)
235+
%mask.bit = and i32 %mask, 16
236+
%sel = lshr i32 %pack, %mask.bit
237+
%trunc = trunc i32 %sel to i16
238+
ret i16 %trunc
239+
}
240+
241+
; non-truncated use of %sel blocks fold
242+
define i16 @selective_shift_16.mu.2(i32 %mask, i16 %upper, i16 %lower) {
243+
; CHECK-LABEL: define i16 @selective_shift_16.mu.2(
244+
; CHECK-SAME: i32 [[MASK:%.*]], i16 [[UPPER:%.*]], i16 [[LOWER:%.*]]) {
245+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i16 [[UPPER]] to i32
246+
; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i32 [[UPPER_ZEXT]], 16
247+
; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i16 [[LOWER]] to i32
248+
; CHECK-NEXT: [[PACK:%.*]] = or disjoint i32 [[UPPER_SHL]], [[LOWER_ZEXT]]
249+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i32 [[MASK]], 16
250+
; CHECK-NEXT: [[SEL:%.*]] = lshr i32 [[PACK]], [[MASK_BIT]]
251+
; CHECK-NEXT: call void @clobber.i32(i32 [[SEL]])
252+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SEL]] to i16
253+
; CHECK-NEXT: ret i16 [[TRUNC]]
254+
;
255+
%upper.zext = zext i16 %upper to i32
256+
%upper.shl = shl nuw i32 %upper.zext, 16
257+
%lower.zext = zext i16 %lower to i32
258+
%pack = or disjoint i32 %upper.shl, %lower.zext
259+
%mask.bit = and i32 %mask, 16
260+
%sel = lshr i32 %pack, %mask.bit
261+
call void @clobber.i32(i32 %sel)
262+
%trunc = trunc i32 %sel to i16
263+
ret i16 %trunc
264+
}
265+
266+
; bitwidth must be a power of 2 to fold
267+
define i24 @selective_shift_24(i48 %mask, i24 %upper, i24 %lower) {
268+
; CHECK-LABEL: define i24 @selective_shift_24(
269+
; CHECK-SAME: i48 [[MASK:%.*]], i24 [[UPPER:%.*]], i24 [[LOWER:%.*]]) {
270+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i24 [[UPPER]] to i48
271+
; CHECK-NEXT: [[UPPER_SHL:%.*]] = shl nuw i48 [[UPPER_ZEXT]], 24
272+
; CHECK-NEXT: [[LOWER_ZEXT:%.*]] = zext i24 [[LOWER]] to i48
273+
; CHECK-NEXT: [[PACK:%.*]] = or disjoint i48 [[UPPER_SHL]], [[LOWER_ZEXT]]
274+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i48 [[MASK]], 24
275+
; CHECK-NEXT: [[SEL:%.*]] = lshr i48 [[PACK]], [[MASK_BIT]]
276+
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i48 [[SEL]] to i24
277+
; CHECK-NEXT: ret i24 [[TRUNC]]
278+
;
279+
%upper.zext = zext i24 %upper to i48
280+
%upper.shl = shl nuw i48 %upper.zext, 24
281+
%lower.zext = zext i24 %lower to i48
282+
%pack = or disjoint i48 %upper.shl, %lower.zext
283+
%mask.bit = and i48 %mask, 24
284+
%sel = lshr i48 %pack, %mask.bit
285+
%trunc = trunc i48 %sel to i24
286+
ret i24 %trunc
287+
}
288+
289+
define i32 @selective_shift_32(i64 %mask, i32 %upper, i32 %lower) {
290+
; CHECK-LABEL: define i32 @selective_shift_32(
291+
; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
292+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
293+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
294+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
295+
; CHECK-NEXT: ret i32 [[SEL_V]]
296+
;
297+
%upper.zext = zext i32 %upper to i64
298+
%upper.shl = shl nuw i64 %upper.zext, 32
299+
%lower.zext = zext i32 %lower to i64
300+
%pack = or disjoint i64 %upper.shl, %lower.zext
301+
%mask.bit = and i64 %mask, 32
302+
%sel = lshr i64 %pack, %mask.bit
303+
%trunc = trunc i64 %sel to i32
304+
ret i32 %trunc
305+
}
306+
307+
define i32 @selective_shift_32.commute(i64 %mask, i32 %upper, i32 %lower) {
308+
; CHECK-LABEL: define i32 @selective_shift_32.commute(
309+
; CHECK-SAME: i64 [[MASK:%.*]], i32 [[UPPER:%.*]], i32 [[LOWER:%.*]]) {
310+
; CHECK-NEXT: [[MASK_BIT:%.*]] = and i64 [[MASK]], 32
311+
; CHECK-NEXT: [[MASK_BIT_Z:%.*]] = icmp eq i64 [[MASK_BIT]], 0
312+
; CHECK-NEXT: [[SEL_V:%.*]] = select i1 [[MASK_BIT_Z]], i32 [[LOWER]], i32 [[UPPER]]
313+
; CHECK-NEXT: ret i32 [[SEL_V]]
314+
;
315+
%upper.zext = zext i32 %upper to i64
316+
%upper.shl = shl nuw i64 %upper.zext, 32
317+
%lower.zext = zext i32 %lower to i64
318+
%pack = or disjoint i64 %lower.zext, %upper.shl
319+
%mask.bit = and i64 %mask, 32
320+
%sel = lshr i64 %pack, %mask.bit
321+
%trunc = trunc i64 %sel to i32
322+
ret i32 %trunc
323+
}

0 commit comments

Comments
 (0)