Skip to content

Commit 276de06

Browse files
zGoldthorpeDavid Salinas
authored andcommitted
[InstCombine] Fold integer unpack/repack patterns through ZExt (llvm#153583)
This patch explicitly enables the InstCombiner to fold integer unpack/repack patterns such as ```llvm define i64 @src_combine(i32 %lower, i32 %upper) { %base = zext i32 %lower to i64 %u.0 = and i32 %upper, u0xff %z.0 = zext i32 %u.0 to i64 %s.0 = shl i64 %z.0, 32 %o.0 = or i64 %base, %s.0 %r.1 = lshr i32 %upper, 8 %u.1 = and i32 %r.1, u0xff %z.1 = zext i32 %u.1 to i64 %s.1 = shl i64 %z.1, 40 %o.1 = or i64 %o.0, %s.1 %r.2 = lshr i32 %upper, 16 %u.2 = and i32 %r.2, u0xff %z.2 = zext i32 %u.2 to i64 %s.2 = shl i64 %z.2, 48 %o.2 = or i64 %o.1, %s.2 %r.3 = lshr i32 %upper, 24 %u.3 = and i32 %r.3, u0xff %z.3 = zext i32 %u.3 to i64 %s.3 = shl i64 %z.3, 56 %o.3 = or i64 %o.2, %s.3 ret i64 %o.3 } ; => define i64 @tgt_combine(i32 %lower, i32 %upper) { %base = zext i32 %lower to i64 %upper.zext = zext i32 %upper to i64 %s.0 = shl nuw i64 %upper.zext, 32 %o.3 = or disjoint i64 %s.0, %base ret i64 %o.3 } ``` Alive2 proofs: [YAy7ny](https://alive2.llvm.org/ce/z/YAy7ny)
1 parent 10996a5 commit 276de06

File tree

2 files changed

+326
-1
lines changed

2 files changed

+326
-1
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3743,6 +3743,82 @@ static Instruction *foldIntegerPackFromVector(Instruction &I,
37433743
return CastInst::Create(Instruction::BitCast, MaskedVec, I.getType());
37443744
}
37453745

3746+
/// Match \p V as "lshr -> mask -> zext -> shl".
3747+
///
3748+
/// \p Int is the underlying integer being extracted from.
3749+
/// \p Mask is a bitmask identifying which bits of the integer are being
3750+
/// extracted. \p Offset identifies which bit of the result \p V corresponds to
3751+
/// the least significant bit of \p Int
3752+
static bool matchZExtedSubInteger(Value *V, Value *&Int, APInt &Mask,
3753+
uint64_t &Offset, bool &IsShlNUW,
3754+
bool &IsShlNSW) {
3755+
Value *ShlOp0;
3756+
uint64_t ShlAmt = 0;
3757+
if (!match(V, m_OneUse(m_Shl(m_Value(ShlOp0), m_ConstantInt(ShlAmt)))))
3758+
return false;
3759+
3760+
IsShlNUW = cast<BinaryOperator>(V)->hasNoUnsignedWrap();
3761+
IsShlNSW = cast<BinaryOperator>(V)->hasNoSignedWrap();
3762+
3763+
Value *ZExtOp0;
3764+
if (!match(ShlOp0, m_OneUse(m_ZExt(m_Value(ZExtOp0)))))
3765+
return false;
3766+
3767+
Value *MaskedOp0;
3768+
const APInt *ShiftedMaskConst = nullptr;
3769+
if (!match(ZExtOp0, m_CombineOr(m_OneUse(m_And(m_Value(MaskedOp0),
3770+
m_APInt(ShiftedMaskConst))),
3771+
m_Value(MaskedOp0))))
3772+
return false;
3773+
3774+
uint64_t LShrAmt = 0;
3775+
if (!match(MaskedOp0,
3776+
m_CombineOr(m_OneUse(m_LShr(m_Value(Int), m_ConstantInt(LShrAmt))),
3777+
m_Value(Int))))
3778+
return false;
3779+
3780+
if (LShrAmt > ShlAmt)
3781+
return false;
3782+
Offset = ShlAmt - LShrAmt;
3783+
3784+
Mask = ShiftedMaskConst ? ShiftedMaskConst->shl(LShrAmt)
3785+
: APInt::getBitsSetFrom(
3786+
Int->getType()->getScalarSizeInBits(), LShrAmt);
3787+
3788+
return true;
3789+
}
3790+
3791+
/// Try to fold the join of two scalar integers whose bits are unpacked and
3792+
/// zexted from the same source integer.
3793+
static Value *foldIntegerRepackThroughZExt(Value *Lhs, Value *Rhs,
3794+
InstCombiner::BuilderTy &Builder) {
3795+
3796+
Value *LhsInt, *RhsInt;
3797+
APInt LhsMask, RhsMask;
3798+
uint64_t LhsOffset, RhsOffset;
3799+
bool IsLhsShlNUW, IsLhsShlNSW, IsRhsShlNUW, IsRhsShlNSW;
3800+
if (!matchZExtedSubInteger(Lhs, LhsInt, LhsMask, LhsOffset, IsLhsShlNUW,
3801+
IsLhsShlNSW))
3802+
return nullptr;
3803+
if (!matchZExtedSubInteger(Rhs, RhsInt, RhsMask, RhsOffset, IsRhsShlNUW,
3804+
IsRhsShlNSW))
3805+
return nullptr;
3806+
if (LhsInt != RhsInt || LhsOffset != RhsOffset)
3807+
return nullptr;
3808+
3809+
APInt Mask = LhsMask | RhsMask;
3810+
3811+
Type *DestTy = Lhs->getType();
3812+
Value *Res = Builder.CreateShl(
3813+
Builder.CreateZExt(
3814+
Builder.CreateAnd(LhsInt, Mask, LhsInt->getName() + ".mask"), DestTy,
3815+
LhsInt->getName() + ".zext"),
3816+
ConstantInt::get(DestTy, LhsOffset), "", IsLhsShlNUW && IsRhsShlNUW,
3817+
IsLhsShlNSW && IsRhsShlNSW);
3818+
Res->takeName(Lhs);
3819+
return Res;
3820+
}
3821+
37463822
// A decomposition of ((X & Mask) * Factor). The NUW / NSW bools
37473823
// track these properities for preservation. Note that we can decompose
37483824
// equivalent select form of this expression (e.g. (!(X & Mask) ? 0 : Mask *
@@ -3844,6 +3920,8 @@ static Value *foldBitmaskMul(Value *Op0, Value *Op1,
38443920
Value *InstCombinerImpl::foldDisjointOr(Value *LHS, Value *RHS) {
38453921
if (Value *Res = foldBitmaskMul(LHS, RHS, Builder))
38463922
return Res;
3923+
if (Value *Res = foldIntegerRepackThroughZExt(LHS, RHS, Builder))
3924+
return Res;
38473925

38483926
return nullptr;
38493927
}
@@ -3977,7 +4055,7 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
39774055
/*NSW=*/true, /*NUW=*/true))
39784056
return R;
39794057

3980-
if (Value *Res = foldBitmaskMul(I.getOperand(0), I.getOperand(1), Builder))
4058+
if (Value *Res = foldDisjointOr(I.getOperand(0), I.getOperand(1)))
39814059
return replaceInstUsesWith(I, Res);
39824060

39834061
if (Value *Res = reassociateDisjointOr(I.getOperand(0), I.getOperand(1)))
Lines changed: 247 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,247 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -passes=instcombine %s -S | FileCheck %s
3+
4+
declare void @use.i32(i32)
5+
declare void @use.i64(i64)
6+
7+
define i64 @full_shl(i32 %x) {
8+
; CHECK-LABEL: define i64 @full_shl(
9+
; CHECK-SAME: i32 [[X:%.*]]) {
10+
; CHECK-NEXT: [[X_ZEXT:%.*]] = zext i32 [[X]] to i64
11+
; CHECK-NEXT: [[LO_SHL:%.*]] = shl nuw nsw i64 [[X_ZEXT]], 24
12+
; CHECK-NEXT: ret i64 [[LO_SHL]]
13+
;
14+
%lo = and i32 %x, u0xffff
15+
%lo.zext = zext nneg i32 %lo to i64
16+
%lo.shl = shl nuw nsw i64 %lo.zext, 24
17+
18+
%hi = lshr i32 %x, 16
19+
%hi.zext = zext nneg i32 %hi to i64
20+
%hi.shl = shl nuw nsw i64 %hi.zext, 40
21+
22+
%res = or disjoint i64 %lo.shl, %hi.shl
23+
ret i64 %res
24+
}
25+
26+
define <2 x i64> @full_shl_vec(<2 x i32> %v) {
27+
; CHECK-LABEL: define <2 x i64> @full_shl_vec(
28+
; CHECK-SAME: <2 x i32> [[V:%.*]]) {
29+
; CHECK-NEXT: [[LO:%.*]] = and <2 x i32> [[V]], splat (i32 65535)
30+
; CHECK-NEXT: [[V_ZEXT:%.*]] = zext nneg <2 x i32> [[LO]] to <2 x i64>
31+
; CHECK-NEXT: [[LO_SHL:%.*]] = shl nuw nsw <2 x i64> [[V_ZEXT]], splat (i64 24)
32+
; CHECK-NEXT: [[HI:%.*]] = lshr <2 x i32> [[V]], splat (i32 16)
33+
; CHECK-NEXT: [[HI_ZEXT:%.*]] = zext nneg <2 x i32> [[HI]] to <2 x i64>
34+
; CHECK-NEXT: [[HI_SHL:%.*]] = shl nuw nsw <2 x i64> [[HI_ZEXT]], splat (i64 40)
35+
; CHECK-NEXT: [[RES:%.*]] = or disjoint <2 x i64> [[LO_SHL]], [[HI_SHL]]
36+
; CHECK-NEXT: ret <2 x i64> [[RES]]
37+
;
38+
%lo = and <2 x i32> %v, splat(i32 u0xffff)
39+
%lo.zext = zext nneg <2 x i32> %lo to <2 x i64>
40+
%lo.shl = shl nuw nsw <2 x i64> %lo.zext, splat(i64 24)
41+
42+
%hi = lshr <2 x i32> %v, splat(i32 16)
43+
%hi.zext = zext nneg <2 x i32> %hi to <2 x i64>
44+
%hi.shl = shl nuw nsw <2 x i64> %hi.zext, splat(i64 40)
45+
46+
%res = or disjoint <2 x i64> %lo.shl, %hi.shl
47+
ret <2 x i64> %res
48+
}
49+
50+
; u0xaabbccdd = -1430532899
51+
define i64 @partial_shl(i32 %x) {
52+
; CHECK-LABEL: define i64 @partial_shl(
53+
; CHECK-SAME: i32 [[X:%.*]]) {
54+
; CHECK-NEXT: [[X_MASK:%.*]] = and i32 [[X]], -1430532899
55+
; CHECK-NEXT: [[X_ZEXT:%.*]] = zext i32 [[X_MASK]] to i64
56+
; CHECK-NEXT: [[LO_SHL:%.*]] = shl nuw nsw i64 [[X_ZEXT]], 24
57+
; CHECK-NEXT: ret i64 [[LO_SHL]]
58+
;
59+
%lo = and i32 %x, u0xccdd
60+
%lo.zext = zext nneg i32 %lo to i64
61+
%lo.shl = shl nuw nsw i64 %lo.zext, 24
62+
63+
%hi = lshr i32 %x, 16
64+
%hi.mask = and i32 %hi, u0xaabb
65+
%hi.zext = zext nneg i32 %hi.mask to i64
66+
%hi.shl = shl nuw nsw i64 %hi.zext, 40
67+
68+
%res = or disjoint i64 %lo.shl, %hi.shl
69+
ret i64 %res
70+
}
71+
72+
define i64 @shl_multi_use_shl(i32 %x) {
73+
; CHECK-LABEL: define i64 @shl_multi_use_shl(
74+
; CHECK-SAME: i32 [[X:%.*]]) {
75+
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X]], 24
76+
; CHECK-NEXT: [[LO_SHL:%.*]] = zext i32 [[TMP1]] to i64
77+
; CHECK-NEXT: call void @use.i64(i64 [[LO_SHL]])
78+
; CHECK-NEXT: [[HI:%.*]] = lshr i32 [[X]], 16
79+
; CHECK-NEXT: [[HI_ZEXT:%.*]] = zext nneg i32 [[HI]] to i64
80+
; CHECK-NEXT: [[HI_SHL:%.*]] = shl nuw nsw i64 [[HI_ZEXT]], 40
81+
; CHECK-NEXT: [[RES:%.*]] = or disjoint i64 [[HI_SHL]], [[LO_SHL]]
82+
; CHECK-NEXT: ret i64 [[RES]]
83+
;
84+
%lo = and i32 %x, u0x00ff
85+
%lo.zext = zext nneg i32 %lo to i64
86+
%lo.shl = shl nuw nsw i64 %lo.zext, 24
87+
call void @use.i64(i64 %lo.shl)
88+
89+
%hi = lshr i32 %x, 16
90+
%hi.zext = zext nneg i32 %hi to i64
91+
%hi.shl = shl nuw nsw i64 %hi.zext, 40
92+
93+
%res = or disjoint i64 %lo.shl, %hi.shl
94+
ret i64 %res
95+
}
96+
97+
define i64 @shl_multi_use_zext(i32 %x) {
98+
; CHECK-LABEL: define i64 @shl_multi_use_zext(
99+
; CHECK-SAME: i32 [[X:%.*]]) {
100+
; CHECK-NEXT: [[LO:%.*]] = and i32 [[X]], 255
101+
; CHECK-NEXT: [[LO_ZEXT:%.*]] = zext nneg i32 [[LO]] to i64
102+
; CHECK-NEXT: call void @use.i64(i64 [[LO_ZEXT]])
103+
; CHECK-NEXT: [[LO_SHL:%.*]] = shl nuw nsw i64 [[LO_ZEXT]], 24
104+
; CHECK-NEXT: [[HI:%.*]] = lshr i32 [[X]], 16
105+
; CHECK-NEXT: [[HI_ZEXT:%.*]] = zext nneg i32 [[HI]] to i64
106+
; CHECK-NEXT: [[HI_SHL:%.*]] = shl nuw nsw i64 [[HI_ZEXT]], 40
107+
; CHECK-NEXT: [[RES:%.*]] = or disjoint i64 [[LO_SHL]], [[HI_SHL]]
108+
; CHECK-NEXT: ret i64 [[RES]]
109+
;
110+
%lo = and i32 %x, u0x00ff
111+
%lo.zext = zext nneg i32 %lo to i64
112+
call void @use.i64(i64 %lo.zext)
113+
%lo.shl = shl nuw nsw i64 %lo.zext, 24
114+
115+
%hi = lshr i32 %x, 16
116+
%hi.zext = zext nneg i32 %hi to i64
117+
%hi.shl = shl nuw nsw i64 %hi.zext, 40
118+
119+
%res = or disjoint i64 %lo.shl, %hi.shl
120+
ret i64 %res
121+
}
122+
123+
define i64 @shl_multi_use_lshr(i32 %x) {
124+
; CHECK-LABEL: define i64 @shl_multi_use_lshr(
125+
; CHECK-SAME: i32 [[X:%.*]]) {
126+
; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X]], 24
127+
; CHECK-NEXT: [[LO_SHL:%.*]] = zext i32 [[TMP1]] to i64
128+
; CHECK-NEXT: [[HI:%.*]] = lshr i32 [[X]], 16
129+
; CHECK-NEXT: call void @use.i32(i32 [[HI]])
130+
; CHECK-NEXT: [[HI_ZEXT:%.*]] = zext nneg i32 [[HI]] to i64
131+
; CHECK-NEXT: [[HI_SHL:%.*]] = shl nuw nsw i64 [[HI_ZEXT]], 40
132+
; CHECK-NEXT: [[RES:%.*]] = or disjoint i64 [[HI_SHL]], [[LO_SHL]]
133+
; CHECK-NEXT: ret i64 [[RES]]
134+
;
135+
%lo = and i32 %x, u0x00ff
136+
%lo.zext = zext nneg i32 %lo to i64
137+
%lo.shl = shl nuw nsw i64 %lo.zext, 24
138+
139+
%hi = lshr i32 %x, 16
140+
call void @use.i32(i32 %hi)
141+
%hi.zext = zext nneg i32 %hi to i64
142+
%hi.shl = shl nuw nsw i64 %hi.zext, 40
143+
144+
%res = or disjoint i64 %lo.shl, %hi.shl
145+
ret i64 %res
146+
}
147+
148+
define i64 @shl_non_disjoint(i32 %x) {
149+
; CHECK-LABEL: define i64 @shl_non_disjoint(
150+
; CHECK-SAME: i32 [[X:%.*]]) {
151+
; CHECK-NEXT: [[LO:%.*]] = and i32 [[X]], 16711680
152+
; CHECK-NEXT: [[LO_ZEXT:%.*]] = zext nneg i32 [[LO]] to i64
153+
; CHECK-NEXT: [[LO_SHL:%.*]] = shl nuw nsw i64 [[LO_ZEXT]], 24
154+
; CHECK-NEXT: [[HI:%.*]] = lshr i32 [[X]], 16
155+
; CHECK-NEXT: call void @use.i32(i32 [[HI]])
156+
; CHECK-NEXT: [[HI_ZEXT:%.*]] = zext nneg i32 [[HI]] to i64
157+
; CHECK-NEXT: [[HI_SHL:%.*]] = shl nuw nsw i64 [[HI_ZEXT]], 40
158+
; CHECK-NEXT: [[RES:%.*]] = or i64 [[LO_SHL]], [[HI_SHL]]
159+
; CHECK-NEXT: ret i64 [[RES]]
160+
;
161+
%lo = and i32 %x, u0x00ff0000
162+
%lo.zext = zext nneg i32 %lo to i64
163+
%lo.shl = shl nuw nsw i64 %lo.zext, 24
164+
165+
%hi = lshr i32 %x, 16
166+
call void @use.i32(i32 %hi)
167+
%hi.zext = zext nneg i32 %hi to i64
168+
%hi.shl = shl nuw nsw i64 %hi.zext, 40
169+
170+
%res = or i64 %lo.shl, %hi.shl
171+
ret i64 %res
172+
}
173+
174+
define i64 @combine(i32 %lower, i32 %upper) {
175+
; CHECK-LABEL: define i64 @combine(
176+
; CHECK-SAME: i32 [[LOWER:%.*]], i32 [[UPPER:%.*]]) {
177+
; CHECK-NEXT: [[BASE:%.*]] = zext i32 [[LOWER]] to i64
178+
; CHECK-NEXT: [[UPPER_ZEXT:%.*]] = zext i32 [[UPPER]] to i64
179+
; CHECK-NEXT: [[S_0:%.*]] = shl nuw i64 [[UPPER_ZEXT]], 32
180+
; CHECK-NEXT: [[O_3:%.*]] = or disjoint i64 [[S_0]], [[BASE]]
181+
; CHECK-NEXT: ret i64 [[O_3]]
182+
;
183+
%base = zext i32 %lower to i64
184+
185+
%u.0 = and i32 %upper, u0xff
186+
%z.0 = zext i32 %u.0 to i64
187+
%s.0 = shl i64 %z.0, 32
188+
%o.0 = or i64 %base, %s.0
189+
190+
%r.1 = lshr i32 %upper, 8
191+
%u.1 = and i32 %r.1, u0xff
192+
%z.1 = zext i32 %u.1 to i64
193+
%s.1 = shl i64 %z.1, 40
194+
%o.1 = or i64 %o.0, %s.1
195+
196+
%r.2 = lshr i32 %upper, 16
197+
%u.2 = and i32 %r.2, u0xff
198+
%z.2 = zext i32 %u.2 to i64
199+
%s.2 = shl i64 %z.2, 48
200+
%o.2 = or i64 %o.1, %s.2
201+
202+
%r.3 = lshr i32 %upper, 24
203+
%u.3 = and i32 %r.3, u0xff
204+
%z.3 = zext i32 %u.3 to i64
205+
%s.3 = shl i64 %z.3, 56
206+
%o.3 = or i64 %o.2, %s.3
207+
208+
ret i64 %o.3
209+
}
210+
211+
define i64 @combine_2(i32 %lower, i32 %upper) {
212+
; CHECK-LABEL: define i64 @combine_2(
213+
; CHECK-SAME: i32 [[LOWER:%.*]], i32 [[UPPER:%.*]]) {
214+
; CHECK-NEXT: [[BASE:%.*]] = zext i32 [[LOWER]] to i64
215+
; CHECK-NEXT: [[S_03:%.*]] = zext i32 [[UPPER]] to i64
216+
; CHECK-NEXT: [[O:%.*]] = shl nuw i64 [[S_03]], 32
217+
; CHECK-NEXT: [[RES:%.*]] = or disjoint i64 [[O]], [[BASE]]
218+
; CHECK-NEXT: ret i64 [[RES]]
219+
;
220+
%base = zext i32 %lower to i64
221+
222+
%u.0 = and i32 %upper, u0xff
223+
%z.0 = zext i32 %u.0 to i64
224+
%s.0 = shl i64 %z.0, 32
225+
226+
%r.1 = lshr i32 %upper, 8
227+
%u.1 = and i32 %r.1, u0xff
228+
%z.1 = zext i32 %u.1 to i64
229+
%s.1 = shl i64 %z.1, 40
230+
%o.1 = or i64 %s.0, %s.1
231+
232+
%r.2 = lshr i32 %upper, 16
233+
%u.2 = and i32 %r.2, u0xff
234+
%z.2 = zext i32 %u.2 to i64
235+
%s.2 = shl i64 %z.2, 48
236+
237+
%r.3 = lshr i32 %upper, 24
238+
%u.3 = and i32 %r.3, u0xff
239+
%z.3 = zext i32 %u.3 to i64
240+
%s.3 = shl i64 %z.3, 56
241+
%o.3 = or i64 %s.2, %s.3
242+
243+
%o = or i64 %o.1, %o.3
244+
%res = or i64 %o, %base
245+
246+
ret i64 %res
247+
}

0 commit comments

Comments
 (0)