Skip to content

Commit 30e314f

Browse files
committed
!fixup address comments, thanks!
1 parent 9792b91 commit 30e314f

File tree

2 files changed

+26
-74
lines changed

2 files changed

+26
-74
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1715,8 +1715,8 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
17151715
if (!match(&I, m_ZExt(m_Value())))
17161716
return false;
17171717

1718-
// Try to convert a vector zext feeding only extracts to a set of scalar (Src
1719-
// << ExtIdx *Size) & (Size -1), if profitable.
1718+
// Try to convert a vector zext feeding only extracts to a set of scalar
1719+
// (Src << ExtIdx *Size) & (Size -1), if profitable.
17201720
auto *Ext = cast<ZExtInst>(&I);
17211721
auto *SrcTy = dyn_cast<FixedVectorType>(Ext->getOperand(0)->getType());
17221722
if (!SrcTy)
@@ -1743,8 +1743,11 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
17431743
VectorCost += TTI.getVectorInstrCost(Instruction::ExtractElement, DstTy,
17441744
CostKind, Idx->getZExtValue(), U);
17451745
}
1746-
1746+
Type *ScalarTy =
1747+
IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy));
17471748
InstructionCost ScalarCost =
1749+
TTI.getCastInstrCost(Instruction::BitCast, SrcTy, ScalarTy,
1750+
TTI::CastContextHint::None, CostKind) +
17481751
ExtCnt * TTI.getArithmeticInstrCost(
17491752
Instruction::And, ScalarDstTy, CostKind,
17501753
{TTI::OK_AnyValue, TTI::OP_None},
@@ -1760,9 +1763,7 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) {
17601763
Value *ScalarV = Ext->getOperand(0);
17611764
if (!isGuaranteedNotToBePoison(ScalarV, &AC))
17621765
ScalarV = Builder.CreateFreeze(ScalarV);
1763-
ScalarV = Builder.CreateBitCast(
1764-
ScalarV,
1765-
IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy)));
1766+
ScalarV = Builder.CreateBitCast(ScalarV, ScalarTy);
17661767
unsigned SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType());
17671768
Value *EltBitMask =
17681769
ConstantInt::get(ScalarV->getType(), (1ull << SrcEltSizeInBits) - 1);

llvm/test/Transforms/VectorCombine/AArch64/ext-extract.ll

Lines changed: 19 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -9,25 +9,15 @@ define void @zext_v4i8_all_lanes_used(<4 x i8> %src) {
99
; CHECK-LABEL: define void @zext_v4i8_all_lanes_used(
1010
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
1111
; CHECK-NEXT: [[ENTRY:.*:]]
12-
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
13-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
14-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
15-
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255
16-
; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 16
17-
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
18-
; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP1]], 8
19-
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 255
20-
; CHECK-NEXT: [[TMP8:%.*]] = lshr i32 [[TMP1]], 0
21-
; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 255
2212
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
2313
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT9]], i64 0
2414
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT9]], i64 1
2515
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT9]], i64 2
2616
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT9]], i64 3
27-
; CHECK-NEXT: call void @use.i32(i32 [[TMP9]])
28-
; CHECK-NEXT: call void @use.i32(i32 [[TMP7]])
29-
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
30-
; CHECK-NEXT: call void @use.i32(i32 [[TMP3]])
17+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_0]])
18+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_1]])
19+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_2]])
20+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_3]])
3121
; CHECK-NEXT: ret void
3222
;
3323
entry:
@@ -111,21 +101,13 @@ define void @zext_v4i8_3_lanes_used_2(<4 x i8> %src) {
111101
; CHECK-LABEL: define void @zext_v4i8_3_lanes_used_2(
112102
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
113103
; CHECK-NEXT: [[ENTRY:.*:]]
114-
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
115-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
116-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 24
117-
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255
118-
; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 8
119-
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
120-
; CHECK-NEXT: [[TMP6:%.*]] = lshr i32 [[TMP1]], 0
121-
; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 255
122104
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
123105
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT9]], i64 0
124106
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT9]], i64 1
125107
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT9]], i64 3
126-
; CHECK-NEXT: call void @use.i32(i32 [[TMP7]])
127-
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
128-
; CHECK-NEXT: call void @use.i32(i32 [[TMP3]])
108+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_0]])
109+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_1]])
110+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_3]])
129111
; CHECK-NEXT: ret void
130112
;
131113
entry:
@@ -171,17 +153,11 @@ define void @zext_v4i8_2_lanes_used_2(<4 x i8> %src) {
171153
; CHECK-LABEL: define void @zext_v4i8_2_lanes_used_2(
172154
; CHECK-SAME: <4 x i8> [[SRC:%.*]]) {
173155
; CHECK-NEXT: [[ENTRY:.*:]]
174-
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i8> [[SRC]]
175-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[TMP0]] to i32
176-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 [[TMP1]], 16
177-
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 255
178-
; CHECK-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP1]], 0
179-
; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP4]], 255
180156
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
181157
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT9]], i64 0
182158
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT9]], i64 2
183-
; CHECK-NEXT: call void @use.i32(i32 [[TMP5]])
184-
; CHECK-NEXT: call void @use.i32(i32 [[TMP3]])
159+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_0]])
160+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_2]])
185161
; CHECK-NEXT: ret void
186162
;
187163
entry:
@@ -198,24 +174,15 @@ define void @zext_v4i8_all_lanes_used_noundef(<4 x i8> noundef %src) {
198174
; CHECK-LABEL: define void @zext_v4i8_all_lanes_used_noundef(
199175
; CHECK-SAME: <4 x i8> noundef [[SRC:%.*]]) {
200176
; CHECK-NEXT: [[ENTRY:.*:]]
201-
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[SRC]] to i32
202-
; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[TMP0]], 24
203-
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 255
204-
; CHECK-NEXT: [[TMP3:%.*]] = lshr i32 [[TMP0]], 16
205-
; CHECK-NEXT: [[TMP4:%.*]] = and i32 [[TMP3]], 255
206-
; CHECK-NEXT: [[TMP5:%.*]] = lshr i32 [[TMP0]], 8
207-
; CHECK-NEXT: [[TMP6:%.*]] = and i32 [[TMP5]], 255
208-
; CHECK-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP0]], 0
209-
; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 255
210177
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i8> [[SRC]] to <4 x i32>
211178
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i32> [[EXT9]], i64 0
212179
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i32> [[EXT9]], i64 1
213180
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i32> [[EXT9]], i64 2
214181
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i32> [[EXT9]], i64 3
215-
; CHECK-NEXT: call void @use.i32(i32 [[TMP8]])
216-
; CHECK-NEXT: call void @use.i32(i32 [[TMP6]])
217-
; CHECK-NEXT: call void @use.i32(i32 [[TMP4]])
218-
; CHECK-NEXT: call void @use.i32(i32 [[TMP2]])
182+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_0]])
183+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_1]])
184+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_2]])
185+
; CHECK-NEXT: call void @use.i32(i32 [[EXT_3]])
219186
; CHECK-NEXT: ret void
220187
;
221188
entry:
@@ -268,25 +235,15 @@ define void @zext_v4i16_all_lanes_used(<4 x i16> %src) {
268235
; CHECK-LABEL: define void @zext_v4i16_all_lanes_used(
269236
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) {
270237
; CHECK-NEXT: [[ENTRY:.*:]]
271-
; CHECK-NEXT: [[TMP0:%.*]] = freeze <4 x i16> [[SRC]]
272-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to i64
273-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 48
274-
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 65535
275-
; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP1]], 32
276-
; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 65535
277-
; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP1]], 16
278-
; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 65535
279-
; CHECK-NEXT: [[TMP8:%.*]] = lshr i64 [[TMP1]], 0
280-
; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 65535
281238
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <4 x i16> [[SRC]] to <4 x i64>
282239
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <4 x i64> [[EXT9]], i64 0
283240
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <4 x i64> [[EXT9]], i64 1
284241
; CHECK-NEXT: [[EXT_2:%.*]] = extractelement <4 x i64> [[EXT9]], i64 2
285242
; CHECK-NEXT: [[EXT_3:%.*]] = extractelement <4 x i64> [[EXT9]], i64 3
286-
; CHECK-NEXT: call void @use.i64(i64 [[TMP9]])
287-
; CHECK-NEXT: call void @use.i64(i64 [[TMP7]])
288-
; CHECK-NEXT: call void @use.i64(i64 [[TMP5]])
289-
; CHECK-NEXT: call void @use.i64(i64 [[TMP3]])
243+
; CHECK-NEXT: call void @use.i64(i64 [[EXT_0]])
244+
; CHECK-NEXT: call void @use.i64(i64 [[EXT_1]])
245+
; CHECK-NEXT: call void @use.i64(i64 [[EXT_2]])
246+
; CHECK-NEXT: call void @use.i64(i64 [[EXT_3]])
290247
; CHECK-NEXT: ret void
291248
;
292249
entry:
@@ -307,17 +264,11 @@ define void @zext_v2i32_all_lanes_used(<2 x i32> %src) {
307264
; CHECK-LABEL: define void @zext_v2i32_all_lanes_used(
308265
; CHECK-SAME: <2 x i32> [[SRC:%.*]]) {
309266
; CHECK-NEXT: [[ENTRY:.*:]]
310-
; CHECK-NEXT: [[TMP0:%.*]] = freeze <2 x i32> [[SRC]]
311-
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to i64
312-
; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 32
313-
; CHECK-NEXT: [[TMP3:%.*]] = and i64 [[TMP2]], 4294967295
314-
; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP1]], 0
315-
; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], 4294967295
316267
; CHECK-NEXT: [[EXT9:%.*]] = zext nneg <2 x i32> [[SRC]] to <2 x i64>
317268
; CHECK-NEXT: [[EXT_0:%.*]] = extractelement <2 x i64> [[EXT9]], i64 0
318269
; CHECK-NEXT: [[EXT_1:%.*]] = extractelement <2 x i64> [[EXT9]], i64 1
319-
; CHECK-NEXT: call void @use.i64(i64 [[TMP5]])
320-
; CHECK-NEXT: call void @use.i64(i64 [[TMP3]])
270+
; CHECK-NEXT: call void @use.i64(i64 [[EXT_0]])
271+
; CHECK-NEXT: call void @use.i64(i64 [[EXT_1]])
321272
; CHECK-NEXT: ret void
322273
;
323274
entry:

0 commit comments

Comments
 (0)