Skip to content

Commit 382ad6f

Browse files
authored
[GISel][AArch64] Added more efficient lowering of Bitreverse (llvm#139233)
GlobalISel was previously inefficient in handling bitreverses of vector types. This deals with i16, i32, i64 vector types and converts them into i8 bitreverses and rev instructions.
1 parent 3009aa7 commit 382ad6f

File tree

4 files changed

+156
-349
lines changed

4 files changed

+156
-349
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 44 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8985,49 +8985,64 @@ static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
89858985
LegalizerHelper::LegalizeResult
89868986
LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
89878987
auto [Dst, Src] = MI.getFirst2Regs();
8988-
const LLT Ty = MRI.getType(Src);
8989-
unsigned Size = Ty.getScalarSizeInBits();
8988+
const LLT SrcTy = MRI.getType(Src);
8989+
unsigned Size = SrcTy.getScalarSizeInBits();
8990+
unsigned VSize = SrcTy.getSizeInBits();
89908991

89918992
if (Size >= 8) {
8992-
MachineInstrBuilder BSWAP =
8993-
MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
8994-
8995-
// swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
8996-
// [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
8997-
// -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
8998-
MachineInstrBuilder Swap4 =
8999-
SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
9000-
9001-
// swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9002-
// [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9003-
// -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9004-
MachineInstrBuilder Swap2 =
9005-
SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
9006-
9007-
// swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9008-
// 6|7
9009-
// [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9010-
// -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9011-
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
8993+
if (SrcTy.isVector() && (VSize % 8 == 0) &&
8994+
(LI.isLegal({TargetOpcode::G_BITREVERSE,
8995+
{LLT::fixed_vector(VSize / 8, 8),
8996+
LLT::fixed_vector(VSize / 8, 8)}}))) {
8997+
// If bitreverse is legal for i8 vector of the same size, then cast
8998+
// to i8 vector type.
8999+
// e.g. v4s32 -> v16s8
9000+
LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9001+
auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9002+
auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9003+
auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9004+
MIRBuilder.buildBitcast(Dst, RBIT);
9005+
} else {
9006+
MachineInstrBuilder BSWAP =
9007+
MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9008+
9009+
// swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9010+
// [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9011+
// -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9012+
MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9013+
APInt::getSplat(Size, APInt(8, 0xF0)));
9014+
9015+
// swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9016+
// [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9017+
// -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9018+
MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9019+
APInt::getSplat(Size, APInt(8, 0xCC)));
9020+
9021+
// swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9022+
// 6|7
9023+
// [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9024+
// -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9025+
SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9026+
}
90129027
} else {
90139028
// Expand bitreverse for types smaller than 8 bits.
90149029
MachineInstrBuilder Tmp;
90159030
for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
90169031
MachineInstrBuilder Tmp2;
90179032
if (I < J) {
9018-
auto ShAmt = MIRBuilder.buildConstant(Ty, J - I);
9019-
Tmp2 = MIRBuilder.buildShl(Ty, Src, ShAmt);
9033+
auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9034+
Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
90209035
} else {
9021-
auto ShAmt = MIRBuilder.buildConstant(Ty, I - J);
9022-
Tmp2 = MIRBuilder.buildLShr(Ty, Src, ShAmt);
9036+
auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9037+
Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
90239038
}
90249039

9025-
auto Mask = MIRBuilder.buildConstant(Ty, 1ULL << J);
9026-
Tmp2 = MIRBuilder.buildAnd(Ty, Tmp2, Mask);
9040+
auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9041+
Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
90279042
if (I == 0)
90289043
Tmp = Tmp2;
90299044
else
9030-
Tmp = MIRBuilder.buildOr(Ty, Tmp, Tmp2);
9045+
Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
90319046
}
90329047
MIRBuilder.buildCopy(Dst, Tmp);
90339048
}

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -361,12 +361,15 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
361361

362362
getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF).lower();
363363

364-
// TODO: Custom lowering for v2s32, v4s32, v2s64.
365364
getActionDefinitionsBuilder(G_BITREVERSE)
366365
.legalFor({s32, s64, v8s8, v16s8})
367366
.widenScalarToNextPow2(0, /*Min = */ 32)
367+
.widenScalarOrEltToNextPow2OrMinSize(0, 8)
368368
.clampScalar(0, s32, s64)
369369
.clampNumElements(0, v8s8, v16s8)
370+
.clampNumElements(0, v4s16, v8s16)
371+
.clampNumElements(0, v2s32, v4s32)
372+
.clampNumElements(0, v2s64, v2s64)
370373
.moreElementsToNextPow2(0)
371374
.lower();
372375

llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitreverse.mir

Lines changed: 15 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -152,33 +152,9 @@ body: |
152152
; CHECK-NEXT: {{ $}}
153153
; CHECK-NEXT: %vec:_(<4 x s16>) = COPY $d0
154154
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<4 x s16>) = G_BSWAP %vec
155-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
156-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
157-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3856
158-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
159-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
160-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<4 x s16>)
161-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s16>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<4 x s16>)
162-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
163-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[LSHR]], [[AND1]]
164-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
165-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
166-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13108
167-
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
168-
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR]], [[BUILD_VECTOR3]]
169-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<4 x s16>)
170-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<4 x s16>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<4 x s16>)
171-
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
172-
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s16>) = G_OR [[LSHR1]], [[AND3]]
173-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
174-
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16)
175-
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -21846
176-
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16)
177-
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<4 x s16>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
178-
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<4 x s16>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<4 x s16>)
179-
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<4 x s16>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<4 x s16>)
180-
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s16>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
181-
; CHECK-NEXT: %bitreverse:_(<4 x s16>) = G_OR [[LSHR2]], [[AND5]]
155+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BSWAP]](<4 x s16>)
156+
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<8 x s8>) = G_BITREVERSE [[BITCAST]]
157+
; CHECK-NEXT: %bitreverse:_(<4 x s16>) = G_BITCAST [[BITREVERSE]](<8 x s8>)
182158
; CHECK-NEXT: $d0 = COPY %bitreverse(<4 x s16>)
183159
; CHECK-NEXT: RET_ReallyLR implicit $q0
184160
%vec:_(<4 x s16>) = COPY $d0
@@ -197,33 +173,9 @@ body: |
197173
; CHECK-NEXT: {{ $}}
198174
; CHECK-NEXT: %vec:_(<2 x s32>) = COPY $d0
199175
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s32>) = G_BSWAP %vec
200-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
201-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
202-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -252645136
203-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32)
204-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
205-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<2 x s32>)
206-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s32>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<2 x s32>)
207-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
208-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[LSHR]], [[AND1]]
209-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
210-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32)
211-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -858993460
212-
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32)
213-
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s32>) = G_AND [[OR]], [[BUILD_VECTOR3]]
214-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<2 x s32>)
215-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s32>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<2 x s32>)
216-
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
217-
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s32>) = G_OR [[LSHR1]], [[AND3]]
218-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
219-
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32)
220-
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
221-
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32)
222-
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<2 x s32>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
223-
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s32>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<2 x s32>)
224-
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s32>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<2 x s32>)
225-
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<2 x s32>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
226-
; CHECK-NEXT: %bitreverse:_(<2 x s32>) = G_OR [[LSHR2]], [[AND5]]
176+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<8 x s8>) = G_BITCAST [[BSWAP]](<2 x s32>)
177+
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<8 x s8>) = G_BITREVERSE [[BITCAST]]
178+
; CHECK-NEXT: %bitreverse:_(<2 x s32>) = G_BITCAST [[BITREVERSE]](<8 x s8>)
227179
; CHECK-NEXT: $d0 = COPY %bitreverse(<2 x s32>)
228180
; CHECK-NEXT: RET_ReallyLR implicit $d0
229181
%vec:_(<2 x s32>) = COPY $d0
@@ -242,33 +194,9 @@ body: |
242194
; CHECK-NEXT: {{ $}}
243195
; CHECK-NEXT: %vec:_(<2 x s64>) = COPY $q0
244196
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<2 x s64>) = G_BSWAP %vec
245-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
246-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
247-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1085102592571150096
248-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C1]](s64), [[C1]](s64)
249-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<2 x s64>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
250-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<2 x s64>)
251-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<2 x s64>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<2 x s64>)
252-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
253-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s64>) = G_OR [[LSHR]], [[AND1]]
254-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
255-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C2]](s64), [[C2]](s64)
256-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 -3689348814741910324
257-
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C3]](s64), [[C3]](s64)
258-
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<2 x s64>) = G_AND [[OR]], [[BUILD_VECTOR3]]
259-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<2 x s64>)
260-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<2 x s64>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<2 x s64>)
261-
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
262-
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<2 x s64>) = G_OR [[LSHR1]], [[AND3]]
263-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
264-
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C4]](s64), [[C4]](s64)
265-
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 -6148914691236517206
266-
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C5]](s64), [[C5]](s64)
267-
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<2 x s64>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
268-
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<2 x s64>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<2 x s64>)
269-
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<2 x s64>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<2 x s64>)
270-
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<2 x s64>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
271-
; CHECK-NEXT: %bitreverse:_(<2 x s64>) = G_OR [[LSHR2]], [[AND5]]
197+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<2 x s64>)
198+
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
199+
; CHECK-NEXT: %bitreverse:_(<2 x s64>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
272200
; CHECK-NEXT: $q0 = COPY %bitreverse(<2 x s64>)
273201
; CHECK-NEXT: RET_ReallyLR implicit $q0
274202
%vec:_(<2 x s64>) = COPY $q0
@@ -287,33 +215,9 @@ body: |
287215
; CHECK-NEXT: {{ $}}
288216
; CHECK-NEXT: %vec:_(<4 x s32>) = COPY $q0
289217
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<4 x s32>) = G_BSWAP %vec
290-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
291-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
292-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -252645136
293-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
294-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
295-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<4 x s32>)
296-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<4 x s32>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<4 x s32>)
297-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
298-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s32>) = G_OR [[LSHR]], [[AND1]]
299-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
300-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C2]](s32), [[C2]](s32), [[C2]](s32), [[C2]](s32)
301-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -858993460
302-
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
303-
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<4 x s32>) = G_AND [[OR]], [[BUILD_VECTOR3]]
304-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<4 x s32>)
305-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<4 x s32>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<4 x s32>)
306-
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
307-
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<4 x s32>) = G_OR [[LSHR1]], [[AND3]]
308-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
309-
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C4]](s32), [[C4]](s32), [[C4]](s32), [[C4]](s32)
310-
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
311-
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C5]](s32), [[C5]](s32), [[C5]](s32), [[C5]](s32)
312-
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<4 x s32>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
313-
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<4 x s32>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<4 x s32>)
314-
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<4 x s32>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<4 x s32>)
315-
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<4 x s32>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
316-
; CHECK-NEXT: %bitreverse:_(<4 x s32>) = G_OR [[LSHR2]], [[AND5]]
218+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<4 x s32>)
219+
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
220+
; CHECK-NEXT: %bitreverse:_(<4 x s32>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
317221
; CHECK-NEXT: $q0 = COPY %bitreverse(<4 x s32>)
318222
; CHECK-NEXT: RET_ReallyLR implicit $q0
319223
%vec:_(<4 x s32>) = COPY $q0
@@ -332,33 +236,9 @@ body: |
332236
; CHECK-NEXT: {{ $}}
333237
; CHECK-NEXT: %vec:_(<8 x s16>) = COPY $q0
334238
; CHECK-NEXT: [[BSWAP:%[0-9]+]]:_(<8 x s16>) = G_BSWAP %vec
335-
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
336-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16), [[C]](s16)
337-
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 -3856
338-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16), [[C1]](s16)
339-
; CHECK-NEXT: [[AND:%[0-9]+]]:_(<8 x s16>) = G_AND [[BSWAP]], [[BUILD_VECTOR1]]
340-
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND]], [[BUILD_VECTOR]](<8 x s16>)
341-
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(<8 x s16>) = G_SHL [[BSWAP]], [[BUILD_VECTOR]](<8 x s16>)
342-
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL]], [[BUILD_VECTOR1]]
343-
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s16>) = G_OR [[LSHR]], [[AND1]]
344-
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 2
345-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16), [[C2]](s16)
346-
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 -13108
347-
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16), [[C3]](s16)
348-
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(<8 x s16>) = G_AND [[OR]], [[BUILD_VECTOR3]]
349-
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND2]], [[BUILD_VECTOR2]](<8 x s16>)
350-
; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(<8 x s16>) = G_SHL [[OR]], [[BUILD_VECTOR2]](<8 x s16>)
351-
; CHECK-NEXT: [[AND3:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL1]], [[BUILD_VECTOR3]]
352-
; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<8 x s16>) = G_OR [[LSHR1]], [[AND3]]
353-
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
354-
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16), [[C4]](s16)
355-
; CHECK-NEXT: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 -21846
356-
; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16), [[C5]](s16)
357-
; CHECK-NEXT: [[AND4:%[0-9]+]]:_(<8 x s16>) = G_AND [[OR1]], [[BUILD_VECTOR5]]
358-
; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[AND4]], [[BUILD_VECTOR4]](<8 x s16>)
359-
; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(<8 x s16>) = G_SHL [[OR1]], [[BUILD_VECTOR4]](<8 x s16>)
360-
; CHECK-NEXT: [[AND5:%[0-9]+]]:_(<8 x s16>) = G_AND [[SHL2]], [[BUILD_VECTOR5]]
361-
; CHECK-NEXT: %bitreverse:_(<8 x s16>) = G_OR [[LSHR2]], [[AND5]]
239+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<16 x s8>) = G_BITCAST [[BSWAP]](<8 x s16>)
240+
; CHECK-NEXT: [[BITREVERSE:%[0-9]+]]:_(<16 x s8>) = G_BITREVERSE [[BITCAST]]
241+
; CHECK-NEXT: %bitreverse:_(<8 x s16>) = G_BITCAST [[BITREVERSE]](<16 x s8>)
362242
; CHECK-NEXT: $q0 = COPY %bitreverse(<8 x s16>)
363243
; CHECK-NEXT: RET_ReallyLR implicit $q0
364244
%vec:_(<8 x s16>) = COPY $q0

0 commit comments

Comments
 (0)