Skip to content

Commit e55dd98

Browse files
committed
[Legalizer] Cache extracted element when lowering G_SHUFFLE_VECTOR.
1 parent 9458faa commit e55dd98

File tree

3 files changed

+57
-54
lines changed

3 files changed

+57
-54
lines changed

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9008,6 +9008,8 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
90089008
SmallVector<Register, 32> BuildVec;
90099009
LLT EltTy = DstTy.getScalarType();
90109010

9011+
DenseMap<unsigned, Register> CachedExtract;
9012+
90119013
for (int Idx : Mask) {
90129014
if (Idx < 0) {
90139015
if (!Undef.isValid())
@@ -9022,9 +9024,13 @@ LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
90229024
int NumElts = Src0Ty.getNumElements();
90239025
Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
90249026
int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9025-
auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9026-
auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
9027-
BuildVec.push_back(Extract.getReg(0));
9027+
auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9028+
if (Inserted) {
9029+
auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9030+
It->second =
9031+
MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9032+
}
9033+
BuildVec.push_back(It->second);
90289034
}
90299035
}
90309036

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.mir

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,7 @@ body: |
136136
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
137137
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
138138
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
139-
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
140-
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
141-
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY3]](s32)
139+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY2]](s32)
142140
; CHECK-NEXT: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
143141
%0:_(<2 x s32>) = COPY $vgpr0_vgpr1
144142
%1:_(<2 x s32>) = COPY $vgpr2_vgpr3
@@ -374,3 +372,30 @@ body: |
374372
$vgpr0_vgpr1 = COPY %4
375373
376374
...
375+
376+
---
377+
name: shufflevector_cache_element
378+
tracksRegLiveness: true
379+
380+
body: |
381+
bb.0:
382+
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
383+
384+
; CHECK-LABEL: name: shufflevector_cache_element
385+
; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
386+
; CHECK-NEXT: {{ $}}
387+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
388+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
389+
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
390+
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
391+
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY2]](s32), [[COPY2]](s32), [[COPY2]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY3]](s32), [[COPY3]](s32), [[COPY3]](s32)
392+
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[BUILD_VECTOR]](<8 x s32>), [[COPY1]](s32)
393+
; CHECK-NEXT: $vgpr0 = COPY [[EVEC]](s32)
394+
%0:_(s32) = COPY $vgpr0
395+
%1:_(s32) = COPY $vgpr1
396+
%2:_(<2 x s32>) = G_BUILD_VECTOR %0, %1
397+
%3:_(<8 x s32>) = G_SHUFFLE_VECTOR %2, %2, shufflemask(0, 0, 0, 0, 3, 3, 3, 3)
398+
%4:_(s32) = G_EXTRACT_VECTOR_ELT %3(<8 x s32>), %1(s32)
399+
$vgpr0 = COPY %4
400+
401+
...

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shuffle-vector.s16.mir

Lines changed: 20 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -237,16 +237,13 @@ body: |
237237
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
238238
; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
239239
; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
240-
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
241-
; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
242240
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
243241
; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
244-
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
245242
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
246-
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
243+
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32)
247244
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
248-
; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
249-
; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
245+
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
246+
; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
250247
;
251248
; GFX9-LABEL: name: shufflevector_v2s16_v2s16_0_0
252249
; GFX9: liveins: $vgpr0, $vgpr1
@@ -257,10 +254,7 @@ body: |
257254
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
258255
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
259256
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
260-
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
261-
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
262-
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
263-
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
257+
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16)
264258
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
265259
%0:_(<2 x s16>) = COPY $vgpr0
266260
%1:_(<2 x s16>) = COPY $vgpr1
@@ -285,12 +279,10 @@ body: |
285279
; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
286280
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
287281
; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
288-
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
289-
; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
290-
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
282+
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
291283
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]]
292-
; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
293-
; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
284+
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
285+
; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
294286
;
295287
; GFX9-LABEL: name: shufflevector_v2s16_v2s16_1_1
296288
; GFX9: liveins: $vgpr0, $vgpr1
@@ -301,10 +293,7 @@ body: |
301293
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
302294
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
303295
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
304-
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
305-
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
306-
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
307-
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
296+
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16)
308297
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
309298
%0:_(<2 x s16>) = COPY $vgpr0
310299
%1:_(<2 x s16>) = COPY $vgpr1
@@ -329,16 +318,13 @@ body: |
329318
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
330319
; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
331320
; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
332-
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
333-
; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
334321
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
335322
; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
336-
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
337323
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
338-
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
324+
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32)
339325
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
340-
; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
341-
; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
326+
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
327+
; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
342328
;
343329
; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_2
344330
; GFX9: liveins: $vgpr0, $vgpr1
@@ -349,10 +335,7 @@ body: |
349335
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
350336
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
351337
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
352-
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
353-
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
354-
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
355-
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
338+
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16)
356339
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
357340
%0:_(<2 x s16>) = COPY $vgpr0
358341
%1:_(<2 x s16>) = COPY $vgpr1
@@ -641,12 +624,10 @@ body: |
641624
; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
642625
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
643626
; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
644-
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
645-
; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
646-
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR1]], [[C]](s32)
627+
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[LSHR]], [[C]](s32)
647628
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[LSHR]], [[SHL]]
648-
; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
649-
; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
629+
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
630+
; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
650631
;
651632
; GFX9-LABEL: name: shufflevector_v2s16_v2s16_3_3
652633
; GFX9: liveins: $vgpr0, $vgpr1
@@ -657,10 +638,7 @@ body: |
657638
; GFX9-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
658639
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
659640
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
660-
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>)
661-
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
662-
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
663-
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
641+
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16)
664642
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
665643
%0:_(<2 x s16>) = COPY $vgpr0
666644
%1:_(<2 x s16>) = COPY $vgpr1
@@ -733,16 +711,13 @@ body: |
733711
; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
734712
; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
735713
; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
736-
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
737-
; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
738714
; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
739715
; GFX8-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
740-
; GFX8-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C1]]
741716
; GFX8-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
742-
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C2]](s32)
717+
; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C2]](s32)
743718
; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
744-
; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
745-
; GFX8-NEXT: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
719+
; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
720+
; GFX8-NEXT: $vgpr0 = COPY [[BITCAST1]](<2 x s16>)
746721
;
747722
; GFX9-LABEL: name: shufflevector_v2s16_v2s16_2_0
748723
; GFX9: liveins: $vgpr0, $vgpr1
@@ -753,10 +728,7 @@ body: |
753728
; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
754729
; GFX9-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
755730
; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
756-
; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
757-
; GFX9-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
758-
; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
759-
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16)
731+
; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC]](s16)
760732
; GFX9-NEXT: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
761733
%0:_(<2 x s16>) = COPY $vgpr0
762734
%1:_(<2 x s16>) = COPY $vgpr1

0 commit comments

Comments
 (0)