Skip to content

Commit 423d9db

Browse files
[GISel][CombinerHelper] Add two patterns that extract the first two chunks of a vector
1 parent 3e55c53 commit 423d9db

File tree

6 files changed

+220
-138
lines changed

6 files changed

+220
-138
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,8 +366,12 @@ Register CombinerHelper::createUnmergeValue(
366366

367367
bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
368368
const Register DstReg = MI.getOperand(0).getReg();
369+
const Register SrcReg1 = MI.getOperand(1).getReg();
370+
const Register SrcReg2 = MI.getOperand(2).getReg();
371+
369372
const LLT DstTy = MRI.getType(DstReg);
370373
const LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
374+
371375
const unsigned DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
372376
const unsigned SrcNumElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
373377

@@ -416,6 +420,38 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
416420
return true;
417421
}
418422

423+
// {1, 2, ..., |DstVector|} -> G_UNMERGE_VALUES
424+
// Extracts the first chunk of the same size of the destination vector from
425+
// the source
426+
GeneratorType FirstQuarter = adderGenerator(0, DstNumElts - 1, 1);
427+
if (matchCombineShuffleVector(MI, FirstQuarter, DstNumElts - 1)) {
428+
// This optimization does not work if the target type is not a power of two,
429+
// this can happen in some backends that support uneven vector types. We
430+
// also need to make sure that the vector can be split into two.
431+
if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 ||
432+
SrcNumElts % DstNumElts != 0)
433+
return false;
434+
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
435+
const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
436+
createUnmergeValue(MI, TargetReg, DstReg, 0, 0, SrcNumElts);
437+
MI.eraseFromParent();
438+
return true;
439+
}
440+
441+
// {|DstVector|, |DstVector|+1, ..., 2 * |DstVector|} -> G_UNMERGE_VALUES
442+
// Extracts the second chunk of the same size of the destination vector from
443+
// the source
444+
GeneratorType SecondQuarter =
445+
adderGenerator(DstNumElts, (DstNumElts * 2) - 1, 1);
446+
if (matchCombineShuffleVector(MI, SecondQuarter, DstNumElts - 1)) {
447+
if (((SrcNumElts / 2) % 2) != 0 || SrcNumElts % DstNumElts != 0)
448+
return false;
449+
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
450+
const Register TargetReg = Mask[0] < (int)SrcNumElts ? SrcReg1 : SrcReg2;
451+
createUnmergeValue(MI, TargetReg, DstReg, 1, 0, SrcNumElts);
452+
MI.eraseFromParent();
453+
return true;
454+
}
419455
return false;
420456
}
421457

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-undef.mir

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -216,16 +216,16 @@ body: |
216216
; CHECK-LABEL: name: shufflevector_not_all_ops_undef
217217
; CHECK: liveins: $d0
218218
; CHECK-NEXT: {{ $}}
219-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
220-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
221-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<2 x s32>), [[COPY]], shufflemask(0, 1)
222-
; CHECK-NEXT: $d0 = COPY [[SHUF]](<2 x s32>)
223-
; CHECK-NEXT: RET_ReallyLR implicit $d0
224-
%1:_(<2 x s32>) = G_IMPLICIT_DEF
225-
%2:_(<2 x s32>) = COPY $d0
226-
%0:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2(<2 x s32>), shufflemask(0, 1)
227-
$d0 = COPY %0(<2 x s32>)
228-
RET_ReallyLR implicit $d0
219+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
220+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
221+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<4 x s32>), [[COPY]], shufflemask(0, 1, 2, 1)
222+
; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
223+
; CHECK-NEXT: RET_ReallyLR implicit $q0
224+
%1:_(<4 x s32>) = G_IMPLICIT_DEF
225+
%2:_(<4 x s32>) = COPY $q0
226+
%0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<4 x s32>), %2(<4 x s32>), shufflemask(0, 1, 2, 1)
227+
$q0 = COPY %0(<4 x s32>)
228+
RET_ReallyLR implicit $q0
229229
230230
...
231231
---

llvm/test/CodeGen/AArch64/ext-narrow-index.ll

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ define <8 x i8> @i8_off8(<16 x i8> %arg1, <16 x i8> %arg2) {
4242
;
4343
; CHECK-GISEL-LABEL: i8_off8:
4444
; CHECK-GISEL: // %bb.0: // %entry
45-
; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
46-
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
45+
; CHECK-GISEL-NEXT: mov d0, v0.d[1]
4746
; CHECK-GISEL-NEXT: ret
4847
entry:
4948
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> %arg2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -254,9 +253,7 @@ define <8 x i8> @i8_zero_off8(<16 x i8> %arg1) {
254253
;
255254
; CHECK-GISEL-LABEL: i8_zero_off8:
256255
; CHECK-GISEL: // %bb.0: // %entry
257-
; CHECK-GISEL-NEXT: movi v1.2d, #0000000000000000
258-
; CHECK-GISEL-NEXT: ext v0.16b, v0.16b, v1.16b, #8
259-
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
256+
; CHECK-GISEL-NEXT: mov d0, v0.d[1]
260257
; CHECK-GISEL-NEXT: ret
261258
entry:
262259
%shuffle = shufflevector <16 x i8> %arg1, <16 x i8> zeroinitializer, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>

llvm/test/CodeGen/AArch64/vecreduce-add.ll

Lines changed: 10 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -6305,69 +6305,16 @@ entry:
63056305
}
63066306

63076307
define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i16> %ay, <8 x i16> %bx, <8 x i16> %by) {
6308-
; CHECK-SD-BASE-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
6309-
; CHECK-SD-BASE: // %bb.0: // %entry
6310-
; CHECK-SD-BASE-NEXT: uaddlp v1.4s, v1.8h
6311-
; CHECK-SD-BASE-NEXT: uaddlp v3.4s, v3.8h
6312-
; CHECK-SD-BASE-NEXT: uadalp v1.4s, v0.8h
6313-
; CHECK-SD-BASE-NEXT: uadalp v3.4s, v2.8h
6314-
; CHECK-SD-BASE-NEXT: add v0.4s, v3.4s, v1.4s
6315-
; CHECK-SD-BASE-NEXT: addv s0, v0.4s
6316-
; CHECK-SD-BASE-NEXT: fmov w0, s0
6317-
; CHECK-SD-BASE-NEXT: ret
6318-
;
6319-
; CHECK-SD-DOT-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
6320-
; CHECK-SD-DOT: // %bb.0: // %entry
6321-
; CHECK-SD-DOT-NEXT: uaddlp v1.4s, v1.8h
6322-
; CHECK-SD-DOT-NEXT: uaddlp v3.4s, v3.8h
6323-
; CHECK-SD-DOT-NEXT: uadalp v1.4s, v0.8h
6324-
; CHECK-SD-DOT-NEXT: uadalp v3.4s, v2.8h
6325-
; CHECK-SD-DOT-NEXT: add v0.4s, v3.4s, v1.4s
6326-
; CHECK-SD-DOT-NEXT: addv s0, v0.4s
6327-
; CHECK-SD-DOT-NEXT: fmov w0, s0
6328-
; CHECK-SD-DOT-NEXT: ret
6329-
;
6330-
; CHECK-GI-BASE-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
6331-
; CHECK-GI-BASE: // %bb.0: // %entry
6332-
; CHECK-GI-BASE-NEXT: ushll v4.4s, v0.4h, #0
6333-
; CHECK-GI-BASE-NEXT: ushll2 v0.4s, v0.8h, #0
6334-
; CHECK-GI-BASE-NEXT: ushll v5.4s, v1.4h, #0
6335-
; CHECK-GI-BASE-NEXT: ushll2 v1.4s, v1.8h, #0
6336-
; CHECK-GI-BASE-NEXT: ushll v6.4s, v2.4h, #0
6337-
; CHECK-GI-BASE-NEXT: ushll2 v2.4s, v2.8h, #0
6338-
; CHECK-GI-BASE-NEXT: ushll v7.4s, v3.4h, #0
6339-
; CHECK-GI-BASE-NEXT: ushll2 v3.4s, v3.8h, #0
6340-
; CHECK-GI-BASE-NEXT: add v0.4s, v4.4s, v0.4s
6341-
; CHECK-GI-BASE-NEXT: add v1.4s, v5.4s, v1.4s
6342-
; CHECK-GI-BASE-NEXT: add v2.4s, v6.4s, v2.4s
6343-
; CHECK-GI-BASE-NEXT: add v3.4s, v7.4s, v3.4s
6344-
; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
6345-
; CHECK-GI-BASE-NEXT: add v1.4s, v2.4s, v3.4s
6346-
; CHECK-GI-BASE-NEXT: add v0.4s, v0.4s, v1.4s
6347-
; CHECK-GI-BASE-NEXT: addv s0, v0.4s
6348-
; CHECK-GI-BASE-NEXT: fmov w0, s0
6349-
; CHECK-GI-BASE-NEXT: ret
6350-
;
6351-
; CHECK-GI-DOT-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
6352-
; CHECK-GI-DOT: // %bb.0: // %entry
6353-
; CHECK-GI-DOT-NEXT: ushll v4.4s, v0.4h, #0
6354-
; CHECK-GI-DOT-NEXT: ushll2 v0.4s, v0.8h, #0
6355-
; CHECK-GI-DOT-NEXT: ushll v5.4s, v1.4h, #0
6356-
; CHECK-GI-DOT-NEXT: ushll2 v1.4s, v1.8h, #0
6357-
; CHECK-GI-DOT-NEXT: ushll v6.4s, v2.4h, #0
6358-
; CHECK-GI-DOT-NEXT: ushll2 v2.4s, v2.8h, #0
6359-
; CHECK-GI-DOT-NEXT: ushll v7.4s, v3.4h, #0
6360-
; CHECK-GI-DOT-NEXT: ushll2 v3.4s, v3.8h, #0
6361-
; CHECK-GI-DOT-NEXT: add v0.4s, v4.4s, v0.4s
6362-
; CHECK-GI-DOT-NEXT: add v1.4s, v5.4s, v1.4s
6363-
; CHECK-GI-DOT-NEXT: add v2.4s, v6.4s, v2.4s
6364-
; CHECK-GI-DOT-NEXT: add v3.4s, v7.4s, v3.4s
6365-
; CHECK-GI-DOT-NEXT: add v0.4s, v0.4s, v1.4s
6366-
; CHECK-GI-DOT-NEXT: add v1.4s, v2.4s, v3.4s
6367-
; CHECK-GI-DOT-NEXT: add v0.4s, v0.4s, v1.4s
6368-
; CHECK-GI-DOT-NEXT: addv s0, v0.4s
6369-
; CHECK-GI-DOT-NEXT: fmov w0, s0
6370-
; CHECK-GI-DOT-NEXT: ret
6308+
; CHECK-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle:
6309+
; CHECK: // %bb.0: // %entry
6310+
; CHECK-NEXT: uaddlp v1.4s, v1.8h
6311+
; CHECK-NEXT: uaddlp v3.4s, v3.8h
6312+
; CHECK-NEXT: uadalp v1.4s, v0.8h
6313+
; CHECK-NEXT: uadalp v3.4s, v2.8h
6314+
; CHECK-NEXT: add v0.4s, v3.4s, v1.4s
6315+
; CHECK-NEXT: addv s0, v0.4s
6316+
; CHECK-NEXT: fmov w0, s0
6317+
; CHECK-NEXT: ret
63716318
entry:
63726319
%axx = zext <8 x i16> %ax to <8 x i32>
63736320
%s1h = shufflevector <8 x i32> %axx, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>

llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,25 @@ body: |
134134
PseudoRET implicit $lr, implicit $x0
135135
...
136136

137+
---
138+
name: extract_vector_1024_to_512
139+
legalized: false
140+
body: |
141+
bb.1.entry:
142+
liveins: $y2
143+
; CHECK-LABEL: name: extract_vector_1024_to_512
144+
; CHECK: liveins: $y2
145+
; CHECK-NEXT: {{ $}}
146+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
147+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
148+
; CHECK-NEXT: $x0 = COPY [[UV]](<16 x s32>)
149+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
150+
%1:_(<32 x s32>) = COPY $y2
151+
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
152+
$x0 = COPY %0:_(<16 x s32>)
153+
PseudoRET implicit $lr, implicit $x0
154+
...
155+
137156
---
138157
name: concat_vector_32_512_first_start
139158
legalized: false
@@ -155,6 +174,26 @@ body: |
155174
PseudoRET implicit $lr, implicit $x0
156175
...
157176

177+
---
178+
name: extract_vector_1024_to_256
179+
legalized: false
180+
body: |
181+
bb.1.entry:
182+
liveins: $y2
183+
; CHECK-LABEL: name: extract_vector_1024_to_256
184+
; CHECK: liveins: $y2
185+
; CHECK-NEXT: {{ $}}
186+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
187+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
188+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
189+
; CHECK-NEXT: $wl0 = COPY [[UV2]](<8 x s32>)
190+
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
191+
%1:_(<32 x s32>) = COPY $y2
192+
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3, 4, 5, 6, 7)
193+
$wl0 = COPY %0:_(<8 x s32>)
194+
PseudoRET implicit $lr, implicit $x0
195+
...
196+
158197
---
159198
name: concat_vector_32_512_first_end
160199
legalized: false
@@ -258,3 +297,117 @@ body: |
258297
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(0, -1, 2, -1, 4, -1, -1, 7, 8, 9, -1, 11, 12, -1, 14, -1)
259298
$x0 = COPY %0:_(<16 x s32>)
260299
PseudoRET implicit $lr, implicit $x0
300+
...
301+
302+
---
303+
name: extract_vector_1024_to_128
304+
legalized: false
305+
body: |
306+
bb.1.entry:
307+
liveins: $y2
308+
; CHECK-LABEL: name: extract_vector_1024_to_128
309+
; CHECK: liveins: $y2
310+
; CHECK-NEXT: {{ $}}
311+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y2
312+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
313+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[UV]](<16 x s32>)
314+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[UV2]](<8 x s32>)
315+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<4 x s32>)
316+
%1:_(<32 x s32>) = COPY $y2
317+
%0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %1:_, shufflemask(0, 1, 2, 3)
318+
PseudoRET implicit $lr, implicit %0
319+
...
320+
321+
---
322+
name: extract_vector_1024_to_32
323+
legalized: false
324+
body: |
325+
bb.1.entry:
326+
liveins: $y2
327+
; CHECK-LABEL: name: extract_vector_1024_to_32
328+
; CHECK: liveins: $y2
329+
; CHECK-NEXT: {{ $}}
330+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
331+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
332+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<32 x s8>), [[UV3:%[0-9]+]]:_(<32 x s8>) = G_UNMERGE_VALUES [[UV]](<64 x s8>)
333+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[UV2]](<32 x s8>)
334+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<8 x s8>), [[UV5:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[AIE_UNPAD_VECTOR]](<16 x s8>)
335+
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[UV4]](<8 x s8>)
336+
; CHECK-NEXT: [[UV8:%[0-9]+]]:_(<2 x s8>), [[UV9:%[0-9]+]]:_(<2 x s8>) = G_UNMERGE_VALUES [[UV6]](<4 x s8>)
337+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV8]](<2 x s8>)
338+
%1:_(<128 x s8>) = COPY $y2
339+
%0:_(<2 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_, shufflemask(0, 1)
340+
PseudoRET implicit $lr, implicit %0
341+
...
342+
343+
---
344+
name: extract_vector_second_half_512_to_256
345+
legalized: false
346+
body: |
347+
bb.1.entry:
348+
liveins: $x0, $x1
349+
; CHECK-LABEL: name: extract_vector_second_half_512_to_256
350+
; CHECK: liveins: $x0, $x1
351+
; CHECK-NEXT: {{ $}}
352+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
353+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
354+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<8 x s32>)
355+
%1:_(<16 x s32>) = COPY $x0
356+
%2:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %1:_(<16 x s32>), shufflemask(8, 9, 10, 11, 12, 13, 14, 15)
357+
PseudoRET implicit $lr, implicit %2
358+
...
359+
360+
---
361+
name: extract_vector_second_half_512_to_128
362+
legalized: false
363+
body: |
364+
bb.1.entry:
365+
liveins: $x0, $x1
366+
; CHECK-LABEL: name: extract_vector_second_half_512_to_128
367+
; CHECK: liveins: $x0, $x1
368+
; CHECK-NEXT: {{ $}}
369+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
370+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
371+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s32>), [[UV3:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[UV]](<8 x s32>)
372+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV3]](<4 x s32>)
373+
%1:_(<16 x s32>) = COPY $x0
374+
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %1:_(<16 x s32>), shufflemask(4, 5, 6, 7)
375+
PseudoRET implicit $lr, implicit %2
376+
...
377+
378+
---
379+
name: extract_vector_second_half_1024_to_512
380+
legalized: false
381+
body: |
382+
bb.1.entry:
383+
liveins: $y2, $y3
384+
; CHECK-LABEL: name: extract_vector_second_half_1024_to_512
385+
; CHECK: liveins: $y2, $y3
386+
; CHECK-NEXT: {{ $}}
387+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
388+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
389+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<64 x s8>)
390+
%1:_(<128 x s8>) = COPY $y2
391+
%2:_(<64 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127)
392+
PseudoRET implicit $lr, implicit %2
393+
...
394+
395+
---
396+
name: extract_vector_second_half_1024_to_32
397+
legalized: false
398+
body: |
399+
bb.1.entry:
400+
liveins: $y2, $y3
401+
; CHECK-LABEL: name: extract_vector_second_half_1024_to_32
402+
; CHECK: liveins: $y2, $y3
403+
; CHECK-NEXT: {{ $}}
404+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
405+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
406+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<32 x s8>), [[UV3:%[0-9]+]]:_(<32 x s8>) = G_UNMERGE_VALUES [[UV]](<64 x s8>)
407+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[UV2]](<32 x s8>)
408+
; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<8 x s8>), [[UV5:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[AIE_UNPAD_VECTOR]](<16 x s8>)
409+
; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[UV4]](<8 x s8>)
410+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV7]](<4 x s8>)
411+
%1:_(<128 x s8>) = COPY $y2
412+
%2:_(<4 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(4, 5, 6, 7)
413+
PseudoRET implicit $lr, implicit %2

0 commit comments

Comments
 (0)