Skip to content

Commit c2e6b01

Browse files
[GISel][CombinerHelper] Add a combiner to concatenate the first halfs of two vectors together
1 parent 150f4ec commit c2e6b01

File tree

5 files changed

+253
-93
lines changed

5 files changed

+253
-93
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ Register CombinerHelper::createUnmergeValue(
350350
Register TargetReg = DstReg;
351351
if (DstTy != HalfSizeTy) {
352352
TargetReg = MRI.createGenericVirtualRegister(HalfSizeTy);
353-
}
353+
}
354354

355355
// Each destination fits n times into the source and each iteration we
356356
// exactly half the source. Therefore we need to pick on which side we want
@@ -402,7 +402,6 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
402402
Register UndefReg;
403403
const Register Src1 = MI.getOperand(1).getReg();
404404
const Register Src2 = MI.getOperand(2).getReg();
405-
406405
const ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
407406

408407
// The destination can be longer than the source, so we separate them into
@@ -468,6 +467,33 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
468467
MI.eraseFromParent();
469468
return true;
470469
}
470+
471+
// {1, 2, ..., n/4, n/2, n/2+1, .... 3n/4} -> G_UNMERGE_VALUES
472+
// Take the first halfs of the two vectors and concatenate them into one
473+
// vector.
474+
GeneratorType FirstEightA = adderGenerator(0, (DstNumElts / 2) - 1, 1);
475+
GeneratorType FirstEightB =
476+
adderGenerator(DstNumElts, DstNumElts + (DstNumElts / 2) - 1, 1);
477+
478+
GeneratorType FirstAndThird =
479+
concatGenerators(SmallVector<GeneratorType>{FirstEightA, FirstEightB});
480+
if (matchCombineShuffleVector(MI, FirstAndThird, (DstNumElts / 2) - 1)) {
481+
if (DstNumElts <= 2)
482+
return false;
483+
const Register DstReg = MI.getOperand(0).getReg();
484+
const LLT HalfSrcTy =
485+
LLT::fixed_vector(SrcNumElts / 2, SrcTy.getScalarType());
486+
const Register HalfOfA = createUnmergeValue(
487+
MI, MI.getOperand(1).getReg(),
488+
MRI.createGenericVirtualRegister(HalfSrcTy), 0, 0, SrcNumElts);
489+
const Register HalfOfB = createUnmergeValue(
490+
MI, MI.getOperand(2).getReg(),
491+
MRI.createGenericVirtualRegister(HalfSrcTy), 0, 0, SrcNumElts);
492+
Builder.buildMergeLikeInstr(DstReg, {HalfOfA, HalfOfB});
493+
MI.eraseFromParent();
494+
return true;
495+
}
496+
471497
return false;
472498
}
473499

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,14 @@ body: |
270270
; CHECK-NEXT: {{ $}}
271271
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
272272
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
273-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 0, 1)
274-
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
273+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[COPY]](<4 x s32>)
274+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[BITCAST]](s128)
275+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[TRUNC]](s64)
276+
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s128) = G_BITCAST [[COPY1]](<4 x s32>)
277+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[BITCAST2]](s128)
278+
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[TRUNC1]](s64)
279+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s32>), [[BITCAST3]](<2 x s32>)
280+
; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s32>)
275281
%0:_(<4 x s32>) = COPY $q0
276282
%1:_(<4 x s32>) = COPY $q1
277283
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,0,1)

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-undef.mir

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -216,11 +216,11 @@ body: |
216216
; CHECK-LABEL: name: shufflevector_not_all_ops_undef
217217
; CHECK: liveins: $d0
218218
; CHECK-NEXT: {{ $}}
219-
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
220-
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
221-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<4 x s32>), [[COPY]], shufflemask(0, 1, 2, 1)
222-
; CHECK-NEXT: $q0 = COPY [[SHUF]](<4 x s32>)
223-
; CHECK-NEXT: RET_ReallyLR implicit $q0
219+
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
220+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
221+
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s32>) = G_SHUFFLE_VECTOR [[DEF]](<2 x s32>), [[COPY]], shufflemask(0, 1)
222+
; CHECK-NEXT: $d0 = COPY [[SHUF]](<2 x s32>)
223+
; CHECK-NEXT: RET_ReallyLR implicit $d0
224224
%1:_(<2 x s32>) = G_IMPLICIT_DEF
225225
%2:_(<2 x s32>) = COPY $d0
226226
%0:_(<2 x s32>) = G_SHUFFLE_VECTOR %1(<2 x s32>), %2(<2 x s32>), shufflemask(0, 1)
@@ -304,7 +304,6 @@ alignment: 4
304304
tracksRegLiveness: true
305305
body: |
306306
bb.0:
307-
; Optimize these to zero?
308307
; CHECK-LABEL: name: ashr_undef_lhs
309308
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
310309
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF

llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,3 +411,201 @@ body: |
411411
%1:_(<128 x s8>) = COPY $y2
412412
%2:_(<4 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(4, 5, 6, 7)
413413
PseudoRET implicit $lr, implicit %2
414+
...
415+
---
416+
name: extract_vector_third_half_1024
417+
legalized: false
418+
body: |
419+
bb.1.entry:
420+
liveins: $y2, $y3
421+
; CHECK-LABEL: name: extract_vector_third_half_1024
422+
; CHECK: liveins: $y2, $y3
423+
; CHECK-NEXT: {{ $}}
424+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y3
425+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
426+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV]](<16 x s32>)
427+
%1:_(<32 x s32>) = COPY $y2
428+
%2:_(<32 x s32>) = COPY $y3
429+
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %2:_, shufflemask(32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47)
430+
PseudoRET implicit $lr, implicit %0
431+
...
432+
---
433+
name: extract_vector_third_half_512
434+
legalized: false
435+
body: |
436+
bb.1.entry:
437+
liveins: $x0, $x1
438+
; CHECK-LABEL: name: extract_vector_third_half_512
439+
; CHECK: liveins: $x0, $x1
440+
; CHECK-NEXT: {{ $}}
441+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x1
442+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
443+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV]](<8 x s32>)
444+
%1:_(<16 x s32>) = COPY $x0
445+
%2:_(<16 x s32>) = COPY $x1
446+
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %2:_, shufflemask(16, 17, 18, 19, 20, 21, 22, 23)
447+
PseudoRET implicit $lr, implicit %0
448+
...
449+
---
450+
name: extract_vector_third_half_256
451+
legalized: false
452+
body: |
453+
bb.1.entry:
454+
liveins: $wl0, $wl1
455+
; CHECK-LABEL: name: extract_vector_third_half_256
456+
; CHECK: liveins: $wl0, $wl1
457+
; CHECK-NEXT: {{ $}}
458+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl1
459+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY]](<8 x s32>)
460+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<4 x s32>)
461+
%1:_(<8 x s32>) = COPY $wl0
462+
%2:_(<8 x s32>) = COPY $wl1
463+
%0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(8, 9, 10, 11)
464+
PseudoRET implicit $lr, implicit %0
465+
...
466+
---
467+
name: extract_vector_third_half_128
468+
legalized: false
469+
body: |
470+
bb.1.entry:
471+
liveins: $q0, $q1
472+
; CHECK-LABEL: name: extract_vector_third_half_128
473+
; CHECK: liveins: $q0, $q1
474+
; CHECK-NEXT: {{ $}}
475+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q1
476+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
477+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV]](<2 x s32>)
478+
%1:_(<4 x s32>) = COPY $q0
479+
%2:_(<4 x s32>) = COPY $q1
480+
%0:_(<2 x s32>) = G_SHUFFLE_VECTOR %1:_(<4 x s32>), %2:_, shufflemask(4, 5)
481+
PseudoRET implicit $lr, implicit %0
482+
...
483+
---
484+
name: extract_vector_fourth_half_1024
485+
legalized: false
486+
body: |
487+
bb.1.entry:
488+
liveins: $y2, $y3
489+
; CHECK-LABEL: name: extract_vector_fourth_half_1024
490+
; CHECK: liveins: $y2, $y3
491+
; CHECK-NEXT: {{ $}}
492+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y3
493+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
494+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<16 x s32>)
495+
%1:_(<32 x s32>) = COPY $y2
496+
%2:_(<32 x s32>) = COPY $y3
497+
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<32 x s32>), %2:_, shufflemask(48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63)
498+
PseudoRET implicit $lr, implicit %0
499+
...
500+
---
501+
name: extract_vector_fourth_half_512
502+
legalized: false
503+
body: |
504+
bb.1.entry:
505+
liveins: $x0, $x1
506+
; CHECK-LABEL: name: extract_vector_fourth_half_512
507+
; CHECK: liveins: $x0, $x1
508+
; CHECK-NEXT: {{ $}}
509+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x1
510+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
511+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<8 x s32>)
512+
%1:_(<16 x s32>) = COPY $x0
513+
%2:_(<16 x s32>) = COPY $x1
514+
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %2:_, shufflemask(24,25,26,27,28,29,30,31)
515+
PseudoRET implicit $lr, implicit %0
516+
...
517+
---
518+
name: extract_vector_fourth_half_256
519+
legalized: false
520+
body: |
521+
bb.1.entry:
522+
liveins: $wl0, $wl1
523+
; CHECK-LABEL: name: extract_vector_fourth_half_256
524+
; CHECK: liveins: $wl0, $wl1
525+
; CHECK-NEXT: {{ $}}
526+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl1
527+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
528+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<4 x s32>)
529+
%1:_(<8 x s32>) = COPY $wl0
530+
%2:_(<8 x s32>) = COPY $wl1
531+
%0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_, shufflemask(12,13,14,15)
532+
PseudoRET implicit $lr, implicit %0
533+
...
534+
---
535+
name: extract_vector_fourth_half_128
536+
legalized: false
537+
body: |
538+
bb.1.entry:
539+
liveins: $q0, $q1
540+
; CHECK-LABEL: name: extract_vector_fourth_half_128
541+
; CHECK: liveins: $q0, $q1
542+
; CHECK-NEXT: {{ $}}
543+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q1
544+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
545+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<2 x s32>)
546+
%1:_(<4 x s32>) = COPY $q0
547+
%2:_(<4 x s32>) = COPY $q1
548+
%0:_(<2 x s32>) = G_SHUFFLE_VECTOR %1:_(<4 x s32>), %2:_, shufflemask(6,7)
549+
PseudoRET implicit $lr, implicit %0
550+
...
551+
---
552+
name: insert_vector_16_elements
553+
legalized: false
554+
body: |
555+
bb.1.entry:
556+
liveins: $x0, $x1
557+
; CHECK-LABEL: name: insert_vector_16_elements
558+
; CHECK: liveins: $x0, $x1
559+
; CHECK-NEXT: {{ $}}
560+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
561+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1
562+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
563+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY1]](<16 x s32>)
564+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[UV]](<8 x s32>), [[UV2]](<8 x s32>)
565+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<16 x s32>)
566+
%1:_(<16 x s32>) = COPY $x0
567+
%2:_(<16 x s32>) = COPY $x1
568+
%3:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %2:_(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23)
569+
PseudoRET implicit $lr, implicit %3
570+
...
571+
572+
---
573+
name: insert_vector_8_elements
574+
legalized: false
575+
body: |
576+
bb.1.entry:
577+
liveins: $wl0, $wl1
578+
; CHECK-LABEL: name: insert_vector_8_elements
579+
; CHECK: liveins: $wl0, $wl1
580+
; CHECK-NEXT: {{ $}}
581+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl0
582+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl1
583+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY]](<8 x s32>)
584+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>)
585+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[AIE_UNPAD_VECTOR]](<4 x s32>), [[AIE_UNPAD_VECTOR1]](<4 x s32>)
586+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<8 x s32>)
587+
%1:_(<8 x s32>) = COPY $wl0
588+
%2:_(<8 x s32>) = COPY $wl1
589+
%3:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_(<8 x s32>), shufflemask(0, 1, 2, 3, 8, 9, 10, 11)
590+
PseudoRET implicit $lr, implicit %3
591+
...
592+
593+
---
594+
name: insert_vector_128_elements
595+
legalized: false
596+
body: |
597+
bb.1.entry:
598+
liveins: $y2, $y3
599+
; CHECK-LABEL: name: insert_vector_128_elements
600+
; CHECK: liveins: $y2, $y3
601+
; CHECK-NEXT: {{ $}}
602+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
603+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<128 x s8>) = COPY $y3
604+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
605+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<64 x s8>), [[UV3:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY1]](<128 x s8>)
606+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<128 x s8>) = G_CONCAT_VECTORS [[UV]](<64 x s8>), [[UV2]](<64 x s8>)
607+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<128 x s8>)
608+
%1:_(<128 x s8>) = COPY $y2
609+
%2:_(<128 x s8>) = COPY $y3
610+
%3:_(<128 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %2:_(<128 x s8>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191)
611+
PseudoRET implicit $lr, implicit %3

0 commit comments

Comments
 (0)