Skip to content

Commit 9502ada

Browse files
[GISel][CombinerHelper] Add a combiner to concatenate the first halfs of two vectors together
1 parent 86845b8 commit 9502ada

File tree

4 files changed

+114
-87
lines changed

4 files changed

+114
-87
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -389,7 +389,6 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
389389
Register UndefReg;
390390
const Register Src1 = MI.getOperand(1).getReg();
391391
const Register Src2 = MI.getOperand(2).getReg();
392-
393392
const ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
394393

395394
// The destination can be longer than the source, so we separate them into
@@ -428,7 +427,8 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
428427
// the source
429428
GeneratorType FirstQuarter = adderGenerator(0, DstNumElts - 1, 1);
430429
if (matchCombineShuffleVector(MI, FirstQuarter, DstNumElts - 1)) {
431-
if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 || SrcNumElts % DstNumElts != 0 )
430+
if (SrcTy == DstTy || ((SrcNumElts / 2) % 2) != 0 ||
431+
SrcNumElts % DstNumElts != 0)
432432
return false;
433433
createUnmergeValue(MI, MI.getOperand(1).getReg(), DstReg, 0);
434434
MI.eraseFromParent();
@@ -447,6 +447,33 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
447447
MI.eraseFromParent();
448448
return true;
449449
}
450+
451+
// {1, 2, ..., n/4, n/2, n/2+1, .... 3n/4} -> G_UNMERGE_VALUES
452+
// Take the first halfs of the two vectors and concatenate them into one
453+
// vector.
454+
GeneratorType FirstEightA = adderGenerator(0, (DstNumElts / 2) - 1, 1);
455+
GeneratorType FirstEightB =
456+
adderGenerator(DstNumElts, DstNumElts + (DstNumElts / 2) - 1, 1);
457+
458+
GeneratorType FirstAndThird =
459+
concatGenerators(SmallVector<GeneratorType>{FirstEightA, FirstEightB});
460+
if (matchCombineShuffleVector(MI, FirstAndThird, (DstNumElts / 2) - 1)) {
461+
if (DstNumElts <= 2)
462+
return false;
463+
const Register DstReg = MI.getOperand(0).getReg();
464+
const LLT HalfSrcTy =
465+
LLT::fixed_vector(SrcNumElts / 2, SrcTy.getScalarType());
466+
const Register HalfOfA =
467+
createUnmergeValue(MI, MI.getOperand(1).getReg(),
468+
MRI.createGenericVirtualRegister(HalfSrcTy), 0);
469+
const Register HalfOfB =
470+
createUnmergeValue(MI, MI.getOperand(2).getReg(),
471+
MRI.createGenericVirtualRegister(HalfSrcTy), 0);
472+
Builder.buildMergeLikeInstr(DstReg, {HalfOfA, HalfOfB});
473+
MI.eraseFromParent();
474+
return true;
475+
}
476+
450477
return false;
451478
}
452479

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,14 @@ body: |
270270
; CHECK-NEXT: {{ $}}
271271
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
272272
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
273-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 0, 1)
274-
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
273+
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s128) = G_BITCAST [[COPY]](<4 x s32>)
274+
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[BITCAST]](s128)
275+
; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[TRUNC]](s64)
276+
; CHECK-NEXT: [[BITCAST2:%[0-9]+]]:_(s128) = G_BITCAST [[COPY1]](<4 x s32>)
277+
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s64) = G_TRUNC [[BITCAST2]](s128)
278+
; CHECK-NEXT: [[BITCAST3:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[TRUNC1]](s64)
279+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BITCAST1]](<2 x s32>), [[BITCAST3]](<2 x s32>)
280+
; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s32>)
275281
%0:_(<4 x s32>) = COPY $q0
276282
%1:_(<4 x s32>) = COPY $q1
277283
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,0,1)

llvm/test/CodeGen/AIE/aie2/GlobalISel/prelegalizercombiner-shufflevector.mir

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -411,3 +411,66 @@ body: |
411411
%1:_(<128 x s8>) = COPY $y2
412412
%2:_(<4 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %1:_(<128 x s8>), shufflemask(4, 5, 6, 7)
413413
PseudoRET implicit $lr, implicit %2
414+
...
415+
416+
---
417+
name: insert_vector_16_elements
418+
legalized: false
419+
body: |
420+
bb.1.entry:
421+
liveins: $x0, $x1
422+
; CHECK-LABEL: name: insert_vector_16_elements
423+
; CHECK: liveins: $x0, $x1
424+
; CHECK-NEXT: {{ $}}
425+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
426+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1
427+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
428+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY1]](<16 x s32>)
429+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s32>) = G_CONCAT_VECTORS [[UV]](<8 x s32>), [[UV2]](<8 x s32>)
430+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<16 x s32>)
431+
%1:_(<16 x s32>) = COPY $x0
432+
%2:_(<16 x s32>) = COPY $x1
433+
%3:_(<16 x s32>) = G_SHUFFLE_VECTOR %1:_(<16 x s32>), %2:_(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23)
434+
PseudoRET implicit $lr, implicit %3
435+
...
436+
437+
---
438+
name: insert_vector_8_elements
439+
legalized: false
440+
body: |
441+
bb.1.entry:
442+
liveins: $wl0, $wl1
443+
; CHECK-LABEL: name: insert_vector_8_elements
444+
; CHECK: liveins: $wl0, $wl1
445+
; CHECK-NEXT: {{ $}}
446+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl0
447+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl1
448+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY]](<8 x s32>)
449+
; CHECK-NEXT: [[AIE_UNPAD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>)
450+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[AIE_UNPAD_VECTOR]](<4 x s32>), [[AIE_UNPAD_VECTOR1]](<4 x s32>)
451+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<8 x s32>)
452+
%1:_(<8 x s32>) = COPY $wl0
453+
%2:_(<8 x s32>) = COPY $wl1
454+
%3:_(<8 x s32>) = G_SHUFFLE_VECTOR %1:_(<8 x s32>), %2:_(<8 x s32>), shufflemask(0, 1, 2, 3, 8, 9, 10, 11)
455+
PseudoRET implicit $lr, implicit %3
456+
...
457+
458+
---
459+
name: insert_vector_128_elements
460+
legalized: false
461+
body: |
462+
bb.1.entry:
463+
liveins: $y2, $y3
464+
; CHECK-LABEL: name: insert_vector_128_elements
465+
; CHECK: liveins: $y2, $y3
466+
; CHECK-NEXT: {{ $}}
467+
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<128 x s8>) = COPY $y2
468+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<128 x s8>) = COPY $y3
469+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<64 x s8>), [[UV1:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY]](<128 x s8>)
470+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<64 x s8>), [[UV3:%[0-9]+]]:_(<64 x s8>) = G_UNMERGE_VALUES [[COPY1]](<128 x s8>)
471+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<128 x s8>) = G_CONCAT_VECTORS [[UV]](<64 x s8>), [[UV2]](<64 x s8>)
472+
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<128 x s8>)
473+
%1:_(<128 x s8>) = COPY $y2
474+
%2:_(<128 x s8>) = COPY $y3
475+
%3:_(<128 x s8>) = G_SHUFFLE_VECTOR %1:_(<128 x s8>), %2:_(<128 x s8>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191)
476+
PseudoRET implicit $lr, implicit %3

llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll

Lines changed: 14 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -49,95 +49,26 @@ define <16 x i32> @test_insert_vector(<16 x i32> noundef %a, i32 noundef %idx, <
4949
; CHECK: .p2align 4
5050
; CHECK-NEXT: // %bb.0: // %entry
5151
; CHECK-NEXT: nopb ; nopa ; nops ; jz r0, #.LBB1_2; nopv
52-
; CHECK-NEXT: nopx // Delay Slot 5
52+
; CHECK-NEXT: nopa ; nopx // Delay Slot 5
5353
; CHECK-NEXT: nop // Delay Slot 4
5454
; CHECK-NEXT: nop // Delay Slot 3
55-
; CHECK-NEXT: mov r24, r16 // Delay Slot 2
56-
; CHECK-NEXT: mova r16, #0 // Delay Slot 1
55+
; CHECK-NEXT: nop // Delay Slot 2
56+
; CHECK-NEXT: vmov wl0, wl4 // Delay Slot 1
5757
; CHECK-NEXT: // %bb.1: // %if.end
58-
; CHECK-NEXT: vextract.s32 r0, x2, r16
59-
; CHECK-NEXT: vextract.s32 r1, x4, r16
60-
; CHECK-NEXT: nop
61-
; CHECK-NEXT: mova r16, #1
62-
; CHECK-NEXT: vextract.s32 r2, x2, r16
63-
; CHECK-NEXT: vextract.s32 r3, x4, r16
64-
; CHECK-NEXT: nop
65-
; CHECK-NEXT: mova r16, #2
66-
; CHECK-NEXT: vextract.s32 r4, x2, r16
67-
; CHECK-NEXT: vextract.s32 r5, x4, r16
68-
; CHECK-NEXT: nop
69-
; CHECK-NEXT: mova r16, #3
70-
; CHECK-NEXT: vextract.s32 r6, x2, r16
71-
; CHECK-NEXT: vextract.s32 r7, x4, r16
72-
; CHECK-NEXT: nop
73-
; CHECK-NEXT: mova r16, #4
74-
; CHECK-NEXT: vextract.s32 r8, x2, r16
75-
; CHECK-NEXT: vextract.s32 r9, x4, r16
76-
; CHECK-NEXT: nop
77-
; CHECK-NEXT: mova r16, #5
78-
; CHECK-NEXT: vextract.s32 r10, x2, r16
79-
; CHECK-NEXT: vextract.s32 r11, x4, r16
80-
; CHECK-NEXT: nop
81-
; CHECK-NEXT: mova r16, #7
82-
; CHECK-NEXT: vextract.s32 r12, x2, r16
83-
; CHECK-NEXT: j #.LBB1_3
84-
; CHECK-NEXT: vextract.s32 r13, x4, r16 // Delay Slot 5
85-
; CHECK-NEXT: nop // Delay Slot 4
86-
; CHECK-NEXT: mova r16, #6 // Delay Slot 3
87-
; CHECK-NEXT: vextract.s32 r14, x2, r16 // Delay Slot 2
88-
; CHECK-NEXT: vextract.s32 r15, x4, r16 // Delay Slot 1
58+
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
59+
; CHECK-NEXT: nopx // Delay Slot 5
60+
; CHECK-NEXT: vmov wh2, wl0 // Delay Slot 4
61+
; CHECK-NEXT: nop // Delay Slot 3
62+
; CHECK-NEXT: vmov x0, x2 // Delay Slot 2
63+
; CHECK-NEXT: nop // Delay Slot 1
8964
; CHECK-NEXT: .p2align 4
9065
; CHECK-NEXT: .LBB1_2: // %if.then
91-
; CHECK-NEXT: nopa ; nopx ; vextract.s32 r0, x4, r16
92-
; CHECK-NEXT: vextract.s32 r1, x2, r16
93-
; CHECK-NEXT: nop
94-
; CHECK-NEXT: mova r16, #1
95-
; CHECK-NEXT: vextract.s32 r2, x4, r16
96-
; CHECK-NEXT: vextract.s32 r3, x2, r16
97-
; CHECK-NEXT: nop
98-
; CHECK-NEXT: mova r16, #2
99-
; CHECK-NEXT: vextract.s32 r4, x4, r16
100-
; CHECK-NEXT: vextract.s32 r5, x2, r16
101-
; CHECK-NEXT: nop
102-
; CHECK-NEXT: mova r16, #3
103-
; CHECK-NEXT: vextract.s32 r6, x4, r16
104-
; CHECK-NEXT: vextract.s32 r7, x2, r16
105-
; CHECK-NEXT: nop
106-
; CHECK-NEXT: mova r16, #4
107-
; CHECK-NEXT: vextract.s32 r8, x4, r16
108-
; CHECK-NEXT: vextract.s32 r9, x2, r16
109-
; CHECK-NEXT: nop
110-
; CHECK-NEXT: mova r16, #5
111-
; CHECK-NEXT: vextract.s32 r10, x4, r16
112-
; CHECK-NEXT: vextract.s32 r11, x2, r16
113-
; CHECK-NEXT: nop
114-
; CHECK-NEXT: mova r16, #7
115-
; CHECK-NEXT: vextract.s32 r12, x4, r16
116-
; CHECK-NEXT: vextract.s32 r13, x2, r16
117-
; CHECK-NEXT: nop
118-
; CHECK-NEXT: mova r16, #6
119-
; CHECK-NEXT: vextract.s32 r14, x4, r16
120-
; CHECK-NEXT: vextract.s32 r15, x2, r16
121-
; CHECK-NEXT: .p2align 4
122-
; CHECK-NEXT: .LBB1_3: // %cleanup
123-
; CHECK-NEXT: nopa ; nopb ; nopx ; vpush.lo.32 x0, r13, x0
124-
; CHECK-NEXT: vpush.lo.32 x0, r15, x0
125-
; CHECK-NEXT: vpush.lo.32 x0, r11, x0
126-
; CHECK-NEXT: vpush.lo.32 x0, r9, x0
127-
; CHECK-NEXT: vpush.lo.32 x0, r7, x0
128-
; CHECK-NEXT: vpush.lo.32 x0, r5, x0
129-
; CHECK-NEXT: vpush.lo.32 x0, r3, x0
130-
; CHECK-NEXT: vpush.lo.32 x0, r1, x0
131-
; CHECK-NEXT: vpush.lo.32 x0, r12, x0
132-
; CHECK-NEXT: vpush.lo.32 x0, r14, x0
133-
; CHECK-NEXT: vpush.lo.32 x0, r10, x0
134-
; CHECK-NEXT: vpush.lo.32 x0, r8, x0
13566
; CHECK-NEXT: ret lr
136-
; CHECK-NEXT: vpush.lo.32 x0, r6, x0 // Delay Slot 5
137-
; CHECK-NEXT: vpush.lo.32 x0, r4, x0 // Delay Slot 4
138-
; CHECK-NEXT: vpush.lo.32 x0, r2, x0 // Delay Slot 3
139-
; CHECK-NEXT: vpush.lo.32 x0, r0, x0 // Delay Slot 2
140-
; CHECK-NEXT: mov r16, r24 // Delay Slot 1
67+
; CHECK-NEXT: nop // Delay Slot 5
68+
; CHECK-NEXT: nop // Delay Slot 4
69+
; CHECK-NEXT: nop // Delay Slot 3
70+
; CHECK-NEXT: vmov wh0, wl2 // Delay Slot 2
71+
; CHECK-NEXT: nop // Delay Slot 1
14172
entry:
14273
%shuffle = shufflevector <8 x i32> %b, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
14374
%cmp = icmp eq i32 %idx, 0

0 commit comments

Comments
 (0)