Skip to content

Commit dd2555b

Browse files
[GISel][CombinerHelper] Add a combiner to concatenate the first halfs of two vectors together
1 parent f2d4bb6 commit dd2555b

File tree

7 files changed

+351
-143
lines changed

7 files changed

+351
-143
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,6 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
490490
Register UndefReg;
491491
const Register Src1 = MI.getOperand(1).getReg();
492492
const Register Src2 = MI.getOperand(2).getReg();
493-
494493
const ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
495494

496495
// The destination can be longer than the source, so we separate them into
@@ -556,6 +555,45 @@ bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
556555
MI.eraseFromParent();
557556
return true;
558557
}
558+
559+
// After this point, it is assumed our shufflevectors work on vectors that can
560+
// be splint into two
561+
if ((DstNumElts % 2) != 0)
562+
return false;
563+
564+
// {1, 2, ..., n/4, n/2, n/2+1, .... 3n/4} -> G_UNMERGE_VALUES
565+
// Take the first halfs of the two vectors and concatenate them into one
566+
// vector.
567+
GeneratorType FirstEightA = adderGenerator(0, (DstNumElts / 2) - 1, 1);
568+
GeneratorType FirstEightB =
569+
adderGenerator(DstNumElts, DstNumElts + (DstNumElts / 2) - 1, 1);
570+
571+
GeneratorType FirstAndThird =
572+
concatGenerators(SmallVector<GeneratorType>{FirstEightA, FirstEightB});
573+
if (matchCombineShuffleVector(MI, FirstAndThird, (DstNumElts / 2) - 1)) {
574+
if (DstNumElts <= 2)
575+
return false;
576+
const Register DstReg = MI.getOperand(0).getReg();
577+
const LLT HalfSrcTy =
578+
LLT::fixed_vector(SrcNumElts / 2, SrcTy.getScalarType());
579+
const Register HalfOfA = createUnmergeValue(
580+
MI, MI.getOperand(1).getReg(),
581+
MRI.createGenericVirtualRegister(HalfSrcTy), 0, 0, SrcNumElts);
582+
const Register HalfOfB = createUnmergeValue(
583+
MI, MI.getOperand(2).getReg(),
584+
MRI.createGenericVirtualRegister(HalfSrcTy), 0, 0, SrcNumElts);
585+
586+
const ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
587+
if (Mask[0] <= 0) {
588+
Builder.buildMergeLikeInstr(DstReg, {HalfOfA, HalfOfB});
589+
} else {
590+
Builder.buildMergeLikeInstr(DstReg, {HalfOfB, HalfOfA});
591+
}
592+
593+
MI.eraseFromParent();
594+
return true;
595+
}
596+
559597
return false;
560598
}
561599

llvm/test/CodeGen/AArch64/GlobalISel/combine-shufflevector.mir

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,9 @@ body: |
101101
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
102102
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
103103
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %c(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
104-
; CHECK-NEXT: %z:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(0, undef, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef)
104+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s8>)
105+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s8>), [[UV3:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<16 x s8>)
106+
; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS [[UV]](<8 x s8>), [[UV2]](<8 x s8>)
105107
; CHECK-NEXT: $q0 = COPY %z(<16 x s8>)
106108
; CHECK-NEXT: RET_ReallyLR implicit $q0
107109
%p1:_(p0) = COPY $x0
@@ -179,7 +181,9 @@ body: |
179181
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
180182
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %a(<4 x s8>), %b(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
181183
; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<16 x s8>) = G_CONCAT_VECTORS %c(<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>), [[DEF]](<4 x s8>)
182-
; CHECK-NEXT: %z:_(<16 x s8>) = G_SHUFFLE_VECTOR [[CONCAT_VECTORS]](<16 x s8>), [[CONCAT_VECTORS1]], shufflemask(undef, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, undef, undef, undef, undef)
184+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s8>), [[UV1:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<16 x s8>)
185+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<8 x s8>), [[UV3:%[0-9]+]]:_(<8 x s8>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<16 x s8>)
186+
; CHECK-NEXT: %z:_(<16 x s8>) = G_CONCAT_VECTORS [[UV]](<8 x s8>), [[UV2]](<8 x s8>)
183187
; CHECK-NEXT: $q0 = COPY %z(<16 x s8>)
184188
; CHECK-NEXT: RET_ReallyLR implicit $q0
185189
%p1:_(p0) = COPY $x0

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-shuffle-vector.mir

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -270,8 +270,10 @@ body: |
270270
; CHECK-NEXT: {{ $}}
271271
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
272272
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
273-
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<4 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<4 x s32>), [[COPY1]], shufflemask(4, 5, 0, 1)
274-
; CHECK-NEXT: RET_ReallyLR implicit [[SHUF]](<4 x s32>)
273+
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY]](<4 x s32>)
274+
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s32>), [[UV3:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[COPY1]](<4 x s32>)
275+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[UV2]](<2 x s32>), [[UV]](<2 x s32>)
276+
; CHECK-NEXT: RET_ReallyLR implicit [[CONCAT_VECTORS]](<4 x s32>)
275277
%0:_(<4 x s32>) = COPY $q0
276278
%1:_(<4 x s32>) = COPY $q1
277279
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1(<4 x s32>), shufflemask(4,5,0,1)

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-undef.mir

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,6 @@ alignment: 4
304304
tracksRegLiveness: true
305305
body: |
306306
bb.0:
307-
; Optimize these to zero?
308307
; CHECK-LABEL: name: ashr_undef_lhs
309308
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 10
310309
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF

llvm/test/CodeGen/AArch64/arm64-neon-copy.ll

Lines changed: 18 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1776,19 +1776,10 @@ entry:
17761776
}
17771777

17781778
define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 {
1779-
; CHECK-SD-LABEL: test_concat_v16i8_v16i8_v16i8:
1780-
; CHECK-SD: // %bb.0: // %entry
1781-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
1782-
; CHECK-SD-NEXT: ret
1783-
;
1784-
; CHECK-GI-LABEL: test_concat_v16i8_v16i8_v16i8:
1785-
; CHECK-GI: // %bb.0: // %entry
1786-
; CHECK-GI-NEXT: adrp x8, .LCPI126_0
1787-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
1788-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI126_0]
1789-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1790-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
1791-
; CHECK-GI-NEXT: ret
1779+
; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8:
1780+
; CHECK: // %bb.0: // %entry
1781+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
1782+
; CHECK-NEXT: ret
17921783
entry:
17931784
%vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
17941785
ret <16 x i8> %vecinit30
@@ -1803,9 +1794,7 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
18031794
;
18041795
; CHECK-GI-LABEL: test_concat_v16i8_v8i8_v16i8:
18051796
; CHECK-GI: // %bb.0: // %entry
1806-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
1807-
; CHECK-GI-NEXT: adrp x8, .LCPI127_0
1808-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
1797+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
18091798
; CHECK-GI-NEXT: mov b2, v0.b[1]
18101799
; CHECK-GI-NEXT: mov b3, v0.b[2]
18111800
; CHECK-GI-NEXT: mov b4, v0.b[3]
@@ -1814,14 +1803,13 @@ define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 {
18141803
; CHECK-GI-NEXT: mov b7, v0.b[6]
18151804
; CHECK-GI-NEXT: mov b16, v0.b[7]
18161805
; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
1817-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI127_0]
18181806
; CHECK-GI-NEXT: mov v0.b[2], v3.b[0]
18191807
; CHECK-GI-NEXT: mov v0.b[3], v4.b[0]
18201808
; CHECK-GI-NEXT: mov v0.b[4], v5.b[0]
18211809
; CHECK-GI-NEXT: mov v0.b[5], v6.b[0]
18221810
; CHECK-GI-NEXT: mov v0.b[6], v7.b[0]
18231811
; CHECK-GI-NEXT: mov v0.b[7], v16.b[0]
1824-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
1812+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
18251813
; CHECK-GI-NEXT: ret
18261814
entry:
18271815
%vecext = extractelement <8 x i8> %x, i32 0
@@ -1999,19 +1987,10 @@ entry:
19991987
}
20001988

20011989
define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 {
2002-
; CHECK-SD-LABEL: test_concat_v8i16_v8i16_v8i16:
2003-
; CHECK-SD: // %bb.0: // %entry
2004-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
2005-
; CHECK-SD-NEXT: ret
2006-
;
2007-
; CHECK-GI-LABEL: test_concat_v8i16_v8i16_v8i16:
2008-
; CHECK-GI: // %bb.0: // %entry
2009-
; CHECK-GI-NEXT: adrp x8, .LCPI130_0
2010-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
2011-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI130_0]
2012-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
2013-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
2014-
; CHECK-GI-NEXT: ret
1990+
; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16:
1991+
; CHECK: // %bb.0: // %entry
1992+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
1993+
; CHECK-NEXT: ret
20151994
entry:
20161995
%vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
20171996
ret <8 x i16> %vecinit14
@@ -2026,17 +2005,14 @@ define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 {
20262005
;
20272006
; CHECK-GI-LABEL: test_concat_v8i16_v4i16_v8i16:
20282007
; CHECK-GI: // %bb.0: // %entry
2029-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
2030-
; CHECK-GI-NEXT: adrp x8, .LCPI131_0
2031-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
2008+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
20322009
; CHECK-GI-NEXT: mov h2, v0.h[1]
20332010
; CHECK-GI-NEXT: mov h3, v0.h[2]
20342011
; CHECK-GI-NEXT: mov h4, v0.h[3]
20352012
; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
2036-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI131_0]
20372013
; CHECK-GI-NEXT: mov v0.h[2], v3.h[0]
20382014
; CHECK-GI-NEXT: mov v0.h[3], v4.h[0]
2039-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
2015+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
20402016
; CHECK-GI-NEXT: ret
20412017
entry:
20422018
%vecext = extractelement <4 x i16> %x, i32 0
@@ -2142,19 +2118,10 @@ entry:
21422118
}
21432119

21442120
define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 {
2145-
; CHECK-SD-LABEL: test_concat_v4i32_v4i32_v4i32:
2146-
; CHECK-SD: // %bb.0: // %entry
2147-
; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
2148-
; CHECK-SD-NEXT: ret
2149-
;
2150-
; CHECK-GI-LABEL: test_concat_v4i32_v4i32_v4i32:
2151-
; CHECK-GI: // %bb.0: // %entry
2152-
; CHECK-GI-NEXT: adrp x8, .LCPI134_0
2153-
; CHECK-GI-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1
2154-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI134_0]
2155-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
2156-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
2157-
; CHECK-GI-NEXT: ret
2121+
; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32:
2122+
; CHECK: // %bb.0: // %entry
2123+
; CHECK-NEXT: mov v0.d[1], v1.d[0]
2124+
; CHECK-NEXT: ret
21582125
entry:
21592126
%vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
21602127
ret <4 x i32> %vecinit6
@@ -2169,13 +2136,10 @@ define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 {
21692136
;
21702137
; CHECK-GI-LABEL: test_concat_v4i32_v2i32_v4i32:
21712138
; CHECK-GI: // %bb.0: // %entry
2172-
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1
2173-
; CHECK-GI-NEXT: adrp x8, .LCPI135_0
2174-
; CHECK-GI-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1
2139+
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
21752140
; CHECK-GI-NEXT: mov s2, v0.s[1]
21762141
; CHECK-GI-NEXT: mov v0.s[1], v2.s[0]
2177-
; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI135_0]
2178-
; CHECK-GI-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b
2142+
; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
21792143
; CHECK-GI-NEXT: ret
21802144
entry:
21812145
%vecext = extractelement <2 x i32> %x, i32 0

0 commit comments

Comments
 (0)