Skip to content

Commit a614043

Browse files
committed
[RISCV] Split vunzip2{a,b} for deinterleave2 as we do for vnsrl
The motivation is basically the same as the vnsrl cases; we'd rather do 3 simple linear in LMUL operation than need to fall back to a vrgather on at least one source.
1 parent a144f58 commit a614043

File tree

2 files changed

+40
-62
lines changed

2 files changed

+40
-62
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5837,6 +5837,21 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
58375837
lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
58385838
return DAG.getExtractSubvector(DL, VT, Res, 0);
58395839
}
5840+
5841+
// Narrow each source and concatenate them.
5842+
// FIXME: For small LMUL it is better to concatenate first.
5843+
if (1 < count_if(Mask,
5844+
[&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
5845+
1 < count_if(Mask,
5846+
[&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
5847+
SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
5848+
SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
5849+
5850+
MVT SubVT = VT.getHalfNumVectorElementsVT();
5851+
return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
5852+
DAG.getExtractSubvector(DL, SubVT, Lo, 0),
5853+
DAG.getExtractSubvector(DL, SubVT, Hi, 0));
5854+
}
58405855
}
58415856

58425857
if (SDValue V =

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll

Lines changed: 25 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1364,13 +1364,11 @@ define <4 x i64> @unzip2a_dual_v4i64(<4 x i64> %a, <4 x i64> %b) {
13641364
;
13651365
; ZIP-LABEL: unzip2a_dual_v4i64:
13661366
; ZIP: # %bb.0: # %entry
1367-
; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
1368-
; ZIP-NEXT: vmv.v.i v0, 8
1369-
; ZIP-NEXT: vslideup.vi v10, v9, 2
1370-
; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
1371-
; ZIP-NEXT: vmv.v.i v0, 12
1372-
; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9
1373-
; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
1367+
; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1368+
; ZIP-NEXT: ri.vunzip2a.vv v11, v9, v10
1369+
; ZIP-NEXT: ri.vunzip2a.vv v9, v8, v10
1370+
; ZIP-NEXT: vslideup.vi v9, v11, 2
1371+
; ZIP-NEXT: vmv.v.v v8, v9
13741372
; ZIP-NEXT: ret
13751373
entry:
13761374
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1502,16 +1500,11 @@ define <16 x i64> @unzip2a_dual_v16i64(<16 x i64> %a, <16 x i64> %b) {
15021500
; ZIP-LABEL: unzip2a_dual_v16i64:
15031501
; ZIP: # %bb.0: # %entry
15041502
; ZIP-NEXT: vsetivli zero, 8, e64, m2, ta, ma
1505-
; ZIP-NEXT: ri.vunzip2a.vv v16, v8, v10
1506-
; ZIP-NEXT: vsetivli zero, 16, e16, m1, ta, ma
1507-
; ZIP-NEXT: vid.v v8
1508-
; ZIP-NEXT: li a0, -256
1509-
; ZIP-NEXT: vadd.vv v8, v8, v8
1510-
; ZIP-NEXT: vmv.s.x v0, a0
1511-
; ZIP-NEXT: vadd.vi v8, v8, -16
1512-
; ZIP-NEXT: vsetvli zero, zero, e64, m4, ta, mu
1513-
; ZIP-NEXT: vrgatherei16.vv v16, v12, v8, v0.t
1514-
; ZIP-NEXT: vmv.v.v v8, v16
1503+
; ZIP-NEXT: ri.vunzip2a.vv v16, v12, v14
1504+
; ZIP-NEXT: ri.vunzip2a.vv v12, v8, v10
1505+
; ZIP-NEXT: vsetivli zero, 16, e64, m4, ta, ma
1506+
; ZIP-NEXT: vslideup.vi v12, v16, 8
1507+
; ZIP-NEXT: vmv.v.v v8, v12
15151508
; ZIP-NEXT: ret
15161509
entry:
15171510
%c = shufflevector <16 x i64> %a, <16 x i64> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -1557,13 +1550,11 @@ define <4 x i64> @unzip2a_dual_v4i64_exact(<4 x i64> %a, <4 x i64> %b) vscale_ra
15571550
;
15581551
; ZIP-LABEL: unzip2a_dual_v4i64_exact:
15591552
; ZIP: # %bb.0: # %entry
1560-
; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
1561-
; ZIP-NEXT: vmv.v.i v0, 8
1562-
; ZIP-NEXT: vslideup.vi v10, v9, 2
1563-
; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
1564-
; ZIP-NEXT: vmv.v.i v0, 12
1565-
; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9
1566-
; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
1553+
; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1554+
; ZIP-NEXT: ri.vunzip2a.vv v11, v9, v10
1555+
; ZIP-NEXT: ri.vunzip2a.vv v9, v8, v10
1556+
; ZIP-NEXT: vslideup.vi v9, v11, 2
1557+
; ZIP-NEXT: vmv.v.v v8, v9
15671558
; ZIP-NEXT: ret
15681559
entry:
15691560
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1609,13 +1600,12 @@ define <4 x i64> @unzip2a_dual_v4i64_exact_nf2(<4 x i64> %a, <4 x i64> %b) vscal
16091600
;
16101601
; ZIP-LABEL: unzip2a_dual_v4i64_exact_nf2:
16111602
; ZIP: # %bb.0: # %entry
1612-
; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
1613-
; ZIP-NEXT: vmv.v.i v0, 8
1614-
; ZIP-NEXT: vslideup.vi v10, v9, 2
1615-
; ZIP-NEXT: vslideup.vi v10, v9, 1, v0.t
1616-
; ZIP-NEXT: vmv.v.i v0, 12
1617-
; ZIP-NEXT: ri.vunzip2a.vv v11, v8, v9
1618-
; ZIP-NEXT: vmerge.vvm v8, v11, v10, v0
1603+
; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1604+
; ZIP-NEXT: ri.vunzip2a.vv v11, v9, v10
1605+
; ZIP-NEXT: ri.vunzip2a.vv v9, v8, v10
1606+
; ZIP-NEXT: vsetvli zero, zero, e64, m1, tu, ma
1607+
; ZIP-NEXT: vslideup.vi v9, v11, 2
1608+
; ZIP-NEXT: vmv1r.v v8, v9
16191609
; ZIP-NEXT: ret
16201610
entry:
16211611
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1740,37 +1730,10 @@ define <16 x i64> @unzip2a_dual_v16i64_exact(<16 x i64> %a, <16 x i64> %b) vscal
17401730
;
17411731
; ZIP-LABEL: unzip2a_dual_v16i64_exact:
17421732
; ZIP: # %bb.0: # %entry
1743-
; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, mu
1744-
; ZIP-NEXT: vslideup.vi v18, v15, 2
1745-
; ZIP-NEXT: vmv.v.i v16, 8
1746-
; ZIP-NEXT: vmv.v.i v17, 12
1747-
; ZIP-NEXT: vslideup.vi v20, v13, 2
1748-
; ZIP-NEXT: vmv.v.v v0, v16
1749-
; ZIP-NEXT: vslideup.vi v18, v15, 1, v0.t
1750-
; ZIP-NEXT: ri.vunzip2a.vv v15, v14, v19
1751-
; ZIP-NEXT: vmv.v.v v0, v17
1752-
; ZIP-NEXT: vmerge.vvm v15, v15, v18, v0
1753-
; ZIP-NEXT: vmv.v.v v0, v16
1754-
; ZIP-NEXT: vslideup.vi v20, v13, 1, v0.t
1755-
; ZIP-NEXT: ri.vunzip2a.vv v14, v12, v13
1756-
; ZIP-NEXT: vslideup.vi v12, v11, 2
1757-
; ZIP-NEXT: vslideup.vi v18, v9, 2
1758-
; ZIP-NEXT: vmv.v.v v0, v17
1759-
; ZIP-NEXT: vmerge.vvm v14, v14, v20, v0
1760-
; ZIP-NEXT: li a0, -256
1761-
; ZIP-NEXT: ri.vunzip2a.vv v20, v10, v13
1762-
; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v19
1763-
; ZIP-NEXT: vmv.v.v v0, v16
1764-
; ZIP-NEXT: vslideup.vi v12, v11, 1, v0.t
1765-
; ZIP-NEXT: vmv.v.v v0, v17
1766-
; ZIP-NEXT: vmerge.vvm v13, v20, v12, v0
1767-
; ZIP-NEXT: vmv.v.v v0, v16
1768-
; ZIP-NEXT: vslideup.vi v18, v9, 1, v0.t
1769-
; ZIP-NEXT: vmv.v.v v0, v17
1770-
; ZIP-NEXT: vmerge.vvm v12, v10, v18, v0
1771-
; ZIP-NEXT: vmv.s.x v0, a0
1772-
; ZIP-NEXT: vsetivli zero, 16, e64, m4, ta, ma
1773-
; ZIP-NEXT: vmerge.vvm v8, v12, v12, v0
1733+
; ZIP-NEXT: vsetivli zero, 8, e64, m2, ta, ma
1734+
; ZIP-NEXT: ri.vunzip2a.vv v18, v12, v14
1735+
; ZIP-NEXT: ri.vunzip2a.vv v16, v8, v10
1736+
; ZIP-NEXT: vmv4r.v v8, v16
17741737
; ZIP-NEXT: ret
17751738
entry:
17761739
%c = shufflevector <16 x i64> %a, <16 x i64> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>

0 commit comments

Comments
 (0)