Skip to content

Commit c88ad6f

Browse files
committed
[RISCV] Exploit register layout for vunzip2{a,b} when VLEN is known
This allows us to use a single instruction instead of needing to split and slide.
1 parent a614043 commit c88ad6f

File tree

2 files changed

+8
-9
lines changed

2 files changed

+8
-9
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5830,14 +5830,16 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
58305830
Index == 0 ? RISCVISD::RI_VUNZIP2A_VL : RISCVISD::RI_VUNZIP2B_VL;
58315831
if (V2.isUndef())
58325832
return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
5833+
if (auto VLEN = Subtarget.getRealVLen();
5834+
VLEN && VT.getSizeInBits().getKnownMinValue() % *VLEN == 0)
5835+
return lowerVZIP(Opc, V1, V2, DL, DAG, Subtarget);
58335836
if (SDValue Src = foldConcatVector(V1, V2)) {
58345837
EVT NewVT = VT.getDoubleNumVectorElementsVT();
58355838
Src = DAG.getExtractSubvector(DL, NewVT, Src, 0);
58365839
SDValue Res =
58375840
lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
58385841
return DAG.getExtractSubvector(DL, VT, Res, 0);
58395842
}
5840-
58415843
// Narrow each source and concatenate them.
58425844
// FIXME: For small LMUL it is better to concatenate first.
58435845
if (1 < count_if(Mask,

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1551,10 +1551,8 @@ define <4 x i64> @unzip2a_dual_v4i64_exact(<4 x i64> %a, <4 x i64> %b) vscale_ra
15511551
; ZIP-LABEL: unzip2a_dual_v4i64_exact:
15521552
; ZIP: # %bb.0: # %entry
15531553
; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1554-
; ZIP-NEXT: ri.vunzip2a.vv v11, v9, v10
1555-
; ZIP-NEXT: ri.vunzip2a.vv v9, v8, v10
1556-
; ZIP-NEXT: vslideup.vi v9, v11, 2
1557-
; ZIP-NEXT: vmv.v.v v8, v9
1554+
; ZIP-NEXT: ri.vunzip2a.vv v10, v8, v9
1555+
; ZIP-NEXT: vmv.v.v v8, v10
15581556
; ZIP-NEXT: ret
15591557
entry:
15601558
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
@@ -1730,10 +1728,9 @@ define <16 x i64> @unzip2a_dual_v16i64_exact(<16 x i64> %a, <16 x i64> %b) vscal
17301728
;
17311729
; ZIP-LABEL: unzip2a_dual_v16i64_exact:
17321730
; ZIP: # %bb.0: # %entry
1733-
; ZIP-NEXT: vsetivli zero, 8, e64, m2, ta, ma
1734-
; ZIP-NEXT: ri.vunzip2a.vv v18, v12, v14
1735-
; ZIP-NEXT: ri.vunzip2a.vv v16, v8, v10
1736-
; ZIP-NEXT: vmv4r.v v8, v16
1731+
; ZIP-NEXT: vsetivli zero, 16, e64, m4, ta, ma
1732+
; ZIP-NEXT: ri.vunzip2a.vv v16, v8, v12
1733+
; ZIP-NEXT: vmv.v.v v8, v16
17371734
; ZIP-NEXT: ret
17381735
entry:
17391736
%c = shufflevector <16 x i64> %a, <16 x i64> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>

0 commit comments

Comments
 (0)