Skip to content

Commit a10508e

Browse files
committed
[RISCV] Prefer concat then unzip for fractional LMUL
This saves one unzip instruction, and avoids a vsetvl toggle.
1 parent c88ad6f commit a10508e

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5840,12 +5840,23 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
58405840
lowerVZIP(Opc, Src, DAG.getUNDEF(NewVT), DL, DAG, Subtarget);
58415841
return DAG.getExtractSubvector(DL, VT, Res, 0);
58425842
}
5843-
// Narrow each source and concatenate them.
5844-
// FIXME: For small LMUL it is better to concatenate first.
5843+
// Deinterleave each source and concatenate them, or concat first, then
5844+
// deinterleave.
58455845
if (1 < count_if(Mask,
58465846
[&Mask](int Idx) { return Idx < (int)Mask.size(); }) &&
58475847
1 < count_if(Mask,
58485848
[&Mask](int Idx) { return Idx >= (int)Mask.size(); })) {
5849+
5850+
const unsigned EltSize = VT.getScalarSizeInBits();
5851+
const unsigned MinVLMAX = Subtarget.getRealMinVLen() / EltSize;
5852+
if (NumElts < MinVLMAX) {
5853+
MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
5854+
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
5855+
SDValue Res =
5856+
lowerVZIP(Opc, Concat, DAG.getUNDEF(ConcatVT), DL, DAG, Subtarget);
5857+
return DAG.getExtractSubvector(DL, VT, Res, 0);
5858+
}
5859+
58495860
SDValue Lo = lowerVZIP(Opc, V1, DAG.getUNDEF(VT), DL, DAG, Subtarget);
58505861
SDValue Hi = lowerVZIP(Opc, V2, DAG.getUNDEF(VT), DL, DAG, Subtarget);
58515862

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave2.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1598,12 +1598,10 @@ define <4 x i64> @unzip2a_dual_v4i64_exact_nf2(<4 x i64> %a, <4 x i64> %b) vscal
15981598
;
15991599
; ZIP-LABEL: unzip2a_dual_v4i64_exact_nf2:
16001600
; ZIP: # %bb.0: # %entry
1601-
; ZIP-NEXT: vsetivli zero, 4, e64, m1, ta, ma
1602-
; ZIP-NEXT: ri.vunzip2a.vv v11, v9, v10
1601+
; ZIP-NEXT: vsetivli zero, 8, e64, m1, ta, ma
1602+
; ZIP-NEXT: vslideup.vi v8, v9, 4
16031603
; ZIP-NEXT: ri.vunzip2a.vv v9, v8, v10
1604-
; ZIP-NEXT: vsetvli zero, zero, e64, m1, tu, ma
1605-
; ZIP-NEXT: vslideup.vi v9, v11, 2
1606-
; ZIP-NEXT: vmv1r.v v8, v9
1604+
; ZIP-NEXT: vmv.v.v v8, v9
16071605
; ZIP-NEXT: ret
16081606
entry:
16091607
%c = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>

0 commit comments

Comments
 (0)