Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 21 additions & 16 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10756,10 +10756,6 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT,
Op.getOperand(0), Op.getOperand(1));

// We want to operate on all lanes, so get the mask and VL and mask for it
auto [Mask, VL] = getDefaultScalableVLOps(ConcatVT, DL, DAG, Subtarget);
SDValue Passthru = DAG.getUNDEF(ConcatVT);

// We can deinterleave through vnsrl.wi if the element type is smaller than
// ELEN
if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) {
Expand All @@ -10771,19 +10767,28 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op,
}

// For the indices, use the same SEW to avoid an extra vsetvli
// TODO: If container type is larger than m1, we can consider using a splat
// of a constant instead of the following sequence

// Create a vector of even indices {0, 1, 2, ...}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment still needed? Since we don't have indices anymore, just a mask

MVT IdxVT = ConcatVT.changeVectorElementTypeToInteger();
// Create a vector of even indices {0, 2, 4, ...}
SDValue EvenIdx =
DAG.getStepVector(DL, IdxVT, APInt(IdxVT.getScalarSizeInBits(), 2));
// Create a vector of odd indices {1, 3, 5, ... }
SDValue OddIdx =
DAG.getNode(ISD::ADD, DL, IdxVT, EvenIdx, DAG.getConstant(1, DL, IdxVT));

// Gather the even and odd elements into two separate vectors
SDValue EvenWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
Concat, EvenIdx, Passthru, Mask, VL);
SDValue OddWide = DAG.getNode(RISCVISD::VRGATHER_VV_VL, DL, ConcatVT,
Concat, OddIdx, Passthru, Mask, VL);
SDValue StepVec = DAG.getStepVector(DL, IdxVT);
// 0, 1, 0, 1, 0, 1
SDValue ZeroOnes =
DAG.getNode(ISD::AND, DL, IdxVT, StepVec, DAG.getConstant(1, DL, IdxVT));
MVT MaskVT = ConcatVT.changeVectorElementType(MVT::i1);
SDValue EvenMask =
DAG.getSetCC(DL, MaskVT, ZeroOnes, DAG.getConstant(0, DL, IdxVT),
ISD::CondCode::SETEQ);
// Have the latter be the not of the former to minimize the live range of
// the index vector since that might be large.
SDValue OddMask = DAG.getLogicalNOT(DL, EvenMask, MaskVT);

// vcompress the even and odd elements into two separate vectors
SDValue EvenWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
EvenMask, DAG.getUNDEF(ConcatVT));
SDValue OddWide = DAG.getNode(ISD::VECTOR_COMPRESS, DL, ConcatVT, Concat,
OddMask, DAG.getUNDEF(ConcatVT));

// Extract the result half of the gather for even and odd
SDValue Even = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VecVT, EvenWide,
Expand Down
76 changes: 18 additions & 58 deletions llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -106,95 +106,55 @@ define {<vscale x 8 x i64>, <vscale x 8 x i64>} @vector_deinterleave_load_nxv8i6
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: li a2, 40
; CHECK-NEXT: li a2, 24
; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: vl8re64.v v16, (a0)
; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: vadd.vv v24, v8, v8
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 4
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vand.vi v8, v8, 1
; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: vmseq.vi v24, v8, 0
; CHECK-NEXT: vl8re64.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vadd.vi v8, v24, 1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vrgather.vv v8, v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vrgather.vv v24, v16, v0
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmnot.m v6, v24
; CHECK-NEXT: vcompress.vm v8, v16, v24
; CHECK-NEXT: vmv1r.v v13, v24
; CHECK-NEXT: vcompress.vm v24, v16, v6
; CHECK-NEXT: vmv1r.v v12, v6
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vrgather.vv v24, v16, v0
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vcompress.vm v0, v16, v13
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vrgather.vv v16, v24, v0
; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vcompress.vm v0, v16, v12
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv4r.v v12, v16
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vmv4r.v v28, v16
; CHECK-NEXT: vmv8r.v v16, v24
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: li a1, 40
; CHECK-NEXT: li a1, 24
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: .cfi_def_cfa sp, 16
Expand Down
Loading
Loading