Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10140,6 +10140,35 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
DAG.getConstantFP(APFloat::getNaN(ResVT.getFltSemantics()), DL, ResVT));
}

// Merge BUILD_VECTOR and insert_subvector into BUILD_VECTOR.
// e.g.,
// t7: v8i32 = BUILD_VECTOR 0, 0, 0, 0, 0, 0, 0, 0
// t9: v16i32 = insert_subvector undef:v16i32, t7, i64<8>
// ->
// BUILD_VECTOR undef, undef, undef, undef, undef, undef, undef, undef, 0, 0, 0,
Copy link
Collaborator

@topperc topperc Sep 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be a DAG combine? Typically lowering doesn't look at multiple nodes.

// 0, 0, 0, 0, 0
static SDValue lowerINSERT_SUBVECTORAsBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) {
SDValue SubVec = Op.getOperand(1);
if (!Op.getOperand(0).isUndef())
return SDValue();
if (SubVec.getOpcode() != ISD::BUILD_VECTOR)
return SDValue();
SDLoc DL(Op);
MVT VecVT = Op.getSimpleValueType();
uint64_t InsertIndex = Op.getConstantOperandVal(2);
MVT SubVecVT = SubVec.getSimpleValueType();
MVT SubVecElementVT = SubVec.getOperand(0).getSimpleValueType();
unsigned SubVecNumElements = SubVecVT.getVectorNumElements();
SmallVector<SDValue> NewOps(VecVT.getVectorNumElements());
for (unsigned I = 0, E = VecVT.getVectorNumElements(); I != E; ++I)
if (I < InsertIndex || InsertIndex + SubVecNumElements <= I)
NewOps[I] = DAG.getUNDEF(SubVecElementVT);
else
NewOps[I] = SubVec.getOperand(I - InsertIndex);
return DAG.getNode(ISD::BUILD_VECTOR, DL, Op.getSimpleValueType(), NewOps);
}

SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
Expand Down Expand Up @@ -10191,6 +10220,9 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
}
}

if (SDValue V = lowerINSERT_SUBVECTORAsBUILD_VECTOR(Op, DAG))
return V;

// If the subvector vector is a fixed-length type and we don't know VLEN
// exactly, we cannot use subregister manipulation to simplify the codegen; we
// don't know which register of a LMUL group contains the specific subvector
Expand Down
23 changes: 23 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -894,3 +894,26 @@ define <4 x i32> @insert_extract_v8i32_v2i32_0(<2 x i32> %v) {
%2 = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %1, i64 0)
ret <4 x i32> %2
}

define <16 x i32> @build_vector_insert(<16 x i32> %0) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

InstCombine already flattens this. Do you have other examples?

define <16 x i32> @build_vector_insert(<16 x i32> %0) {
entry:
  %1 = shufflevector <16 x i32> %0, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
  ret <16 x i32> %1
}

; VLA-LABEL: build_vector_insert:
; VLA: # %bb.0: # %entry
; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; VLA-NEXT: vmv.v.i v12, 0
; VLA-NEXT: vsetivli zero, 8, e32, m4, tu, ma
; VLA-NEXT: vmv.v.v v12, v8
; VLA-NEXT: vmv4r.v v8, v12
; VLA-NEXT: ret
;
; VLS-LABEL: build_vector_insert:
; VLS: # %bb.0: # %entry
; VLS-NEXT: vsetivli zero, 16, e32, m4, ta, ma
; VLS-NEXT: vmv.v.i v12, 0
; VLS-NEXT: vmv1r.v v10, v14
; VLS-NEXT: vmv1r.v v11, v15
; VLS-NEXT: ret
entry:
%1 = call <16 x i32> @llvm.vector.insert.v16f32.v8f32(<16 x i32> poison, <8 x i32> zeroinitializer, i64 8)
%2 = shufflevector <16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
ret <16 x i32> %2
}
Loading