Skip to content

Commit 262a72f

Browse files
committed
[RISCV] Use stack slot to handle SPLAT_VECTOR_PARTS on RV32.
Reduces the amount of vector ALU operations and reduces vector register pressure.
1 parent 254e289 commit 262a72f

24 files changed

+1215
-1178
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 28 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,18 +1511,15 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
15111511
return SDValue();
15121512
}
15131513

1514-
// Called by type legalization to handle splat of i64 on RV32.
1515-
// FIXME: We can optimize this when the type has sign or zero bits in one
1516-
// of the halves.
1517-
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
1518-
SDValue VL, SelectionDAG &DAG,
1519-
const RISCVSubtarget &Subtarget) {
1520-
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
1521-
DAG.getConstant(0, DL, MVT::i32));
1522-
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
1523-
DAG.getConstant(1, DL, MVT::i32));
1524-
1525-
// Fall back to a stack store and stride x0 vector load.
1514+
// Use a stack slot to splat the two i32 values in Lo/Hi to the vector desired
1515+
// vector nxvXi64 VT.
1516+
static SDValue splatPartsI64ThroughStack(const SDLoc &DL, MVT VT, SDValue Lo,
1517+
SDValue Hi, SDValue VL,
1518+
SelectionDAG &DAG,
1519+
const RISCVSubtarget &Subtarget) {
1520+
assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
1521+
Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
1522+
"Unexpected VTs!");
15261523
MachineFunction &MF = DAG.getMachineFunction();
15271524
RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
15281525

@@ -1553,6 +1550,21 @@ static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
15531550
MPI, Align(8), MachineMemOperand::MOLoad);
15541551
}
15551552

1553+
// Called by type legalization to handle splat of i64 on RV32.
1554+
// FIXME: We can optimize this when the type has sign or zero bits in one
1555+
// of the halves.
1556+
static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
1557+
SDValue VL, SelectionDAG &DAG,
1558+
const RISCVSubtarget &Subtarget) {
1559+
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
1560+
DAG.getConstant(0, DL, MVT::i32));
1561+
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
1562+
DAG.getConstant(1, DL, MVT::i32));
1563+
1564+
// Fall back to a stack store and stride x0 vector load.
1565+
return splatPartsI64ThroughStack(DL, VT, Lo, Hi, VL, DAG, Subtarget);
1566+
}
1567+
15561568
// This function lowers a splat of a scalar operand Splat with the vector
15571569
// length VL. It ensures the final sequence is type legal, which is useful when
15581570
// lowering a splat after type legalization.
@@ -2761,7 +2773,7 @@ SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
27612773
SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
27622774
SelectionDAG &DAG) const {
27632775
SDLoc DL(Op);
2764-
EVT VecVT = Op.getValueType();
2776+
MVT VecVT = Op.getSimpleValueType();
27652777
assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
27662778
"Unexpected SPLAT_VECTOR_PARTS lowering");
27672779

@@ -2784,27 +2796,9 @@ SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
27842796
Hi.getConstantOperandVal(1) == 31)
27852797
return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
27862798

2787-
// Else, on RV32 we lower an i64-element SPLAT_VECTOR thus, being careful not
2788-
// to accidentally sign-extend the 32-bit halves to the e64 SEW:
2789-
// vmv.v.x vX, hi
2790-
// vsll.vx vX, vX, /*32*/
2791-
// vmv.v.x vY, lo
2792-
// vsll.vx vY, vY, /*32*/
2793-
// vsrl.vx vY, vY, /*32*/
2794-
// vor.vv vX, vX, vY
2795-
SDValue ThirtyTwoV = DAG.getConstant(32, DL, VecVT);
2796-
2797-
Lo = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
2798-
Lo = DAG.getNode(ISD::SHL, DL, VecVT, Lo, ThirtyTwoV);
2799-
Lo = DAG.getNode(ISD::SRL, DL, VecVT, Lo, ThirtyTwoV);
2800-
2801-
if (isNullConstant(Hi))
2802-
return Lo;
2803-
2804-
Hi = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Hi);
2805-
Hi = DAG.getNode(ISD::SHL, DL, VecVT, Hi, ThirtyTwoV);
2806-
2807-
return DAG.getNode(ISD::OR, DL, VecVT, Lo, Hi);
2799+
// Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
2800+
return splatPartsI64ThroughStack(
2801+
DL, VecVT, Lo, Hi, DAG.getRegister(RISCV::X0, MVT::i64), DAG, Subtarget);
28082802
}
28092803

28102804
// Custom-lower extensions from mask vectors by using a vselect either with 1

0 commit comments

Comments
 (0)