Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVFeatures.td
Original file line number Diff line number Diff line change
Expand Up @@ -1417,6 +1417,9 @@ def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "V
def TuneVXRMPipelineFlush : SubtargetFeature<"vxrm-pipeline-flush", "HasVXRMPipelineFlush",
"true", "VXRM writes causes pipeline flush">;

def TuneCheapVWMul : SubtargetFeature<"cheap-vwmul", "HasCheapVWMul", "true",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure the tune flag here is warranted. I would bias towards not having it unless we know some particular core is unprofitable.

"Fold vector shift of sign/zero extension to widening multiply">;

// Assume that lock-free native-width atomics are available, even if the target
// and operating system combination would not usually provide them. The user
// is responsible for providing any necessary __sync implementations. Code
Expand Down
83 changes: 81 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17341,6 +17341,85 @@ static SDValue combineScalarCTPOPToVCPOP(SDNode *N, SelectionDAG &DAG,
return DAG.getZExtOrTrunc(Pop, DL, VT);
}

static SDValue combineSHL(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The naming convention in this file would have this routine named performSHLCombine

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Renamed, thanks!

const RISCVSubtarget &Subtarget) {
// (shl (zext x), y) -> (vwsll x, y)
if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
return V;

// (shl (sext x), C) -> (vwmulsu x, 1u << C)
// (shl (zext x), C) -> (vwmulu x, 1u << C)

if (!Subtarget.hasCheapVWMul())
return SDValue();

if (!DCI.isAfterLegalizeDAG())
return SDValue();

SDValue LHS = N->getOperand(0);
if (!LHS.hasOneUse())
return SDValue();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For now, this only handles single-use of sext/zext.
I can rewrite it to be part of combineOp_VLToVWOp_VL so that it handles multi-use too.

unsigned Opcode;
switch (LHS.getOpcode()) {
case ISD::SIGN_EXTEND:
case RISCVISD::VSEXT_VL:
Opcode = RISCVISD::VWMULSU_VL;
break;
case ISD::ZERO_EXTEND:
case RISCVISD::VZEXT_VL:
Opcode = RISCVISD::VWMULU_VL;
break;
default:
return SDValue();
}

SDValue RHS = N->getOperand(1);
APInt ShAmt;
uint64_t ShAmtInt;
if (ISD::isConstantSplatVector(RHS.getNode(), ShAmt))
ShAmtInt = ShAmt.getZExtValue();
else if (RHS.getOpcode() == RISCVISD::VMV_V_X_VL &&
RHS.getOperand(1).getOpcode() == ISD::Constant)
ShAmtInt = RHS.getConstantOperandVal(1);
else
return SDValue();

// Better foldings:
// (shl (sext x), 1) -> (vwadd x, x)
// (shl (zext x), 1) -> (vwaddu x, x)
if (ShAmtInt <= 1)
return SDValue();

SDValue NarrowOp = LHS.getOperand(0);
MVT NarrowVT = NarrowOp.getSimpleValueType();
uint64_t NarrowBits = NarrowVT.getScalarSizeInBits();
if (ShAmtInt >= NarrowBits)
return SDValue();
MVT VT = N->getSimpleValueType(0);
if (NarrowBits * 2 != VT.getScalarSizeInBits())
return SDValue();

SelectionDAG &DAG = DCI.DAG;
SDLoc DL(N);
SDValue Passthru, Mask, VL;
switch (N->getOpcode()) {
case ISD::SHL:
Passthru = DAG.getUNDEF(VT);
std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
break;
case RISCVISD::SHL_VL:
Passthru = N->getOperand(2);
Mask = N->getOperand(3);
VL = N->getOperand(4);
break;
default:
llvm_unreachable("Expected SHL");
}
return DAG.getNode(Opcode, DL, VT, NarrowOp,
DAG.getConstant(1ULL << ShAmtInt, SDLoc(RHS), NarrowVT),
Passthru, Mask, VL);
}

SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
Expand Down Expand Up @@ -17970,7 +18049,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
break;
}
case RISCVISD::SHL_VL:
if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
if (SDValue V = combineSHL(N, DCI, Subtarget))
return V;
[[fallthrough]];
case RISCVISD::SRA_VL:
Expand All @@ -17995,7 +18074,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SRL:
case ISD::SHL: {
if (N->getOpcode() == ISD::SHL) {
if (SDValue V = combineOp_VLToVWOp_VL(N, DCI, Subtarget))
if (SDValue V = combineSHL(N, DCI, Subtarget))
return V;
}
SDValue ShAmt = N->getOperand(1);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/RISCV/RISCVProcessors.td
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,8 @@ def SPACEMIT_X60 : RISCVProcessorModel<"spacemit-x60",
TuneOptimizedNF2SegmentLoadStore,
TuneOptimizedNF3SegmentLoadStore,
TuneOptimizedNF4SegmentLoadStore,
TuneVXRMPipelineFlush]> {
TuneVXRMPipelineFlush,
TuneCheapVWMul]> {
let MVendorID = 0x710;
let MArchID = 0x8000000058000001;
let MImpID = 0x1000000049772200;
Expand Down
Loading