-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[RISCV] Provide a more efficient lowering for experimental.cttz.elts. #88552
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
cdea88b
3043928
ded1cf4
e64bd03
13d5a6e
0148073
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -1484,6 +1484,11 @@ bool RISCVTargetLowering::shouldExpandGetVectorLength(EVT TripCountVT, | |||||||||||
return VF > MaxVF || !isPowerOf2_32(VF); | ||||||||||||
} | ||||||||||||
|
||||||||||||
bool RISCVTargetLowering::shouldExpandCttzElements(EVT VT) const { | ||||||||||||
return !Subtarget.hasVInstructions() || | ||||||||||||
VT.getVectorElementType() != MVT::i1 || !isTypeLegal(VT); | ||||||||||||
} | ||||||||||||
|
||||||||||||
bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, | ||||||||||||
const CallInst &I, | ||||||||||||
MachineFunction &MF, | ||||||||||||
|
@@ -8718,6 +8723,33 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, | |||||||||||
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res); | ||||||||||||
} | ||||||||||||
|
||||||||||||
static SDValue lowerCttzElts(SDNode *N, SelectionDAG &DAG, | ||||||||||||
const RISCVSubtarget &Subtarget) { | ||||||||||||
SDValue Op0 = N->getOperand(1); | ||||||||||||
MVT OpVT = Op0.getSimpleValueType(); | ||||||||||||
MVT ContainerVT = OpVT; | ||||||||||||
if (OpVT.isFixedLengthVector()) { | ||||||||||||
ContainerVT = getContainerForFixedLengthVector(DAG, OpVT, Subtarget); | ||||||||||||
Op0 = convertToScalableVector(ContainerVT, Op0, DAG, Subtarget); | ||||||||||||
} | ||||||||||||
MVT XLenVT = Subtarget.getXLenVT(); | ||||||||||||
SDLoc DL(N); | ||||||||||||
auto [Mask, VL] = getDefaultVLOps(OpVT, ContainerVT, DL, DAG, Subtarget); | ||||||||||||
SDValue Res = DAG.getNode(RISCVISD::VFIRST_VL, DL, XLenVT, Op0, Mask, VL); | ||||||||||||
if (isOneConstant(N->getOperand(2))) | ||||||||||||
return Res; | ||||||||||||
|
||||||||||||
// Convert -1 to VL. | ||||||||||||
SDValue Setcc = | ||||||||||||
DAG.getSetCC(DL, XLenVT, Res, DAG.getConstant(0, DL, XLenVT), ISD::SETLT); | ||||||||||||
// We need to use vscale rather than X0 for scalable vectors. | ||||||||||||
if (!OpVT.isFixedLengthVector()) | ||||||||||||
VL = DAG.getVScale( | ||||||||||||
DL, XLenVT, | ||||||||||||
APInt(XLenVT.getSizeInBits(), OpVT.getVectorMinNumElements())); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can also do
Suggested change
|
||||||||||||
return DAG.getSelect(DL, XLenVT, Setcc, VL, Res); | ||||||||||||
} | ||||||||||||
|
||||||||||||
static inline void promoteVCIXScalar(const SDValue &Op, | ||||||||||||
SmallVectorImpl<SDValue> &Operands, | ||||||||||||
SelectionDAG &DAG) { | ||||||||||||
|
@@ -8913,6 +8945,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, | |||||||||||
} | ||||||||||||
case Intrinsic::experimental_get_vector_length: | ||||||||||||
return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); | ||||||||||||
case Intrinsic::experimental_cttz_elts: | ||||||||||||
return lowerCttzElts(Op.getNode(), DAG, Subtarget); | ||||||||||||
case Intrinsic::riscv_vmv_x_s: { | ||||||||||||
SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1)); | ||||||||||||
return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res); | ||||||||||||
|
@@ -12336,6 +12370,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, | |||||||||||
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); | ||||||||||||
return; | ||||||||||||
} | ||||||||||||
case Intrinsic::experimental_cttz_elts: { | ||||||||||||
SDValue Res = lowerCttzElts(N, DAG, Subtarget); | ||||||||||||
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); | ||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The i32 seems slightly weird here - I'd expect XLenVT unless there's something I didn't spot on a first glance about when this code is reached? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is called by type legalization. It needs to return the original type so that it can plug in to the original users that haven't been legalized yet. Looking at it again, it should probably be N->getValuetype(0). I think we just lack test coverage for return types other than i32. |
||||||||||||
return; | ||||||||||||
} | ||||||||||||
case Intrinsic::riscv_orc_b: | ||||||||||||
case Intrinsic::riscv_brev8: | ||||||||||||
case Intrinsic::riscv_sha256sig0: | ||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We could handle the legal non-i1 case via s simple vector comparison against zero - producing a mask result that then flows through this code. From the tests, this looks like a pretty huge improvement in codegen quality.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's true. I was just trying to cover the new usage from #88385