-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[RISCV] Use slideup to lower build_vector when all operand are (extract_element X, 0) #154450
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f74a607
c5b56c2
217402a
3dec8ff
1d8b13e
ac83561
ed3f456
41f1a97
83f1747
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -4513,41 +4513,104 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, | |||||
|
||||||
const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC; | ||||||
|
||||||
// General case: splat the first operand and slide other operands down one | ||||||
// by one to form a vector. Alternatively, if every operand is an | ||||||
// extraction from element 0 of a vector, we use that vector from the last | ||||||
// extraction as the start value and slide up instead of slide down. Such that | ||||||
// (1) we can avoid the initial splat (2) we can turn those vslide1up into | ||||||
// vslideup of 1 later and eliminate the vector to scalar movement, which is | ||||||
// something we cannot do with vslide1down/vslidedown. | ||||||
// Of course, using vslide1up/vslideup might increase the register pressure, | ||||||
// and that's why we conservatively limit to cases where every operand is an | ||||||
// extraction from the first element. | ||||||
SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end()); | ||||||
SDValue EVec; | ||||||
bool SlideUp = false; | ||||||
auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec, | ||||||
SDValue Offset, SDValue Mask, SDValue VL) -> SDValue { | ||||||
if (SlideUp) | ||||||
return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, | ||||||
Mask, VL, Policy); | ||||||
return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, | ||||||
Mask, VL, Policy); | ||||||
}; | ||||||
|
||||||
// The reason we don't use all_of here is because we're also capturing EVec | ||||||
// from the last non-undef operand. If the std::execution_policy of the | ||||||
// underlying std::all_of is anything but std::sequenced_policy we might | ||||||
// capture the wrong EVec. | ||||||
for (SDValue V : Operands) { | ||||||
using namespace SDPatternMatch; | ||||||
SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero())); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that even we have interleaving undef "intervals", in the worst case the number of those intervals will only be one more than the number of non-undef values. |
||||||
if (!SlideUp) | ||||||
break; | ||||||
} | ||||||
|
||||||
if (SlideUp) { | ||||||
MVT EVecContainerVT = EVec.getSimpleValueType(); | ||||||
// Make sure the original vector has scalable vector type. | ||||||
if (EVecContainerVT.isFixedLengthVector()) { | ||||||
EVecContainerVT = | ||||||
getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget); | ||||||
EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget); | ||||||
} | ||||||
|
||||||
// Adapt EVec's type into ContainerVT. | ||||||
if (EVecContainerVT.getVectorMinNumElements() < | ||||||
ContainerVT.getVectorMinNumElements()) | ||||||
EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0); | ||||||
else | ||||||
EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0); | ||||||
|
||||||
// Reverse the elements as we're going to slide up from the last element. | ||||||
std::reverse(Operands.begin(), Operands.end()); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit, does llvm::reverse work here?
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That was actually what I thought, but unfortunately, |
||||||
} | ||||||
|
||||||
SDValue Vec; | ||||||
UndefCount = 0; | ||||||
for (SDValue V : Op->ops()) { | ||||||
for (SDValue V : Operands) { | ||||||
if (V.isUndef()) { | ||||||
UndefCount++; | ||||||
continue; | ||||||
} | ||||||
|
||||||
// Start our sequence with a TA splat in the hopes that hardware is able to | ||||||
// recognize there's no dependency on the prior value of our temporary | ||||||
// register. | ||||||
// Start our sequence with either a TA splat or extract source in the | ||||||
// hopes that hardware is able to recognize there's no dependency on the | ||||||
// prior value of our temporary register. | ||||||
if (!Vec) { | ||||||
Vec = DAG.getSplatVector(VT, DL, V); | ||||||
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); | ||||||
if (SlideUp) { | ||||||
Vec = EVec; | ||||||
} else { | ||||||
Vec = DAG.getSplatVector(VT, DL, V); | ||||||
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); | ||||||
} | ||||||
|
||||||
UndefCount = 0; | ||||||
continue; | ||||||
} | ||||||
|
||||||
if (UndefCount) { | ||||||
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); | ||||||
Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), | ||||||
Vec, Offset, Mask, VL, Policy); | ||||||
Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask, | ||||||
VL); | ||||||
UndefCount = 0; | ||||||
} | ||||||
auto OpCode = | ||||||
VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; | ||||||
|
||||||
unsigned Opcode; | ||||||
if (VT.isFloatingPoint()) | ||||||
Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL; | ||||||
else | ||||||
Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL; | ||||||
|
||||||
if (!VT.isFloatingPoint()) | ||||||
V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V); | ||||||
Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, | ||||||
Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, | ||||||
V, Mask, VL); | ||||||
} | ||||||
if (UndefCount) { | ||||||
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); | ||||||
Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), | ||||||
Vec, Offset, Mask, VL, Policy); | ||||||
Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask, | ||||||
VL); | ||||||
} | ||||||
return convertFromScalableVector(VT, Vec, DAG, Subtarget); | ||||||
} | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit, can we get away with just copying the iterator and not the storage? Does something like this work
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good question and I guess it's related to your
llvm::reverse
comment earlier: we can keep everything iterator (ranges) if we can write the following code:but unfortunately I don't think that would be possible without some iterator type adaption, given
llvm::reverse
having a different type thanop_values()
.