-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[VPlan] Unroll VPReplicateRecipe by VF. #142433
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
c354bdc
2e877b5
0800450
884b9d3
ed982d4
14e296c
48699d9
cc1a779
47b9665
af2d2c0
5a73ebe
ab6665c
b6a0834
ae3e3c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -261,6 +261,13 @@ Value *VPTransformState::get(const VPValue *Def, const VPLane &Lane) { | |
return Data.VPV2Scalars[Def][0]; | ||
} | ||
|
||
// Look through BuildVector to avoid redundant extracts. | ||
// TODO: Remove once replicate regions are unrolled explicitly. | ||
if (Lane.getKind() == VPLane::Kind::First && match(Def, m_BuildVector())) { | ||
auto *BuildVector = cast<VPInstruction>(Def); | ||
return get(BuildVector->getOperand(Lane.getKnownLane()), true); | ||
} | ||
|
||
assert(hasVectorValue(Def)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent: missing error message. |
||
auto *VecPart = Data.VPV2Vector[Def]; | ||
if (!VecPart->getType()->isVectorTy()) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -936,6 +936,13 @@ class VPInstruction : public VPRecipeWithIRFlags, | |
BranchOnCount, | ||
BranchOnCond, | ||
Broadcast, | ||
/// Given operands of (the same) struct type, creates a struct of fixed- | ||
/// width vectors each containing a struct field of all operands. The | ||
/// number of operands matches the element count of every vector. | ||
BuildStructVector, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lex order has BuildVector after BuildStructVector? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. reordered, thanks |
||
/// Creates a fixed-width vector containing all operands. The number of | ||
/// operands matches the vector element count. | ||
BuildVector, | ||
ComputeAnyOfResult, | ||
ComputeFindLastIVResult, | ||
ComputeReductionResult, | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -224,6 +224,9 @@ struct Recipe_match { | |||||||||||||||||
if ((!matchRecipeAndOpcode<RecipeTys>(R) && ...)) | ||||||||||||||||||
return false; | ||||||||||||||||||
|
||||||||||||||||||
auto *VPI = dyn_cast<VPInstruction>(R); | ||||||||||||||||||
if (VPI && VPI->getOpcode() == VPInstruction::BuildVector) | ||||||||||||||||||
return true; | ||||||||||||||||||
|
auto *VPI = dyn_cast<VPInstruction>(R); | |
if (VPI && VPI->getOpcode() == VPInstruction::BuildVector) | |
return true; | |
// Finally match operands, except for BuildVector which is matched w/o checking its operands. | |
auto *VPI = dyn_cast<VPInstruction>(R); | |
if (VPI && VPI->getOpcode() == VPInstruction::BuildVector) | |
return true; |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better to check instead if Ops_t is empty and if so assert that R is BuildVector and early return, or set NumOperands to zero instead of R->getNumOperands() and continue?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Moved up to handle the case first, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Independent nit: worth noting below that "Commutative" checks operands in reverse order, which works best for binary operations.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
inline ZeroOpVPInstruction_match<VPInstruction::BuildVector> m_BuildVector() { | |
return ZeroOpVPInstruction_match<VPInstruction::BuildVector>(); | |
} | |
/// BuildVector is matches only its opcode, w/o matching its operands. | |
inline ZeroOpVPInstruction_match<VPInstruction::BuildVector> m_BuildVector() { | |
return ZeroOpVPInstruction_match<VPInstruction::BuildVector>(); | |
} | |
plus some explanation why - number of operands varies?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -495,9 +495,9 @@ Value *VPInstruction::generate(VPTransformState &State) { | |||||||||||||||||||||
} | ||||||||||||||||||||||
case Instruction::ExtractElement: { | ||||||||||||||||||||||
assert(State.VF.isVector() && "Only extract elements from vectors"); | ||||||||||||||||||||||
Value *Vec = State.get(getOperand(0)); | ||||||||||||||||||||||
Value *Idx = State.get(getOperand(1), /*IsScalar=*/true); | ||||||||||||||||||||||
return Builder.CreateExtractElement(Vec, Idx, Name); | ||||||||||||||||||||||
unsigned IdxToExtract = | ||||||||||||||||||||||
cast<ConstantInt>(getOperand(1)->getLiveInIRValue())->getZExtValue(); | ||||||||||||||||||||||
return State.get(getOperand(0), VPLane(IdxToExtract)); | ||||||||||||||||||||||
|
||||||||||||||||||||||
} | ||||||||||||||||||||||
case Instruction::Freeze: { | ||||||||||||||||||||||
Value *Op = State.get(getOperand(0), vputils::onlyFirstLaneUsed(this)); | ||||||||||||||||||||||
|
@@ -608,6 +608,32 @@ Value *VPInstruction::generate(VPTransformState &State) { | |||||||||||||||||||||
return Builder.CreateVectorSplat( | ||||||||||||||||||||||
State.VF, State.get(getOperand(0), /*IsScalar*/ true), "broadcast"); | ||||||||||||||||||||||
} | ||||||||||||||||||||||
case VPInstruction::BuildStructVector: { | ||||||||||||||||||||||
// For struct types, we need to build a new 'wide' struct type, where each | ||||||||||||||||||||||
// element is widened, i.e. we crate a struct of vectors . | ||||||||||||||||||||||
|
// element is widened, i.e. we crate a struct of vectors . | |
// element is widened, i.e., we create a struct of vectors. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Fixed, thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it be better to interchange the loops, inserting all elements into each vector consecutively and then insert each vector into Res once?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Currently this matches the construction order as packScalarIntoVectorizedValue
, would probably be good to only adjust once its last user has been removed.
Currently that's stil lin State.set, but will double-check if that code is actually used for struct types (only should be relevant for recipes in replicate regions)
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would be good to clarify that Idx
represents an enumeration of vector elements while I
represents an enumeration of struct fields. Admittedly StructTy->getNumElements()
may also cause confusion. How about LaneIndex
and FieldIndex
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"VPReplicateRecipes outside replicate regions must be unrolled"); | |
"VPReplicateRecipes outside replicate regions must have already been unrolled"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Handle simpler single-scalar case first and early-return, assert one of above two cases applies:
if (!State.Lane) {
assert(IsSingleScalar && "...");
scalarizeInstruction(UI, this, VPLane(0), State);
return;
}
// Generate a single instance.
...
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"uniform recipe shouldn't be predicated"); | |
"uniform recipe shouldn't be predicated"); | |
assert(!State.VF.isScalable() && "Can't scalarize a scalable vector"); |
retain the assert here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (State.Lane->isFirstLane()) { | |
assert(!State.VF.isScalable() && "VF is assumed to be non scalable."); | |
WideValue = PoisonValue::get(VectorType::get(UI->getType(), State.VF)); | |
} else { | |
WideValue = State.get(this); | |
} | |
if (State.Lane->isFirstLane()) | |
WideValue = PoisonValue::get(VectorType::get(UI->getType(), State.VF)); | |
else | |
WideValue = State.get(this); |
drop it from here?
Can also simplify using a ternary Value *WideValue = State.Lane->isFirstLane() ? ... : ... ;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -15,6 +15,7 @@ | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
#include "VPlan.h" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
#include "VPlanAnalysis.h" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
#include "VPlanCFG.h" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
#include "VPlanHelpers.h" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
#include "VPlanPatternMatch.h" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
#include "VPlanTransforms.h" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
#include "VPlanUtils.h" | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
@@ -450,3 +451,86 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF, LLVMContext &Ctx) { | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
VPlanTransforms::removeDeadRecipes(Plan); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
/// Create a single-scalar clone of \p RepR for lane \p Lane. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
static VPReplicateRecipe *cloneForLane(VPlan &Plan, VPBuilder &Builder, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Type *IdxTy, VPReplicateRecipe *RepR, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
VPLane Lane) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Collect the operands at Lane, creating extracts as needed. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
SmallVector<VPValue *> NewOps; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
for (VPValue *Op : RepR->operands()) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (vputils::isSingleScalar(Op)) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NewOps.push_back(Op); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (Lane.getKind() == VPLane::Kind::ScalableLast) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NewOps.push_back( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op})); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Look through buildvector to avoid unnecessary extracts. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (match(Op, m_BuildVector())) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NewOps.push_back( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cast<VPInstruction>(Op)->getOperand(Lane.getKnownLane())); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
VPValue *Idx = | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane())); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx}); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
NewOps.push_back(Ext); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
auto *New = | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could this be a VPInstruction? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
New->insertBefore(RepR); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That this failed to call transferFlags here, which resulted in miscompiles. I've created #147398 to fix. |
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
return New; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Type *IdxTy = IntegerType::get( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
Plan.getScalarHeader()->getIRBasicBlock()->getContext(), 32); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>( | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
auto *RepR = dyn_cast<VPReplicateRecipe>(&R); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (!RepR || RepR->isSingleScalar()) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
VPBuilder Builder(RepR); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
SmallVector<VPValue *> LaneDefs; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
// Stores to invariant addresses need to store the last lane only. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (isa<StoreInst>(RepR->getUnderlyingInstr()) && | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
vputils::isSingleScalar(RepR->getOperand(1))) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF)); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
RepR->eraseFromParent(); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
/// Create single-scalar version of RepR for all lanes. | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I))); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
if (RepR->getNumUsers() == 0) { | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
RepR->eraseFromParent(); | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|
if (isa<StoreInst>(RepR->getUnderlyingInstr()) && | |
vputils::isSingleScalar(RepR->getOperand(1))) { | |
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF)); | |
RepR->eraseFromParent(); | |
continue; | |
} | |
/// Create single-scalar version of RepR for all lanes. | |
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) | |
LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I))); | |
if (RepR->getNumUsers() == 0) { | |
RepR->eraseFromParent(); | |
continue; | |
} | |
if (RepR->getNumUsers() == 0) { | |
if (isa<StoreInst>(RepR->getUnderlyingInstr()) && | |
vputils::isSingleScalar(RepR->getOperand(1))) { | |
// Stores to invariant addresses need to store the last lane only. | |
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane::getLastLaneForVF(VF)); | |
} else { | |
// Create single-scalar version of RepR for all lanes. | |
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) | |
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I)); | |
} | |
RepR->eraseFromParent(); | |
continue; | |
} | |
/// Create and record single-scalar version of RepR for all lanes. | |
SmallVector<VPValue *> LaneDefs; | |
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) | |
LaneDefs.push_back(cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I))); | |
`` |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So a pair of replicating recipes one feeding the other is replaced by VF recipes feeding a buildVector which VF other recipes extract from, where the extracts are optimized away by cloneForLane(); and the buildVector possibly by dce?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is done now rather than later to reduce test diff?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep without the fold we would have additional insert/extracts for uses inside replicate regions, with corresponding test changes.