-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[VPlan] Add VPInstruction to unpack vector values to scalars. #155670
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
a2a1372
b49efbc
719b51d
dc2df3a
39605b4
2f90433
26d2add
f617993
b4a34f9
14f2feb
133f50a
a5878f0
f99634c
7231363
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -7172,7 +7172,8 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |||||
// TODO: Move to VPlan transform stage once the transition to the VPlan-based | ||||||
// cost model is complete for better cost estimates. | ||||||
VPlanTransforms::runPass(VPlanTransforms::unrollByUF, BestVPlan, BestUF); | ||||||
VPlanTransforms::runPass(VPlanTransforms::materializeBuildVectors, BestVPlan); | ||||||
VPlanTransforms::runPass(VPlanTransforms::materializeBuildAndUnpackVectors, | ||||||
|
VPlanTransforms::runPass(VPlanTransforms::materializeBuildAndUnpackVectors, | |
VPlanTransforms::runPass(VPlanTransforms::materializePacksAndUnpacks, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1060,6 +1060,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, | |
ResumeForEpilogue, | ||
/// Returns the value for vscale. | ||
VScale, | ||
/// Extracts all lanes from its (non-scalable) vector operand. | ||
|
||
UnpackVector, | ||
}; | ||
|
||
/// Returns true if this VPInstruction generates scalar values for all lanes. | ||
|
@@ -2708,6 +2710,15 @@ class LLVM_ABI_FOR_TEST VPReductionRecipe : public VPRecipeWithIRFlags { | |
return R && classof(R); | ||
} | ||
|
||
static inline bool classof(const VPValue *VPV) { | ||
const VPRecipeBase *R = VPV->getDefiningRecipe(); | ||
return R && classof(R); | ||
} | ||
|
||
static inline bool classof(const VPSingleDefRecipe *R) { | ||
return classof(static_cast<const VPRecipeBase *>(R)); | ||
} | ||
|
||
Comment on lines
+2723
to
+2731
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, related to the introduction of unpack? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's needed for this change, due to the new cast/dyn_cast/isa with VPSingleDefRecipe |
||
/// Generate the reduction in the loop. | ||
void execute(VPTransformState &State) override; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -506,6 +506,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) { | |
case VPInstruction::ExtractPenultimateElement: | ||
case VPInstruction::FirstActiveLane: | ||
case VPInstruction::Not: | ||
case VPInstruction::UnpackVector: | ||
return 1; | ||
case Instruction::ICmp: | ||
case Instruction::FCmp: | ||
|
@@ -1231,6 +1232,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const { | |
case VPInstruction::StepVector: | ||
case VPInstruction::ReductionStartVector: | ||
case VPInstruction::VScale: | ||
case VPInstruction::UnpackVector: | ||
return false; | ||
default: | ||
return true; | ||
|
@@ -1274,8 +1276,6 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
return getNumOperands() > 1; | ||
case VPInstruction::PtrAdd: | ||
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); | ||
case VPInstruction::WidePtrAdd: | ||
return Op == getOperand(0); | ||
Comment on lines
1292
to
1293
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unrelated? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The current code here doesn't match the codegen for WidePtrAdd which is exposed by this patch; it only uses the first lane, if the pointer is a single scalar. With this code, we would incorrectly insert an
Comment on lines
1292
to
1293
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How/Is this related? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The patch exposed an issue here, more details in https://github.com/llvm/llvm-project/pull/155670/files#r2347298616 above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Defaulting to false seems a bit odd, as the first operand should be a scalar base. Worth a comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment. It supports both vector and scalar base addresses. |
||
case VPInstruction::ComputeAnyOfResult: | ||
case VPInstruction::ComputeFindIVResult: | ||
return Op == getOperand(1); | ||
|
@@ -1399,6 +1399,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, | |
case VPInstruction::ResumeForEpilogue: | ||
O << "resume-for-epilogue"; | ||
break; | ||
case VPInstruction::UnpackVector: | ||
O << "unpack-into-scalars"; | ||
|
||
break; | ||
default: | ||
O << Instruction::getOpcodeName(getOpcode()); | ||
} | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1224,6 +1224,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { | |||||
return; | ||||||
} | ||||||
|
||||||
VPValue *Idx; | ||||||
if (match(&R, m_VPInstruction<Instruction::ExtractElement>(m_BuildVector(), | ||||||
|
||||||
m_VPValue(Idx)))) { | ||||||
auto *BuildVector = cast<VPInstruction>(R.getOperand(0)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, I think it could, but would be inconsistent with other matchers, where the inner matcher match the operands. |
||||||
Def->replaceAllUsesWith(BuildVector->getOperand( | ||||||
cast<ConstantInt>(Idx->getLiveInIRValue())->getZExtValue())); | ||||||
|
||||||
return; | ||||||
} | ||||||
|
||||||
if (auto *Phi = dyn_cast<VPPhi>(Def)) { | ||||||
if (Phi->getNumOperands() == 1) | ||||||
Phi->replaceAllUsesWith(Phi->getOperand(0)); | ||||||
|
@@ -3706,7 +3715,7 @@ void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan, | |||||
BTC->replaceAllUsesWith(TCMO); | ||||||
} | ||||||
|
||||||
void VPlanTransforms::materializeBuildVectors(VPlan &Plan) { | ||||||
void VPlanTransforms::materializeBuildAndUnpackVectors(VPlan &Plan) { | ||||||
if (Plan.hasScalarVFOnly()) | ||||||
return; | ||||||
|
||||||
|
@@ -3755,6 +3764,48 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) { | |||||
}); | ||||||
} | ||||||
} | ||||||
|
||||||
// Create explicit VPInstructions to convert vectors to scalars. | ||||||
for (VPBasicBlock *VPBB : | ||||||
concat<VPBasicBlock *>(VPBBsOutsideLoopRegion, VPBBsInsideLoopRegion)) { | ||||||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||||||
if (isa<VPReplicateRecipe, VPInstruction, VPScalarIVStepsRecipe>(&R)) | ||||||
continue; | ||||||
for (VPValue *Def : R.definedValues()) { | ||||||
artagnon marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
if (vputils::isSingleScalar(Def) || vputils::onlyFirstLaneUsed(Def)) | ||||||
|
||||||
continue; | ||||||
|
||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth explaining how scalar users inside replicate regions (continue to) extract their values w/o going through an Unpack VPInstruction. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done and added TODO, thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! Better placed before the lambda? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should be moved, thanks |
||||||
if (VPBB->getParent() != Plan.getVectorLoopRegion()) | ||||||
|
if (VPBB->getParent() != Plan.getVectorLoopRegion()) | |
if (VPBB->getParent() != LoopRegion) |
Can this ever happen? I thought we were doing a shallow traversal?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This skipped VPInstructions outside regions, I updated the traversal to not visit those.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto UsesVectorOrInsideReplicateRegion = [LoopRegion](VPUser *U) { | |
auto IsInsideReplicateRegion = [LoopRegion](VPUser *U) { |
Where does it check that it uses vector?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes that needs to be done separately in the latest version, renamed, thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better check if ParentRegion is replicating, consistent with the lambda's name?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks!
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Quick note: usesScalars can be a bit weak; I put up chainUsesScalarValues to fix this, but I'm not sure if it's the best idea.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For this use case, it should be fine, we just need to know if the recipe may use scalars and provide it
artagnon marked this conversation as resolved.
Show resolved
Hide resolved
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -319,8 +319,9 @@ struct VPlanTransforms { | |||||
VPBasicBlock *VectorPH); | ||||||
|
||||||
/// Add explicit Build[Struct]Vector recipes that combine multiple scalar | ||||||
/// values into single vectors. | ||||||
static void materializeBuildVectors(VPlan &Plan); | ||||||
/// values into single vectors and UnpackVector to extract scalars from a | ||||||
|
/// values into single vectors and UnpackVector to extract scalars from a | |
/// values into single vectors and Unpack recipes to extract scalars from a |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -466,25 +466,40 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) { | |
/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or | ||
/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar | ||
/// definitions for operands of \DefR. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent: "\p DefR" rather than "\DefR" |
||
static VPRecipeWithIRFlags * | ||
static VPValue * | ||
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, | ||
VPRecipeWithIRFlags *DefR, VPLane Lane, | ||
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) { | ||
|
||
|
||
VPValue *Op; | ||
if (match(DefR, | ||
m_VPInstruction<VPInstruction::UnpackVector>(m_VPValue(Op)))) { | ||
auto LaneDefs = Def2LaneDefs.find(Op); | ||
if (LaneDefs != Def2LaneDefs.end()) | ||
return LaneDefs->second[Lane.getKnownLane()]; | ||
|
||
VPValue *Idx = | ||
Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane())); | ||
return Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx}); | ||
} | ||
|
||
// Collect the operands at Lane, creating extracts as needed. | ||
SmallVector<VPValue *> NewOps; | ||
for (VPValue *Op : DefR->operands()) { | ||
if (Lane.getKind() == VPLane::Kind::ScalableLast) { | ||
match(Op, m_VPInstruction<VPInstruction::UnpackVector>(m_VPValue(Op))); | ||
|
||
NewOps.push_back( | ||
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op})); | ||
continue; | ||
} | ||
|
||
// If Op is a definition that has been unrolled, directly use the clone for | ||
// the corresponding lane. | ||
auto LaneDefs = Def2LaneDefs.find(Op); | ||
if (LaneDefs != Def2LaneDefs.end()) { | ||
NewOps.push_back(LaneDefs->second[Lane.getKnownLane()]); | ||
continue; | ||
} | ||
if (Lane.getKind() == VPLane::Kind::ScalableLast) { | ||
NewOps.push_back( | ||
Builder.createNaryOp(VPInstruction::ExtractLastElement, {Op})); | ||
continue; | ||
} | ||
if (vputils::isSingleScalar(Op)) { | ||
NewOps.push_back(Op); | ||
continue; | ||
|
@@ -498,8 +513,8 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, | |
} | ||
VPValue *Idx = | ||
Plan.getOrAddLiveIn(ConstantInt::get(IdxTy, Lane.getKnownLane())); | ||
VPValue *Ext = Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx}); | ||
NewOps.push_back(Ext); | ||
NewOps.push_back( | ||
Builder.createNaryOp(Instruction::ExtractElement, {Op, Idx})); | ||
|
||
} | ||
|
||
VPRecipeWithIRFlags *New; | ||
|
@@ -548,7 +563,8 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { | |
(isa<VPReplicateRecipe>(&R) && | ||
cast<VPReplicateRecipe>(&R)->isSingleScalar()) || | ||
(isa<VPInstruction>(&R) && | ||
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes())) | ||
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes() && | ||
cast<VPInstruction>(&R)->getOpcode() != VPInstruction::UnpackVector)) | ||
artagnon marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
continue; | ||
|
||
auto *DefR = cast<VPRecipeWithIRFlags>(&R); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -84,6 +84,8 @@ inline bool isSingleScalar(const VPValue *VPV) { | |
return VPI->isSingleScalar() || VPI->isVectorToScalar() || | ||
(PreservesUniformity(VPI->getOpcode()) && | ||
all_of(VPI->operands(), isSingleScalar)); | ||
if (auto *Reduce = dyn_cast<VPReductionRecipe>(VPV)) | ||
return true; | ||
|
||
|
||
// VPExpandSCEVRecipes must be placed in the entry and are alway uniform. | ||
return isa<VPExpandSCEVRecipe>(VPV); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this comment still relevant?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, comment indicates that these transforms are still taking place during VPlan execution rather than planning. Perhaps worth rewording to clarify.