-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[VPlan] Explicitly replicate VPInstructions by VF. #155102
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
b80eee3
0d04499
1e62f9f
c5b4978
14c1d98
f9c3619
6bcaac8
32bc393
e6ac419
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -343,37 +343,21 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { | |||||||||||
LastLane = 0; | ||||||||||||
} | ||||||||||||
|
||||||||||||
auto *LastInst = cast<Instruction>(get(Def, LastLane)); | ||||||||||||
// We need to construct the vector value for a single-scalar value by | ||||||||||||
// broadcasting the scalar to all lanes. | ||||||||||||
Comment on lines
+346
to
+347
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done thanks |
||||||||||||
// TODO: Replace by introducing Broadcast VPInstructions. | ||||||||||||
assert(IsSingleScalar && "must be a single-scalar at this point"); | ||||||||||||
// Set the insert point after the last scalarized instruction or after the | ||||||||||||
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence | ||||||||||||
// will directly follow the scalar definitions. | ||||||||||||
auto OldIP = Builder.saveIP(); | ||||||||||||
auto *LastInst = cast<Instruction>(get(Def, LastLane)); | ||||||||||||
auto NewIP = isa<PHINode>(LastInst) | ||||||||||||
? LastInst->getParent()->getFirstNonPHIIt() | ||||||||||||
: std::next(BasicBlock::iterator(LastInst)); | ||||||||||||
Builder.SetInsertPoint(&*NewIP); | ||||||||||||
|
||||||||||||
// However, if we are vectorizing, we need to construct the vector values. | ||||||||||||
// If the value is known to be uniform after vectorization, we can just | ||||||||||||
// broadcast the scalar value corresponding to lane zero. Otherwise, we | ||||||||||||
// construct the vector values using insertelement instructions. Since the | ||||||||||||
// resulting vectors are stored in State, we will only generate the | ||||||||||||
// insertelements once. | ||||||||||||
Value *VectorValue = nullptr; | ||||||||||||
if (IsSingleScalar) { | ||||||||||||
VectorValue = GetBroadcastInstrs(ScalarValue); | ||||||||||||
set(Def, VectorValue); | ||||||||||||
} else { | ||||||||||||
assert(!VF.isScalable() && "VF is assumed to be non scalable."); | ||||||||||||
assert(isa<VPInstruction>(Def) && | ||||||||||||
"Explicit BuildVector recipes must have" | ||||||||||||
"handled packing for non-VPInstructions."); | ||||||||||||
// Initialize packing with insertelements to start from poison. | ||||||||||||
VectorValue = PoisonValue::get(toVectorizedTy(LastInst->getType(), VF)); | ||||||||||||
for (unsigned Lane = 0; Lane < VF.getFixedValue(); ++Lane) | ||||||||||||
VectorValue = packScalarIntoVectorizedValue(Def, VectorValue, Lane); | ||||||||||||
set(Def, VectorValue); | ||||||||||||
} | ||||||||||||
Value *VectorValue = GetBroadcastInstrs(ScalarValue); | ||||||||||||
set(Def, VectorValue); | ||||||||||||
Builder.restoreIP(OldIP); | ||||||||||||
return VectorValue; | ||||||||||||
} | ||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -908,6 +908,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { | |
return R && classof(R); | ||
} | ||
|
||
virtual VPRecipeWithIRFlags *clone() override = 0; | ||
|
||
static inline bool classof(const VPSingleDefRecipe *U) { | ||
auto *R = dyn_cast<VPRecipeBase>(U); | ||
return R && classof(R); | ||
|
@@ -1061,13 +1063,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, | |
VScale, | ||
}; | ||
|
||
private: | ||
typedef unsigned char OpcodeTy; | ||
OpcodeTy Opcode; | ||
|
||
/// An optional name that can be used for the generated IR instruction. | ||
const std::string Name; | ||
|
||
/// Returns true if this VPInstruction generates scalar values for all lanes. | ||
/// Most VPInstructions generate a single value per part, either vector or | ||
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar) | ||
|
@@ -1076,6 +1071,13 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, | |
/// underlying ingredient. | ||
bool doesGeneratePerAllLanes() const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesGeneratePerAllLanes() moved from private to public. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep |
||
|
||
private: | ||
typedef unsigned char OpcodeTy; | ||
OpcodeTy Opcode; | ||
|
||
/// An optional name that can be used for the generated IR instruction. | ||
const std::string Name; | ||
|
||
/// Returns true if we can generate a scalar for the first lane only if | ||
/// needed. | ||
bool canGenerateScalarForFirstLane() const; | ||
|
@@ -1085,11 +1087,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, | |
/// existing value is returned rather than a generated one. | ||
Value *generate(VPTransformState &State); | ||
|
||
/// Utility methods serving execute(): generates a scalar single instance of | ||
/// the modeled instruction for a given lane. \returns the scalar generated | ||
/// value for lane \p Lane. | ||
Value *generatePerLane(VPTransformState &State, const VPLane &Lane); | ||
|
||
#if !defined(NDEBUG) | ||
/// Return the number of operands determined by the opcode of the | ||
/// VPInstruction. Returns -1u if the number of operands cannot be determined | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -564,16 +564,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { | |||||
} | ||||||
} | ||||||
|
||||||
Value *VPInstruction::generatePerLane(VPTransformState &State, | ||||||
const VPLane &Lane) { | ||||||
IRBuilderBase &Builder = State.Builder; | ||||||
|
||||||
assert(getOpcode() == VPInstruction::PtrAdd && | ||||||
"only PtrAdd opcodes are supported for now"); | ||||||
return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), | ||||||
State.get(getOperand(1), Lane), Name); | ||||||
} | ||||||
|
||||||
/// Create a conditional branch using \p Cond branching to the successors of \p | ||||||
/// VPBB. Note that the first successor is always forward (i.e. not created yet) | ||||||
/// while the second successor may already have been created (if it is a header | ||||||
|
@@ -1197,24 +1187,13 @@ void VPInstruction::execute(VPTransformState &State) { | |||||
"Set flags not supported for the provided opcode"); | ||||||
if (hasFastMathFlags()) | ||||||
State.Builder.setFastMathFlags(getFastMathFlags()); | ||||||
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && | ||||||
(vputils::onlyFirstLaneUsed(this) || | ||||||
isVectorToScalar() || isSingleScalar()); | ||||||
bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); | ||||||
if (GeneratesPerAllLanes) { | ||||||
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue(); | ||||||
Lane != NumLanes; ++Lane) { | ||||||
Value *GeneratedValue = generatePerLane(State, VPLane(Lane)); | ||||||
assert(GeneratedValue && "generatePerLane must produce a value"); | ||||||
State.set(this, GeneratedValue, VPLane(Lane)); | ||||||
} | ||||||
return; | ||||||
} | ||||||
Comment on lines
-1203
to
-1212
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is dropped due to earlier replication of VPInstructions by VF. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep |
||||||
|
||||||
Value *GeneratedValue = generate(State); | ||||||
if (!hasResult()) | ||||||
return; | ||||||
assert(GeneratedValue && "generate must produce a value"); | ||||||
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && | ||||||
(vputils::onlyFirstLaneUsed(this) || | ||||||
isVectorToScalar() || isSingleScalar()); | ||||||
assert((((GeneratedValue->getType()->isVectorTy() || | ||||||
GeneratedValue->getType()->isStructTy()) == | ||||||
!GeneratesPerFirstLaneOnly) || | ||||||
|
@@ -1287,6 +1266,12 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |||||
case VPInstruction::Broadcast: | ||||||
case VPInstruction::ReductionStartVector: | ||||||
return true; | ||||||
case VPInstruction::BuildStructVector: | ||||||
case VPInstruction::BuildVector: | ||||||
// Before replicating by VF, Build(Struct)Vector uses all lanes of the | ||||||
// operand, after replicating its operands only the first lane is used. | ||||||
// Before replicating, it will only have a single operand. | ||||||
|
// Before replicating, it will only have a single operand. | |
// Before replicating, it will have only a single operand. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done, thanks
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -463,15 +463,16 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) { | |
VPlanTransforms::removeDeadRecipes(Plan); | ||
} | ||
|
||
/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p | ||
/// Def2LaneDefs to look up scalar definitions for operands of \RepR. | ||
static VPReplicateRecipe * | ||
/// Create a single-scalar clone of \p DefR (must be a VPReplicateRecipe or | ||
/// VPInstruction) for lane \p Lane. Use \p Def2LaneDefs to look up scalar | ||
/// definitions for operands of \DefR. | ||
static VPRecipeWithIRFlags * | ||
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, | ||
VPReplicateRecipe *RepR, VPLane Lane, | ||
VPRecipeWithIRFlags *DefR, VPLane Lane, | ||
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) { | ||
// Collect the operands at Lane, creating extracts as needed. | ||
SmallVector<VPValue *> NewOps; | ||
for (VPValue *Op : RepR->operands()) { | ||
for (VPValue *Op : DefR->operands()) { | ||
// If Op is a definition that has been unrolled, directly use the clone for | ||
// the corresponding lane. | ||
auto LaneDefs = Def2LaneDefs.find(Op); | ||
|
@@ -501,11 +502,24 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, | |
NewOps.push_back(Ext); | ||
} | ||
|
||
auto *New = | ||
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, | ||
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); | ||
New->transferFlags(*RepR); | ||
New->insertBefore(RepR); | ||
VPRecipeWithIRFlags *New; | ||
if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) { | ||
// TODO: have cloning of replicate recipes also provide the desired result | ||
// coupled with setting its operands to NewOps (deriving IsSingleScalar and | ||
// Mask from the operands?) | ||
New = | ||
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, | ||
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); | ||
Comment on lines
+511
to
+512
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: have cloning of replicate recipes also provide the desired result coupled with setting its operands to NewOps (deriving IsSingleScalar and Mask from the operands?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added thanks |
||
} else { | ||
assert(isa<VPInstruction>(DefR) && | ||
"DefR must be a VPReplicateRecipe or VPInstruction"); | ||
New = DefR->clone(); | ||
for (const auto &[Idx, Op] : enumerate(NewOps)) { | ||
New->setOperand(Idx, Op); | ||
} | ||
} | ||
New->transferFlags(*DefR); | ||
New->insertBefore(DefR); | ||
return New; | ||
} | ||
|
||
|
@@ -530,34 +544,38 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { | |
SmallVector<VPRecipeBase *> ToRemove; | ||
for (VPBasicBlock *VPBB : VPBBsToUnroll) { | ||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||
auto *RepR = dyn_cast<VPReplicateRecipe>(&R); | ||
if (!RepR || RepR->isSingleScalar()) | ||
if (!isa<VPInstruction, VPReplicateRecipe>(&R) || | ||
(isa<VPReplicateRecipe>(&R) && | ||
cast<VPReplicateRecipe>(&R)->isSingleScalar()) || | ||
(isa<VPInstruction>(&R) && | ||
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes())) | ||
continue; | ||
|
||
VPBuilder Builder(RepR); | ||
if (RepR->getNumUsers() == 0) { | ||
// Create single-scalar version of RepR for all lanes. | ||
auto *DefR = cast<VPRecipeWithIRFlags>(&R); | ||
VPBuilder Builder(DefR); | ||
if (DefR->getNumUsers() == 0) { | ||
// Create single-scalar version of DefR for all lanes. | ||
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) | ||
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs); | ||
RepR->eraseFromParent(); | ||
cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs); | ||
DefR->eraseFromParent(); | ||
continue; | ||
} | ||
/// Create single-scalar version of RepR for all lanes. | ||
/// Create single-scalar version of DefR for all lanes. | ||
SmallVector<VPValue *> LaneDefs; | ||
for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) | ||
LaneDefs.push_back( | ||
cloneForLane(Plan, Builder, IdxTy, RepR, VPLane(I), Def2LaneDefs)); | ||
cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs)); | ||
|
||
Def2LaneDefs[RepR] = LaneDefs; | ||
Def2LaneDefs[DefR] = LaneDefs; | ||
/// Users that only demand the first lane can use the definition for lane | ||
/// 0. | ||
RepR->replaceUsesWithIf(LaneDefs[0], [RepR](VPUser &U, unsigned) { | ||
return U.onlyFirstLaneUsed(RepR); | ||
DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) { | ||
return U.onlyFirstLaneUsed(DefR); | ||
}); | ||
|
||
// Update each build vector user that currently has RepR as its only | ||
// Update each build vector user that currently has DefR as its only | ||
// operand, to have all LaneDefs as its operands. | ||
for (VPUser *U : to_vector(RepR->users())) { | ||
for (VPUser *U : to_vector(DefR->users())) { | ||
auto *VPI = dyn_cast<VPInstruction>(U); | ||
if (!VPI || (VPI->getOpcode() != VPInstruction::BuildVector && | ||
VPI->getOpcode() != VPInstruction::BuildStructVector)) | ||
|
@@ -569,7 +587,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { | |
for (VPValue *LaneDef : drop_begin(LaneDefs)) | ||
VPI->addOperand(LaneDef); | ||
} | ||
ToRemove.push_back(RepR); | ||
ToRemove.push_back(DefR); | ||
} | ||
} | ||
for (auto *R : reverse(ToRemove)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,10 @@ define void @a(ptr readnone %b) { | |
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] | ||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]] | ||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]] | ||
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0 | ||
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1 | ||
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2 | ||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How/are these inserts (and those below) related to the patch, perhaps to dropping the packing from VPTransformState::get()? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If ther eare users in replicate regions, we will materialize explicit build-vectors for those users, but we will look thourgh them during codegen There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential for being cleaned up when replicate regions will be replicated by VF? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes exactly |
||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 | ||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 | ||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3 | ||
|
@@ -649,9 +653,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) { | |
; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]] | ||
; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]] | ||
; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]] | ||
; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]] | ||
; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]] | ||
; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]] | ||
Comment on lines
-652
to
-654
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DCE or generation avoided now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will be DCE'd afte unrolling and only the last lane being used |
||
; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] | ||
; STRIDED-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[INDEX]] | ||
; STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Independent: above comment deserves updating, as this method no longer has a Part parameter - "// If Values have been set for this Def return the one relevant for \p Part."