-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[VPlan] Explicitly replicate VPInstructions by VF. #155102
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
b80eee3
0d04499
1e62f9f
c5b4978
14c1d98
f9c3619
6bcaac8
32bc393
e6ac419
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -333,6 +333,9 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { | |||||||||||
LastLane = 0; | ||||||||||||
} | ||||||||||||
|
||||||||||||
assert(IsSingleScalar && "must be a single-scalar at this point"); | ||||||||||||
// We need to construct the vector value for a single-scalar value by | ||||||||||||
// broadcasting the scalar to all lanes. | ||||||||||||
Comment on lines
+346
to
+347
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done thanks |
||||||||||||
auto *LastInst = cast<Instruction>(get(Def, LastLane)); | ||||||||||||
|
||||||||||||
// Set the insert point after the last scalarized instruction or after the | ||||||||||||
// last PHI, if LastInst is a PHI. This ensures the insertelement sequence | ||||||||||||
|
@@ -343,27 +346,8 @@ Value *VPTransformState::get(const VPValue *Def, bool NeedsScalar) { | |||||||||||
: std::next(BasicBlock::iterator(LastInst)); | ||||||||||||
Builder.SetInsertPoint(&*NewIP); | ||||||||||||
|
||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed thanks
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -901,6 +901,8 @@ struct VPRecipeWithIRFlags : public VPSingleDefRecipe, public VPIRFlags { | |
return R && classof(R); | ||
} | ||
|
||
virtual VPRecipeWithIRFlags *clone() override = 0; | ||
|
||
void execute(VPTransformState &State) override = 0; | ||
|
||
/// Compute the cost for this recipe for \p VF, using \p Opcode and \p Ctx. | ||
|
@@ -1045,13 +1047,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, | |
VScale, | ||
}; | ||
|
||
private: | ||
typedef unsigned char OpcodeTy; | ||
OpcodeTy Opcode; | ||
|
||
/// An optional name that can be used for the generated IR instruction. | ||
const std::string Name; | ||
|
||
/// Returns true if this VPInstruction generates scalar values for all lanes. | ||
/// Most VPInstructions generate a single value per part, either vector or | ||
/// scalar. VPReplicateRecipe takes care of generating multiple (scalar) | ||
|
@@ -1060,6 +1055,13 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, | |
/// underlying ingredient. | ||
bool doesGeneratePerAllLanes() const; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. doesGeneratePerAllLanes() moved from private to public. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep |
||
|
||
private: | ||
typedef unsigned char OpcodeTy; | ||
OpcodeTy Opcode; | ||
|
||
/// An optional name that can be used for the generated IR instruction. | ||
const std::string Name; | ||
|
||
/// Returns true if we can generate a scalar for the first lane only if | ||
/// needed. | ||
bool canGenerateScalarForFirstLane() const; | ||
|
@@ -1069,11 +1071,6 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags, | |
/// existing value is returned rather than a generated one. | ||
Value *generate(VPTransformState &State); | ||
|
||
/// Utility methods serving execute(): generates a scalar single instance of | ||
/// the modeled instruction for a given lane. \returns the scalar generated | ||
/// value for lane \p Lane. | ||
Value *generatePerLane(VPTransformState &State, const VPLane &Lane); | ||
|
||
#if !defined(NDEBUG) | ||
/// Return the number of operands determined by the opcode of the | ||
/// VPInstruction. Returns -1u if the number of operands cannot be determined | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -525,16 +525,6 @@ bool VPInstruction::canGenerateScalarForFirstLane() const { | |
} | ||
} | ||
|
||
Value *VPInstruction::generatePerLane(VPTransformState &State, | ||
const VPLane &Lane) { | ||
IRBuilderBase &Builder = State.Builder; | ||
|
||
assert(getOpcode() == VPInstruction::PtrAdd && | ||
"only PtrAdd opcodes are supported for now"); | ||
return Builder.CreatePtrAdd(State.get(getOperand(0), Lane), | ||
State.get(getOperand(1), Lane), Name); | ||
} | ||
|
||
/// Create a conditional branch using \p Cond branching to the successors of \p | ||
/// VPBB. Note that the first successor is always forward (i.e. not created yet) | ||
/// while the second successor may already have been created (if it is a header | ||
|
@@ -1154,24 +1144,13 @@ void VPInstruction::execute(VPTransformState &State) { | |
"Set flags not supported for the provided opcode"); | ||
if (hasFastMathFlags()) | ||
State.Builder.setFastMathFlags(getFastMathFlags()); | ||
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && | ||
(vputils::onlyFirstLaneUsed(this) || | ||
isVectorToScalar() || isSingleScalar()); | ||
bool GeneratesPerAllLanes = doesGeneratePerAllLanes(); | ||
if (GeneratesPerAllLanes) { | ||
for (unsigned Lane = 0, NumLanes = State.VF.getFixedValue(); | ||
Lane != NumLanes; ++Lane) { | ||
Value *GeneratedValue = generatePerLane(State, VPLane(Lane)); | ||
assert(GeneratedValue && "generatePerLane must produce a value"); | ||
State.set(this, GeneratedValue, VPLane(Lane)); | ||
} | ||
return; | ||
} | ||
Comment on lines
-1203
to
-1212
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is dropped due to earlier replication of VPInstructions by VF. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep |
||
|
||
Value *GeneratedValue = generate(State); | ||
if (!hasResult()) | ||
return; | ||
assert(GeneratedValue && "generate must produce a value"); | ||
bool GeneratesPerFirstLaneOnly = canGenerateScalarForFirstLane() && | ||
(vputils::onlyFirstLaneUsed(this) || | ||
isVectorToScalar() || isSingleScalar()); | ||
assert((((GeneratedValue->getType()->isVectorTy() || | ||
GeneratedValue->getType()->isStructTy()) == | ||
!GeneratesPerFirstLaneOnly) || | ||
|
@@ -1244,6 +1223,9 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { | |
case VPInstruction::Broadcast: | ||
case VPInstruction::ReductionStartVector: | ||
return true; | ||
case VPInstruction::BuildStructVector: | ||
case VPInstruction::BuildVector: | ||
return getNumOperands() > 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are Build*Vector with a single operand useful, do they construct single element vectors? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All build-vectors start out as single operand, which will get expanded once we replicate by VF There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and this determines if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added thanks |
||
case VPInstruction::PtrAdd: | ||
return Op == getOperand(0) || vputils::onlyFirstLaneUsed(this); | ||
case VPInstruction::WidePtrAdd: | ||
|
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -3377,34 +3377,40 @@ void VPlanTransforms::materializeBuildVectors(VPlan &Plan) { | |||||||||||||
vp_depth_first_shallow(Plan.getEntry())); | ||||||||||||||
auto VPBBsInsideLoopRegion = VPBlockUtils::blocksOnly<VPBasicBlock>( | ||||||||||||||
vp_depth_first_shallow(LoopRegion->getEntry())); | ||||||||||||||
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes, | ||||||||||||||
// excluding ones in replicate regions. Those are not materialized explicitly | ||||||||||||||
// yet. Those vector users are still handled in VPReplicateRegion::execute(), | ||||||||||||||
// via shouldPack(). | ||||||||||||||
// Materialize Build(Struct)Vector for all replicating VPReplicateRecipes and | ||||||||||||||
// VPInstructions, excluding ones in replicate regions. Those are not | ||||||||||||||
// materialized explicitly yet. Those vector users are still handled in | ||||||||||||||
// VPReplicateRegion::execute(), via shouldPack(). | ||||||||||||||
// TODO: materialize build vectors for replicating recipes in replicating | ||||||||||||||
// regions. | ||||||||||||||
// TODO: materialize build vectors for VPInstructions. | ||||||||||||||
|
// TODO: materialize build vectors for VPInstructions. |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R); | |
auto *DefR = cast<VPRecipeWithIRFlags>(&R); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
doen thanks
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (!DefR || !isa<VPReplicateRecipe, VPInstruction>(DefR)) | |
if (!isa_and_present<VPReplicateRecipe, VPInstruction>(DefR)) |
although slightly longer...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated as suggested just below
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R); | |
if (!DefR || !isa<VPReplicateRecipe, VPInstruction>(DefR)) | |
continue; | |
if (!isa<VPReplicateRecipe, VPInstruction>(&R)) | |
continue; | |
auto *DefR = cast<VPRecipeWithIRFlags>(&R); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks!
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should isSingleScalar() and doesGeneratePerAllLanes be combined into a common method of VPRecipeWithIRFlags? (if not VPSingleDefRecipe)
Should the two merge, i.e., by converting replicated recipes into VPInstructions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After replicating by VF, we can replace single-scalar VPReplicateRecipes by VPInstructions (or even create them directly instead of single-scalar VPReplicateRecipes). This will probably also require explicit unpacking first (#155670).
To do so before replicating by VF, we would probably need to encode single-scalar in VPInstruction (or VPInstructionWithType) #140623.
Some VPReplicateRecipes cannnot be expressed easily by VPInstructions at the moment, including load/stores, and calls.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How/Is the additional check of onlyFirstLaneUsed() related to DefR being a VPInstruction?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPReplicateRecipes encode single-scalarness in the recipe, whereas for VPInstructions we currently have to infer it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so should the onlyFirstLaneUsed(DefR)
be placed under the isa<VPInstruction>(DefR) &&
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, done thanks
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -158,8 +158,8 @@ struct VPlanTransforms { | |||||
/// Explicitly unroll \p Plan by \p UF. | ||||||
static void unrollByUF(VPlan &Plan, unsigned UF); | ||||||
|
||||||
/// Replace each VPReplicateRecipe outside on any replicate region in \p Plan | ||||||
/// with \p VF single-scalar recipes. | ||||||
/// Replace each VPReplicateRecipe and replicating VPInstruction outside on | ||||||
|
/// Replace each VPReplicateRecipe and replicating VPInstruction outside on | |
/// Replace each replicating VPReplicateRecipe and VPInstruction outside of |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -463,15 +463,15 @@ void VPlanTransforms::unrollByUF(VPlan &Plan, unsigned UF) { | |||||||||||||||||||||||||||||||||
VPlanTransforms::removeDeadRecipes(Plan); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
/// Create a single-scalar clone of \p RepR for lane \p Lane. Use \p | ||||||||||||||||||||||||||||||||||
/// Def2LaneDefs to look up scalar definitions for operands of \RepR. | ||||||||||||||||||||||||||||||||||
static VPReplicateRecipe * | ||||||||||||||||||||||||||||||||||
/// Create a single-scalar clone of \p DefR for lane \p Lane. Use \p | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
/// Def2LaneDefs to look up scalar definitions for operands of \DefR. | ||||||||||||||||||||||||||||||||||
static VPRecipeWithIRFlags * | ||||||||||||||||||||||||||||||||||
cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, | ||||||||||||||||||||||||||||||||||
VPReplicateRecipe *RepR, VPLane Lane, | ||||||||||||||||||||||||||||||||||
VPRecipeWithIRFlags *DefR, VPLane Lane, | ||||||||||||||||||||||||||||||||||
const DenseMap<VPValue *, SmallVector<VPValue *>> &Def2LaneDefs) { | ||||||||||||||||||||||||||||||||||
// Collect the operands at Lane, creating extracts as needed. | ||||||||||||||||||||||||||||||||||
SmallVector<VPValue *> NewOps; | ||||||||||||||||||||||||||||||||||
for (VPValue *Op : RepR->operands()) { | ||||||||||||||||||||||||||||||||||
for (VPValue *Op : DefR->operands()) { | ||||||||||||||||||||||||||||||||||
// If Op is a definition that has been unrolled, directly use the clone for | ||||||||||||||||||||||||||||||||||
// the corresponding lane. | ||||||||||||||||||||||||||||||||||
auto LaneDefs = Def2LaneDefs.find(Op); | ||||||||||||||||||||||||||||||||||
|
@@ -501,11 +501,19 @@ cloneForLane(VPlan &Plan, VPBuilder &Builder, Type *IdxTy, | |||||||||||||||||||||||||||||||||
NewOps.push_back(Ext); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
auto *New = | ||||||||||||||||||||||||||||||||||
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, | ||||||||||||||||||||||||||||||||||
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); | ||||||||||||||||||||||||||||||||||
New->transferFlags(*RepR); | ||||||||||||||||||||||||||||||||||
New->insertBefore(RepR); | ||||||||||||||||||||||||||||||||||
VPRecipeWithIRFlags *New; | ||||||||||||||||||||||||||||||||||
if (auto *RepR = dyn_cast<VPReplicateRecipe>(DefR)) { | ||||||||||||||||||||||||||||||||||
New = | ||||||||||||||||||||||||||||||||||
new VPReplicateRecipe(RepR->getUnderlyingInstr(), NewOps, | ||||||||||||||||||||||||||||||||||
/*IsSingleScalar=*/true, /*Mask=*/nullptr, *RepR); | ||||||||||||||||||||||||||||||||||
Comment on lines
+511
to
+512
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TODO: have cloning of replicate recipes also provide the desired result coupled with setting its operands to NewOps (deriving IsSingleScalar and Mask from the operands?) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. added thanks |
||||||||||||||||||||||||||||||||||
} else { | ||||||||||||||||||||||||||||||||||
New = DefR->clone(); | ||||||||||||||||||||||||||||||||||
for (const auto &[Idx, Op] : enumerate(NewOps)) { | ||||||||||||||||||||||||||||||||||
New->setOperand(Idx, Op); | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
New->transferFlags(*DefR); | ||||||||||||||||||||||||||||||||||
New->insertBefore(DefR); | ||||||||||||||||||||||||||||||||||
return New; | ||||||||||||||||||||||||||||||||||
} | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
|
@@ -530,41 +538,46 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { | |||||||||||||||||||||||||||||||||
SmallVector<VPRecipeBase *> ToRemove; | ||||||||||||||||||||||||||||||||||
for (VPBasicBlock *VPBB : VPBBsToUnroll) { | ||||||||||||||||||||||||||||||||||
for (VPRecipeBase &R : make_early_inc_range(*VPBB)) { | ||||||||||||||||||||||||||||||||||
auto *RepR = dyn_cast<VPReplicateRecipe>(&R); | ||||||||||||||||||||||||||||||||||
if (!RepR || RepR->isSingleScalar()) | ||||||||||||||||||||||||||||||||||
auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R); | ||||||||||||||||||||||||||||||||||
if (!DefR || !isa<VPInstruction, VPReplicateRecipe>(DefR)) | ||||||||||||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||||||||||
if ((isa<VPReplicateRecipe>(DefR) && | ||||||||||||||||||||||||||||||||||
cast<VPReplicateRecipe>(DefR)->isSingleScalar()) || | ||||||||||||||||||||||||||||||||||
(isa<VPInstruction>(DefR) && | ||||||||||||||||||||||||||||||||||
!cast<VPInstruction>(DefR)->doesGeneratePerAllLanes())) | ||||||||||||||||||||||||||||||||||
continue; | ||||||||||||||||||||||||||||||||||
|
||||||||||||||||||||||||||||||||||
|
auto *DefR = dyn_cast<VPRecipeWithIRFlags>(&R); | |
if (!DefR || !isa<VPInstruction, VPReplicateRecipe>(DefR)) | |
continue; | |
if ((isa<VPReplicateRecipe>(DefR) && | |
cast<VPReplicateRecipe>(DefR)->isSingleScalar()) || | |
(isa<VPInstruction>(DefR) && | |
!cast<VPInstruction>(DefR)->doesGeneratePerAllLanes())) | |
continue; | |
if (!isa<VPInstruction, VPReplicateRecipe>(&R)) || | |
(isa<VPReplicateRecipe>(&R) && | |
cast<VPReplicateRecipe>(&R)->isSingleScalar()) || | |
(isa<VPInstruction>(&R) && | |
!cast<VPInstruction>(&R)->doesGeneratePerAllLanes())) | |
continue; | |
auto *DefR = cast<VPRecipeWithIRFlags>(&R); |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,10 @@ define void @a(ptr readnone %b) { | |
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] | ||
; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP14]] | ||
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr null, i64 [[TMP17]] | ||
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x ptr> poison, ptr [[NEXT_GEP]], i32 0 | ||
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x ptr> [[TMP21]], ptr [[NEXT_GEP2]], i32 1 | ||
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x ptr> [[TMP22]], ptr [[NEXT_GEP3]], i32 2 | ||
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x ptr> [[TMP23]], ptr [[NEXT_GEP4]], i32 3 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How/are these inserts (and those below) related to the patch, perhaps to dropping the packing from VPTransformState::get()? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If ther eare users in replicate regions, we will materialize explicit build-vectors for those users, but we will look thourgh them during codegen There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Potential for being cleaned up when replicate regions will be replicated by VF? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes exactly |
||
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[NEXT_GEP]], i64 -1 | ||
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP3]], i32 0 | ||
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i32 -3 | ||
|
@@ -649,9 +653,6 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) { | |
; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]] | ||
; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]] | ||
; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]] | ||
; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]] | ||
; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]] | ||
; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]] | ||
Comment on lines
-652
to
-654
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. DCE or generation avoided now? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this will be DCE'd afte unrolling and only the last lane being used |
||
; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] | ||
; STRIDED-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[INDEX]] | ||
; STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Independent: above comment deserves updating, as this method no longer has a Part parameter - "// If Values have been set for this Def return the one relevant for \p Part."