Skip to content

Commit b817fa0

Browse files
committed
Prototype vectorizing structs via multiple result VPWidenCallRecipe
1 parent 1a0bae5 commit b817fa0

File tree

9 files changed

+350
-25
lines changed

9 files changed

+350
-25
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -946,11 +946,26 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
946946
if (CI && !VFDatabase::getMappings(*CI).empty())
947947
VecCallVariantsFound = true;
948948

949+
// TODO: Tidy up these checks.
950+
auto canWidenInst = [](Instruction &I) {
951+
Type *InstTy = I.getType();
952+
if (isa<CallInst>(I) && isa<StructType>(InstTy) &&
953+
canWidenType(InstTy)) {
954+
// We can only widen struct calls where the users are extractvalues.
955+
for (auto &U : I.uses()) {
956+
if (!isa<ExtractValueInst>(U.getUser()))
957+
return false;
958+
}
959+
return true;
960+
}
961+
return VectorType::isValidElementType(InstTy) || InstTy->isVoidTy();
962+
};
963+
949964
// Check that the instruction return type is vectorizable.
950965
// We can't vectorize casts from vector type to scalar type.
951966
// Also, we can't vectorize extractelement instructions.
952-
Type *InstTy = I.getType();
953-
if (!(InstTy->isVoidTy() || canWidenType(InstTy)) ||
967+
// TODO: Tidy up these checks.
968+
if (!canWidenInst(I) ||
954969
(isa<CastInst>(I) &&
955970
!VectorType::isValidElementType(I.getOperand(0)->getType())) ||
956971
isa<ExtractElementInst>(I)) {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7362,6 +7362,8 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
73627362
return dyn_cast_or_null<Instruction>(S->getUnderlyingValue());
73637363
if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(R))
73647364
return &WidenMem->getIngredient();
7365+
if (auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R))
7366+
return WidenCall->getUnderlyingCallInstruction();
73657367
return nullptr;
73667368
};
73677369

@@ -8337,9 +8339,9 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
83378339
return new VPBlendRecipe(Phi, OperandsWithMask);
83388340
}
83398341

8340-
VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
8341-
ArrayRef<VPValue *> Operands,
8342-
VFRange &Range) {
8342+
VPRecipeBase *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
8343+
ArrayRef<VPValue *> Operands,
8344+
VFRange &Range) {
83438345
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
83448346
[this, CI](ElementCount VF) {
83458347
return CM.isScalarWithPredication(CI, VF);
@@ -9049,6 +9051,19 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
90499051
// TODO: Model and preserve debug intrinsics in VPlan.
90509052
for (Instruction &I : drop_end(BB->instructionsWithoutDebug(false))) {
90519053
Instruction *Instr = &I;
9054+
9055+
// A special case. Mapping handled in
9056+
// VPRecipeBuilder::getVPValueOrAddLiveIn().
9057+
if (auto *ExtractValue = dyn_cast<ExtractValueInst>(Instr)) {
9058+
bool IsUniform = LoopVectorizationPlanner::getDecisionAndClampRange(
9059+
[&](ElementCount VF) {
9060+
return CM.isUniformAfterVectorization(ExtractValue, VF);
9061+
},
9062+
Range);
9063+
if (!IsUniform)
9064+
continue;
9065+
}
9066+
90529067
SmallVector<VPValue *, 4> Operands;
90539068
auto *Phi = dyn_cast<PHINode>(Instr);
90549069
if (Phi && Phi->getParent() == HeaderBB) {

llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,8 @@ class VPRecipeBuilder {
9595
/// Handle call instructions. If \p CI can be widened for \p Range.Start,
9696
/// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
9797
/// decreased to ensure same decision from \p Range.Start to \p Range.End.
98-
VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
99-
VFRange &Range);
98+
VPRecipeBase *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
99+
VFRange &Range);
100100

101101
/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
102102
/// if it can. The function should only be called if the cost-model indicates
@@ -182,6 +182,16 @@ class VPRecipeBuilder {
182182
if (auto *R = Ingredient2Recipe.lookup(I))
183183
return R->getVPSingleValue();
184184
}
185+
// Ugh: Not sure where to handle this :(
186+
if (auto *EVI = dyn_cast<ExtractValueInst>(V)) {
187+
Value *AggOp = EVI->getAggregateOperand();
188+
if (auto *R = getRecipe(cast<Instruction>(AggOp))) {
189+
assert(R->getNumDefinedValues() ==
190+
cast<StructType>(AggOp->getType())->getNumElements());
191+
unsigned Idx = EVI->getIndices()[0];
192+
return R->getVPValue(Idx);
193+
}
194+
}
185195
return Plan.getOrAddLiveIn(V);
186196
}
187197
};

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -907,7 +907,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
907907
case VPRecipeBase::VPReplicateSC:
908908
case VPRecipeBase::VPScalarIVStepsSC:
909909
case VPRecipeBase::VPVectorPointerSC:
910-
case VPRecipeBase::VPWidenCallSC:
911910
case VPRecipeBase::VPWidenCanonicalIVSC:
912911
case VPRecipeBase::VPWidenCastSC:
913912
case VPRecipeBase::VPWidenGEPSC:
@@ -929,6 +928,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
929928
case VPRecipeBase::VPBranchOnMaskSC:
930929
case VPRecipeBase::VPInterleaveSC:
931930
case VPRecipeBase::VPIRInstructionSC:
931+
case VPRecipeBase::VPWidenCallSC:
932932
case VPRecipeBase::VPWidenLoadEVLSC:
933933
case VPRecipeBase::VPWidenLoadSC:
934934
case VPRecipeBase::VPWidenStoreEVLSC:
@@ -1734,28 +1734,35 @@ class VPWidenIntrinsicRecipe : public VPSingleDefRecipeWithIRFlags {
17341734
};
17351735

17361736
/// A recipe for widening Call instructions using library calls.
1737-
class VPWidenCallRecipe : public VPSingleDefRecipeWithIRFlags {
1737+
class VPWidenCallRecipe : public VPRecipeBase, public VPRecipeIRFlags {
17381738
/// Variant stores a pointer to the chosen function. There is a 1:1 mapping
17391739
/// between a given VF and the chosen vectorized variant, so there will be a
17401740
/// different VPlan for each VF with a valid variant.
17411741
Function *Variant;
17421742

1743+
CallInst *CI;
1744+
17431745
public:
1744-
VPWidenCallRecipe(Value *UV, Function *Variant,
1746+
VPWidenCallRecipe(CallInst *CI, Function *Variant,
17451747
ArrayRef<VPValue *> CallArguments, DebugLoc DL = {})
1746-
: VPSingleDefRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
1747-
*cast<Instruction>(UV)),
1748-
Variant(Variant) {
1748+
: VPRecipeBase(VPDef::VPWidenCallSC, CallArguments, DL),
1749+
VPRecipeIRFlags(*CI), Variant(Variant), CI(CI) {
17491750
assert(
17501751
isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
17511752
"last operand must be the called function");
1753+
for (Type *Ty : getContainedTypes(CI->getType())) {
1754+
(void)Ty;
1755+
new VPValue(CI, this);
1756+
}
17521757
}
17531758

1759+
CallInst *getUnderlyingCallInstruction() const { return CI; }
1760+
17541761
~VPWidenCallRecipe() override = default;
17551762

17561763
VPWidenCallRecipe *clone() override {
1757-
return new VPWidenCallRecipe(getUnderlyingValue(), Variant,
1758-
{op_begin(), op_end()}, getDebugLoc());
1764+
return new VPWidenCallRecipe(CI, Variant, {op_begin(), op_end()},
1765+
getDebugLoc());
17591766
}
17601767

17611768
VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,14 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenRecipe *R) {
136136
llvm_unreachable("Unhandled opcode!");
137137
}
138138

139-
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R) {
140-
auto &CI = *cast<CallInst>(R->getUnderlyingInstr());
141-
return CI.getType();
139+
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenCallRecipe *R,
140+
const VPValue *V) {
141+
auto &CI = *cast<CallInst>(R->getUnderlyingCallInstruction());
142+
for (auto [I, Ty] : enumerate(getContainedTypes(CI.getType()))) {
143+
if (R->getVPValue(I) == V)
144+
return Ty;
145+
}
146+
llvm_unreachable("Unexpected call value!");
142147
}
143148

144149
Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R) {
@@ -267,12 +272,13 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
267272
return inferScalarType(R->getOperand(0));
268273
})
269274
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
270-
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
271-
VPWidenSelectRecipe>(
275+
VPReplicateRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
272276
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
273277
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
274278
return R->getResultType();
275279
})
280+
.Case<VPWidenCallRecipe>(
281+
[&](const auto *R) { return inferScalarTypeForRecipe(R, V); })
276282
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
277283
// TODO: Use info from interleave group.
278284
return V->getUnderlyingValue()->getType();

llvm/lib/Transforms/Vectorize/VPlanAnalysis.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ class VPTypeAnalysis {
4747

4848
Type *inferScalarTypeForRecipe(const VPBlendRecipe *R);
4949
Type *inferScalarTypeForRecipe(const VPInstruction *R);
50-
Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R);
50+
Type *inferScalarTypeForRecipe(const VPWidenCallRecipe *R, const VPValue *V);
5151
Type *inferScalarTypeForRecipe(const VPWidenRecipe *R);
5252
Type *inferScalarTypeForRecipe(const VPWidenIntOrFpInductionRecipe *R);
5353
Type *inferScalarTypeForRecipe(const VPWidenMemoryRecipe *R);

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -294,6 +294,8 @@ InstructionCost VPRecipeBase::cost(ElementCount VF, VPCostContext &Ctx) {
294294
UI = IG->getInsertPos();
295295
else if (auto *WidenMem = dyn_cast<VPWidenMemoryRecipe>(this))
296296
UI = &WidenMem->getIngredient();
297+
else if (auto *WidenCall = dyn_cast<VPWidenCallRecipe>(this))
298+
UI = WidenCall->getUnderlyingCallInstruction();
297299

298300
InstructionCost RecipeCost;
299301
if (UI && Ctx.skipCostComputation(UI, VF.isVector())) {
@@ -915,16 +917,24 @@ void VPWidenCallRecipe::execute(VPTransformState &State) {
915917

916918
assert(Variant != nullptr && "Can't create vector function.");
917919

918-
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
920+
auto *CI = getUnderlyingCallInstruction();
919921
SmallVector<OperandBundleDef, 1> OpBundles;
920922
if (CI)
921923
CI->getOperandBundlesAsDefs(OpBundles);
922924

923925
CallInst *V = State.Builder.CreateCall(Variant, Args, OpBundles);
924926
setFlags(V);
925927

926-
if (!V->getType()->isVoidTy())
927-
State.set(this, V);
928+
if (!V->getType()->isVoidTy()) {
929+
if (getNumDefinedValues() > 1) {
930+
for (auto [I, Def] : enumerate(definedValues())) {
931+
Value *AggV = State.Builder.CreateExtractValue(V, I);
932+
State.set(Def, AggV);
933+
}
934+
} else {
935+
State.set(getVPSingleValue(), V);
936+
}
937+
}
928938
State.addMetadata(V, CI);
929939
}
930940

@@ -945,7 +955,9 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
945955
if (CalledFn->getReturnType()->isVoidTy())
946956
O << "void ";
947957
else {
948-
printAsOperand(O, SlotTracker);
958+
interleaveComma(definedValues(), O, [&O, &SlotTracker](VPValue *Def) {
959+
Def->printAsOperand(O, SlotTracker);
960+
});
949961
O << " = ";
950962
}
951963

0 commit comments

Comments
 (0)