Skip to content
33 changes: 20 additions & 13 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4367,7 +4367,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
[](const auto *R) { return Instruction::Store; })
.Case<VPWidenLoadRecipe>(
[](const auto *R) { return Instruction::Load; })
.Case<VPWidenCallRecipe>(
.Case<VPWidenCallRecipe, VPWidenIntrinsicRecipe>(
[](const auto *R) { return Instruction::Call; })
.Case<VPInstruction, VPWidenRecipe, VPReplicateRecipe,
VPWidenCastRecipe>(
Expand All @@ -4392,11 +4392,17 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks(
OS << "):";
if (Opcode == Instruction::Call) {
auto *WidenCall = dyn_cast<VPWidenCallRecipe>(R);
Function *CalledFn =
WidenCall ? WidenCall->getCalledScalarFunction()
: cast<Function>(R->getOperand(R->getNumOperands() - 1)
->getLiveInIRValue());
OS << " call to " << CalledFn->getName();
StringRef Name = "";
if (auto *Int = dyn_cast<VPWidenIntrinsicRecipe>(R)) {
Name = Int->getIntrinsicName();
} else {
Function *CalledFn =
WidenCall ? WidenCall->getCalledScalarFunction()
: cast<Function>(R->getOperand(R->getNumOperands() - 1)
->getLiveInIRValue());
Name = CalledFn->getName();
}
OS << " call to " << Name;
} else
OS << " " << Instruction::getOpcodeName(Opcode);
reportVectorizationInfo(OutString, "InvalidCost", ORE, OrigLoop, nullptr,
Expand Down Expand Up @@ -4447,6 +4453,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenCanonicalIVSC:
case VPDef::VPWidenCastSC:
case VPDef::VPWidenGEPSC:
case VPDef::VPWidenIntrinsicSC:
case VPDef::VPWidenSC:
case VPDef::VPWidenSelectSC:
case VPDef::VPBlendSC:
Expand Down Expand Up @@ -8266,7 +8273,7 @@ VPBlendRecipe *VPRecipeBuilder::tryToBlend(PHINode *Phi,
return new VPBlendRecipe(Phi, OperandsWithMask);
}

VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
ArrayRef<VPValue *> Operands,
VFRange &Range) {
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
Expand Down Expand Up @@ -8297,8 +8304,9 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
},
Range);
if (ShouldUseVectorIntrinsic)
return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()), ID,
CI->getDebugLoc());
return new VPWidenIntrinsicRecipe(*CI, ID,
make_range(Ops.begin(), Ops.end() - 1),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe move Ops.push_back(Operands.back()); from line 8305 to line 8319 to avoid -1 here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!

CI->getType(), CI->getDebugLoc());

Function *Variant = nullptr;
std::optional<unsigned> MaskPos;
Expand Down Expand Up @@ -8350,9 +8358,8 @@ VPWidenCallRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
Ops.insert(Ops.begin() + *MaskPos, Mask);
}

return new VPWidenCallRecipe(CI, make_range(Ops.begin(), Ops.end()),
Intrinsic::not_intrinsic, CI->getDebugLoc(),
Variant);
return new VPWidenCallRecipe(
CI, Variant, make_range(Ops.begin(), Ops.end()), CI->getDebugLoc());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
return new VPWidenCallRecipe(
CI, Variant, make_range(Ops.begin(), Ops.end()), CI->getDebugLoc());
return new VPWidenCallRecipe(
CI, Variant, Ops, CI->getDebugLoc());

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks!

}

return nullptr;
Expand Down Expand Up @@ -9218,7 +9225,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
RecurrenceDescriptor::isFMulAddIntrinsic(CurrentLinkI) &&
"Expected instruction to be a call to the llvm.fmuladd intrinsic");
assert(((MinVF.isScalar() && isa<VPReplicateRecipe>(CurrentLink)) ||
isa<VPWidenCallRecipe>(CurrentLink)) &&
isa<VPWidenIntrinsicRecipe>(CurrentLink)) &&
CurrentLink->getOperand(2) == PreviousLink &&
"expected a call where the previous link is the added operand");

Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,9 @@ class VPRecipeBuilder {
VPBlendRecipe *tryToBlend(PHINode *Phi, ArrayRef<VPValue *> Operands);

/// Handle call instructions. If \p CI can be widened for \p Range.Start,
/// return a new VPWidenCallRecipe. Range.End may be decreased to ensure same
/// decision from \p Range.Start to \p Range.End.
VPWidenCallRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
/// return a new VPWidenCallRecipe or VPWidenIntrinsicRecipe. Range.End may be
/// decreased to ensure same decision from \p Range.Start to \p Range.End.
VPSingleDefRecipe *tryToWidenCall(CallInst *CI, ArrayRef<VPValue *> Operands,
VFRange &Range);

/// Check if \p I has an opcode that can be widened and return a VPWidenRecipe
Expand Down
69 changes: 55 additions & 14 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -886,6 +886,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenCanonicalIVSC:
case VPRecipeBase::VPWidenCastSC:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
Expand Down Expand Up @@ -1608,25 +1609,65 @@ class VPScalarCastRecipe : public VPSingleDefRecipe {
}
};

/// A recipe for widening Call instructions.
class VPWidenCallRecipe : public VPRecipeWithIRFlags {
/// ID of the vector intrinsic to call when widening the call. If set the
/// Intrinsic::not_intrinsic, a library call will be used instead.
/// A recipe for widening vector intrinsics.
class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
/// ID of the vector intrinsic to widen.
Intrinsic::ID VectorIntrinsicID;
/// If this recipe represents a library call, Variant stores a pointer to
/// the chosen function. There is a 1:1 mapping between a given VF and the
/// chosen vectorized variant, so there will be a different vplan for each
/// VF with a valid variant.

/// Scalar type of the result produced by the intrinsic.
Type *ResultTy;

public:
template <typename IterT>
VPWidenIntrinsicRecipe(CallInst &CI, Intrinsic::ID VectorIntrinsicID,
iterator_range<IterT> CallArguments, Type *Ty,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about ArrayRef here instead of iterator_range?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, thanks!

DebugLoc DL = {})
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, CallArguments, CI),
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty) {}

~VPWidenIntrinsicRecipe() override = default;

VPWidenIntrinsicRecipe *clone() override {
return new VPWidenIntrinsicRecipe(*cast<CallInst>(getUnderlyingValue()),
VectorIntrinsicID, operands(), ResultTy,
getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenIntrinsicSC)

/// Produce a widened version of the vector intrinsic.
void execute(VPTransformState &State) override;

/// Return the cost of this vector intrinsic.
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;

Type *getResultTy() const { return ResultTy; }

/// Return to name of the intrinsic as string.
StringRef getIntrinsicName() const;

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif
};

/// A recipe for widening Call instructions using library calls.
class VPWidenCallRecipe : public VPRecipeWithIRFlags {
/// Variant stores a pointer to the chosen function. There is a 1:1 mapping
/// between a given VF and the chosen vectorized variant, so there will be a
/// different VPlan for each VF with a valid variant.
Function *Variant;

public:
template <typename IterT>
VPWidenCallRecipe(Value *UV, iterator_range<IterT> CallArguments,
Intrinsic::ID VectorIntrinsicID, DebugLoc DL = {},
Function *Variant = nullptr)
VPWidenCallRecipe(Value *UV, Function *Variant,
iterator_range<IterT> CallArguments, DebugLoc DL = {})
: VPRecipeWithIRFlags(VPDef::VPWidenCallSC, CallArguments,
*cast<Instruction>(UV)),
VectorIntrinsicID(VectorIntrinsicID), Variant(Variant) {
Variant(Variant) {
assert(
isa<Function>(getOperand(getNumOperands() - 1)->getLiveInIRValue()) &&
"last operand must be the called function");
Expand All @@ -1635,8 +1676,8 @@ class VPWidenCallRecipe : public VPRecipeWithIRFlags {
~VPWidenCallRecipe() override = default;

VPWidenCallRecipe *clone() override {
return new VPWidenCallRecipe(getUnderlyingValue(), operands(),
VectorIntrinsicID, getDebugLoc(), Variant);
return new VPWidenCallRecipe(getUnderlyingValue(), Variant, operands(),
getDebugLoc());
}

VP_CLASSOF_IMPL(VPDef::VPWidenCallSC)
Expand Down
2 changes: 2 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
VPWidenSelectRecipe>(
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
.Case<VPWidenIntrinsicRecipe>(
[](const VPWidenIntrinsicRecipe *R) { return R->getResultTy(); })
.Case<VPInterleaveRecipe>([V](const VPInterleaveRecipe *R) {
// TODO: Use info from interleave group.
return V->getUnderlyingValue()->getType();
Expand Down
Loading
Loading