Skip to content

Commit 3264b28

Browse files
committed
[VPlan] Use VPWidenIntrinsicRecipe to support binary and unary operations with EVL-vectorization
1 parent b13d40b commit 3264b28

File tree

11 files changed

+94
-173
lines changed

11 files changed

+94
-173
lines changed

llvm/include/llvm/IR/VectorBuilder.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,11 @@ class VectorBuilder {
9999
const Twine &Name = Twine());
100100

101101
/// Emit a VP reduction intrinsic call for recurrence kind.
102-
/// \param RdxID The intrinsic ID of llvm.vector.reduce.*
102+
/// \param ID The intrinsic ID of call Intrinsic
103103
/// \param ValTy The type of operand which the reduction operation is
104104
/// performed.
105105
/// \param VecOpArray The operand list.
106-
Value *createSimpleReduction(Intrinsic::ID RdxID, Type *ValTy,
106+
Value *createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
107107
ArrayRef<Value *> VecOpArray,
108108
const Twine &Name = Twine());
109109
};

llvm/lib/IR/VectorBuilder.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,12 @@ Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy,
6060
return createVectorInstructionImpl(VPID, ReturnTy, InstOpArray, Name);
6161
}
6262

63-
Value *VectorBuilder::createSimpleReduction(Intrinsic::ID RdxID,
64-
Type *ValTy,
63+
Value *VectorBuilder::createSimpleIntrinsic(Intrinsic::ID ID, Type *ValTy,
6564
ArrayRef<Value *> InstOpArray,
6665
const Twine &Name) {
67-
auto VPID = VPIntrinsic::getForIntrinsic(RdxID);
68-
assert(VPReductionIntrinsic::isVPReduction(VPID) &&
69-
"No VPIntrinsic for this reduction");
66+
auto VPID = VPIntrinsic::getForIntrinsic(ID);
67+
assert(VPIntrinsic::isVPIntrinsic(VPID) &&
68+
"No VPIntrinsic for this Intrinsic");
7069
return createVectorInstructionImpl(VPID, ValTy, InstOpArray, Name);
7170
}
7271

llvm/lib/Transforms/Utils/LoopUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1300,7 +1300,7 @@ Value *llvm::createSimpleReduction(VectorBuilder &VBuilder, Value *Src,
13001300
Type *SrcEltTy = SrcTy->getElementType();
13011301
Value *Iden = getRecurrenceIdentity(Kind, SrcEltTy, Desc.getFastMathFlags());
13021302
Value *Ops[] = {Iden, Src};
1303-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1303+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13041304
}
13051305

13061306
Value *llvm::createReduction(IRBuilderBase &B,
@@ -1343,7 +1343,7 @@ Value *llvm::createOrderedReduction(VectorBuilder &VBuilder,
13431343
Intrinsic::ID Id = getReductionIntrinsicID(RecurKind::FAdd);
13441344
auto *SrcTy = cast<VectorType>(Src->getType());
13451345
Value *Ops[] = {Start, Src};
1346-
return VBuilder.createSimpleReduction(Id, SrcTy, Ops);
1346+
return VBuilder.createSimpleIntrinsic(Id, SrcTy, Ops);
13471347
}
13481348

13491349
void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue,

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 13 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -912,7 +912,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
912912
case VPRecipeBase::VPWidenGEPSC:
913913
case VPRecipeBase::VPWidenIntrinsicSC:
914914
case VPRecipeBase::VPWidenSC:
915-
case VPRecipeBase::VPWidenEVLSC:
916915
case VPRecipeBase::VPWidenSelectSC:
917916
case VPRecipeBase::VPBlendSC:
918917
case VPRecipeBase::VPPredInstPHISC:
@@ -1107,7 +1106,6 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
11071106
static inline bool classof(const VPRecipeBase *R) {
11081107
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
11091108
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1110-
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
11111109
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
11121110
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
11131111
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
@@ -1474,16 +1472,11 @@ class VPIRInstruction : public VPRecipeBase {
14741472
class VPWidenRecipe : public VPRecipeWithIRFlags {
14751473
unsigned Opcode;
14761474

1477-
protected:
1478-
template <typename IterT>
1479-
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1480-
iterator_range<IterT> Operands)
1481-
: VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1482-
14831475
public:
14841476
template <typename IterT>
14851477
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1486-
: VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
1478+
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1479+
Opcode(I.getOpcode()) {}
14871480

14881481
~VPWidenRecipe() override = default;
14891482

@@ -1493,15 +1486,7 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
14931486
return R;
14941487
}
14951488

1496-
static inline bool classof(const VPRecipeBase *R) {
1497-
return R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1498-
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC;
1499-
}
1500-
1501-
static inline bool classof(const VPUser *U) {
1502-
auto *R = dyn_cast<VPRecipeBase>(U);
1503-
return R && classof(R);
1504-
}
1489+
VP_CLASSOF_IMPL(VPDef::VPWidenSC)
15051490

15061491
/// Produce a widened instruction using the opcode and operands of the recipe,
15071492
/// processing State.VF elements.
@@ -1520,54 +1505,6 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
15201505
#endif
15211506
};
15221507

1523-
/// A recipe for widening operations with vector-predication intrinsics with
1524-
/// explicit vector length (EVL).
1525-
class VPWidenEVLRecipe : public VPWidenRecipe {
1526-
using VPRecipeWithIRFlags::transferFlags;
1527-
1528-
public:
1529-
template <typename IterT>
1530-
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
1531-
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1532-
addOperand(&EVL);
1533-
}
1534-
VPWidenEVLRecipe(VPWidenRecipe &W, VPValue &EVL)
1535-
: VPWidenEVLRecipe(*W.getUnderlyingInstr(), W.operands(), EVL) {
1536-
transferFlags(W);
1537-
}
1538-
1539-
~VPWidenEVLRecipe() override = default;
1540-
1541-
VPWidenRecipe *clone() override final {
1542-
llvm_unreachable("VPWidenEVLRecipe cannot be cloned");
1543-
return nullptr;
1544-
}
1545-
1546-
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1547-
1548-
VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
1549-
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1550-
1551-
/// Produce a vp-intrinsic using the opcode and operands of the recipe,
1552-
/// processing EVL elements.
1553-
void execute(VPTransformState &State) override final;
1554-
1555-
/// Returns true if the recipe only uses the first lane of operand \p Op.
1556-
bool onlyFirstLaneUsed(const VPValue *Op) const override {
1557-
assert(is_contained(operands(), Op) &&
1558-
"Op must be an operand of the recipe");
1559-
// EVL in that recipe is always the last operand, thus any use before means
1560-
// the VPValue should be vectorized.
1561-
return getEVL() == Op;
1562-
}
1563-
1564-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1565-
/// Print the recipe.
1566-
void print(raw_ostream &O, const Twine &Indent,
1567-
VPSlotTracker &SlotTracker) const override final;
1568-
#endif
1569-
};
1570-
15711508
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
15721509
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
15731510
/// Cast instruction opcode.
@@ -1686,6 +1623,16 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
16861623
MayWriteToMemory(CI.mayWriteToMemory()),
16871624
MayHaveSideEffects(CI.mayHaveSideEffects()) {}
16881625

1626+
template <typename IterT>
1627+
VPWidenIntrinsicRecipe(Instruction &I, Intrinsic::ID VectorIntrinsicID,
1628+
iterator_range<IterT> Operands, Type *Ty,
1629+
DebugLoc DL = {})
1630+
: VPRecipeWithIRFlags(VPDef::VPWidenIntrinsicSC, Operands, I),
1631+
VectorIntrinsicID(VectorIntrinsicID), ResultTy(Ty),
1632+
MayReadFromMemory(I.mayReadFromMemory()),
1633+
MayWriteToMemory(I.mayWriteToMemory()),
1634+
MayHaveSideEffects(I.mayHaveSideEffects()) {}
1635+
16891636
VPWidenIntrinsicRecipe(Intrinsic::ID VectorIntrinsicID,
16901637
ArrayRef<VPValue *> CallArguments, Type *Ty,
16911638
DebugLoc DL = {})

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -267,9 +267,8 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
267267
[this](const VPRecipeBase *R) {
268268
return inferScalarType(R->getOperand(0));
269269
})
270-
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPWidenEVLRecipe,
271-
VPReplicateRecipe, VPWidenCallRecipe, VPWidenMemoryRecipe,
272-
VPWidenSelectRecipe>(
270+
.Case<VPBlendRecipe, VPInstruction, VPWidenRecipe, VPReplicateRecipe,
271+
VPWidenCallRecipe, VPWidenMemoryRecipe, VPWidenSelectRecipe>(
273272
[this](const auto *R) { return inferScalarTypeForRecipe(R); })
274273
.Case<VPWidenIntrinsicRecipe>([](const VPWidenIntrinsicRecipe *R) {
275274
return R->getResultType();

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 57 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@ bool VPRecipeBase::mayWriteToMemory() const {
9999
case VPWidenLoadSC:
100100
case VPWidenPHISC:
101101
case VPWidenSC:
102-
case VPWidenEVLSC:
103102
case VPWidenSelectSC: {
104103
const Instruction *I =
105104
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -143,7 +142,6 @@ bool VPRecipeBase::mayReadFromMemory() const {
143142
case VPWidenIntOrFpInductionSC:
144143
case VPWidenPHISC:
145144
case VPWidenSC:
146-
case VPWidenEVLSC:
147145
case VPWidenSelectSC: {
148146
const Instruction *I =
149147
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -184,7 +182,6 @@ bool VPRecipeBase::mayHaveSideEffects() const {
184182
case VPWidenPHISC:
185183
case VPWidenPointerInductionSC:
186184
case VPWidenSC:
187-
case VPWidenEVLSC:
188185
case VPWidenSelectSC: {
189186
const Instruction *I =
190187
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -994,24 +991,53 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {
994991
Args.push_back(Arg);
995992
}
996993

997-
// Use vector version of the intrinsic.
998-
Module *M = State.Builder.GetInsertBlock()->getModule();
999-
Function *VectorF =
1000-
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1001-
assert(VectorF && "Can't retrieve vector intrinsic.");
994+
if (VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
995+
VectorIntrinsicID != Intrinsic::vp_select) {
996+
VectorBuilder VBuilder(State.Builder);
997+
Value *Mask =
998+
State.Builder.CreateVectorSplat(State.VF, State.Builder.getTrue());
999+
VBuilder.setMask(Mask).setEVL(Args.back());
1000+
// Remove EVL from Args
1001+
Args.pop_back();
1002+
1003+
if (VectorIntrinsicID == Intrinsic::vp_icmp ||
1004+
VectorIntrinsicID == Intrinsic::vp_fcmp) {
1005+
auto &Ctx = State.Builder.getContext();
1006+
Value *Pred = MetadataAsValue::get(
1007+
Ctx, MDString::get(Ctx, CmpInst::getPredicateName(getPredicate())));
1008+
Args.push_back(Pred);
1009+
}
10021010

1003-
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1004-
SmallVector<OperandBundleDef, 1> OpBundles;
1005-
if (CI)
1006-
CI->getOperandBundlesAsDefs(OpBundles);
1011+
Value *VPInst = VBuilder.createSimpleIntrinsic(
1012+
VectorIntrinsicID, TysForDecl[0], Args, "vp.call");
10071013

1008-
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1014+
if (isa<FPMathOperator>(VPInst))
1015+
setFlags(cast<Instruction>(VPInst));
10091016

1010-
setFlags(V);
1017+
if (!VPInst->getType()->isVoidTy())
1018+
State.set(this, VPInst);
1019+
State.addMetadata(VPInst,
1020+
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1021+
} else {
1022+
// Use vector version of the intrinsic.
1023+
Module *M = State.Builder.GetInsertBlock()->getModule();
1024+
Function *VectorF =
1025+
Intrinsic::getOrInsertDeclaration(M, VectorIntrinsicID, TysForDecl);
1026+
assert(VectorF && "Can't retrieve vector intrinsic.");
10111027

1012-
if (!V->getType()->isVoidTy())
1013-
State.set(this, V);
1014-
State.addMetadata(V, CI);
1028+
auto *CI = cast_or_null<CallInst>(getUnderlyingValue());
1029+
SmallVector<OperandBundleDef, 1> OpBundles;
1030+
if (CI)
1031+
CI->getOperandBundlesAsDefs(OpBundles);
1032+
1033+
CallInst *V = State.Builder.CreateCall(VectorF, Args, OpBundles);
1034+
1035+
setFlags(V);
1036+
1037+
if (!V->getType()->isVoidTy())
1038+
State.set(this, V);
1039+
State.addMetadata(V, CI);
1040+
}
10151041
}
10161042

10171043
InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
@@ -1043,6 +1069,20 @@ InstructionCost VPWidenIntrinsicRecipe::computeCost(ElementCount VF,
10431069
ParamTys.push_back(
10441070
ToVectorTy(Ctx.Types.inferScalarType(getOperand(I)), VF));
10451071

1072+
// TODO: Implment in cost model
1073+
if (std::optional<unsigned> FOp =
1074+
VPIntrinsic::getFunctionalOpcodeForVP(VectorIntrinsicID)) {
1075+
if (FOp == Instruction::FNeg) {
1076+
// Instruction *CtxI =
1077+
dyn_cast_or_null<Instruction>(getUnderlyingValue());
1078+
Type *VectorTy = ToVectorTy(getResultType(), VF);
1079+
return Ctx.TTI.getArithmeticInstrCost(
1080+
FOp.value(), VectorTy, CostKind,
1081+
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
1082+
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None});
1083+
}
1084+
}
1085+
10461086
// TODO: Rework TTI interface to avoid reliance on underlying IntrinsicInst.
10471087
FastMathFlags FMF = hasFastMathFlags() ? getFastMathFlags() : FastMathFlags();
10481088
IntrinsicCostAttributes CostAttrs(
@@ -1454,64 +1494,6 @@ InstructionCost VPWidenRecipe::computeCost(ElementCount VF,
14541494
}
14551495
}
14561496

1457-
void VPWidenEVLRecipe::execute(VPTransformState &State) {
1458-
unsigned Opcode = getOpcode();
1459-
if (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) {
1460-
Value *Op1 = State.get(getOperand(0));
1461-
Value *Op2 = State.get(getOperand(1));
1462-
auto &Ctx = State.Builder.getContext();
1463-
Value *Pred = MetadataAsValue::get(
1464-
Ctx, MDString::get(Ctx, CmpInst::getPredicateName(getPredicate())));
1465-
1466-
IRBuilderBase &BuilderIR = State.Builder;
1467-
VectorBuilder Builder(BuilderIR);
1468-
1469-
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1470-
Builder.setMask(Mask).setEVL(State.get(getEVL(), /*NeedsScalar=*/true));
1471-
VectorType *RetType = VectorType::get(Type::getInt1Ty(Ctx), State.VF);
1472-
Value *VPInst = Builder.createVectorInstruction(Opcode, RetType,
1473-
{Op1, Op2, Pred}, "vp.op");
1474-
if (isa<FPMathOperator>(VPInst))
1475-
setFlags(cast<Instruction>(VPInst));
1476-
1477-
State.set(this, VPInst);
1478-
State.addMetadata(VPInst,
1479-
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1480-
return;
1481-
}
1482-
1483-
if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) {
1484-
State.setDebugLocFrom(getDebugLoc());
1485-
1486-
assert(State.get(getOperand(0))->getType()->isVectorTy() &&
1487-
"VPWidenEVLRecipe should not be used for scalars");
1488-
1489-
VPValue *EVL = getEVL();
1490-
Value *EVLArg = State.get(EVL, /*NeedsScalar=*/true);
1491-
IRBuilderBase &BuilderIR = State.Builder;
1492-
VectorBuilder Builder(BuilderIR);
1493-
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1494-
1495-
SmallVector<Value *, 4> Ops;
1496-
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1497-
VPValue *VPOp = getOperand(I);
1498-
Ops.push_back(State.get(VPOp));
1499-
}
1500-
1501-
Builder.setMask(Mask).setEVL(EVLArg);
1502-
Value *VPInst = Builder.createVectorInstruction(Opcode, Ops[0]->getType(),
1503-
Ops, "vp.op");
1504-
// Currently vp-intrinsics only accept FMF flags.
1505-
// TODO: Enable other flags when support is added.
1506-
if (isa<FPMathOperator>(VPInst))
1507-
setFlags(cast<Instruction>(VPInst));
1508-
1509-
State.set(this, VPInst);
1510-
State.addMetadata(VPInst,
1511-
dyn_cast_or_null<Instruction>(getUnderlyingValue()));
1512-
}
1513-
}
1514-
15151497
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
15161498
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
15171499
VPSlotTracker &SlotTracker) const {
@@ -1521,15 +1503,6 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
15211503
printFlags(O);
15221504
printOperands(O, SlotTracker);
15231505
}
1524-
1525-
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1526-
VPSlotTracker &SlotTracker) const {
1527-
O << Indent << "WIDEN ";
1528-
printAsOperand(O, SlotTracker);
1529-
O << " = vp." << Instruction::getOpcodeName(getOpcode());
1530-
printFlags(O);
1531-
printOperands(O, SlotTracker);
1532-
}
15331506
#endif
15341507

15351508
void VPWidenCastRecipe::execute(VPTransformState &State) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1475,9 +1475,17 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
14751475
})
14761476
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
14771477
unsigned Opcode = W->getOpcode();
1478-
if (Opcode == Instruction::Freeze)
1478+
// TODO: Support other opcodes
1479+
if (!Instruction::isBinaryOp(Opcode) &&
1480+
!Instruction::isUnaryOp(Opcode))
14791481
return nullptr;
1480-
return new VPWidenEVLRecipe(*W, EVL);
1482+
auto *I = cast<Instruction>(W->getUnderlyingInstr());
1483+
SmallVector<VPValue *> Ops(W->operands());
1484+
Ops.push_back(&EVL);
1485+
Intrinsic::ID VPID = VPIntrinsic::getForOpcode(W->getOpcode());
1486+
return new VPWidenIntrinsicRecipe(
1487+
*I, VPID, make_range(Ops.begin(), Ops.end()), I->getType(),
1488+
I->getDebugLoc());
14811489
})
14821490
.Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
14831491
VPValue *NewMask = GetNewMask(Red->getCondOp());

0 commit comments

Comments
 (0)