Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/Analysis/VectorUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,11 @@ LLVM_ABI bool
isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx,
const TargetTransformInfo *TTI);

/// Identifies if the vector form of the intrinsic that returns a struct has
/// a scalar element at the struct element index \p RetIdx.
LLVM_ABI bool isVectorIntrinsicWithStructReturnScalarAtField(Intrinsic::ID ID,
int RetIdx);

/// Identifies if the vector form of the intrinsic that returns a struct is
/// overloaded at the struct element index \p RetIdx. /// \p TTI is used to
/// consider target specific intrinsics, if no target specific intrinsics
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Analysis/VectorUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
return (ScalarOpdIdx == 2);
case Intrinsic::experimental_vp_splice:
return ScalarOpdIdx == 2 || ScalarOpdIdx == 4;
case Intrinsic::vp_load_ff:
return ScalarOpdIdx == 0 || ScalarOpdIdx == 2;
default:
return false;
}
Expand Down Expand Up @@ -212,18 +214,34 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(
case Intrinsic::powi:
case Intrinsic::ldexp:
return OpdIdx == -1 || OpdIdx == 1;
case Intrinsic::vp_load_ff:
return OpdIdx == 0;
default:
return OpdIdx == -1;
}
}

bool llvm::isVectorIntrinsicWithStructReturnScalarAtField(Intrinsic::ID ID,
int RetIdx) {
switch (ID) {
case Intrinsic::vp_load_ff:
return RetIdx == 1;
default:
return false;
}
}

bool llvm::isVectorIntrinsicWithStructReturnOverloadAtField(
Intrinsic::ID ID, int RetIdx, const TargetTransformInfo *TTI) {

if (TTI && Intrinsic::isTargetIntrinsic(ID))
return TTI->isTargetIntrinsicWithStructReturnOverloadAtField(ID, RetIdx);

switch (ID) {
case Intrinsic::modf:
case Intrinsic::sincos:
case Intrinsic::sincospi:
return false;
case Intrinsic::frexp:
return RetIdx == 0 || RetIdx == 1;
default:
Expand Down
21 changes: 21 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24909,6 +24909,27 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType,
if (!Subtarget.hasVInstructions())
return false;

// Only support fixed vectors if we know the minimum vector size.
if (DataType.isFixedLengthVector() &&
!Subtarget.useRVVForFixedLengthVectors())
return false;

EVT ScalarType = DataType.getScalarType();
if (!isLegalElementTypeForRVV(ScalarType))
return false;

if (!Subtarget.enableUnalignedVectorMem() &&
Alignment < ScalarType.getStoreSize())
return false;

return true;
}

bool RISCVTargetLowering::isLegalFirstFaultLoad(EVT DataType,
Align Alignment) const {
if (!Subtarget.hasVInstructions())
return false;

// Only support fixed vectors if we know the minimum vector size.
if (DataType.isFixedLengthVector() && !Subtarget.useRVVForFixedLengthVectors())
return false;
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,10 @@ class RISCVTargetLowering : public TargetLowering {
/// alignment is legal.
bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const;

/// Return true if a fault-only-first load of the given result type and
/// alignment is legal.
bool isLegalFirstFaultLoad(EVT DataType, Align Alignment) const;

unsigned getMaxSupportedInterleaveFactor() const override { return 8; }

bool fallBackToDAGISel(const Instruction &Inst) const override;
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1589,6 +1589,17 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
CmpInst::FCMP_UNO, CostKind);
return Cost;
}
case Intrinsic::vp_load_ff: {
Type *DataTy = RetTy->getStructElementType(0);
EVT DataTypeVT = TLI->getValueType(DL, DataTy);
// TODO: Extend IntrinsicCostAttributes to accept Align parameter.
Align Alignment;
if (!TLI->isLegalFirstFaultLoad(DataTypeVT, Alignment))
return InstructionCost::getInvalid();

return getMemoryOpCost(Instruction::Load, DataTy, Alignment, 0, CostKind,
{TTI::OK_AnyValue, TTI::OP_None}, nullptr);
}
}

if (ST->hasVInstructions() && RetTy->isVectorTy()) {
Expand Down
73 changes: 68 additions & 5 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,12 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));

static cl::opt<bool>
EnableEarlyExitWithFFLoads("enable-early-exit-with-ffload", cl::init(false),
cl::Hidden,
cl::desc("Enable vectorization of early-exit "
"loops with fault-only-first loads."));

static cl::opt<bool> ConsiderRegPressure(
"vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
cl::desc("Discard VFs if their register pressure is too high."));
Expand Down Expand Up @@ -3491,6 +3497,15 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
return FixedScalableVFPair::getNone();
}

if (!Legal->getPotentiallyFaultingLoads().empty() && UserIC > 1) {
reportVectorizationFailure("Auto-vectorization of loops with potentially "
"faulting loads is not supported when the "
"interleave count is more than 1",
"CantInterleaveLoopWithPotentiallyFaultingLoads",
ORE, TheLoop);
return FixedScalableVFPair::getNone();
}

ScalarEvolution *SE = PSE.getSE();
ElementCount TC = getSmallConstantTripCount(SE, TheLoop);
unsigned MaxTC = PSE.getSmallConstantMaxTripCount();
Expand Down Expand Up @@ -4100,7 +4115,23 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
Type *ScalarTy = TypeInfo.inferScalarType(ToCheck);
if (!Visited.insert({ScalarTy}).second)
continue;
Type *WideTy = toVectorizedTy(ScalarTy, VF);

Type *WideTy;
if (auto *WI = dyn_cast<VPWidenIntrinsicRecipe>(&R);
WI && ScalarTy->isStructTy()) {
auto *StructTy = cast<StructType>(ScalarTy);
SmallVector<Type *, 2> Tys;
for (unsigned I = 0, E = StructTy->getNumElements(); I != E; ++I) {
Type *ElementTy = StructTy->getStructElementType(I);
if (!isVectorIntrinsicWithStructReturnScalarAtField(
WI->getVectorIntrinsicID(), I))
ElementTy = toVectorizedTy(ElementTy, VF);
Tys.push_back(ElementTy);
}
WideTy = StructType::create(Tys);
} else
WideTy = toVectorizedTy(ScalarTy, VF);

if (any_of(getContainedTypes(WideTy), WillGenerateTargetVectors))
return true;
}
Expand Down Expand Up @@ -4549,6 +4580,10 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
if (!Legal->isSafeForAnyVectorWidth())
return 1;

// No interleaving for potentially faulting loads.
if (!Legal->getPotentiallyFaultingLoads().empty())
return 1;

// We don't attempt to perform interleaving for loops with uncountable early
// exits because the VPInstruction::AnyOf code cannot currently handle
// multiple parts.
Expand Down Expand Up @@ -7253,6 +7288,9 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// Regions are dissolved after optimizing for VF and UF, which completely
// removes unneeded loop regions first.
VPlanTransforms::dissolveLoopRegions(BestVPlan);

VPlanTransforms::convertFFLoadEarlyExitToVLStepping(BestVPlan);

// Canonicalize EVL loops after regions are dissolved.
VPlanTransforms::canonicalizeEVLLoops(BestVPlan);
VPlanTransforms::materializeBackedgeTakenCount(BestVPlan, VectorPH);
Expand Down Expand Up @@ -7481,9 +7519,9 @@ void EpilogueVectorizerEpilogueLoop::printDebugTracesAtEnd() {
});
}

VPWidenMemoryRecipe *
VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range) {
VPRecipeBase *VPRecipeBuilder::tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range) {
assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
"Must be called with either a load or store");

Expand Down Expand Up @@ -7541,6 +7579,22 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
Builder.insert(VectorPtr);
Ptr = VectorPtr;
}

if (Legal->getPotentiallyFaultingLoads().contains(I)) {
auto *I32Ty = IntegerType::getInt32Ty(Plan.getContext());
auto *RetTy = StructType::create({I->getType(), I32Ty});
DebugLoc DL = I->getDebugLoc();
if (!Mask)
Mask = Plan.getOrAddLiveIn(
ConstantInt::getTrue(IntegerType::getInt1Ty(Plan.getContext())));
auto *FFLoad = new VPWidenIntrinsicRecipe(
Intrinsic::vp_load_ff, {Ptr, Mask, &Plan.getVF()}, RetTy, DL);
Builder.insert(FFLoad);
VPValue *Zero = Plan.getOrAddLiveIn(ConstantInt::get(I32Ty, 0));
return new VPWidenRecipe(Instruction::ExtractValue, {FFLoad, Zero}, {}, {},
DL);
}

if (LoadInst *Load = dyn_cast<LoadInst>(I))
return new VPWidenLoadRecipe(*Load, Ptr, Mask, Consecutive, Reverse,
VPIRMetadata(*Load, LVer), I->getDebugLoc());
Expand Down Expand Up @@ -8427,6 +8481,8 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// Adjust the recipes for any inloop reductions.
adjustRecipesForReductions(Plan, RecipeBuilder, Range.Start);

VPlanTransforms::adjustFFLoadEarlyExitForPoisonSafety(*Plan);

// Apply mandatory transformation to handle FP maxnum/minnum reduction with
// NaNs if possible, bail out otherwise.
if (!VPlanTransforms::runPass(VPlanTransforms::handleMaxMinNumReductions,
Expand Down Expand Up @@ -9747,7 +9803,14 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}

if (!LVL.getPotentiallyFaultingLoads().empty()) {
if (EnableEarlyExitWithFFLoads) {
if (LVL.getPotentiallyFaultingLoads().size() > 1) {
reportVectorizationFailure("Auto-vectorization of loops with more than 1 "
"potentially faulting load is not enabled",
"MoreThanOnePotentiallyFaultingLoad", ORE, L);
return false;
}
} else if (!LVL.getPotentiallyFaultingLoads().empty()) {
reportVectorizationFailure("Auto-vectorization of loops with potentially "
"faulting load is not supported",
"PotentiallyFaultingLoadsNotSupported", ORE, L);
Expand Down
5 changes: 2 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,9 +96,8 @@ class VPRecipeBuilder {
/// Check if the load or store instruction \p I should widened for \p
/// Range.Start and potentially masked. Such instructions are handled by a
/// recipe that takes an additional VPInstruction for the mask.
VPWidenMemoryRecipe *tryToWidenMemory(Instruction *I,
ArrayRef<VPValue *> Operands,
VFRange &Range);
VPRecipeBase *tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
VFRange &Range);

/// Check if an induction recipe should be constructed for \p Phi. If so build
/// and return it. If not, return null.
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,12 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::BranchOnCond:
case VPInstruction::BranchOnCount:
return Type::getVoidTy(Ctx);
case Instruction::ExtractValue: {
assert(R->getNumOperands() == 2 && "expected single level extractvalue");
auto *StructTy = cast<StructType>(inferScalarType(R->getOperand(0)));
auto *CI = cast<ConstantInt>(R->getOperand(1)->getLiveInIRValue());
return StructTy->getTypeAtIndex(CI->getZExtValue());
}
default:
break;
}
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlanHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,8 @@ struct VPTransformState {
set(Def, V, VPLane(0));
return;
}
assert((VF.isScalar() || isVectorizedTy(V->getType())) &&
assert((VF.isScalar() || isVectorizedTy(V->getType()) ||
V->getType()->isStructTy()) &&
"scalar values must be stored as (0, 0)");
Data.VPV2Vector[Def] = V;
}
Expand Down
33 changes: 30 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,12 @@ Value *VPInstruction::generate(VPTransformState &State) {
Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
return Builder.CreateNot(A, Name);
}
case Instruction::ExtractValue: {
assert(getNumOperands() == 2 && "expected single level extractvalue");
Value *Op = State.get(getOperand(0));
auto *CI = cast<ConstantInt>(getOperand(1)->getLiveInIRValue());
return Builder.CreateExtractValue(Op, CI->getZExtValue());
}
case Instruction::ExtractElement: {
assert(State.VF.isVector() && "Only extract elements from vectors");
if (getOperand(1)->isLiveIn()) {
Expand Down Expand Up @@ -1194,6 +1200,7 @@ bool VPInstruction::isVectorToScalar() const {
bool VPInstruction::isSingleScalar() const {
switch (getOpcode()) {
case Instruction::PHI:
case Instruction::ExtractValue:
case VPInstruction::ExplicitVectorLength:
case VPInstruction::ResumeForEpilogue:
case VPInstruction::VScale:
Expand Down Expand Up @@ -1738,7 +1745,16 @@ void VPWidenIntrinsicRecipe::execute(VPTransformState &State) {

SmallVector<Type *, 2> TysForDecl;
// Add return type if intrinsic is overloaded on it.
if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1, State.TTI))
if (ResultTy->isStructTy()) {
auto *StructTy = cast<StructType>(ResultTy);
for (unsigned I = 0, E = StructTy->getNumElements(); I != E; ++I) {
if (isVectorIntrinsicWithStructReturnOverloadAtField(VectorIntrinsicID, I,
State.TTI))
TysForDecl.push_back(
toVectorizedTy(StructTy->getStructElementType(I), State.VF));
}
} else if (isVectorIntrinsicWithOverloadTypeAtArg(VectorIntrinsicID, -1,
State.TTI))
TysForDecl.push_back(VectorType::get(getResultType(), State.VF));
SmallVector<Value *, 4> Args;
for (const auto &I : enumerate(operands())) {
Expand Down Expand Up @@ -1802,8 +1818,19 @@ static InstructionCost getCostForIntrinsics(Intrinsic::ID ID,
Arguments.push_back(V);
}

Type *ScalarRetTy = Ctx.Types.inferScalarType(&R);
Type *RetTy = VF.isVector() ? toVectorizedTy(ScalarRetTy, VF) : ScalarRetTy;
Type *RetTy = Ctx.Types.inferScalarType(&R);
if (RetTy->isStructTy()) {
auto *StructTy = cast<StructType>(RetTy);
SmallVector<Type *> Tys;
for (unsigned I = 0, E = StructTy->getNumElements(); I != E; ++I) {
Type *ElementTy = StructTy->getStructElementType(I);
if (!isVectorIntrinsicWithStructReturnScalarAtField(ID, I))
ElementTy = toVectorizedTy(ElementTy, VF);
Tys.push_back(ElementTy);
}
RetTy = StructType::get(StructTy->getContext(), Tys);
} else if (VF.isVector())
RetTy = toVectorizedTy(RetTy, VF);
SmallVector<Type *> ParamTys;
for (const VPValue *Op : Operands) {
ParamTys.push_back(VF.isVector()
Expand Down
Loading