-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[LV] Emit all remarks for unvectorizable instructions #153833
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LV] Emit all remarks for unvectorizable instructions #153833
Conversation
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Tobias Stadler (tobias-stadler) ChangesIf ExtraAnalysis is requested, emit all remarks caused by unvectorizable instructions - instead of only the first. This causes a lot of indent churn, so the PR is split into 2 commits, the first contains the actual changes, the second one is just formatting. Patch is 27.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153833.diff 3 Files Affected:
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 43ff084816d18..48ee93acbe008 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -493,6 +493,9 @@ class LoopVectorizationLegality {
/// and we only need to check individual instructions.
bool canVectorizeInstrs();
+ /// Check if an individual instruction is vectorizable.
+ bool canVectorizeInstr(Instruction &I);
+
/// When we vectorize loops we may change the order in which
/// we read and write from memory. This method checks if it is
/// legal to vectorize the code, considering only memory constrains.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index c47fd9421fddd..789047a2a28e7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -793,280 +793,296 @@ static bool canWidenCallReturnType(Type *Ty) {
}
bool LoopVectorizationLegality::canVectorizeInstrs() {
- BasicBlock *Header = TheLoop->getHeader();
+ bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
+ bool Result = true;
// For each block in the loop.
for (BasicBlock *BB : TheLoop->blocks()) {
// Scan the instructions in the block and look for hazards.
for (Instruction &I : *BB) {
- if (auto *Phi = dyn_cast<PHINode>(&I)) {
- Type *PhiTy = Phi->getType();
- // Check that this PHI type is allowed.
- if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
- !PhiTy->isPointerTy()) {
- reportVectorizationFailure("Found a non-int non-pointer PHI",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop);
- return false;
- }
+ Result &= canVectorizeInstr(I);
+ if (!DoExtraAnalysis && !Result)
+ return false;
+ }
+ }
- // If this PHINode is not in the header block, then we know that we
- // can convert it to select during if-conversion. No need to check if
- // the PHIs in this block are induction or reduction variables.
- if (BB != Header) {
- // Non-header phi nodes that have outside uses can be vectorized. Add
- // them to the list of allowed exits.
- // Unsafe cyclic dependencies with header phis are identified during
- // legalization for reduction, induction and fixed order
- // recurrences.
- AllowedExit.insert(&I);
- continue;
- }
+ if (!PrimaryInduction) {
+ if (Inductions.empty()) {
+ reportVectorizationFailure(
+ "Did not find one integer induction var",
+ "loop induction variable could not be identified",
+ "NoInductionVariable", ORE, TheLoop);
+ return false;
+ }
+ if (!WidestIndTy) {
+ reportVectorizationFailure(
+ "Did not find one integer induction var",
+ "integer loop induction variable could not be identified",
+ "NoIntegerInductionVariable", ORE, TheLoop);
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
+ }
- // We only allow if-converted PHIs with exactly two incoming values.
- if (Phi->getNumIncomingValues() != 2) {
- reportVectorizationFailure("Found an invalid PHI",
- "loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", ORE, TheLoop, Phi);
- return false;
- }
+ // Now we know the widest induction type, check if our found induction
+ // is the same size. If it's not, unset it here and InnerLoopVectorizer
+ // will create another.
+ if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
+ PrimaryInduction = nullptr;
- RecurrenceDescriptor RedDes;
- if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
- DT, PSE.getSE())) {
- Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
- AllowedExit.insert(RedDes.getLoopExitInstr());
- Reductions[Phi] = RedDes;
- continue;
- }
+ return Result;
+}
- // We prevent matching non-constant strided pointer IVS to preserve
- // historical vectorizer behavior after a generalization of the
- // IVDescriptor code. The intent is to remove this check, but we
- // have to fix issues around code quality for such loops first.
- auto IsDisallowedStridedPointerInduction =
- [](const InductionDescriptor &ID) {
- if (AllowStridedPointerIVs)
- return false;
- return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
- ID.getConstIntStepValue() == nullptr;
- };
-
- // TODO: Instead of recording the AllowedExit, it would be good to
- // record the complementary set: NotAllowedExit. These include (but may
- // not be limited to):
- // 1. Reduction phis as they represent the one-before-last value, which
- // is not available when vectorized
- // 2. Induction phis and increment when SCEV predicates cannot be used
- // outside the loop - see addInductionPhi
- // 3. Non-Phis with outside uses when SCEV predicates cannot be used
- // outside the loop - see call to hasOutsideLoopUser in the non-phi
- // handling below
- // 4. FixedOrderRecurrence phis that can possibly be handled by
- // extraction.
- // By recording these, we can then reason about ways to vectorize each
- // of these NotAllowedExit.
- InductionDescriptor ID;
- if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
- !IsDisallowedStridedPointerInduction(ID)) {
- addInductionPhi(Phi, ID, AllowedExit);
- Requirements->addExactFPMathInst(ID.getExactFPMathInst());
- continue;
- }
+bool LoopVectorizationLegality::canVectorizeInstr(Instruction &I) {
+ BasicBlock *BB = I.getParent();
+ BasicBlock *Header = TheLoop->getHeader();
- if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
- AllowedExit.insert(Phi);
- FixedOrderRecurrences.insert(Phi);
- continue;
- }
+ if (auto *Phi = dyn_cast<PHINode>(&I)) {
+ Type *PhiTy = Phi->getType();
+ // Check that this PHI type is allowed.
+ if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
+ !PhiTy->isPointerTy()) {
+ reportVectorizationFailure(
+ "Found a non-int non-pointer PHI",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood", ORE, TheLoop);
+ return false;
+ }
- // As a last resort, coerce the PHI to a AddRec expression
- // and re-try classifying it a an induction PHI.
- if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
- !IsDisallowedStridedPointerInduction(ID)) {
- addInductionPhi(Phi, ID, AllowedExit);
- continue;
- }
+ // If this PHINode is not in the header block, then we know that we
+ // can convert it to select during if-conversion. No need to check if
+ // the PHIs in this block are induction or reduction variables.
+ if (BB != Header) {
+ // Non-header phi nodes that have outside uses can be vectorized. Add
+ // them to the list of allowed exits.
+ // Unsafe cyclic dependencies with header phis are identified during
+ // legalization for reduction, induction and fixed order
+ // recurrences.
+ AllowedExit.insert(&I);
+ return true;
+ }
- reportVectorizationFailure("Found an unidentified PHI",
- "value that could not be identified as "
- "reduction is used outside the loop",
- "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
- return false;
- } // end of PHI handling
-
- // We handle calls that:
- // * Have a mapping to an IR intrinsic.
- // * Have a vector version available.
- auto *CI = dyn_cast<CallInst>(&I);
-
- if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
- !(CI->getCalledFunction() && TLI &&
- (!VFDatabase::getMappings(*CI).empty() ||
- isTLIScalarize(*TLI, *CI)))) {
- // If the call is a recognized math libary call, it is likely that
- // we can vectorize it given loosened floating-point constraints.
- LibFunc Func;
- bool IsMathLibCall =
- TLI && CI->getCalledFunction() &&
- CI->getType()->isFloatingPointTy() &&
- TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
- TLI->hasOptimizedCodeGen(Func);
-
- if (IsMathLibCall) {
- // TODO: Ideally, we should not use clang-specific language here,
- // but it's hard to provide meaningful yet generic advice.
- // Also, should this be guarded by allowExtraAnalysis() and/or be part
- // of the returned info from isFunctionVectorizable()?
- reportVectorizationFailure(
- "Found a non-intrinsic callsite",
- "library call cannot be vectorized. "
- "Try compiling with -fno-math-errno, -ffast-math, "
- "or similar flags",
- "CantVectorizeLibcall", ORE, TheLoop, CI);
- } else {
- reportVectorizationFailure("Found a non-intrinsic callsite",
- "call instruction cannot be vectorized",
- "CantVectorizeLibcall", ORE, TheLoop, CI);
- }
- return false;
- }
+ // We only allow if-converted PHIs with exactly two incoming values.
+ if (Phi->getNumIncomingValues() != 2) {
+ reportVectorizationFailure(
+ "Found an invalid PHI",
+ "loop control flow is not understood by vectorizer",
+ "CFGNotUnderstood", ORE, TheLoop, Phi);
+ return false;
+ }
- // Some intrinsics have scalar arguments and should be same in order for
- // them to be vectorized (i.e. loop invariant).
- if (CI) {
- auto *SE = PSE.getSE();
- Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
- for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
- if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
- if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)),
- TheLoop)) {
- reportVectorizationFailure("Found unvectorizable intrinsic",
- "intrinsic instruction cannot be vectorized",
- "CantVectorizeIntrinsic", ORE, TheLoop, CI);
- return false;
- }
- }
- }
+ RecurrenceDescriptor RedDes;
+ if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, DT,
+ PSE.getSE())) {
+ Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
+ AllowedExit.insert(RedDes.getLoopExitInstr());
+ Reductions[Phi] = RedDes;
+ return true;
+ }
- // If we found a vectorized variant of a function, note that so LV can
- // make better decisions about maximum VF.
- if (CI && !VFDatabase::getMappings(*CI).empty())
- VecCallVariantsFound = true;
-
- auto CanWidenInstructionTy = [](Instruction const &Inst) {
- Type *InstTy = Inst.getType();
- if (!isa<StructType>(InstTy))
- return canVectorizeTy(InstTy);
-
- // For now, we only recognize struct values returned from calls where
- // all users are extractvalue as vectorizable. All element types of the
- // struct must be types that can be widened.
- return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
- all_of(Inst.users(), IsaPred<ExtractValueInst>);
- };
+ // We prevent matching non-constant strided pointer IVS to preserve
+ // historical vectorizer behavior after a generalization of the
+ // IVDescriptor code. The intent is to remove this check, but we
+ // have to fix issues around code quality for such loops first.
+ auto IsDisallowedStridedPointerInduction =
+ [](const InductionDescriptor &ID) {
+ if (AllowStridedPointerIVs)
+ return false;
+ return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
+ ID.getConstIntStepValue() == nullptr;
+ };
+
+ // TODO: Instead of recording the AllowedExit, it would be good to
+ // record the complementary set: NotAllowedExit. These include (but may
+ // not be limited to):
+ // 1. Reduction phis as they represent the one-before-last value, which
+ // is not available when vectorized
+ // 2. Induction phis and increment when SCEV predicates cannot be used
+ // outside the loop - see addInductionPhi
+ // 3. Non-Phis with outside uses when SCEV predicates cannot be used
+ // outside the loop - see call to hasOutsideLoopUser in the non-phi
+ // handling below
+ // 4. FixedOrderRecurrence phis that can possibly be handled by
+ // extraction.
+ // By recording these, we can then reason about ways to vectorize each
+ // of these NotAllowedExit.
+ InductionDescriptor ID;
+ if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
+ !IsDisallowedStridedPointerInduction(ID)) {
+ addInductionPhi(Phi, ID, AllowedExit);
+ Requirements->addExactFPMathInst(ID.getExactFPMathInst());
+ return true;
+ }
- // Check that the instruction return type is vectorizable.
- // We can't vectorize casts from vector type to scalar type.
- // Also, we can't vectorize extractelement instructions.
- if (!CanWidenInstructionTy(I) ||
- (isa<CastInst>(I) &&
- !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
- isa<ExtractElementInst>(I)) {
- reportVectorizationFailure("Found unvectorizable type",
- "instruction return type cannot be vectorized",
- "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
- return false;
- }
+ if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
+ AllowedExit.insert(Phi);
+ FixedOrderRecurrences.insert(Phi);
+ return true;
+ }
+
+ // As a last resort, coerce the PHI to a AddRec expression
+ // and re-try classifying it a an induction PHI.
+ if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
+ !IsDisallowedStridedPointerInduction(ID)) {
+ addInductionPhi(Phi, ID, AllowedExit);
+ return true;
+ }
- // Check that the stored type is vectorizable.
- if (auto *ST = dyn_cast<StoreInst>(&I)) {
- Type *T = ST->getValueOperand()->getType();
- if (!VectorType::isValidElementType(T)) {
- reportVectorizationFailure("Store instruction cannot be vectorized",
- "CantVectorizeStore", ORE, TheLoop, ST);
+ reportVectorizationFailure("Found an unidentified PHI",
+ "value that could not be identified as "
+ "reduction is used outside the loop",
+ "NonReductionValueUsedOutsideLoop", ORE, TheLoop,
+ Phi);
+ return false;
+ } // end of PHI handling
+
+ // We handle calls that:
+ // * Have a mapping to an IR intrinsic.
+ // * Have a vector version available.
+ auto *CI = dyn_cast<CallInst>(&I);
+
+ if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
+ !(CI->getCalledFunction() && TLI &&
+ (!VFDatabase::getMappings(*CI).empty() || isTLIScalarize(*TLI, *CI)))) {
+ // If the call is a recognized math libary call, it is likely that
+ // we can vectorize it given loosened floating-point constraints.
+ LibFunc Func;
+ bool IsMathLibCall =
+ TLI && CI->getCalledFunction() && CI->getType()->isFloatingPointTy() &&
+ TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
+ TLI->hasOptimizedCodeGen(Func);
+
+ if (IsMathLibCall) {
+ // TODO: Ideally, we should not use clang-specific language here,
+ // but it's hard to provide meaningful yet generic advice.
+ // Also, should this be guarded by allowExtraAnalysis() and/or be part
+ // of the returned info from isFunctionVectorizable()?
+ reportVectorizationFailure(
+ "Found a non-intrinsic callsite",
+ "library call cannot be vectorized. "
+ "Try compiling with -fno-math-errno, -ffast-math, "
+ "or similar flags",
+ "CantVectorizeLibcall", ORE, TheLoop, CI);
+ } else {
+ reportVectorizationFailure("Found a non-intrinsic callsite",
+ "call instruction cannot be vectorized",
+ "CantVectorizeLibcall", ORE, TheLoop, CI);
+ }
+ return false;
+ }
+
+ // Some intrinsics have scalar arguments and should be same in order for
+ // them to be vectorized (i.e. loop invariant).
+ if (CI) {
+ auto *SE = PSE.getSE();
+ Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
+ for (unsigned Idx = 0; Idx < CI->arg_size(); ++Idx)
+ if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, Idx, TTI)) {
+ if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(Idx)), TheLoop)) {
+ reportVectorizationFailure(
+ "Found unvectorizable intrinsic",
+ "intrinsic instruction cannot be vectorized",
+ "CantVectorizeIntrinsic", ORE, TheLoop, CI);
return false;
}
+ }
+ }
- // For nontemporal stores, check that a nontemporal vector version is
- // supported on the target.
- if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
- // Arbitrarily try a vector of 2 elements.
- auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
- assert(VecTy && "did not find vectorized version of stored type");
- if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
- reportVectorizationFailure(
- "nontemporal store instruction cannot be vectorized",
- "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
- return false;
- }
- }
+ // If we found a vectorized variant of a function, note that so LV can
+ // make better decisions about maximum VF.
+ if (CI && !VFDatabase::getMappings(*CI).empty())
+ VecCallVariantsFound = true;
+
+ auto CanWidenInstructionTy = [](Instruction const &Inst) {
+ Type *InstTy = Inst.getType();
+ if (!isa<StructType>(InstTy))
+ return canVectorizeTy(InstTy);
+
+ // For now, we only recognize struct values returned from calls where
+ // all users are extractvalue as vectorizable. All element types of the
+ // struct must be types that can be widened.
+ return isa<CallInst>(Inst) && canWidenCallReturnType(InstTy) &&
+ all_of(Inst.users(), IsaPred<ExtractValueInst>);
+ };
- } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
- if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
- // For nontemporal loads, check that a nontemporal vector version is
- // supported on the target (arbitrarily try a vector of 2 elements).
- auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
- assert(VecTy && "did not find vectorized version of load type");
- if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
- reportVectorizationFailure(
- "nontemporal load instruction cannot be vectorized",
- "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
- return false;
- }
- }
+ // Check that the instruction return type is vectorizable.
+ // We can't vectorize casts from vector type to scalar type.
+ // Also, we can't vectorize extractelement instructions.
+ if (!CanWidenInstructionTy(I) ||
+ (isa<CastInst>(I) &&
+ !VectorType::isValidElementType(I.getOperand(0)->getType())) ||
...
[truncated]
|
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks.
This is in line with how other places handle DoExtraAnalysis and it can be quite helpful to get info about all instructions in a loop that prevent vectorization
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/22791 Here is the relevant piece of the build log for the reference |
If ExtraAnalysis is requested, emit all remarks caused by unvectorizable instructions - instead of only the first.
This causes a lot of indent churn, so the PR is split into 2 commits, the first contains the actual changes, the second one is just formatting.