-
Couldn't load subscription status.
- Fork 15k
[SLP]Initial support for copyable elements (non-schedulable only) #140279
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SLP]Initial support for copyable elements (non-schedulable only) #140279
Conversation
Created using spr 1.3.5
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Alexey Bataev (alexey-bataev) ChangesAdds initial support for copyable elements. This patch only models adds Patch is 37.79 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140279.diff 6 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index c63f80675fef4..97d6068571918 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -206,6 +206,12 @@ static cl::opt<bool> VectorizeNonPowerOf2(
"slp-vectorize-non-power-of-2", cl::init(false), cl::Hidden,
cl::desc("Try to vectorize with non-power-of-2 number of elements."));
+/// Enables vectorization of copyable elements.
+static cl::opt<bool> VectorizeCopyableElements(
+ "slp-copyable-elements", cl::init(true), cl::Hidden,
+ cl::desc("Try to replace values with the idempotent instructions for "
+ "better vectorization."));
+
// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
static const unsigned AliasedCheckLimit = 10;
@@ -835,6 +841,13 @@ static std::optional<unsigned> getExtractIndex(const Instruction *E) {
return *EI->idx_begin();
}
+namespace llvm {
+/// Checks if the specified value does not require scheduling. It does not
+/// require scheduling if all operands and all users do not need to be scheduled
+/// in the current basic block.
+static bool doesNotNeedToBeScheduled(Value *V);
+} // namespace llvm
+
namespace {
/// \returns true if \p Opcode is allowed as part of the main/alternate
/// instruction for SLP vectorization.
@@ -1170,9 +1183,11 @@ class InstructionsState {
if (!I->isBinaryOp())
return nullptr;
BinOpSameOpcodeHelper Converter(MainOp);
- if (Converter.add(I) && Converter.add(MainOp) && !Converter.hasAltOp())
- return MainOp;
- return AltOp;
+ if (!Converter.add(I) || !Converter.add(MainOp))
+ return nullptr;
+ if (Converter.hasAltOp() && !isAltShuffle())
+ return nullptr;
+ return Converter.hasAltOp() ? AltOp : MainOp;
}
/// Checks if main/alt instructions are shift operations.
@@ -1220,6 +1235,48 @@ class InstructionsState {
InstructionsState(Instruction *MainOp, Instruction *AltOp)
: MainOp(MainOp), AltOp(AltOp) {}
static InstructionsState invalid() { return {nullptr, nullptr}; }
+
+ bool isCopyableElement(Value *V) const {
+ assert(valid() && "InstructionsState is invalid.");
+ if (isAltShuffle() || getOpcode() == Instruction::GetElementPtr)
+ return false;
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I && isa<PoisonValue>(V))
+ return false;
+ // FIXME: remove doesNotNeedToBeScheduled() and isa<PHINode>() check once
+ // scheduling is supported.
+ return !I ||
+ (I->getParent() != MainOp->getParent() &&
+ (!isVectorLikeInstWithConstOps(I) ||
+ !isVectorLikeInstWithConstOps(MainOp))) ||
+ (I->getOpcode() != MainOp->getOpcode() &&
+ (isa<PHINode>(I) || doesNotNeedToBeScheduled(I)) &&
+ (!I->isBinaryOp() || getMatchingMainOpOrAltOp(I) != MainOp));
+ }
+
+ bool areInstructionsWithCopyableElements(ArrayRef<Value *> VL) const {
+ assert(valid() && "InstructionsState is invalid.");
+ bool HasAtLeastOneCopyableElement = false;
+ auto IsCopyableElement = [&](Value *V) {
+ bool IsCopyable = isCopyableElement(V);
+ HasAtLeastOneCopyableElement |= IsCopyable;
+ return IsCopyable;
+ };
+ return !isAltShuffle() && all_of(VL, [&](Value *V) {
+ if (V == MainOp || isa<PoisonValue>(V))
+ return true;
+ if (IsCopyableElement(V))
+ return true;
+ auto *I = dyn_cast<Instruction>(V);
+ if (getOpcode() == Instruction::GetElementPtr && !I)
+ return true;
+ return I->getType() == MainOp->getType() &&
+ (I->getParent() == MainOp->getParent() ||
+ (isVectorLikeInstWithConstOps(I) &&
+ isVectorLikeInstWithConstOps(MainOp))) &&
+ getMatchingMainOpOrAltOp(cast<Instruction>(V)) == MainOp;
+ }) && HasAtLeastOneCopyableElement;
+ }
};
std::pair<Instruction *, SmallVector<Value *>>
@@ -2878,9 +2935,6 @@ class BoUpSLP {
for (OperandDataVec &Ops : OpsVec)
Ops.resize(NumLanes);
for (unsigned Lane : seq<unsigned>(NumLanes)) {
- Value *V = VL[Lane];
- assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
- "Expected instruction or poison value");
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of V and whether the operand at OpIdx is the LHS or RHS
@@ -2891,13 +2945,20 @@ class BoUpSLP {
// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely tell
// the inverse operations by checking commutativity.
- if (isa<PoisonValue>(V)) {
+ auto *I = dyn_cast<Instruction>(VL[Lane]);
+ if (!I && isa<PoisonValue>(VL[Lane])) {
for (unsigned OpIdx : seq<unsigned>(NumOperands))
OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], true, false};
continue;
}
- auto [SelectedOp, Ops] = convertTo(cast<Instruction>(V), S);
- bool IsInverseOperation = !isCommutative(SelectedOp);
+ bool IsInverseOperation = false;
+ if (S.isCopyableElement(VL[Lane])) {
+ // The value is a copyable element.
+ IsInverseOperation = !isCommutative(MainOp);
+ } else {
+ auto [SelectedOp, Ops] = convertTo(I, S);
+ IsInverseOperation = !isCommutative(SelectedOp);
+ }
for (unsigned OpIdx : seq<unsigned>(ArgSize)) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
OpsVec[OpIdx][Lane] = {Operands[OpIdx][Lane], APO, false};
@@ -3905,6 +3966,14 @@ class BoUpSLP {
bool hasState() const { return S.valid(); }
+ /// Returns true if \p V is a copyable element.
+ bool isCopyableElement(Value *V) const { return S.isCopyableElement(V); }
+
+ /// Returns true if any scalar in the list is a copyable element.
+ bool hasCopyableElements() const {
+ return S.areInstructionsWithCopyableElements(Scalars);
+ }
+
/// When ReuseReorderShuffleIndices is empty it just returns position of \p
/// V within vector of Scalars. Otherwise, try to remap on its reuse index.
int findLaneForValue(Value *V) const {
@@ -4153,7 +4222,7 @@ class BoUpSLP {
} else if (!Last->isGather()) {
SmallPtrSet<Value *, 4> Processed;
for (Value *V : VL) {
- if (isa<PoisonValue>(V))
+ if (isa<PoisonValue>(V) || S.isCopyableElement(V))
continue;
auto It = ScalarToTreeEntries.find(V);
if (It == ScalarToTreeEntries.end()) {
@@ -4168,14 +4237,20 @@ class BoUpSLP {
// Update the scheduler bundle to point to this TreeEntry.
assert((!Bundle.getBundle().empty() || isa<PHINode>(S.getMainOp()) ||
isVectorLikeInstWithConstOps(S.getMainOp()) ||
- doesNotNeedToSchedule(VL)) &&
+ doesNotNeedToSchedule(VL) ||
+ all_of(VL,
+ [&](Value *V) {
+ return S.isCopyableElement(V) ||
+ doesNotNeedToBeScheduled(V);
+ })) &&
"Bundle and VL out of sync");
if (!Bundle.getBundle().empty()) {
#if !defined(NDEBUG) || defined(EXPENSIVE_CHECKS)
auto *BundleMember = Bundle.getBundle().begin();
SmallPtrSet<Value *, 4> Processed;
for (Value *V : VL) {
- if (doesNotNeedToBeScheduled(V) || !Processed.insert(V).second)
+ if (doesNotNeedToBeScheduled(V) || S.isCopyableElement(V) ||
+ !Processed.insert(V).second)
continue;
++BundleMember;
}
@@ -4284,7 +4359,8 @@ class BoUpSLP {
/// in general.
ScalarsVectorizationLegality
getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
- const EdgeInfo &UserTreeIdx) const;
+ const EdgeInfo &UserTreeIdx,
+ bool TryCopyableElementsVectorization) const;
/// Checks if the specified list of the instructions/values can be vectorized
/// and fills required data before actual scheduling of the instructions.
@@ -4996,7 +5072,8 @@ class BoUpSLP {
/// Build a bundle from the ScheduleData nodes corresponding to the
/// scalar instruction for each lane.
- ScheduleBundle &buildBundle(ArrayRef<Value *> VL);
+ ScheduleBundle &buildBundle(ArrayRef<Value *> VL,
+ const InstructionsState &S);
/// Checks if a bundle of instructions can be scheduled, i.e. has no
/// cyclic dependencies. This is only a dry-run, no instructions are
@@ -7893,7 +7970,7 @@ void BoUpSLP::buildExternalUses(
// For each lane:
for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) {
Value *Scalar = Entry->Scalars[Lane];
- if (!isa<Instruction>(Scalar))
+ if (!isa<Instruction>(Scalar) || Entry->isCopyableElement(Scalar))
continue;
// All uses must be replaced already? No need to do it again.
auto It = ScalarToExtUses.find(Scalar);
@@ -9617,7 +9694,8 @@ static bool tryToFindDuplicates(SmallVectorImpl<Value *> &VL,
PoisonValue::get(UniqueValues.front()->getType()));
// Check that extended with poisons operations are still valid for
// vectorization (div/rem are not allowed).
- if (!getSameOpcode(PaddedUniqueValues, TLI).valid()) {
+ if (!S.areInstructionsWithCopyableElements(PaddedUniqueValues) &&
+ !getSameOpcode(PaddedUniqueValues, TLI).valid()) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
ReuseShuffleIndices.clear();
return false;
@@ -9766,13 +9844,112 @@ bool BoUpSLP::canBuildSplitNode(ArrayRef<Value *> VL,
}
namespace {
-/// Class accepts incoming list of values and generates the list of values
-/// for scheduling and list of operands for the new nodes.
+/// Class accepts incoming list of values, checks if it is able to model
+/// "copyable" values as compatible operations, and generates the list of values
+/// for scheduling and list of operands doe the new nodes.
class InstructionsCompatibilityAnalysis {
DominatorTree &DT;
const DataLayout &DL;
const TargetTransformInfo &TTI;
const TargetLibraryInfo &TLI;
+ unsigned MainOpcode = 0;
+ Instruction *MainOp = nullptr;
+
+ /// Identifies the best candidate value, which represents main opcode
+ /// operation.
+ /// Currently the best candidate is the Add instruction with the parent
+ /// block with the highest DFS incoming number (block, that dominates other).
+ void findAndSetMainInstruction(ArrayRef<Value *> VL) {
+ BasicBlock *Parent = nullptr;
+ // Checks if the instruction has supported opcode.
+ auto IsSupportedOpcode = [](Instruction *I) {
+ return I && I->getOpcode() == Instruction::Add;
+ };
+ for (Value *V : VL) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ continue;
+ if (!DT.isReachableFromEntry(I->getParent()))
+ continue;
+ if (!MainOp) {
+ MainOp = I;
+ Parent = I->getParent();
+ continue;
+ }
+ if (Parent == I->getParent()) {
+ if (!IsSupportedOpcode(MainOp))
+ MainOp = I;
+ if (MainOp->getOpcode() == I->getOpcode() &&
+ doesNotNeedToBeScheduled(MainOp) && !doesNotNeedToBeScheduled(I))
+ MainOp = I;
+ continue;
+ }
+ auto *NodeA = DT.getNode(Parent);
+ auto *NodeB = DT.getNode(I->getParent());
+ assert(NodeA && "Should only process reachable instructions");
+ assert(NodeB && "Should only process reachable instructions");
+ assert((NodeA == NodeB) ==
+ (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
+ "Different nodes should have different DFS numbers");
+ if (NodeA->getDFSNumIn() < NodeB->getDFSNumIn()) {
+ MainOp = I;
+ Parent = I->getParent();
+ }
+ }
+ // FIXME: remove second part of the check, once the scheduling support
+ // for copyable instructions is landed.
+ if (!IsSupportedOpcode(MainOp) || any_of(VL, [&](Value *V) {
+ auto *I = dyn_cast<Instruction>(V);
+ return I && I->getOpcode() != MainOp->getOpcode() &&
+ I->getParent() == MainOp->getParent() && !isa<PHINode>(I) &&
+ !doesNotNeedToBeScheduled(I);
+ })) {
+ MainOp = nullptr;
+ return;
+ }
+ MainOpcode = MainOp->getOpcode();
+ }
+
+ /// Returns the idempotent value for the \p MainOp with the detected \p
+ /// MainOpcode. For Add, returns 0. For Or, it should choose between false and
+ /// the operand itself, since V or V == V.
+ Value *selectBestIdempotentValue() const {
+ switch (MainOpcode) {
+ case Instruction::Add:
+ return ConstantInt::getNullValue(MainOp->getType());
+ default:
+ break;
+ }
+ llvm_unreachable("Unsupported opcode");
+ }
+
+ unsigned getNumberOfOperands() const {
+ switch (MainOpcode) {
+ case Instruction::Add:
+ return 2;
+ default:
+ break;
+ }
+ llvm_unreachable("Unsupported opcode");
+ }
+
+ /// Returns the value and operands for the \p V, considering if it is original
+ /// instruction and its actual operands should be returned, or it is a
+ /// copyable element and its should be represented as idempotent instruction.
+ SmallVector<Value *> getOperands(const InstructionsState &S, Value *V) const {
+ bool MatchesMainOp = !S.isCopyableElement(V);
+ switch (MainOpcode) {
+ case Instruction::Add:
+ if (isa<PoisonValue>(V))
+ return {V, V};
+ if (MatchesMainOp)
+ return SmallVector<Value *>(cast<Instruction>(V)->operands());
+ return {V, selectBestIdempotentValue()};
+ default:
+ break;
+ }
+ llvm_unreachable("Unsupported opcode");
+ }
/// Builds operands for the original instructions.
void
@@ -9933,22 +10110,122 @@ class InstructionsCompatibilityAnalysis {
const TargetLibraryInfo &TLI)
: DT(DT), DL(DL), TTI(TTI), TLI(TLI) {}
+ InstructionsState
+ buildInstructionsState(ArrayRef<Value *> VL, const BoUpSLP &R,
+ bool TryCopyableElementsVectorization,
+ bool WithProfitabilityCheck = false) {
+ InstructionsState S = getSameOpcode(VL, TLI);
+ if (S)
+ return S;
+ if (!VectorizeCopyableElements || !TryCopyableElementsVectorization)
+ return S;
+ findAndSetMainInstruction(VL);
+ if (!MainOp)
+ return InstructionsState::invalid();
+ S = InstructionsState(MainOp, MainOp);
+ if (!WithProfitabilityCheck)
+ return S;
+ // Check if it is profitable to vectorize the instruction.
+ SmallVector<BoUpSLP::ValueList> Operands = buildOperands(S, VL);
+ if (VL.size() == 2) {
+ // Check if the operands allow better vectorization.
+ SmallVector<std::pair<Value *, Value *>, 4> Candidates;
+ Candidates.emplace_back(Operands[0][0], Operands[0][1]);
+ Candidates.emplace_back(Operands[1][0], Operands[1][1]);
+ if (isCommutative(MainOp)) {
+ Candidates.emplace_back(Operands[0][0], Operands[1][1]);
+ Candidates.emplace_back(Operands[1][0], Operands[0][1]);
+ }
+ // No good candidates - not profitable.
+ if (!R.findBestRootPair(Candidates,
+ BoUpSLP::LookAheadHeuristics::ScoreSplat)) {
+ // Deeper analysis for 2 splats/constants.
+ SmallVector<std::pair<Value *, Value *>, 4> Candidates1, Candidates2;
+ Candidates1.emplace_back(Operands[0][0], Operands[0][1]);
+ Candidates2.emplace_back(Operands[1][0], Operands[1][1]);
+ bool Res = R.findBestRootPair(Candidates1) &&
+ R.findBestRootPair(Candidates2);
+ if (!Res && isCommutative(MainOp)) {
+ Candidates1.clear();
+ Candidates2.clear();
+ Candidates1.emplace_back(Operands[0][0], Operands[1][1]);
+ Candidates2.emplace_back(Operands[1][0], Operands[0][1]);
+ Res = R.findBestRootPair(Candidates1) &&
+ R.findBestRootPair(Candidates2);
+ }
+ if (!Res)
+ return InstructionsState::invalid();
+ }
+ }
+ assert(Operands.size() == 2 && "Unexpected number of operands!");
+ unsigned CopyableNum =
+ count_if(VL, [&](Value *V) { return S.isCopyableElement(V); });
+ if (CopyableNum <= VL.size() / 2)
+ return S;
+ // Check profitability if number of copyables > VL.size() / 2.
+ // 1. Reorder operands for better matching.
+ if (isCommutative(MainOp)) {
+ for (auto &Ops : Operands) {
+ // Make instructions the first operands.
+ if (isa<Instruction>(Ops.back())) {
+ std::swap(Ops.front(), Ops.back());
+ continue;
+ }
+ // Make constants the second operands.
+ if (isa<Constant>(Ops.front())) {
+ std::swap(Ops.front(), Ops.back());
+ continue;
+ }
+ }
+ }
+ // 2. Check, if operands can be vectorized.
+ if (!allConstant(Operands.back()))
+ return InstructionsState::invalid();
+ bool Res = allConstant(Operands.front()) || isSplat(Operands.front());
+ if (!Res) {
+ // First operand not a constant or splat? Last attempt - check for
+ // potential vectorization.
+ InstructionsCompatibilityAnalysis Analysis(DT, DL, TTI, TLI);
+ if (!Analysis.buildInstructionsState(
+ Operands.front(), R,
+ /*TryCopyableElementsVectorization=*/true))
+ return InstructionsState::invalid();
+ }
+
+ return S;
+ }
+
SmallVector<BoUpSLP::ValueList> buildOperands(const InstructionsState &S,
ArrayRef<Value *> VL) {
assert(S && "Invalid state!");
SmallVector<BoUpSLP::ValueList> Operands;
- buildOriginalOperands(S, VL, Operands);
+ if (S.areInstructionsWithCopyableElements(VL)) {
+ MainOp = S.getMainOp();
+ MainOpcode = S.getOpcode();
+ Operands.assign(getNumberOfOperands(),
+ BoUpSLP::ValueList(VL.size(), nullptr));
+ for (auto [Idx, V] : enumerate(VL)) {
+ SmallVector<Value *> OperandsForValue = getOperands(S, V);
+ for (auto [OperandIdx, Operand] : enumerate(OperandsForValue))
+ Operands[OperandIdx][Idx] = Operand;
+ }
+ } else {
+ buildOriginalOperands(S, VL, Operands);
+ }
return Operands;
}
};
} // namespace
-BoUpSLP::ScalarsVectorizationLegality
-BoUpSLP::getScalarsVectorizationLegality(ArrayRef<Value *> VL, unsigned Depth,
- const EdgeInfo &UserTreeIdx) const {
+BoUpSLP::ScalarsVectorizationLegality BoUpSLP::getScalarsVectorizationLegality(
+ ArrayRef<Value *> VL, unsigned Depth, const EdgeInfo &UserTreeIdx,
+ bool TryCopyableElementsVectorization) const {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
- InstructionsState S = getSameOpcode(VL, *TLI);
+ InstructionsCompatibilityAnalysis Analysis(*DT, *DL, *TTI, *TLI);
+ InstructionsState S = Analysis.buildInstructionsState(
+ VL, *this, TryCopyableElementsVectorization,
+ /*WithProfitabilityCheck=*/true);
// Don't go into catchswitch blocks, which can happen with PHIs.
// Such blocks can only have PHIs and the catchswitch. There is no
@@ -10247,9 +10524,9 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
return true;
};
- ScalarsVectorizationLegality Legality =
- getScalarsVectorizationLegality(VL, Depth, UserTreeIdx);
- const InstructionsState &S = Legality.getInstructionsState();
+ ScalarsVectorizationLegality Legality = getScalarsVectorizationLegality(
+ VL, Depth, UserTreeIdx, /*TryCopyableElementsVectorization=*/false);
+ InstructionsState S = Legality.getInstructionsState();
if (!Legality.isLegal()) {
if (Legality.trySplitVectorize()) {
auto [MainOp, AltOp] = getMainAltOpsNoStateVL(VL);
@@ -10257,11 +10534,18 @@ void BoUpSLP::buildTreeRec(ArrayRef<Value *> VLRef, unsigned Depth,
if (MainOp && AltOp && TrySplitNode(InstructionsState(MainOp, AltOp)))
return;
}
- if (Legality.tryToFindD...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Created using spr 1.3.5
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
llvm\test\Transforms\SLPVectorizer\X86\vect_copyable_in_binops.ll has a number of add tests - how come none of them matched?
These tests require support for schedulable instructions. This is planned for the next patch. Splitting the patches to limit the number of changes |
Created using spr 1.3.5
|
Hi @alexey-bataev |
Your patch solves the problem by inserting new instruction. It breaks tje design of the vectorizer - “model first, then modify”. No need to insert new instructions, it leads to issues with compile time, analysis state, etc. Instead, need to model such values as vslues, which actually represent “virtual” identity instructions without emitting such instructions. |
Thanks for the feedback. |
|
Yes, this is just the initial patch, the first in a serie |
Terminology wise, this would be much clearer if you described this as using identity constants for missing lanes. The key bit in the above is the 0 is the identity value for add. |
Created using spr 1.3.5
Created using spr 1.3.5
Created using spr 1.3.5
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef[^a-zA-Z0-9_-]|UndefValue::get)' 'HEAD~1' HEAD llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/SLPVectorizer/X86/buildvector-schedule-for-subvector.ll llvm/test/Transforms/SLPVectorizer/X86/full-match-with-poison-scalar.ll llvm/test/Transforms/SLPVectorizer/X86/node-outside-used-only.ll llvm/test/Transforms/SLPVectorizer/X86/non-schedulable-instructions-become-schedulable.ll llvm/test/Transforms/SLPVectorizer/X86/pr47642.ll llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
Created using spr 1.3.5
Created using spr 1.3.5
|
Ping! |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
…e only) Adds initial support for copyable elements. This patch only models adds and model copyable elements as add <element>, 0, i.e. uses identity constants for missing lanes. Only support for elements, which do not require scheduling, is added to reduce size of the patch. Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: llvm/llvm-project#140279
|
FYI this has some compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=36089e5d983fe9ae00f497c2d500f37227f82db1&to=e202dba288edd47f1b370cc43aa8cd36a924e7c1&stat=instructions:u Some files with large impact are libclamav_nsis_LZMADecode.c (+8%) and zlib_inflate.c (+4%). |
Reverted for now in a415d68, will try to fix |
Adds initial support for copyable elements. This patch only models adds and model copyable elements as add <element>, 0, i.e. uses identity constants for missing lanes. Only support for elements, which do not require scheduling, is added to reduce size of the patch. Fixed compile time regressions, updated release notes Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: #140279
…e only) Adds initial support for copyable elements. This patch only models adds and model copyable elements as add <element>, 0, i.e. uses identity constants for missing lanes. Only support for elements, which do not require scheduling, is added to reduce size of the patch. Fixed compile time regressions, updated release notes Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: llvm/llvm-project#140279
Adds initial support for copyable elements. This patch only models adds and model copyable elements as add <element>, 0, i.e. uses identity constants for missing lanes. Only support for elements, which do not require scheduling, is added to reduce size of the patch. Fixed compile time regressions, updated release notes Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: llvm/llvm-project#140279
|
This change makes building LLVM with frontend instrumentation to hang on compiling To reproduce this hang, download https://martin.st/temp/instcount-preproc.cpp and compile it with |
|
This caused clang to crash on our Mac build: Attached repro. Please revert if it takes a while to fix it. |
…nly)" This reverts commit 898bba3. This change caused hangs and crashes, see #140279 (comment).
|
I pushed a revert now. I had to revert c9cea24 as well as the problematic commit didn't revert cleanly initially. |
…hedulable only)" This reverts commit 898bba3. This change caused hangs and crashes, see llvm/llvm-project#140279 (comment).
Adds initial support for copyable elements. This patch only models adds and model copyable elements as add <element>, 0, i.e. uses identity constants for missing lanes. Only support for elements, which do not require scheduling, is added to reduce size of the patch. Fixed compile time regressions, reported crashes, updated release notes Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: #140279
…e only) Adds initial support for copyable elements. This patch only models adds and model copyable elements as add <element>, 0, i.e. uses identity constants for missing lanes. Only support for elements, which do not require scheduling, is added to reduce size of the patch. Fixed compile time regressions, reported crashes, updated release notes Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: llvm/llvm-project#140279
Adds initial support for copyable elements. This patch only models adds and model copyable elements as add <element>, 0, i.e. uses identity constants for missing lanes. Only support for elements, which do not require scheduling, is added to reduce size of the patch. Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: llvm#140279
Adds initial support for copyable elements. This patch only models adds and model copyable elements as add <element>, 0, i.e. uses identity constants for missing lanes. Only support for elements, which do not require scheduling, is added to reduce size of the patch. Fixed compile time regressions, updated release notes Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: llvm#140279
…nly)" This reverts commit 898bba3. This change caused hangs and crashes, see llvm#140279 (comment).
Adds initial support for copyable elements. This patch only models adds and model copyable elements as add <element>, 0, i.e. uses identity constants for missing lanes. Only support for elements, which do not require scheduling, is added to reduce size of the patch. Fixed compile time regressions, reported crashes, updated release notes Reviewers: RKSimon, hiraditya Reviewed By: RKSimon Pull Request: llvm#140279
We're hitting a similar issue which bisects to the re-land (ef98e24). Compiling the attached source with frontend instrumentation hangs -- or hits an asserts when enabled: I'll work on getting a smaller reproducer as well. |
|
A reduced C++ repro: reduced.ii.gz An IR reproducer based on the above: reduced.ll.gz Could you take a look? |
I checked the original reproducer and the reduced one, used the debug version of the compiler. It does not look like SLP vectorizer is an issue here, even with the debug version of the compiler it passed fast. Looks like a backend issue, llc crashes after peephole-opt pass. |
But it asserts in the backend, right? "Invalid SubReg for physical register" does sound like a backend problem. Could it be caused by the vectorizer somehow generating bad IR, or do you think it just uncovered a pre-existing backend bug? |
|
Yes, looks like you run into a new bug in the backend (codegen), revealed by the changes in SLP Vectorizer (middle-end optimization). |
Adds initial support for copyable elements. This patch only models adds
and model copyable elements as add , 0, i.e. uses identity
constants for missing lanes.
Only support for elements, which do not require scheduling, is added to
reduce size of the patch.