Skip to content

Commit 20bf7ad

Browse files
committed
Clean up comments and simplify some logic
1 parent 0a0aa2e commit 20bf7ad

File tree

1 file changed

+32
-33
lines changed

1 file changed

+32
-33
lines changed

llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp

Lines changed: 32 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -834,8 +834,8 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
834834
unsigned VecRegBytes = TTI.getLoadStoreVecRegBitWidth(AS) / 8;
835835

836836
// For compile time reasons, we cache whether or not the superset
837-
// of all candidate chains contains any extra stores from earlier gap
838837
// of all candidate chains contains any extra loads/stores from earlier gap
838+
// filling.
839839
bool CandidateChainsMayContainExtraLoadsStores = any_of(
840840
C, [this](const ChainElem &E) { return ExtraElements.contains(E.Inst); });
841841

@@ -913,6 +913,7 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
913913
}
914914
}
915915

916+
// Attempt to extend non-power-of-2 chains to the next power of 2.
916917
Chain ExtendingLoadsStores;
917918
if (NumVecElems < TargetVF && NumVecElems % 2 != 0 && VecElemBits >= 8) {
918919
// TargetVF may be a lot higher than NumVecElems,
@@ -928,18 +929,17 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
928929
<< NumVecElems << " "
929930
<< (IsLoadChain ? "loads" : "stores") << " to "
930931
<< NewNumVecElems << " elements\n");
931-
// Do not artificially increase the chain if it becomes misaligned,
932-
// otherwise we may unnecessary split the chain when the target actually
933-
// supports non-pow2 VF.
932+
// Do not artificially increase the chain if it becomes misaligned or if
933+
// the associated masked load/store is not legal, otherwise we may
934+
// unnecessarily split the chain when the target actually supports
935+
// non-pow2 VF.
934936
if (accessIsAllowedAndFast(NewSizeBytes, AS, Alignment, VecElemBits) &&
935-
((IsLoadChain &&
936-
TTI.isLegalMaskedLoad(
937-
FixedVectorType::get(VecElemTy, NewNumVecElems), Alignment,
938-
AS, TTI::MaskKind::ConstantMask)) ||
939-
(!IsLoadChain &&
940-
TTI.isLegalMaskedStore(
941-
FixedVectorType::get(VecElemTy, NewNumVecElems), Alignment,
942-
AS, TTI::MaskKind::ConstantMask)))) {
937+
(IsLoadChain ? TTI.isLegalMaskedLoad(
938+
FixedVectorType::get(VecElemTy, NewNumVecElems),
939+
Alignment, AS, TTI::MaskKind::ConstantMask)
940+
: TTI.isLegalMaskedStore(
941+
FixedVectorType::get(VecElemTy, NewNumVecElems),
942+
Alignment, AS, TTI::MaskKind::ConstantMask))) {
943943
LLVM_DEBUG(dbgs()
944944
<< "LSV: extending " << (IsLoadChain ? "load" : "store")
945945
<< " chain of " << NumVecElems << " "
@@ -950,13 +950,14 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
950950
<< " with total byte size of " << NewSizeBytes
951951
<< ", TargetVF=" << TargetVF << " \n");
952952

953+
// Create (NewNumVecElems - NumVecElems) extra elements.
953954
unsigned ASPtrBits = DL.getIndexSizeInBits(AS);
954955
ChainElem Prev = C[CEnd];
955-
for (unsigned i = 0; i < (NewNumVecElems - NumVecElems); i++) {
956+
for (unsigned I = (NewNumVecElems - NumVecElems); I != 0; --I) {
956957
ChainElem NewElem = createExtraElementAfter(
957958
Prev, APInt(ASPtrBits, VecElemBytes), "Extend");
958959
ExtendingLoadsStores.push_back(NewElem);
959-
Prev = ExtendingLoadsStores.back();
960+
Prev = NewElem;
960961
}
961962

962963
// Update the size and number of elements for upcoming checks.
@@ -984,30 +985,28 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
984985
}
985986

986987
if (CandidateChainsMayContainExtraLoadsStores) {
987-
// The legality of adding extra loads/stores to ExtendingLoadsStores has
988-
// already been checked, but if the candidate chain contains extra
989-
// loads/stores from an earlier optimization, confirm legality now.
990-
// This filter is essential because, when filling gaps in
991-
// splitChainByContinuity, we queried the API to check that (for a given
992-
// element type and address space) there *may* be a legal masked
993-
// load/store we can aspire to create. Now, we need to check if the
994-
// actual chain we ended up with is legal to turn into a masked
995-
// load/store. This is relevant for NVPTX, for example, where a masked
996-
// store is only legal if we have ended up with a 256-bit vector.
997-
bool CandidateChainContainsExtraLoadsStores = llvm::any_of(
988+
// If the candidate chain contains extra loads/stores from an earlier
989+
// optimization, confirm legality now. This filter is essential because
990+
// when filling gaps in splitChainByContiguity, we queried the API to
991+
// check that (for a given element type and address space) there *may*
992+
// have been a legal masked load/store we could possibly create. Now, we
993+
// need to check if the actual chain we ended up with is legal to turn
994+
// into a masked load/store. This is relevant for NVPTX, for example,
995+
// where a masked store is only legal if we have ended up with a 256-bit
996+
// vector.
997+
bool CurrCandContainsExtraLoadsStores = llvm::any_of(
998998
ArrayRef<ChainElem>(C).slice(CBegin, CEnd - CBegin + 1),
999999
[this](const ChainElem &E) {
10001000
return ExtraElements.contains(E.Inst);
10011001
});
10021002

1003-
if (CandidateChainContainsExtraLoadsStores &&
1004-
((IsLoadChain && !TTI.isLegalMaskedLoad(
1005-
FixedVectorType::get(VecElemTy, NumVecElems),
1006-
Alignment, AS, TTI::MaskKind::ConstantMask)) ||
1007-
(!IsLoadChain &&
1008-
!TTI.isLegalMaskedStore(
1009-
FixedVectorType::get(VecElemTy, NumVecElems), Alignment, AS,
1010-
TTI::MaskKind::ConstantMask)))) {
1003+
if (CurrCandContainsExtraLoadsStores &&
1004+
(IsLoadChain ? !TTI.isLegalMaskedLoad(
1005+
FixedVectorType::get(VecElemTy, NumVecElems),
1006+
Alignment, AS, TTI::MaskKind::ConstantMask)
1007+
: !TTI.isLegalMaskedStore(
1008+
FixedVectorType::get(VecElemTy, NumVecElems),
1009+
Alignment, AS, TTI::MaskKind::ConstantMask))) {
10111010
LLVM_DEBUG(dbgs()
10121011
<< "LSV: splitChainByAlignment discarding candidate chain "
10131012
"because it contains extra loads/stores that we cannot "

0 commit comments

Comments
 (0)