@@ -834,8 +834,8 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
834834 unsigned VecRegBytes = TTI.getLoadStoreVecRegBitWidth (AS) / 8 ;
835835
836836 // For compile time reasons, we cache whether or not the superset
837- // of all candidate chains contains any extra stores from earlier gap
838837 // of all candidate chains contains any extra loads/stores from earlier gap
838+ // filling.
839839 bool CandidateChainsMayContainExtraLoadsStores = any_of (
840840 C, [this ](const ChainElem &E) { return ExtraElements.contains (E.Inst ); });
841841
@@ -913,6 +913,7 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
913913 }
914914 }
915915
916+ // Attempt to extend non-power-of-2 chains to the next power of 2.
916917 Chain ExtendingLoadsStores;
917918 if (NumVecElems < TargetVF && NumVecElems % 2 != 0 && VecElemBits >= 8 ) {
918919 // TargetVF may be a lot higher than NumVecElems,
@@ -928,18 +929,17 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
928929 << NumVecElems << " "
929930 << (IsLoadChain ? " loads" : " stores" ) << " to "
930931 << NewNumVecElems << " elements\n " );
931- // Do not artificially increase the chain if it becomes misaligned,
932- // otherwise we may unnecessary split the chain when the target actually
933- // supports non-pow2 VF.
932+ // Do not artificially increase the chain if it becomes misaligned or if
933+ // the associated masked load/store is not legal, otherwise we may
934+ // unnecessarily split the chain when the target actually supports
935+ // non-pow2 VF.
934936 if (accessIsAllowedAndFast (NewSizeBytes, AS, Alignment, VecElemBits) &&
935- ((IsLoadChain &&
936- TTI.isLegalMaskedLoad (
937- FixedVectorType::get (VecElemTy, NewNumVecElems), Alignment,
938- AS, TTI::MaskKind::ConstantMask)) ||
939- (!IsLoadChain &&
940- TTI.isLegalMaskedStore (
941- FixedVectorType::get (VecElemTy, NewNumVecElems), Alignment,
942- AS, TTI::MaskKind::ConstantMask)))) {
937+ (IsLoadChain ? TTI.isLegalMaskedLoad (
938+ FixedVectorType::get (VecElemTy, NewNumVecElems),
939+ Alignment, AS, TTI::MaskKind::ConstantMask)
940+ : TTI.isLegalMaskedStore (
941+ FixedVectorType::get (VecElemTy, NewNumVecElems),
942+ Alignment, AS, TTI::MaskKind::ConstantMask))) {
943943 LLVM_DEBUG (dbgs ()
944944 << " LSV: extending " << (IsLoadChain ? " load" : " store" )
945945 << " chain of " << NumVecElems << " "
@@ -950,13 +950,14 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
950950 << " with total byte size of " << NewSizeBytes
951951 << " , TargetVF=" << TargetVF << " \n " );
952952
953+ // Create (NewNumVecElems - NumVecElems) extra elements.
953954 unsigned ASPtrBits = DL.getIndexSizeInBits (AS);
954955 ChainElem Prev = C[CEnd];
955- for (unsigned i = 0 ; i < (NewNumVecElems - NumVecElems); i++ ) {
956+ for (unsigned I = (NewNumVecElems - NumVecElems); I != 0 ; --I ) {
956957 ChainElem NewElem = createExtraElementAfter (
957958 Prev, APInt (ASPtrBits, VecElemBytes), " Extend" );
958959 ExtendingLoadsStores.push_back (NewElem);
959- Prev = ExtendingLoadsStores. back () ;
960+ Prev = NewElem ;
960961 }
961962
962963 // Update the size and number of elements for upcoming checks.
@@ -984,30 +985,28 @@ std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &C) {
984985 }
985986
986987 if (CandidateChainsMayContainExtraLoadsStores) {
987- // The legality of adding extra loads/stores to ExtendingLoadsStores has
988- // already been checked, but if the candidate chain contains extra
989- // loads/stores from an earlier optimization, confirm legality now.
990- // This filter is essential because, when filling gaps in
991- // splitChainByContinuity, we queried the API to check that (for a given
992- // element type and address space) there *may* be a legal masked
993- // load/store we can aspire to create. Now, we need to check if the
994- // actual chain we ended up with is legal to turn into a masked
995- // load/store. This is relevant for NVPTX, for example, where a masked
996- // store is only legal if we have ended up with a 256-bit vector.
997- bool CandidateChainContainsExtraLoadsStores = llvm::any_of (
988+ // If the candidate chain contains extra loads/stores from an earlier
989+ // optimization, confirm legality now. This filter is essential because
990+ // when filling gaps in splitChainByContiguity, we queried the API to
991+ // check that (for a given element type and address space) there *may*
992+ // have been a legal masked load/store we could possibly create. Now, we
993+ // need to check if the actual chain we ended up with is legal to turn
994+ // into a masked load/store. This is relevant for NVPTX, for example,
995+ // where a masked store is only legal if we have ended up with a 256-bit
996+ // vector.
997+ bool CurrCandContainsExtraLoadsStores = llvm::any_of (
998998 ArrayRef<ChainElem>(C).slice (CBegin, CEnd - CBegin + 1 ),
999999 [this ](const ChainElem &E) {
10001000 return ExtraElements.contains (E.Inst );
10011001 });
10021002
1003- if (CandidateChainContainsExtraLoadsStores &&
1004- ((IsLoadChain && !TTI.isLegalMaskedLoad (
1005- FixedVectorType::get (VecElemTy, NumVecElems),
1006- Alignment, AS, TTI::MaskKind::ConstantMask)) ||
1007- (!IsLoadChain &&
1008- !TTI.isLegalMaskedStore (
1009- FixedVectorType::get (VecElemTy, NumVecElems), Alignment, AS,
1010- TTI::MaskKind::ConstantMask)))) {
1003+ if (CurrCandContainsExtraLoadsStores &&
1004+ (IsLoadChain ? !TTI.isLegalMaskedLoad (
1005+ FixedVectorType::get (VecElemTy, NumVecElems),
1006+ Alignment, AS, TTI::MaskKind::ConstantMask)
1007+ : !TTI.isLegalMaskedStore (
1008+ FixedVectorType::get (VecElemTy, NumVecElems),
1009+ Alignment, AS, TTI::MaskKind::ConstantMask))) {
10111010 LLVM_DEBUG (dbgs ()
10121011 << " LSV: splitChainByAlignment discarding candidate chain "
10131012 " because it contains extra loads/stores that we cannot "
0 commit comments