@@ -490,6 +490,10 @@ static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
490490 for_each (DVRAssignMarkerRange, MigrateDbgAssign);
491491}
492492
493+ static Type *getTypePartition (const DataLayout &DL, Type *Ty, uint64_t Offset,
494+ uint64_t Size);
495+ static Type *getTypePartition (const AllocaInst &AI, const Partition &P);
496+
493497namespace {
494498
495499// / A custom IRBuilder inserter which prefixes all names, but only in
@@ -1011,37 +1015,33 @@ static Value *foldPHINodeOrSelectInst(Instruction &I) {
10111015 return foldSelectInst (cast<SelectInst>(I));
10121016}
10131017
1014- static constexpr size_t getMaxNumFixedVectorElements () {
1015- // FIXME: hack. Do we have a named constant for this?
1016- // SDAG SDNode can't have more than 65535 operands.
1017- return std::numeric_limits<unsigned short >::max ();
1018- }
1019-
10201018// / Returns a fixed vector type equivalent to the memory set by II or nullptr if
1021- // / unable to do so.
1022- static FixedVectorType *getVectorTypeFor (const MemSetInst &II,
1023- const DataLayout &DL) {
1019+ // / not viable.
1020+ static FixedVectorType *getVectorTypeFor (const DataLayout &DL, Type *PartTy,
1021+ const MemSetInst &II) {
1022+ auto *PartVecTy = dyn_cast_or_null<FixedVectorType>(PartTy);
1023+ if (!PartVecTy)
1024+ return nullptr ;
1025+
1026+ const uint64_t PartVecSize = DL.getTypeStoreSize (PartVecTy).getFixedValue ();
1027+
10241028 const ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength ());
10251029 if (!Length)
10261030 return nullptr ;
10271031
10281032 const APInt &Val = Length->getValue ();
1029- if (Val.ugt (getMaxNumFixedVectorElements () ))
1033+ if (Val.ugt (PartVecSize ))
10301034 return nullptr ;
10311035
10321036 // Element type will always be i8. TODO: Support
10331037 // llvm.experimental.memset.pattern?
1034- uint64_t MemSetLen = Val.getZExtValue ();
1035- auto *VTy = FixedVectorType::get (II.getValue ()->getType (), MemSetLen);
1036-
1037- // FIXME: This is a workaround. Vector promotion sometimes inhibits our
1038- // ability to merge constant stores. It seems to be related to the presence of
1039- // alignment bytes. See
1040- // test/Transforms/PhaseOrdering/X86/store-constant-merge.ll
1041- if (MemSetLen != DL.getTypeAllocSize (VTy).getFixedValue ())
1042- return nullptr ;
1038+ return FixedVectorType::get (II.getValue ()->getType (), Val.getZExtValue ());
1039+ }
10431040
1044- return VTy;
1041+ static FixedVectorType *getVectorTypeFor (const AllocaInst &AI,
1042+ const Partition &P,
1043+ const MemSetInst &II) {
1044+ return getVectorTypeFor (AI.getDataLayout (), getTypePartition (AI, P), II);
10451045}
10461046
10471047// / Builder for the alloca slices.
@@ -1055,6 +1055,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
10551055 using Base = PtrUseVisitor<SliceBuilder>;
10561056
10571057 const uint64_t AllocSize;
1058+ const AllocaInst &AI;
10581059 AllocaSlices &AS;
10591060
10601061 SmallDenseMap<Instruction *, unsigned > MemTransferSliceMap;
@@ -1067,7 +1068,7 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
10671068 SliceBuilder (const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
10681069 : PtrUseVisitor<SliceBuilder>(DL),
10691070 AllocSize (DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1070- AS (AS) {}
1071+ AI (AI), AS(AS) {}
10711072
10721073private:
10731074 void markAsDead (Instruction &I) {
@@ -1132,16 +1133,15 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
11321133 return Base::visitGetElementPtrInst (GEPI);
11331134 }
11341135
1135- bool isSplittableMemOp (Type *Ty, bool IsVolatile) {
1136- return Ty->isIntegerTy () && !IsVolatile && DL.typeSizeEqualsStoreSize (Ty);
1137- }
1138-
11391136 void handleLoadOrStore (Type *Ty, Instruction &I, const APInt &Offset,
11401137 uint64_t Size, bool IsVolatile) {
11411138 // We allow splitting of non-volatile loads and stores where the type is an
11421139 // integer type. These may be used to implement 'memcpy' or other "transfer
11431140 // of bits" patterns.
1144- insertUse (I, Offset, Size, isSplittableMemOp (Ty, IsVolatile));
1141+ bool IsSplittable =
1142+ Ty->isIntegerTy () && !IsVolatile && DL.typeSizeEqualsStoreSize (Ty);
1143+
1144+ insertUse (I, Offset, Size, IsSplittable);
11451145 }
11461146
11471147 void visitLoadInst (LoadInst &LI) {
@@ -1216,17 +1216,17 @@ class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
12161216 if (!IsOffsetKnown)
12171217 return PI.setAborted (&II);
12181218
1219- bool Splittable;
1220-
1221- if ( getVectorTypeFor (II, DL))
1222- Splittable = isSplittableMemOp (AS. AI . getAllocatedType (), II. isVolatile ());
1223- else
1224- Splittable = ( bool )Length;
1225-
1226- insertUse (II, Offset,
1227- Length ? Length-> getLimitedValue ()
1228- : AllocSize - Offset. getLimitedValue (),
1229- Splittable);
1219+ uint64_t Size = Length ? Length-> getLimitedValue ()
1220+ : AllocSize - Offset. getLimitedValue ();
1221+ bool Splittable = ( bool )Length;
1222+ if (Splittable) {
1223+ // Encourage the use of vector types by making this non-splittable if the
1224+ // memset corresponds to viable vector type.
1225+ Type *PartTy = getTypePartition (DL, AI. getAllocatedType (),
1226+ Offset. getLimitedValue (), Size);
1227+ Splittable = ! getVectorTypeFor (DL, PartTy, II);
1228+ }
1229+ insertUse (II, Offset, Size, Splittable);
12301230 }
12311231
12321232 void visitMemTransferInst (MemTransferInst &II) {
@@ -2159,11 +2159,12 @@ static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
21592159// /
21602160// / This function is called to test each entry in a partition which is slated
21612161// / for a single slice.
2162- static bool isVectorPromotionViableForSlice (Partition &P, const Slice &S ,
2163- VectorType *Ty,
2162+ static bool isVectorPromotionViableForSlice (const AllocaInst &AI, Partition &P ,
2163+ const Slice &S, VectorType *Ty,
21642164 uint64_t ElementSize,
2165- const DataLayout &DL,
21662165 unsigned VScale) {
2166+ const DataLayout &DL = AI.getDataLayout ();
2167+
21672168 // First validate the slice offsets.
21682169 uint64_t BeginOffset =
21692170 std::max (S.beginOffset (), P.beginOffset ()) - P.beginOffset ();
@@ -2192,14 +2193,14 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
21922193 if (MI->isVolatile ())
21932194 return false ;
21942195
2195- auto *II = dyn_cast<MemSetInst>(U->getUser ());
2196- if (!II && !S.isSplittable ()) {
2196+ if (!S.isSplittable ()) {
21972197 // Skip any non-memset unsplittable intrinsics.
2198- return false ;
2199- }
2200- if (II) {
2201- // For memset, allow if we have a suitable vector type
2202- Type *VTy = getVectorTypeFor (*II, DL);
2198+ auto *II = dyn_cast<MemSetInst>(U->getUser ());
2199+ if (!II)
2200+ return false ;
2201+
2202+ // For memset, allow if we have a viable vector type
2203+ Type *VTy = getVectorTypeFor (AI, P, *II);
22032204 if (!VTy)
22042205 return false ;
22052206 if (!canConvertValue (DL, SliceTy, VTy))
@@ -2246,8 +2247,9 @@ static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
22462247// / This implements the necessary checking for \c checkVectorTypesForPromotion
22472248// / (and thus isVectorPromotionViable) over all slices of the alloca for the
22482249// / given VectorType.
2249- static bool checkVectorTypeForPromotion (Partition &P, VectorType *VTy,
2250- const DataLayout &DL, unsigned VScale) {
2250+ static bool checkVectorTypeForPromotion (const AllocaInst &AI, Partition &P,
2251+ VectorType *VTy, unsigned VScale) {
2252+ const DataLayout &DL = AI.getDataLayout ();
22512253 uint64_t ElementSize =
22522254 DL.getTypeSizeInBits (VTy->getElementType ()).getFixedValue ();
22532255
@@ -2260,11 +2262,11 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
22602262 ElementSize /= 8 ;
22612263
22622264 for (const Slice &S : P)
2263- if (!isVectorPromotionViableForSlice (P, S, VTy, ElementSize, DL , VScale))
2265+ if (!isVectorPromotionViableForSlice (AI, P, S, VTy, ElementSize, VScale))
22642266 return false ;
22652267
22662268 for (const Slice *S : P.splitSliceTails ())
2267- if (!isVectorPromotionViableForSlice (P, *S, VTy, ElementSize, DL , VScale))
2269+ if (!isVectorPromotionViableForSlice (AI, P, *S, VTy, ElementSize, VScale))
22682270 return false ;
22692271
22702272 return true ;
@@ -2275,11 +2277,12 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
22752277// / This implements the necessary checking for \c isVectorPromotionViable over
22762278// / all slices of the alloca for the given VectorType.
22772279static VectorType *
2278- checkVectorTypesForPromotion (Partition &P, const DataLayout &DL ,
2280+ checkVectorTypesForPromotion (const AllocaInst &AI, Partition &P ,
22792281 SmallVectorImpl<VectorType *> &CandidateTys,
22802282 bool HaveCommonEltTy, Type *CommonEltTy,
22812283 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
22822284 VectorType *CommonVecPtrTy, unsigned VScale) {
2285+ const DataLayout &DL = AI.getDataLayout ();
22832286 // If we didn't find a vector type, nothing to do here.
22842287 if (CandidateTys.empty ())
22852288 return nullptr ;
@@ -2347,24 +2350,27 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
23472350 CandidateTys.resize (1 );
23482351 }
23492352
2353+ // FIXME: hack. Do we have a named constant for this?
2354+ // SDAG SDNode can't have more than 65535 operands.
23502355 llvm::erase_if (CandidateTys, [](VectorType *VTy) {
23512356 return cast<FixedVectorType>(VTy)->getNumElements () >
2352- getMaxNumFixedVectorElements ();
2357+ std::numeric_limits< unsigned short >:: max ();
23532358 });
23542359
23552360 for (VectorType *VTy : CandidateTys)
2356- if (checkVectorTypeForPromotion (P, VTy, DL , VScale))
2361+ if (checkVectorTypeForPromotion (AI, P, VTy , VScale))
23572362 return VTy;
23582363
23592364 return nullptr ;
23602365}
23612366
23622367static VectorType *createAndCheckVectorTypesForPromotion (
23632368 SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2364- function_ref<void (Type *)> CheckCandidateType, Partition &P ,
2365- const DataLayout &DL , SmallVectorImpl<VectorType *> &CandidateTys,
2369+ function_ref<void (Type *)> CheckCandidateType, const AllocaInst &AI ,
2370+ Partition &P , SmallVectorImpl<VectorType *> &CandidateTys,
23662371 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
23672372 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {
2373+ const DataLayout &DL = AI.getDataLayout ();
23682374 [[maybe_unused]] VectorType *OriginalElt =
23692375 CandidateTysCopy.size () ? CandidateTysCopy[0 ] : nullptr ;
23702376 // Consider additional vector types where the element type size is a
@@ -2390,7 +2396,7 @@ static VectorType *createAndCheckVectorTypesForPromotion(
23902396 }
23912397
23922398 return checkVectorTypesForPromotion (
2393- P, DL , CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2399+ AI, P , CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
23942400 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
23952401}
23962402
@@ -2403,10 +2409,11 @@ static VectorType *createAndCheckVectorTypesForPromotion(
24032409// / SSA value. We only can ensure this for a limited set of operations, and we
24042410// / don't want to do the rewrites unless we are confident that the result will
24052411// / be promotable, so we have an early test here.
2406- static VectorType *isVectorPromotionViable (Partition &P, const DataLayout &DL ,
2412+ static VectorType *isVectorPromotionViable (const AllocaInst &AI, Partition &P ,
24072413 unsigned VScale) {
24082414 // Collect the candidate types for vector-based promotion. Also track whether
24092415 // we have different element types.
2416+ const DataLayout &DL = AI.getDataLayout ();
24102417 SmallVector<VectorType *, 4 > CandidateTys;
24112418 SetVector<Type *> LoadStoreTys;
24122419 SetVector<Type *> DeferredTys;
@@ -2452,7 +2459,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL,
24522459 else if (auto *SI = dyn_cast<StoreInst>(S.getUse ()->getUser ()))
24532460 Ty = SI->getValueOperand ()->getType ();
24542461 else if (auto *II = dyn_cast<MemSetInst>(S.getUse ()->getUser ())) {
2455- Ty = getVectorTypeFor (*II, DL );
2462+ Ty = getVectorTypeFor (AI, P, *II );
24562463 if (!Ty)
24572464 continue ;
24582465 } else
@@ -2473,14 +2480,14 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL,
24732480
24742481 SmallVector<VectorType *, 4 > CandidateTysCopy = CandidateTys;
24752482 if (auto *VTy = createAndCheckVectorTypesForPromotion (
2476- LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL ,
2483+ LoadStoreTys, CandidateTysCopy, CheckCandidateType, AI, P ,
24772484 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
24782485 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
24792486 return VTy;
24802487
24812488 CandidateTys.clear ();
24822489 return createAndCheckVectorTypesForPromotion (
2483- DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL , CandidateTys,
2490+ DeferredTys, CandidateTysCopy, CheckCandidateType, AI, P , CandidateTys,
24842491 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
24852492 CommonVecPtrTy, VScale);
24862493}
@@ -4465,6 +4472,13 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
44654472 return SubTy;
44664473}
44674474
4475+ static Type *getTypePartition (const AllocaInst &AI, const Partition &P) {
4476+ if (P.empty ())
4477+ return nullptr ;
4478+ return getTypePartition (AI.getDataLayout (), AI.getAllocatedType (),
4479+ P.beginOffset (), P.size ());
4480+ }
4481+
44684482// / Pre-split loads and stores to simplify rewriting.
44694483// /
44704484// / We want to break up the splittable load+store pairs as much as
@@ -5012,12 +5026,12 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
50125026
50135027 // If the common use types are not viable for promotion then attempt to find
50145028 // another type that is viable.
5015- if (SliceVecTy && !checkVectorTypeForPromotion (P, SliceVecTy, DL , VScale))
5029+ if (SliceVecTy && !checkVectorTypeForPromotion (AI, P, SliceVecTy , VScale))
50165030 if (Type *TypePartitionTy = getTypePartition (DL, AI.getAllocatedType (),
50175031 P.beginOffset (), P.size ())) {
50185032 VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
50195033 if (TypePartitionVecTy &&
5020- checkVectorTypeForPromotion (P, TypePartitionVecTy, DL , VScale))
5034+ checkVectorTypeForPromotion (AI, P, TypePartitionVecTy , VScale))
50215035 SliceTy = TypePartitionTy;
50225036 }
50235037
@@ -5028,7 +5042,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
50285042 bool IsIntegerPromotable = isIntegerWideningViable (P, SliceTy, DL);
50295043
50305044 VectorType *VecTy =
5031- IsIntegerPromotable ? nullptr : isVectorPromotionViable (P, DL , VScale);
5045+ IsIntegerPromotable ? nullptr : isVectorPromotionViable (AI, P , VScale);
50325046 if (VecTy)
50335047 SliceTy = VecTy;
50345048
0 commit comments