@@ -13,32 +13,52 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) {
1313 return Ty->isStructTy () || Ty->isArrayTy () || isa<ScalableVectorType>(Ty);
1414}
1515
16+ static std::optional<unsigned > getKnownVScale (Function *F) {
17+ const auto &Attrs = F->getAttributes ().getFnAttrs ();
18+ unsigned MinVScale = Attrs.getVScaleRangeMin ();
19+ if (Attrs.getVScaleRangeMax () == MinVScale)
20+ return MinVScale;
21+ return std::nullopt ;
22+ }
23+
1624// / Return true if coerceAvailableValueToLoadType will succeed.
1725bool canCoerceMustAliasedValueToLoad (Value *StoredVal, Type *LoadTy,
18- const DataLayout &DL ) {
26+ Function *F ) {
1927 Type *StoredTy = StoredVal->getType ();
20-
2128 if (StoredTy == LoadTy)
2229 return true ;
2330
31+ const DataLayout &DL = F->getDataLayout ();
32+ TypeSize StoreSize = DL.getTypeSizeInBits (StoredTy);
33+ TypeSize LoadSize = DL.getTypeSizeInBits (LoadTy);
2434 if (isa<ScalableVectorType>(StoredTy) && isa<ScalableVectorType>(LoadTy) &&
25- DL. getTypeSizeInBits (StoredTy) == DL. getTypeSizeInBits (LoadTy) )
35+ StoreSize == LoadSize )
2636 return true ;
2737
28- // If the loaded/stored value is a first class array/struct, or scalable type,
29- // don't try to transform them. We need to be able to bitcast to integer.
30- if (isFirstClassAggregateOrScalableType (LoadTy) ||
31- isFirstClassAggregateOrScalableType (StoredTy))
38+ // If the loaded/stored value is a first class array/struct, don't try to
39+ // transform them. We need to be able to bitcast to integer. For scalable
40+ // vectors forwarded to fixed-sized vectors @llvm.vector.extract is used.
41+ if (isa<ScalableVectorType>(StoredTy) && isa<FixedVectorType>(LoadTy)) {
42+ if (StoredTy->getScalarType () != LoadTy->getScalarType ())
43+ return false ;
44+
45+ // If the VScale is known at compile-time, use that information to
46+ // allow for wider loads.
47+ std::optional<unsigned > VScale = getKnownVScale (F);
48+ if (VScale)
49+ StoreSize =
50+ TypeSize::getFixed (StoreSize.getKnownMinValue () * VScale.value ());
51+ } else if (isFirstClassAggregateOrScalableType (LoadTy) ||
52+ isFirstClassAggregateOrScalableType (StoredTy)) {
3253 return false ;
33-
34- uint64_t StoreSize = DL.getTypeSizeInBits (StoredTy).getFixedValue ();
54+ }
3555
3656 // The store size must be byte-aligned to support future type casts.
3757 if (llvm::alignTo (StoreSize, 8 ) != StoreSize)
3858 return false ;
3959
4060 // The store has to be at least as big as the load.
41- if (StoreSize < DL. getTypeSizeInBits (LoadTy). getFixedValue ( ))
61+ if (! TypeSize::isKnownGE ( StoreSize, LoadSize ))
4262 return false ;
4363
4464 bool StoredNI = DL.isNonIntegralPointerType (StoredTy->getScalarType ());
@@ -57,11 +77,10 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
5777 return false ;
5878 }
5979
60-
6180 // The implementation below uses inttoptr for vectors of unequal size; we
6281 // can't allow this for non integral pointers. We could teach it to extract
6382 // exact subvectors if desired.
64- if (StoredNI && StoreSize != DL. getTypeSizeInBits (LoadTy). getFixedValue () )
83+ if (StoredNI && StoreSize != LoadSize )
6584 return false ;
6685
6786 if (StoredTy->isTargetExtTy () || LoadTy->isTargetExtTy ())
@@ -77,16 +96,24 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
7796// /
7897// / If we can't do it, return null.
7998Value *coerceAvailableValueToLoadType (Value *StoredVal, Type *LoadedTy,
80- IRBuilderBase &Helper,
81- const DataLayout &DL) {
82- assert (canCoerceMustAliasedValueToLoad (StoredVal, LoadedTy, DL) &&
99+ IRBuilderBase &Helper, Function *F) {
100+ assert (canCoerceMustAliasedValueToLoad (StoredVal, LoadedTy, F) &&
83101 " precondition violation - materialization can't fail" );
102+ const DataLayout &DL = F->getDataLayout ();
84103 if (auto *C = dyn_cast<Constant>(StoredVal))
85104 StoredVal = ConstantFoldConstant (C, DL);
86105
87106 // If this is already the right type, just return it.
88107 Type *StoredValTy = StoredVal->getType ();
89108
109+ // If this is a scalable vector forwarded to a fixed vector load, create
110+ // a @llvm.vector.extract instead of bitcasts.
111+ if (isa<ScalableVectorType>(StoredVal->getType ()) &&
112+ isa<FixedVectorType>(LoadedTy)) {
113+ return Helper.CreateIntrinsic (LoadedTy, Intrinsic::vector_extract,
114+ {StoredVal, Helper.getInt64 (0 )});
115+ }
116+
90117 TypeSize StoredValSize = DL.getTypeSizeInBits (StoredValTy);
91118 TypeSize LoadedValSize = DL.getTypeSizeInBits (LoadedTy);
92119
@@ -220,7 +247,7 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
220247 if (isFirstClassAggregateOrScalableType (StoredVal->getType ()))
221248 return -1 ;
222249
223- if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DL ))
250+ if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DepSI-> getFunction () ))
224251 return -1 ;
225252
226253 Value *StorePtr = DepSI->getPointerOperand ();
@@ -235,11 +262,11 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
235262// / the other load can feed into the second load.
236263int analyzeLoadFromClobberingLoad (Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
237264 const DataLayout &DL) {
238- // Cannot handle reading from store of first-class aggregate yet .
239- if (DepLI-> getType ()-> isStructTy () || DepLI->getType ()-> isArrayTy ( ))
265+ // Cannot handle reading from store of first-class aggregate or scalable type .
266+ if (isFirstClassAggregateOrScalableType ( DepLI->getType ()))
240267 return -1 ;
241268
242- if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DL ))
269+ if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DepLI-> getFunction () ))
243270 return -1 ;
244271
245272 Value *DepPtr = DepLI->getPointerOperand ();
@@ -315,6 +342,16 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
315342 return SrcVal;
316343 }
317344
345+ // For the case of a scalable vector beeing forwarded to a fixed-sized load,
346+ // only equal element types are allowed and a @llvm.vector.extract will be
347+ // used instead of bitcasts.
348+ if (isa<ScalableVectorType>(SrcVal->getType ()) &&
349+ isa<FixedVectorType>(LoadTy)) {
350+ assert (Offset == 0 &&
351+ SrcVal->getType ()->getScalarType () == LoadTy->getScalarType ());
352+ return SrcVal;
353+ }
354+
318355 uint64_t StoreSize =
319356 (DL.getTypeSizeInBits (SrcVal->getType ()).getFixedValue () + 7 ) / 8 ;
320357 uint64_t LoadSize = (DL.getTypeSizeInBits (LoadTy).getFixedValue () + 7 ) / 8 ;
@@ -344,20 +381,24 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
344381}
345382
346383Value *getValueForLoad (Value *SrcVal, unsigned Offset, Type *LoadTy,
347- Instruction *InsertPt, const DataLayout &DL) {
384+ Instruction *InsertPt, Function *F) {
385+ const DataLayout &DL = F->getDataLayout ();
348386#ifndef NDEBUG
349387 TypeSize SrcValSize = DL.getTypeStoreSize (SrcVal->getType ());
350388 TypeSize LoadSize = DL.getTypeStoreSize (LoadTy);
351- assert (SrcValSize.isScalable () == LoadSize.isScalable ());
389+ if (auto VScale = getKnownVScale (InsertPt->getFunction ());
390+ VScale && SrcValSize.isScalable () && !LoadSize.isScalable ())
391+ SrcValSize =
392+ TypeSize::getFixed (SrcValSize.getKnownMinValue () * VScale.value ());
352393 assert ((SrcValSize.isScalable () || Offset + LoadSize <= SrcValSize) &&
353394 " Expected Offset + LoadSize <= SrcValSize" );
354- assert (
355- (!SrcValSize. isScalable () || (Offset == 0 && LoadSize == SrcValSize)) &&
356- " Expected scalable type sizes to match " );
395+ assert ((!SrcValSize. isScalable () ||
396+ (Offset == 0 && TypeSize::isKnownLE ( LoadSize, SrcValSize) )) &&
397+ " Expected offset of zero and LoadSize <= SrcValSize " );
357398#endif
358399 IRBuilder<> Builder (InsertPt);
359400 SrcVal = getStoreValueForLoadHelper (SrcVal, Offset, LoadTy, Builder, DL);
360- return coerceAvailableValueToLoadType (SrcVal, LoadTy, Builder, DL );
401+ return coerceAvailableValueToLoadType (SrcVal, LoadTy, Builder, F );
361402}
362403
363404Constant *getConstantValueForLoad (Constant *SrcVal, unsigned Offset,
@@ -408,7 +449,8 @@ Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
408449 ++NumBytesSet;
409450 }
410451
411- return coerceAvailableValueToLoadType (Val, LoadTy, Builder, DL);
452+ return coerceAvailableValueToLoadType (Val, LoadTy, Builder,
453+ InsertPt->getFunction ());
412454 }
413455
414456 // Otherwise, this is a memcpy/memmove from a constant global.
0 commit comments