@@ -13,32 +13,54 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) {
1313 return Ty->isStructTy () || Ty->isArrayTy () || isa<ScalableVectorType>(Ty);
1414}
1515
16+ static std::optional<unsigned > getKnownVScale (Function *F) {
17+ const auto &Attrs = F->getAttributes ().getFnAttrs ();
18+ unsigned MinVScale = Attrs.getVScaleRangeMin ();
19+ if (Attrs.getVScaleRangeMax () == MinVScale)
20+ return MinVScale;
21+ return std::nullopt ;
22+ }
23+
1624// / Return true if coerceAvailableValueToLoadType will succeed.
1725bool canCoerceMustAliasedValueToLoad (Value *StoredVal, Type *LoadTy,
18- const DataLayout &DL ) {
26+ Function *F ) {
1927 Type *StoredTy = StoredVal->getType ();
20-
2128 if (StoredTy == LoadTy)
2229 return true ;
2330
31+ const DataLayout &DL = F->getDataLayout ();
2432 if (isa<ScalableVectorType>(StoredTy) && isa<ScalableVectorType>(LoadTy) &&
2533 DL.getTypeSizeInBits (StoredTy) == DL.getTypeSizeInBits (LoadTy))
2634 return true ;
2735
28- // If the loaded/stored value is a first class array/struct, or scalable type,
29- // don't try to transform them. We need to be able to bitcast to integer.
30- if (isFirstClassAggregateOrScalableType (LoadTy) ||
31- isFirstClassAggregateOrScalableType (StoredTy))
32- return false ;
33-
34- uint64_t StoreSize = DL.getTypeSizeInBits (StoredTy).getFixedValue ();
36+ // If the loaded/stored value is a first class array/struct, don't try to
37+ // transform them. We need to be able to bitcast to integer. For scalable
38+ // vectors forwarded to fixed-sized vectors with a compile-time known
39+ // vscale, @llvm.vector.extract is used.
40+ uint64_t StoreSize, LoadSize;
41+ if (isa<ScalableVectorType>(StoredTy) && isa<FixedVectorType>(LoadTy)) {
42+ std::optional<unsigned > VScale = getKnownVScale (F);
43+ if (!VScale || StoredTy->getScalarType () != LoadTy->getScalarType ())
44+ return false ;
45+
46+ StoreSize =
47+ DL.getTypeSizeInBits (StoredTy).getKnownMinValue () * VScale.value ();
48+ LoadSize = DL.getTypeSizeInBits (LoadTy).getFixedValue ();
49+ } else {
50+ if (isFirstClassAggregateOrScalableType (LoadTy) ||
51+ isFirstClassAggregateOrScalableType (StoredTy))
52+ return false ;
53+
54+ StoreSize = DL.getTypeSizeInBits (StoredTy).getFixedValue ();
55+ LoadSize = DL.getTypeSizeInBits (LoadTy).getFixedValue ();
56+ }
3557
3658 // The store size must be byte-aligned to support future type casts.
3759 if (llvm::alignTo (StoreSize, 8 ) != StoreSize)
3860 return false ;
3961
4062 // The store has to be at least as big as the load.
41- if (StoreSize < DL. getTypeSizeInBits (LoadTy). getFixedValue () )
63+ if (StoreSize < LoadSize )
4264 return false ;
4365
4466 bool StoredNI = DL.isNonIntegralPointerType (StoredTy->getScalarType ());
@@ -57,11 +79,10 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
5779 return false ;
5880 }
5981
60-
6182 // The implementation below uses inttoptr for vectors of unequal size; we
6283 // can't allow this for non integral pointers. We could teach it to extract
6384 // exact subvectors if desired.
64- if (StoredNI && StoreSize != DL. getTypeSizeInBits (LoadTy). getFixedValue () )
85+ if (StoredNI && StoreSize != LoadSize )
6586 return false ;
6687
6788 if (StoredTy->isTargetExtTy () || LoadTy->isTargetExtTy ())
@@ -79,14 +100,23 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
79100Value *coerceAvailableValueToLoadType (Value *StoredVal, Type *LoadedTy,
80101 IRBuilderBase &Helper,
81102 const DataLayout &DL) {
82- assert (canCoerceMustAliasedValueToLoad (StoredVal, LoadedTy, DL) &&
103+ assert (canCoerceMustAliasedValueToLoad (
104+ StoredVal, LoadedTy, Helper.GetInsertBlock ()->getParent ()) &&
83105 " precondition violation - materialization can't fail" );
84106 if (auto *C = dyn_cast<Constant>(StoredVal))
85107 StoredVal = ConstantFoldConstant (C, DL);
86108
87109 // If this is already the right type, just return it.
88110 Type *StoredValTy = StoredVal->getType ();
89111
112+ // If this is a scalable vector forwarded to a fixed vector load, create
113+ // a @llvm.vector.extract instead of bitcasts.
114+ if (isa<ScalableVectorType>(StoredVal->getType ()) &&
115+ isa<FixedVectorType>(LoadedTy)) {
116+ return Helper.CreateIntrinsic (LoadedTy, Intrinsic::vector_extract,
117+ {StoredVal, Helper.getInt64 (0 )});
118+ }
119+
90120 TypeSize StoredValSize = DL.getTypeSizeInBits (StoredValTy);
91121 TypeSize LoadedValSize = DL.getTypeSizeInBits (LoadedTy);
92122
@@ -220,7 +250,7 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
220250 if (isFirstClassAggregateOrScalableType (StoredVal->getType ()))
221251 return -1 ;
222252
223- if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DL ))
253+ if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DepSI-> getFunction () ))
224254 return -1 ;
225255
226256 Value *StorePtr = DepSI->getPointerOperand ();
@@ -235,11 +265,11 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
235265// / the other load can feed into the second load.
236266int analyzeLoadFromClobberingLoad (Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
237267 const DataLayout &DL) {
238- // Cannot handle reading from store of first-class aggregate yet .
239- if (DepLI-> getType ()-> isStructTy () || DepLI->getType ()-> isArrayTy ( ))
268+ // Cannot handle reading from store of first-class aggregate or scalable type .
269+ if (isFirstClassAggregateOrScalableType ( DepLI->getType ()))
240270 return -1 ;
241271
242- if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DL ))
272+ if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DepLI-> getFunction () ))
243273 return -1 ;
244274
245275 Value *DepPtr = DepLI->getPointerOperand ();
@@ -315,6 +345,16 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
315345 return SrcVal;
316346 }
317347
348+ // For the case of a scalable vector beeing forwarded to a fixed-sized load,
349+ // only equal element types are allowed and a @llvm.vector.extract will be
350+ // used instead of bitcasts.
351+ if (isa<ScalableVectorType>(SrcVal->getType ()) &&
352+ isa<FixedVectorType>(LoadTy)) {
353+ assert (Offset == 0 &&
354+ SrcVal->getType ()->getScalarType () == LoadTy->getScalarType ());
355+ return SrcVal;
356+ }
357+
318358 uint64_t StoreSize =
319359 (DL.getTypeSizeInBits (SrcVal->getType ()).getFixedValue () + 7 ) / 8 ;
320360 uint64_t LoadSize = (DL.getTypeSizeInBits (LoadTy).getFixedValue () + 7 ) / 8 ;
@@ -348,6 +388,10 @@ Value *getValueForLoad(Value *SrcVal, unsigned Offset, Type *LoadTy,
348388#ifndef NDEBUG
349389 TypeSize SrcValSize = DL.getTypeStoreSize (SrcVal->getType ());
350390 TypeSize LoadSize = DL.getTypeStoreSize (LoadTy);
391+ if (SrcValSize.isScalable () && !LoadSize.isScalable ())
392+ SrcValSize =
393+ TypeSize::getFixed (SrcValSize.getKnownMinValue () *
394+ getKnownVScale (InsertPt->getFunction ()).value ());
351395 assert (SrcValSize.isScalable () == LoadSize.isScalable ());
352396 assert ((SrcValSize.isScalable () || Offset + LoadSize <= SrcValSize) &&
353397 " Expected Offset + LoadSize <= SrcValSize" );
0 commit comments