@@ -15,30 +15,42 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) {
1515
1616// / Return true if coerceAvailableValueToLoadType will succeed.
1717bool canCoerceMustAliasedValueToLoad (Value *StoredVal, Type *LoadTy,
18- const DataLayout &DL ) {
18+ Function *F ) {
1919 Type *StoredTy = StoredVal->getType ();
20-
2120 if (StoredTy == LoadTy)
2221 return true ;
2322
23+ const DataLayout &DL = F->getDataLayout ();
24+ TypeSize MinStoreSize = DL.getTypeSizeInBits (StoredTy);
25+ TypeSize LoadSize = DL.getTypeSizeInBits (LoadTy);
2426 if (isa<ScalableVectorType>(StoredTy) && isa<ScalableVectorType>(LoadTy) &&
25- DL. getTypeSizeInBits (StoredTy) == DL. getTypeSizeInBits (LoadTy) )
27+ MinStoreSize == LoadSize )
2628 return true ;
2729
28- // If the loaded/stored value is a first class array/struct, or scalable type,
29- // don't try to transform them. We need to be able to bitcast to integer.
30- if (isFirstClassAggregateOrScalableType (LoadTy) ||
31- isFirstClassAggregateOrScalableType (StoredTy))
30+ // If the loaded/stored value is a first class array/struct, don't try to
31+ // transform them. We need to be able to bitcast to integer. For scalable
32+ // vectors forwarded to fixed-sized vectors @llvm.vector.extract is used.
33+ if (isa<ScalableVectorType>(StoredTy) && isa<FixedVectorType>(LoadTy)) {
34+ if (StoredTy->getScalarType () != LoadTy->getScalarType ())
35+ return false ;
36+
37+ // If it is known at compile-time that the VScale is larger than one,
38+ // use that information to allow for wider loads.
39+ const auto &Attrs = F->getAttributes ().getFnAttrs ();
40+ unsigned MinVScale = Attrs.getVScaleRangeMin ();
41+ MinStoreSize =
42+ TypeSize::getFixed (MinStoreSize.getKnownMinValue () * MinVScale);
43+ } else if (isFirstClassAggregateOrScalableType (LoadTy) ||
44+ isFirstClassAggregateOrScalableType (StoredTy)) {
3245 return false ;
33-
34- uint64_t StoreSize = DL.getTypeSizeInBits (StoredTy).getFixedValue ();
46+ }
3547
3648 // The store size must be byte-aligned to support future type casts.
37- if (llvm::alignTo (StoreSize , 8 ) != StoreSize )
49+ if (llvm::alignTo (MinStoreSize , 8 ) != MinStoreSize )
3850 return false ;
3951
4052 // The store has to be at least as big as the load.
41- if (StoreSize < DL. getTypeSizeInBits (LoadTy). getFixedValue ( ))
53+ if (! TypeSize::isKnownGE (MinStoreSize, LoadSize ))
4254 return false ;
4355
4456 bool StoredNI = DL.isNonIntegralPointerType (StoredTy->getScalarType ());
@@ -57,11 +69,10 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
5769 return false ;
5870 }
5971
60-
6172 // The implementation below uses inttoptr for vectors of unequal size; we
6273 // can't allow this for non integral pointers. We could teach it to extract
6374 // exact subvectors if desired.
64- if (StoredNI && StoreSize != DL. getTypeSizeInBits (LoadTy). getFixedValue ( ))
75+ if (StoredNI && (StoredTy-> isScalableTy () || MinStoreSize != LoadSize ))
6576 return false ;
6677
6778 if (StoredTy->isTargetExtTy () || LoadTy->isTargetExtTy ())
@@ -77,16 +88,24 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
7788// /
7889// / If we can't do it, return null.
7990Value *coerceAvailableValueToLoadType (Value *StoredVal, Type *LoadedTy,
80- IRBuilderBase &Helper,
81- const DataLayout &DL) {
82- assert (canCoerceMustAliasedValueToLoad (StoredVal, LoadedTy, DL) &&
91+ IRBuilderBase &Helper, Function *F) {
92+ assert (canCoerceMustAliasedValueToLoad (StoredVal, LoadedTy, F) &&
8393 " precondition violation - materialization can't fail" );
94+ const DataLayout &DL = F->getDataLayout ();
8495 if (auto *C = dyn_cast<Constant>(StoredVal))
8596 StoredVal = ConstantFoldConstant (C, DL);
8697
8798 // If this is already the right type, just return it.
8899 Type *StoredValTy = StoredVal->getType ();
89100
101+ // If this is a scalable vector forwarded to a fixed vector load, create
102+ // a @llvm.vector.extract instead of bitcasts.
103+ if (isa<ScalableVectorType>(StoredVal->getType ()) &&
104+ isa<FixedVectorType>(LoadedTy)) {
105+ return Helper.CreateIntrinsic (LoadedTy, Intrinsic::vector_extract,
106+ {StoredVal, Helper.getInt64 (0 )});
107+ }
108+
90109 TypeSize StoredValSize = DL.getTypeSizeInBits (StoredValTy);
91110 TypeSize LoadedValSize = DL.getTypeSizeInBits (LoadedTy);
92111
@@ -220,7 +239,7 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
220239 if (isFirstClassAggregateOrScalableType (StoredVal->getType ()))
221240 return -1 ;
222241
223- if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DL ))
242+ if (!canCoerceMustAliasedValueToLoad (StoredVal, LoadTy, DepSI-> getFunction () ))
224243 return -1 ;
225244
226245 Value *StorePtr = DepSI->getPointerOperand ();
@@ -235,11 +254,11 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
235254// / the other load can feed into the second load.
236255int analyzeLoadFromClobberingLoad (Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
237256 const DataLayout &DL) {
238- // Cannot handle reading from store of first-class aggregate yet .
239- if (DepLI-> getType ()-> isStructTy () || DepLI->getType ()-> isArrayTy ( ))
257+ // Cannot handle reading from store of first-class aggregate or scalable type .
258+ if (isFirstClassAggregateOrScalableType ( DepLI->getType ()))
240259 return -1 ;
241260
242- if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DL ))
261+ if (!canCoerceMustAliasedValueToLoad (DepLI, LoadTy, DepLI-> getFunction () ))
243262 return -1 ;
244263
245264 Value *DepPtr = DepLI->getPointerOperand ();
@@ -315,6 +334,16 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
315334 return SrcVal;
316335 }
317336
337+ // For the case of a scalable vector being forwarded to a fixed-sized load,
338+ // only equal element types are allowed and a @llvm.vector.extract will be
339+ // used instead of bitcasts.
340+ if (isa<ScalableVectorType>(SrcVal->getType ()) &&
341+ isa<FixedVectorType>(LoadTy)) {
342+ assert (Offset == 0 &&
343+ SrcVal->getType ()->getScalarType () == LoadTy->getScalarType ());
344+ return SrcVal;
345+ }
346+
318347 uint64_t StoreSize =
319348 (DL.getTypeSizeInBits (SrcVal->getType ()).getFixedValue () + 7 ) / 8 ;
320349 uint64_t LoadSize = (DL.getTypeSizeInBits (LoadTy).getFixedValue () + 7 ) / 8 ;
@@ -344,20 +373,24 @@ static Value *getStoreValueForLoadHelper(Value *SrcVal, unsigned Offset,
344373}
345374
346375Value *getValueForLoad (Value *SrcVal, unsigned Offset, Type *LoadTy,
347- Instruction *InsertPt, const DataLayout &DL) {
376+ Instruction *InsertPt, Function *F) {
377+ const DataLayout &DL = F->getDataLayout ();
348378#ifndef NDEBUG
349- TypeSize SrcValSize = DL.getTypeStoreSize (SrcVal->getType ());
379+ TypeSize MinSrcValSize = DL.getTypeStoreSize (SrcVal->getType ());
350380 TypeSize LoadSize = DL.getTypeStoreSize (LoadTy);
351- assert (SrcValSize.isScalable () == LoadSize.isScalable ());
352- assert ((SrcValSize.isScalable () || Offset + LoadSize <= SrcValSize) &&
381+ if (MinSrcValSize.isScalable () && !LoadSize.isScalable ())
382+ MinSrcValSize =
383+ TypeSize::getFixed (MinSrcValSize.getKnownMinValue () *
384+ F->getAttributes ().getFnAttrs ().getVScaleRangeMin ());
385+ assert ((MinSrcValSize.isScalable () || Offset + LoadSize <= MinSrcValSize) &&
353386 " Expected Offset + LoadSize <= SrcValSize" );
354- assert (
355- (!SrcValSize. isScalable () || (Offset == 0 && LoadSize == SrcValSize )) &&
356- " Expected scalable type sizes to match " );
387+ assert ((!MinSrcValSize. isScalable () ||
388+ (Offset == 0 && TypeSize::isKnownLE ( LoadSize, MinSrcValSize) )) &&
389+ " Expected offset of zero and LoadSize <= SrcValSize " );
357390#endif
358391 IRBuilder<> Builder (InsertPt);
359392 SrcVal = getStoreValueForLoadHelper (SrcVal, Offset, LoadTy, Builder, DL);
360- return coerceAvailableValueToLoadType (SrcVal, LoadTy, Builder, DL );
393+ return coerceAvailableValueToLoadType (SrcVal, LoadTy, Builder, F );
361394}
362395
363396Constant *getConstantValueForLoad (Constant *SrcVal, unsigned Offset,
@@ -408,7 +441,8 @@ Value *getMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
408441 ++NumBytesSet;
409442 }
410443
411- return coerceAvailableValueToLoadType (Val, LoadTy, Builder, DL);
444+ return coerceAvailableValueToLoadType (Val, LoadTy, Builder,
445+ InsertPt->getFunction ());
412446 }
413447
414448 // Otherwise, this is a memcpy/memmove from a constant global.
0 commit comments