@@ -5191,67 +5191,57 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
51915191// / promoted.
51925192AllocaInst *SROA::rewritePartition (AllocaInst &AI, AllocaSlices &AS,
51935193 Partition &P) {
5194- // Try to compute a friendly type for this partition of the alloca. This
5195- // won't always succeed, in which case we fall back to a legal integer type
5196- // or an i8 array of an appropriate size.
5197- Type *SliceTy = nullptr ;
51985194 const DataLayout &DL = AI.getDataLayout ();
5199- unsigned VScale = AI.getFunction ()->getVScaleValue ();
5200-
5201- std::pair<Type *, IntegerType *> CommonUseTy =
5202- findCommonType (P.begin (), P.end (), P.endOffset ());
5203- // Do all uses operate on the same type?
5204- if (CommonUseTy.first ) {
5205- TypeSize CommonUseSize = DL.getTypeAllocSize (CommonUseTy.first );
5206- if (CommonUseSize.isFixed () && CommonUseSize.getFixedValue () >= P.size ())
5207- SliceTy = CommonUseTy.first ;
5208- }
5209- // If not, can we find an appropriate subtype in the original allocated type?
5210- if (!SliceTy)
5211- if (Type *TypePartitionTy = getTypePartition (DL, AI.getAllocatedType (),
5212- P.beginOffset (), P.size ()))
5213- SliceTy = TypePartitionTy;
5214-
5215- // If still not, can we use the largest bitwidth integer type used?
5216- // If SliceTy is a non-promotable aggregate, prefer to represent as an integer
5217- // type because it's more likely to be promotable.
5218- if ((!SliceTy || !SliceTy->isSingleValueType ()) && CommonUseTy.second )
5219- if (DL.getTypeAllocSize (CommonUseTy.second ).getFixedValue () >= P.size ()) {
5220- SliceTy = CommonUseTy.second ;
5221- SliceVecTy = dyn_cast<VectorType>(SliceTy);
5195+ auto ComputePartitionTy = [&]() -> std::tuple<Type *, bool , VectorType *> {
5196+ // First check if the partition is viable for vetor promotion. If it is
5197+ // via a floating-point vector, we are done because we would never prefer integer widening.
5198+ VectorType *VecTy = isVectorPromotionViable (P, DL, AI.getFunction ()->getVScaleValue ());
5199+ if (VecTy) {
5200+ if (VecTy->getElementType ()->isFloatingPointTy ()) {
5201+ return {VecTy, false , VecTy};
5202+ }
52225203 }
5223- // Try representing the partition as a legal integer type of the same size as
5224- // the alloca.
5225- if ((!SliceTy || SliceTy->isArrayTy ()) && DL.isLegalInteger (P.size () * 8 )) {
5226- SliceTy = Type::getIntNTy (*C, P.size () * 8 );
5227- }
5228-
5229- if (!SliceTy)
5230- SliceTy = ArrayType::get (Type::getInt8Ty (*C), P.size ());
5231- assert (DL.getTypeAllocSize (SliceTy).getFixedValue () >= P.size ());
5232-
5233- // Prefer vector promotion over integer widening for floating-point vectors
5234- // because it is more likely the user is just accessing whole vector elements
5235- // and not doing bitsise arithmetic.
5236- bool PreferVectorPromotion = false ;
5237- if (auto *FixedVecSliceTy = dyn_cast<FixedVectorType>(SliceTy))
5238- PreferVectorPromotion = FixedVecSliceTy->getElementType ()->isFloatingPointTy ();
5239-
5240- bool IsIntegerPromotable = false ;
5241- VectorType *VecTy = nullptr ;
5242-
5243- if (PreferVectorPromotion) {
5244- // For float vectors, try vector promotion first
5245- VecTy = isVectorPromotionViable (P, DL, VScale);
5246- if (!VecTy)
5247- IsIntegerPromotable = isIntegerWideningViable (P, SliceTy, DL);
5248- } else {
5249- // For integer vectors (especially small integers like i8), try integer
5250- // widening first as InstCombine can optimize the resulting operations
5251- IsIntegerPromotable = isIntegerWideningViable (P, SliceTy, DL);
5252- if (!IsIntegerPromotable)
5253- VecTy = isVectorPromotionViable (P, DL, VScale);
5254- }
5204+
5205+ // Otherwise, check if there is a common type that all slices of the
5206+ // partition use. Collect the largest integer type used as a backup.
5207+ auto CommonUseTy = findCommonType (P.begin (), P.end (), P.endOffset ());
5208+ // If there is a common type that spans the partition, use it.
5209+ if (CommonUseTy.first ) {
5210+ TypeSize CommonUseSize = DL.getTypeAllocSize (CommonUseTy.first );
5211+ if (CommonUseSize.isFixed () &&
5212+ CommonUseSize.getFixedValue () >= P.size ()) {
5213+
5214+ if (VecTy)
5215+ return {VecTy, false , VecTy};
5216+ return {CommonUseTy.first , isIntegerWideningViable (P, CommonUseTy.first , DL), nullptr };
5217+ }
5218+ }
5219+
5220+ // If not, can we find an appropriate subtype in the original allocated type?
5221+ if (Type *TypePartitionTy = getTypePartition (DL, AI.getAllocatedType (), P.beginOffset (), P.size ())) {
5222+ if (TypePartitionTy->isArrayTy () && TypePartitionTy->getArrayElementType ()->isIntegerTy () && DL.isLegalInteger (P.size () * 8 ))
5223+ TypePartitionTy = Type::getIntNTy (*C, P.size () * 8 );
5224+
5225+ if (isIntegerWideningViable (P, TypePartitionTy, DL))
5226+ return {TypePartitionTy, true , nullptr };
5227+ if (VecTy)
5228+ return {VecTy, false , VecTy};
5229+ if (CommonUseTy.second && DL.getTypeAllocSize (CommonUseTy.second ).getFixedValue () >= P.size () && isIntegerWideningViable (P, CommonUseTy.second , DL))
5230+ return {CommonUseTy.second , true , nullptr };
5231+ return {TypePartitionTy, false , nullptr };
5232+ }
5233+
5234+ // If still not, can we use the largest bitwidth integer type used?
5235+ if (CommonUseTy.second && DL.getTypeAllocSize (CommonUseTy.second ).getFixedValue () >= P.size ())
5236+ return {CommonUseTy.second , false , nullptr };
5237+
5238+ if (DL.isLegalInteger (P.size () * 8 ))
5239+ return {Type::getIntNTy (*C, P.size () * 8 ), false , nullptr };
5240+
5241+ return {ArrayType::get (Type::getInt8Ty (*C), P.size ()), false , nullptr };
5242+ };
5243+
5244+ auto [PartitionTy, IsIntegerPromotable, VecTy] = ComputePartitionTy ();
52555245
52565246 // Check for the case where we're going to rewrite to a new alloca of the
52575247 // exact same type as the original, and with the same access offsets. In that
@@ -5260,7 +5250,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
52605250 // P.beginOffset() can be non-zero even with the same type in a case with
52615251 // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
52625252 AllocaInst *NewAI;
5263- if (SliceTy == AI.getAllocatedType () && P.beginOffset () == 0 ) {
5253+ if (PartitionTy == AI.getAllocatedType () && P.beginOffset () == 0 ) {
52645254 NewAI = &AI;
52655255 // FIXME: We should be able to bail at this point with "nothing changed".
52665256 // FIXME: We might want to defer PHI speculation until after here.
@@ -5270,10 +5260,10 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
52705260 const Align Alignment = commonAlignment (AI.getAlign (), P.beginOffset ());
52715261 // If we will get at least this much alignment from the type alone, leave
52725262 // the alloca's alignment unconstrained.
5273- const bool IsUnconstrained = Alignment <= DL.getABITypeAlign (SliceTy );
5263+ const bool IsUnconstrained = Alignment <= DL.getABITypeAlign (PartitionTy );
52745264 NewAI = new AllocaInst (
5275- SliceTy , AI.getAddressSpace (), nullptr ,
5276- IsUnconstrained ? DL.getPrefTypeAlign (SliceTy ) : Alignment,
5265+ PartitionTy , AI.getAddressSpace (), nullptr ,
5266+ IsUnconstrained ? DL.getPrefTypeAlign (PartitionTy ) : Alignment,
52775267 AI.getName () + " .sroa." + Twine (P.begin () - AS.begin ()),
52785268 AI.getIterator ());
52795269 // Copy the old AI debug location over to the new one.
0 commit comments