Skip to content

Commit e7a5130

Browse files
committed
updated to fix regression
1 parent 9143867 commit e7a5130

File tree

1 file changed

+53
-63
lines changed

1 file changed

+53
-63
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 53 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -5191,67 +5191,57 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
51915191
/// promoted.
51925192
AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
51935193
Partition &P) {
5194-
// Try to compute a friendly type for this partition of the alloca. This
5195-
// won't always succeed, in which case we fall back to a legal integer type
5196-
// or an i8 array of an appropriate size.
5197-
Type *SliceTy = nullptr;
51985194
const DataLayout &DL = AI.getDataLayout();
5199-
unsigned VScale = AI.getFunction()->getVScaleValue();
5200-
5201-
std::pair<Type *, IntegerType *> CommonUseTy =
5202-
findCommonType(P.begin(), P.end(), P.endOffset());
5203-
// Do all uses operate on the same type?
5204-
if (CommonUseTy.first) {
5205-
TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first);
5206-
if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size())
5207-
SliceTy = CommonUseTy.first;
5208-
}
5209-
// If not, can we find an appropriate subtype in the original allocated type?
5210-
if (!SliceTy)
5211-
if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
5212-
P.beginOffset(), P.size()))
5213-
SliceTy = TypePartitionTy;
5214-
5215-
// If still not, can we use the largest bitwidth integer type used?
5216-
// If SliceTy is a non-promotable aggregate, prefer to represent as an integer
5217-
// type because it's more likely to be promotable.
5218-
if ((!SliceTy || !SliceTy->isSingleValueType()) && CommonUseTy.second)
5219-
if (DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size()) {
5220-
SliceTy = CommonUseTy.second;
5221-
SliceVecTy = dyn_cast<VectorType>(SliceTy);
5195+
auto ComputePartitionTy = [&]() -> std::tuple<Type *, bool, VectorType *> {
5196+
// First check if the partition is viable for vetor promotion. If it is
5197+
// via a floating-point vector, we are done because we would never prefer integer widening.
5198+
VectorType *VecTy = isVectorPromotionViable(P, DL, AI.getFunction()->getVScaleValue());
5199+
if (VecTy) {
5200+
if (VecTy->getElementType()->isFloatingPointTy()) {
5201+
return {VecTy, false, VecTy};
5202+
}
52225203
}
5223-
// Try representing the partition as a legal integer type of the same size as
5224-
// the alloca.
5225-
if ((!SliceTy || SliceTy->isArrayTy()) && DL.isLegalInteger(P.size() * 8)) {
5226-
SliceTy = Type::getIntNTy(*C, P.size() * 8);
5227-
}
5228-
5229-
if (!SliceTy)
5230-
SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
5231-
assert(DL.getTypeAllocSize(SliceTy).getFixedValue() >= P.size());
5232-
5233-
// Prefer vector promotion over integer widening for floating-point vectors
5234-
// because it is more likely the user is just accessing whole vector elements
5235-
// and not doing bitsise arithmetic.
5236-
bool PreferVectorPromotion = false;
5237-
if (auto *FixedVecSliceTy = dyn_cast<FixedVectorType>(SliceTy))
5238-
PreferVectorPromotion = FixedVecSliceTy->getElementType()->isFloatingPointTy();
5239-
5240-
bool IsIntegerPromotable = false;
5241-
VectorType *VecTy = nullptr;
5242-
5243-
if (PreferVectorPromotion) {
5244-
// For float vectors, try vector promotion first
5245-
VecTy = isVectorPromotionViable(P, DL, VScale);
5246-
if (!VecTy)
5247-
IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
5248-
} else {
5249-
// For integer vectors (especially small integers like i8), try integer
5250-
// widening first as InstCombine can optimize the resulting operations
5251-
IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
5252-
if (!IsIntegerPromotable)
5253-
VecTy = isVectorPromotionViable(P, DL, VScale);
5254-
}
5204+
5205+
// Otherwise, check if there is a common type that all slices of the
5206+
// partition use. Collect the largest integer type used as a backup.
5207+
auto CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset());
5208+
// If there is a common type that spans the partition, use it.
5209+
if (CommonUseTy.first) {
5210+
TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy.first);
5211+
if (CommonUseSize.isFixed() &&
5212+
CommonUseSize.getFixedValue() >= P.size()) {
5213+
5214+
if (VecTy)
5215+
return {VecTy, false, VecTy};
5216+
return {CommonUseTy.first, isIntegerWideningViable(P, CommonUseTy.first, DL), nullptr};
5217+
}
5218+
}
5219+
5220+
// If not, can we find an appropriate subtype in the original allocated type?
5221+
if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(), P.beginOffset(), P.size())) {
5222+
if (TypePartitionTy->isArrayTy() && TypePartitionTy->getArrayElementType()->isIntegerTy() && DL.isLegalInteger(P.size() * 8))
5223+
TypePartitionTy = Type::getIntNTy(*C, P.size() * 8);
5224+
5225+
if (isIntegerWideningViable(P, TypePartitionTy, DL))
5226+
return {TypePartitionTy, true, nullptr};
5227+
if (VecTy)
5228+
return {VecTy, false, VecTy};
5229+
if (CommonUseTy.second && DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size() && isIntegerWideningViable(P, CommonUseTy.second, DL))
5230+
return {CommonUseTy.second, true, nullptr};
5231+
return {TypePartitionTy, false, nullptr};
5232+
}
5233+
5234+
// If still not, can we use the largest bitwidth integer type used?
5235+
if (CommonUseTy.second && DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >= P.size())
5236+
return {CommonUseTy.second, false, nullptr};
5237+
5238+
if (DL.isLegalInteger(P.size() * 8))
5239+
return {Type::getIntNTy(*C, P.size() * 8), false, nullptr};
5240+
5241+
return {ArrayType::get(Type::getInt8Ty(*C), P.size()), false, nullptr};
5242+
};
5243+
5244+
auto [PartitionTy, IsIntegerPromotable, VecTy] = ComputePartitionTy();
52555245

52565246
// Check for the case where we're going to rewrite to a new alloca of the
52575247
// exact same type as the original, and with the same access offsets. In that
@@ -5260,7 +5250,7 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
52605250
// P.beginOffset() can be non-zero even with the same type in a case with
52615251
// out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
52625252
AllocaInst *NewAI;
5263-
if (SliceTy == AI.getAllocatedType() && P.beginOffset() == 0) {
5253+
if (PartitionTy == AI.getAllocatedType() && P.beginOffset() == 0) {
52645254
NewAI = &AI;
52655255
// FIXME: We should be able to bail at this point with "nothing changed".
52665256
// FIXME: We might want to defer PHI speculation until after here.
@@ -5270,10 +5260,10 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
52705260
const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
52715261
// If we will get at least this much alignment from the type alone, leave
52725262
// the alloca's alignment unconstrained.
5273-
const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(SliceTy);
5263+
const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(PartitionTy);
52745264
NewAI = new AllocaInst(
5275-
SliceTy, AI.getAddressSpace(), nullptr,
5276-
IsUnconstrained ? DL.getPrefTypeAlign(SliceTy) : Alignment,
5265+
PartitionTy, AI.getAddressSpace(), nullptr,
5266+
IsUnconstrained ? DL.getPrefTypeAlign(PartitionTy) : Alignment,
52775267
AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
52785268
AI.getIterator());
52795269
// Copy the old AI debug location over to the new one.

0 commit comments

Comments
 (0)