Skip to content

Commit c5daeb1

Browse files
jrbyrnesbcahoon
authored andcommitted
[SROA] Only try LoadStoreTys when needed
Change-Id: If7ba4999565122327081dfe7fcaa044afe2e4d0f
1 parent 89eaefb commit c5daeb1

File tree

2 files changed

+136
-80
lines changed

2 files changed

+136
-80
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 98 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -2134,6 +2134,94 @@ static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy,
21342134
return true;
21352135
}
21362136

2137+
static VectorType *
2138+
testVectorTyForPromotion(Partition &P, const DataLayout &DL,
2139+
SmallVectorImpl<VectorType *> &CandidateTys,
2140+
bool HaveCommonEltTy, Type *CommonEltTy,
2141+
bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2142+
VectorType *CommonVecPtrTy) {
2143+
// If we didn't find a vector type, nothing to do here.
2144+
if (CandidateTys.empty())
2145+
return nullptr;
2146+
2147+
// Pointer-ness is sticky, if we had a vector-of-pointers candidate type,
2148+
// then we should choose it, not some other alternative.
2149+
// But, we can't perform a no-op pointer address space change via bitcast,
2150+
// so if we didn't have a common pointer element type, bail.
2151+
if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2152+
return nullptr;
2153+
2154+
// Try to pick the "best" element type out of the choices.
2155+
if (!HaveCommonEltTy && HaveVecPtrTy) {
2156+
// If there was a pointer element type, there's really only one choice.
2157+
CandidateTys.clear();
2158+
CandidateTys.push_back(CommonVecPtrTy);
2159+
} else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2160+
// Integer-ify vector types.
2161+
for (VectorType *&VTy : CandidateTys) {
2162+
if (!VTy->getElementType()->isIntegerTy())
2163+
VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2164+
VTy->getContext(), VTy->getScalarSizeInBits())));
2165+
}
2166+
2167+
// Rank the remaining candidate vector types. This is easy because we know
2168+
// they're all integer vectors. We sort by ascending number of elements.
2169+
auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2170+
(void)DL;
2171+
assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2172+
DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2173+
"Cannot have vector types of different sizes!");
2174+
assert(RHSTy->getElementType()->isIntegerTy() &&
2175+
"All non-integer types eliminated!");
2176+
assert(LHSTy->getElementType()->isIntegerTy() &&
2177+
"All non-integer types eliminated!");
2178+
return cast<FixedVectorType>(RHSTy)->getNumElements() <
2179+
cast<FixedVectorType>(LHSTy)->getNumElements();
2180+
};
2181+
auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2182+
(void)DL;
2183+
assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2184+
DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2185+
"Cannot have vector types of different sizes!");
2186+
assert(RHSTy->getElementType()->isIntegerTy() &&
2187+
"All non-integer types eliminated!");
2188+
assert(LHSTy->getElementType()->isIntegerTy() &&
2189+
"All non-integer types eliminated!");
2190+
return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2191+
cast<FixedVectorType>(LHSTy)->getNumElements();
2192+
};
2193+
llvm::sort(CandidateTys, RankVectorTypesComp);
2194+
CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(),
2195+
RankVectorTypesEq),
2196+
CandidateTys.end());
2197+
} else {
2198+
// The only way to have the same element type in every vector type is to
2199+
// have the same vector type. Check that and remove all but one.
2200+
#ifndef NDEBUG
2201+
for (VectorType *VTy : CandidateTys) {
2202+
assert(VTy->getElementType() == CommonEltTy &&
2203+
"Unaccounted for element type!");
2204+
assert(VTy == CandidateTys[0] &&
2205+
"Different vector types with the same element type!");
2206+
}
2207+
#endif
2208+
CandidateTys.resize(1);
2209+
}
2210+
2211+
// FIXME: hack. Do we have a named constant for this?
2212+
// SDAG SDNode can't have more than 65535 operands.
2213+
llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2214+
return cast<FixedVectorType>(VTy)->getNumElements() >
2215+
std::numeric_limits<unsigned short>::max();
2216+
});
2217+
2218+
for (VectorType *VTy : CandidateTys)
2219+
if (checkVectorTypeForPromotion(P, VTy, DL))
2220+
return VTy;
2221+
2222+
return nullptr;
2223+
}
2224+
21372225
/// Test whether the given alloca partitioning and range of slices can be
21382226
/// promoted to a vector.
21392227
///
@@ -2195,6 +2283,12 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
21952283
if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
21962284
CheckCandidateType(Ty);
21972285
}
2286+
2287+
if (auto *VTy = testVectorTyForPromotion(
2288+
P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2289+
HaveCommonVecPtrTy, CommonVecPtrTy))
2290+
return VTy;
2291+
21982292
// Consider additional vector types where the element type size is a
21992293
// multiple of load/store element size.
22002294
for (Type *Ty : LoadStoreTys) {
@@ -2204,6 +2298,7 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
22042298
// Make a copy of CandidateTys and iterate through it, because we might
22052299
// append to CandidateTys in the loop.
22062300
SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2301+
CandidateTys.clear();
22072302
for (VectorType *&VTy : CandidateTysCopy) {
22082303
unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
22092304
unsigned ElementSize =
@@ -2216,86 +2311,9 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
22162311
}
22172312
}
22182313

2219-
// If we didn't find a vector type, nothing to do here.
2220-
if (CandidateTys.empty())
2221-
return nullptr;
2222-
2223-
// Pointer-ness is sticky, if we had a vector-of-pointers candidate type,
2224-
// then we should choose it, not some other alternative.
2225-
// But, we can't perform a no-op pointer address space change via bitcast,
2226-
// so if we didn't have a common pointer element type, bail.
2227-
if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2228-
return nullptr;
2229-
2230-
// Try to pick the "best" element type out of the choices.
2231-
if (!HaveCommonEltTy && HaveVecPtrTy) {
2232-
// If there was a pointer element type, there's really only one choice.
2233-
CandidateTys.clear();
2234-
CandidateTys.push_back(CommonVecPtrTy);
2235-
} else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2236-
// Integer-ify vector types.
2237-
for (VectorType *&VTy : CandidateTys) {
2238-
if (!VTy->getElementType()->isIntegerTy())
2239-
VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2240-
VTy->getContext(), VTy->getScalarSizeInBits())));
2241-
}
2242-
2243-
// Rank the remaining candidate vector types. This is easy because we know
2244-
// they're all integer vectors. We sort by ascending number of elements.
2245-
auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2246-
(void)DL;
2247-
assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2248-
DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2249-
"Cannot have vector types of different sizes!");
2250-
assert(RHSTy->getElementType()->isIntegerTy() &&
2251-
"All non-integer types eliminated!");
2252-
assert(LHSTy->getElementType()->isIntegerTy() &&
2253-
"All non-integer types eliminated!");
2254-
return cast<FixedVectorType>(RHSTy)->getNumElements() <
2255-
cast<FixedVectorType>(LHSTy)->getNumElements();
2256-
};
2257-
auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2258-
(void)DL;
2259-
assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2260-
DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2261-
"Cannot have vector types of different sizes!");
2262-
assert(RHSTy->getElementType()->isIntegerTy() &&
2263-
"All non-integer types eliminated!");
2264-
assert(LHSTy->getElementType()->isIntegerTy() &&
2265-
"All non-integer types eliminated!");
2266-
return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2267-
cast<FixedVectorType>(LHSTy)->getNumElements();
2268-
};
2269-
llvm::sort(CandidateTys, RankVectorTypesComp);
2270-
CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(),
2271-
RankVectorTypesEq),
2272-
CandidateTys.end());
2273-
} else {
2274-
// The only way to have the same element type in every vector type is to
2275-
// have the same vector type. Check that and remove all but one.
2276-
#ifndef NDEBUG
2277-
for (VectorType *VTy : CandidateTys) {
2278-
assert(VTy->getElementType() == CommonEltTy &&
2279-
"Unaccounted for element type!");
2280-
assert(VTy == CandidateTys[0] &&
2281-
"Different vector types with the same element type!");
2282-
}
2283-
#endif
2284-
CandidateTys.resize(1);
2285-
}
2286-
2287-
// FIXME: hack. Do we have a named constant for this?
2288-
// SDAG SDNode can't have more than 65535 operands.
2289-
llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2290-
return cast<FixedVectorType>(VTy)->getNumElements() >
2291-
std::numeric_limits<unsigned short>::max();
2292-
});
2293-
2294-
for (VectorType *VTy : CandidateTys)
2295-
if (checkVectorTypeForPromotion(P, VTy, DL))
2296-
return VTy;
2297-
2298-
return nullptr;
2314+
return testVectorTyForPromotion(P, DL, CandidateTys, HaveCommonEltTy,
2315+
CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2316+
CommonVecPtrTy);
22992317
}
23002318

23012319
/// Test whether a slice of an alloca is valid for integer widening.

llvm/test/Transforms/SROA/vector-promotion.ll

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1227,6 +1227,44 @@ define void @swap-15bytes(ptr %x, ptr %y) {
12271227
ret void
12281228
}
12291229

1230+
define <4 x i32> @PtrCand(ptr %init, i32 %val2) {
1231+
; CHECK-LABEL: @PtrCand(
1232+
; CHECK-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8
1233+
; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VAL0]] to i64
1234+
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>
1235+
; CHECK-NEXT: [[OBJ_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
1236+
; CHECK-NEXT: [[OBJ_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> [[OBJ_0_VEC_EXPAND]], <4 x i32> zeroinitializer
1237+
; CHECK-NEXT: [[OBJ_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_0_VECBLEND]], i32 [[VAL2:%.*]], i32 2
1238+
; CHECK-NEXT: [[OBJ_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_8_VEC_INSERT]], i32 131072, i32 3
1239+
; CHECK-NEXT: ret <4 x i32> [[OBJ_12_VEC_INSERT]]
1240+
;
1241+
; DEBUG-LABEL: @PtrCand(
1242+
; DEBUG-NEXT: [[VAL0:%.*]] = load ptr, ptr [[INIT:%.*]], align 8, !dbg [[DBG492:![0-9]+]]
1243+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr [[VAL0]], metadata [[META487:![0-9]+]], metadata !DIExpression()), !dbg [[DBG492]]
1244+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META488:![0-9]+]], metadata !DIExpression()), !dbg [[DBG493:![0-9]+]]
1245+
; DEBUG-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[VAL0]] to i64, !dbg [[DBG494:![0-9]+]]
1246+
; DEBUG-NEXT: [[TMP2:%.*]] = bitcast i64 [[TMP1]] to <2 x i32>, !dbg [[DBG494]]
1247+
; DEBUG-NEXT: [[OBJ_0_VEC_EXPAND:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>, !dbg [[DBG494]]
1248+
; DEBUG-NEXT: [[OBJ_0_VECBLEND:%.*]] = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> [[OBJ_0_VEC_EXPAND]], <4 x i32> zeroinitializer, !dbg [[DBG494]]
1249+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META489:![0-9]+]], metadata !DIExpression()), !dbg [[DBG495:![0-9]+]]
1250+
; DEBUG-NEXT: [[OBJ_8_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_0_VECBLEND]], i32 [[VAL2:%.*]], i32 2, !dbg [[DBG496:![0-9]+]]
1251+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata ptr undef, metadata [[META490:![0-9]+]], metadata !DIExpression()), !dbg [[DBG497:![0-9]+]]
1252+
; DEBUG-NEXT: [[OBJ_12_VEC_INSERT:%.*]] = insertelement <4 x i32> [[OBJ_8_VEC_INSERT]], i32 131072, i32 3, !dbg [[DBG498:![0-9]+]]
1253+
; DEBUG-NEXT: call void @llvm.dbg.value(metadata <4 x i32> [[OBJ_12_VEC_INSERT]], metadata [[META491:![0-9]+]], metadata !DIExpression()), !dbg [[DBG499:![0-9]+]]
1254+
; DEBUG-NEXT: ret <4 x i32> [[OBJ_12_VEC_INSERT]], !dbg [[DBG500:![0-9]+]]
1255+
;
1256+
%val0 = load ptr, ptr %init, align 8
1257+
%obj = alloca <4 x i32>, align 16
1258+
store <4 x i32> zeroinitializer, ptr %obj, align 16
1259+
store ptr %val0, ptr %obj, align 8
1260+
%ptr2 = getelementptr inbounds i8, ptr %obj, i64 8
1261+
store i32 %val2, ptr %ptr2, align 4
1262+
%ptr3 = getelementptr inbounds i8, ptr %obj, i64 12
1263+
store i32 131072, ptr %ptr3, align 4
1264+
%sroaval = load <4 x i32>, ptr %obj, align 16
1265+
ret <4 x i32> %sroaval
1266+
}
1267+
12301268
declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1)
12311269
declare void @llvm.lifetime.end.p0(i64, ptr)
12321270
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:

0 commit comments

Comments
 (0)