Skip to content

Commit f1d777e

Browse files
committed
fixup! [SROA] Vector promote some memsets
1 parent 4f1c6b8 commit f1d777e

File tree

2 files changed

+95
-55
lines changed

2 files changed

+95
-55
lines changed

llvm/lib/Transforms/Scalar/SROA.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1011,6 +1011,12 @@ static Value *foldPHINodeOrSelectInst(Instruction &I) {
10111011
return foldSelectInst(cast<SelectInst>(I));
10121012
}
10131013

1014+
static constexpr size_t getMaxNumFixedVectorElements() {
1015+
// FIXME: hack. Do we have a named constant for this?
1016+
// SDAG SDNode can't have more than 65535 operands.
1017+
return std::numeric_limits<unsigned short>::max();
1018+
}
1019+
10141020
/// Returns a fixed vector type equivalent to the memory set by II or nullptr if
10151021
/// unable to do so.
10161022
static FixedVectorType *getVectorTypeFor(const MemSetInst &II,
@@ -1019,10 +1025,12 @@ static FixedVectorType *getVectorTypeFor(const MemSetInst &II,
10191025
if (!Length)
10201026
return nullptr;
10211027

1022-
APInt Val = Length->getValue();
1023-
if (Val.ugt(std::numeric_limits<unsigned>::max()))
1028+
const APInt &Val = Length->getValue();
1029+
if (Val.ugt(getMaxNumFixedVectorElements()))
10241030
return nullptr;
10251031

1032+
// Element type will always be i8. TODO: Support
1033+
// llvm.experimental.memset.pattern?
10261034
uint64_t MemSetLen = Val.getZExtValue();
10271035
auto *VTy = FixedVectorType::get(II.getValue()->getType(), MemSetLen);
10281036

@@ -2261,11 +2269,9 @@ checkVectorTypesForPromotion(Partition &P, const DataLayout &DL,
22612269
CandidateTys.resize(1);
22622270
}
22632271

2264-
// FIXME: hack. Do we have a named constant for this?
2265-
// SDAG SDNode can't have more than 65535 operands.
22662272
llvm::erase_if(CandidateTys, [](VectorType *VTy) {
22672273
return cast<FixedVectorType>(VTy)->getNumElements() >
2268-
std::numeric_limits<unsigned short>::max();
2274+
getMaxNumFixedVectorElements();
22692275
});
22702276

22712277
for (VectorType *VTy : CandidateTys)

llvm/test/Transforms/SROA/vector-promotion-memset.ll

Lines changed: 84 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -5,119 +5,153 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
55
%ptr_pair = type { ptr, ptr }
66

77
%struct.a = type { <32 x i8> }
8-
define void @vector_promote_memset_a(ptr %0) {
8+
define void @vector_promote_memset_a(ptr %arg0) {
99
; CHECK-LABEL: @vector_promote_memset_a(
1010
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
1111
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
1212
; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <32 x i8> zeroinitializer, i8 [[TMP3]], i32 0
1313
; CHECK-NEXT: ret void
1414
;
15-
%2 = alloca %struct.a, align 32
16-
%3 = alloca %ptr_pair, align 8
17-
call void @llvm.memset.p0.i64(ptr align 32 %2, i8 0, i64 32, i1 false)
15+
%a0 = alloca %struct.a, align 32
16+
%a1 = alloca %ptr_pair, align 8
17+
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32, i1 false)
1818

19-
store ptr %2, ptr %3, align 8
19+
store ptr %a0, ptr %a1, align 8
2020

21-
%4 = getelementptr inbounds %ptr_pair, ptr %3, i64 0, i32 1
22-
%5 = load ptr, ptr %0, align 8
23-
store ptr %5, ptr %4, align 8
21+
%p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1
22+
%v0 = load ptr, ptr %arg0, align 8
23+
store ptr %v0, ptr %p1, align 8
2424

25-
%6 = getelementptr inbounds i8, ptr %3, i32 8
26-
%7 = load ptr, ptr %6, align 8
25+
%p2 = getelementptr inbounds i8, ptr %a1, i32 8
26+
%v1 = load ptr, ptr %p2, align 8
2727

28-
%8 = load i8, ptr %7, align 1
29-
store i8 %8, ptr %2, align 32
28+
%v2 = load i8, ptr %v1, align 1
29+
store i8 %v2, ptr %a0, align 32
3030

3131
ret void
3232
}
3333

3434
%struct.b = type { <16 x i16> }
35-
define void @vector_promote_memset_b(ptr %0) {
35+
define void @vector_promote_memset_b(ptr %arg0) {
3636
; CHECK-LABEL: @vector_promote_memset_b(
3737
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
3838
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 1
3939
; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <16 x i16> zeroinitializer, i16 [[TMP3]], i32 0
4040
; CHECK-NEXT: ret void
4141
;
42-
%2 = alloca %struct.b, align 16
43-
%3 = alloca %ptr_pair, align 8
44-
call void @llvm.memset.p0.i64(ptr align 32 %2, i8 0, i64 32, i1 false)
42+
%a0 = alloca %struct.b, align 16
43+
%a1 = alloca %ptr_pair, align 8
44+
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32, i1 false)
4545

46-
store ptr %2, ptr %3, align 8
46+
store ptr %a0, ptr %a1, align 8
4747

48-
%4 = getelementptr inbounds %ptr_pair, ptr %3, i64 0, i32 1
49-
%5 = load ptr, ptr %0, align 8
50-
store ptr %5, ptr %4, align 8
48+
%p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1
49+
%v0 = load ptr, ptr %arg0, align 8
50+
store ptr %v0, ptr %p1, align 8
5151

52-
%6 = getelementptr inbounds i8, ptr %3, i32 8
53-
%7 = load ptr, ptr %6, align 8
52+
%p2 = getelementptr inbounds i8, ptr %a1, i32 8
53+
%v1 = load ptr, ptr %p2, align 8
5454

55-
%8 = load i16, ptr %7, align 1
56-
store i16 %8, ptr %2, align 16
55+
%v2 = load i16, ptr %v1, align 1
56+
store i16 %v2, ptr %a0, align 16
5757

5858
ret void
5959
}
6060

6161
%struct.c = type { <4 x i32> }
62-
define void @vector_promote_memset_c(ptr %0) {
62+
define void @vector_promote_memset_c(ptr %arg0) {
6363
; CHECK-LABEL: @vector_promote_memset_c(
6464
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
6565
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 1
6666
; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[TMP3]], i32 2
6767
; CHECK-NEXT: ret void
6868
;
69-
%2 = alloca %struct.c, align 4
70-
%3 = alloca %ptr_pair, align 8
71-
call void @llvm.memset.p0.i64(ptr align 32 %2, i8 0, i64 16, i1 false)
69+
%a0 = alloca %struct.c, align 4
70+
%a1 = alloca %ptr_pair, align 8
71+
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 16, i1 false)
7272

73-
store ptr %2, ptr %3, align 8
73+
store ptr %a0, ptr %a1, align 8
7474

75-
%4 = getelementptr inbounds %ptr_pair, ptr %3, i64 0, i32 1
76-
%5 = load ptr, ptr %0, align 8
77-
store ptr %5, ptr %4, align 8
75+
%p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1
76+
%v0 = load ptr, ptr %arg0, align 8
77+
store ptr %v0, ptr %p1, align 8
7878

79-
%6 = getelementptr inbounds i8, ptr %3, i32 8
80-
%7 = load ptr, ptr %6, align 8
79+
%p2 = getelementptr inbounds i8, ptr %a1, i32 8
80+
%v1 = load ptr, ptr %p2, align 8
8181

82-
%8 = load i32, ptr %7, align 1
82+
%v2 = load i32, ptr %v1, align 1
8383

84-
%9 = getelementptr inbounds i32, ptr %2, i32 2
85-
store i32 %8, ptr %9, align 4
84+
%p3 = getelementptr inbounds i32, ptr %a0, i32 2
85+
store i32 %v2, ptr %p3, align 4
8686

8787
ret void
8888
}
8989

9090
; We currently prevent promotion if the vector would require padding
9191
%struct.d = type { <6 x i32> }
92-
define void @vector_promote_memset_d(ptr %0) {
92+
define void @vector_promote_memset_d(ptr %arg0) {
9393
; CHECK-LABEL: @vector_promote_memset_d(
9494
; CHECK-NEXT: [[DOTSROA_2:%.*]] = alloca [3 x i32], align 4
9595
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[DOTSROA_2]], i8 0, i64 12, i1 false)
9696
; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
9797
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 1
9898
; CHECK-NEXT: ret void
9999
;
100-
%2 = alloca %struct.d, align 4
101-
%3 = alloca %ptr_pair, align 8
102-
call void @llvm.memset.p0.i64(ptr align 32 %2, i8 0, i64 24, i1 false)
100+
%a0 = alloca %struct.d, align 4
101+
%a1 = alloca %ptr_pair, align 8
102+
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 24, i1 false)
103103

104-
store ptr %2, ptr %3, align 8
104+
store ptr %a0, ptr %a1, align 8
105105

106-
%4 = getelementptr inbounds %ptr_pair, ptr %3, i64 0, i32 1
107-
%5 = load ptr, ptr %0, align 8
108-
store ptr %5, ptr %4, align 8
106+
%p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1
107+
%v0 = load ptr, ptr %arg0, align 8
108+
store ptr %v0, ptr %p1, align 8
109109

110-
%6 = getelementptr inbounds i8, ptr %3, i32 8
111-
%7 = load ptr, ptr %6, align 8
110+
%p2 = getelementptr inbounds i8, ptr %a1, i32 8
111+
%v1 = load ptr, ptr %p2, align 8
112112

113-
%8 = load i32, ptr %7, align 1
113+
%v2 = load i32, ptr %v1, align 1
114114

115-
%9 = getelementptr inbounds i32, ptr %2, i32 2
116-
store i32 %8, ptr %9, align 4
115+
%p3 = getelementptr inbounds i32, ptr %a0, i32 2
116+
store i32 %v2, ptr %p3, align 4
117117

118118
ret void
119119
}
120120

121+
122+
; We shouldn't promote large memsets.
123+
%struct.e = type { [65536 x i8] }
124+
define void @vector_promote_memset_e(ptr %arg0) {
125+
; CHECK-LABEL: @vector_promote_memset_e(
126+
; CHECK-NEXT: [[A0_SROA_2:%.*]] = alloca [65524 x i8], align 4
127+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_2]], i8 0, i64 65524, i1 false)
128+
; CHECK-NEXT: [[V0:%.*]] = load ptr, ptr [[ARG0:%.*]], align 8
129+
; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[V0]], align 1
130+
; CHECK-NEXT: ret void
131+
;
132+
%a0 = alloca %struct.e, align 4
133+
%a1 = alloca %ptr_pair, align 8
134+
call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 65536, i1 false)
135+
136+
store ptr %a0, ptr %a1, align 8
137+
138+
%p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1
139+
%v0 = load ptr, ptr %arg0, align 8
140+
store ptr %v0, ptr %p1, align 8
141+
142+
%p2 = getelementptr inbounds i8, ptr %a1, i32 8
143+
%v1 = load ptr, ptr %p2, align 8
144+
145+
%v2 = load i32, ptr %v1, align 1
146+
147+
%p3 = getelementptr inbounds i32, ptr %a0, i32 2
148+
store i32 %v2, ptr %p3, align 4
149+
150+
ret void
151+
}
152+
153+
154+
121155
; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
122156
declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) #0
123157

0 commit comments

Comments
 (0)