@@ -5,119 +5,153 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
55%ptr_pair = type { ptr , ptr }
66
77%struct.a = type { <32 x i8 > }
8- define void @vector_promote_memset_a (ptr %0 ) {
8+ define void @vector_promote_memset_a (ptr %arg0 ) {
99; CHECK-LABEL: @vector_promote_memset_a(
1010; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
1111; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1
1212; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <32 x i8> zeroinitializer, i8 [[TMP3]], i32 0
1313; CHECK-NEXT: ret void
1414;
15- %2 = alloca %struct.a , align 32
16- %3 = alloca %ptr_pair , align 8
17- call void @llvm.memset.p0.i64 (ptr align 32 %2 , i8 0 , i64 32 , i1 false )
15+ %a0 = alloca %struct.a , align 32
16+ %a1 = alloca %ptr_pair , align 8
17+ call void @llvm.memset.p0.i64 (ptr align 32 %a0 , i8 0 , i64 32 , i1 false )
1818
19- store ptr %2 , ptr %3 , align 8
19+ store ptr %a0 , ptr %a1 , align 8
2020
21- %4 = getelementptr inbounds %ptr_pair , ptr %3 , i64 0 , i32 1
22- %5 = load ptr , ptr %0 , align 8
23- store ptr %5 , ptr %4 , align 8
21+ %p1 = getelementptr inbounds %ptr_pair , ptr %a1 , i64 0 , i32 1
22+ %v0 = load ptr , ptr %arg0 , align 8
23+ store ptr %v0 , ptr %p1 , align 8
2424
25- %6 = getelementptr inbounds i8 , ptr %3 , i32 8
26- %7 = load ptr , ptr %6 , align 8
25+ %p2 = getelementptr inbounds i8 , ptr %a1 , i32 8
26+ %v1 = load ptr , ptr %p2 , align 8
2727
28- %8 = load i8 , ptr %7 , align 1
29- store i8 %8 , ptr %2 , align 32
28+ %v2 = load i8 , ptr %v1 , align 1
29+ store i8 %v2 , ptr %a0 , align 32
3030
3131 ret void
3232}
3333
3434%struct.b = type { <16 x i16 > }
35- define void @vector_promote_memset_b (ptr %0 ) {
35+ define void @vector_promote_memset_b (ptr %arg0 ) {
3636; CHECK-LABEL: @vector_promote_memset_b(
3737; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
3838; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 1
3939; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <16 x i16> zeroinitializer, i16 [[TMP3]], i32 0
4040; CHECK-NEXT: ret void
4141;
42- %2 = alloca %struct.b , align 16
43- %3 = alloca %ptr_pair , align 8
44- call void @llvm.memset.p0.i64 (ptr align 32 %2 , i8 0 , i64 32 , i1 false )
42+ %a0 = alloca %struct.b , align 16
43+ %a1 = alloca %ptr_pair , align 8
44+ call void @llvm.memset.p0.i64 (ptr align 32 %a0 , i8 0 , i64 32 , i1 false )
4545
46- store ptr %2 , ptr %3 , align 8
46+ store ptr %a0 , ptr %a1 , align 8
4747
48- %4 = getelementptr inbounds %ptr_pair , ptr %3 , i64 0 , i32 1
49- %5 = load ptr , ptr %0 , align 8
50- store ptr %5 , ptr %4 , align 8
48+ %p1 = getelementptr inbounds %ptr_pair , ptr %a1 , i64 0 , i32 1
49+ %v0 = load ptr , ptr %arg0 , align 8
50+ store ptr %v0 , ptr %p1 , align 8
5151
52- %6 = getelementptr inbounds i8 , ptr %3 , i32 8
53- %7 = load ptr , ptr %6 , align 8
52+ %p2 = getelementptr inbounds i8 , ptr %a1 , i32 8
53+ %v1 = load ptr , ptr %p2 , align 8
5454
55- %8 = load i16 , ptr %7 , align 1
56- store i16 %8 , ptr %2 , align 16
55+ %v2 = load i16 , ptr %v1 , align 1
56+ store i16 %v2 , ptr %a0 , align 16
5757
5858 ret void
5959}
6060
6161%struct.c = type { <4 x i32 > }
62- define void @vector_promote_memset_c (ptr %0 ) {
62+ define void @vector_promote_memset_c (ptr %arg0 ) {
6363; CHECK-LABEL: @vector_promote_memset_c(
6464; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
6565; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 1
6666; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[TMP3]], i32 2
6767; CHECK-NEXT: ret void
6868;
69- %2 = alloca %struct.c , align 4
70- %3 = alloca %ptr_pair , align 8
71- call void @llvm.memset.p0.i64 (ptr align 32 %2 , i8 0 , i64 16 , i1 false )
69+ %a0 = alloca %struct.c , align 4
70+ %a1 = alloca %ptr_pair , align 8
71+ call void @llvm.memset.p0.i64 (ptr align 32 %a0 , i8 0 , i64 16 , i1 false )
7272
73- store ptr %2 , ptr %3 , align 8
73+ store ptr %a0 , ptr %a1 , align 8
7474
75- %4 = getelementptr inbounds %ptr_pair , ptr %3 , i64 0 , i32 1
76- %5 = load ptr , ptr %0 , align 8
77- store ptr %5 , ptr %4 , align 8
75+ %p1 = getelementptr inbounds %ptr_pair , ptr %a1 , i64 0 , i32 1
76+ %v0 = load ptr , ptr %arg0 , align 8
77+ store ptr %v0 , ptr %p1 , align 8
7878
79- %6 = getelementptr inbounds i8 , ptr %3 , i32 8
80- %7 = load ptr , ptr %6 , align 8
79+ %p2 = getelementptr inbounds i8 , ptr %a1 , i32 8
80+ %v1 = load ptr , ptr %p2 , align 8
8181
82- %8 = load i32 , ptr %7 , align 1
82+ %v2 = load i32 , ptr %v1 , align 1
8383
84- %9 = getelementptr inbounds i32 , ptr %2 , i32 2
85- store i32 %8 , ptr %9 , align 4
84+ %p3 = getelementptr inbounds i32 , ptr %a0 , i32 2
85+ store i32 %v2 , ptr %p3 , align 4
8686
8787 ret void
8888}
8989
9090; We currently prevent promotion if the vector would require padding
9191%struct.d = type { <6 x i32 > }
92- define void @vector_promote_memset_d (ptr %0 ) {
92+ define void @vector_promote_memset_d (ptr %arg0 ) {
9393; CHECK-LABEL: @vector_promote_memset_d(
9494; CHECK-NEXT: [[DOTSROA_2:%.*]] = alloca [3 x i32], align 4
9595; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[DOTSROA_2]], i8 0, i64 12, i1 false)
9696; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8
9797; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 1
9898; CHECK-NEXT: ret void
9999;
100- %2 = alloca %struct.d , align 4
101- %3 = alloca %ptr_pair , align 8
102- call void @llvm.memset.p0.i64 (ptr align 32 %2 , i8 0 , i64 24 , i1 false )
100+ %a0 = alloca %struct.d , align 4
101+ %a1 = alloca %ptr_pair , align 8
102+ call void @llvm.memset.p0.i64 (ptr align 32 %a0 , i8 0 , i64 24 , i1 false )
103103
104- store ptr %2 , ptr %3 , align 8
104+ store ptr %a0 , ptr %a1 , align 8
105105
106- %4 = getelementptr inbounds %ptr_pair , ptr %3 , i64 0 , i32 1
107- %5 = load ptr , ptr %0 , align 8
108- store ptr %5 , ptr %4 , align 8
106+ %p1 = getelementptr inbounds %ptr_pair , ptr %a1 , i64 0 , i32 1
107+ %v0 = load ptr , ptr %arg0 , align 8
108+ store ptr %v0 , ptr %p1 , align 8
109109
110- %6 = getelementptr inbounds i8 , ptr %3 , i32 8
111- %7 = load ptr , ptr %6 , align 8
110+ %p2 = getelementptr inbounds i8 , ptr %a1 , i32 8
111+ %v1 = load ptr , ptr %p2 , align 8
112112
113- %8 = load i32 , ptr %7 , align 1
113+ %v2 = load i32 , ptr %v1 , align 1
114114
115- %9 = getelementptr inbounds i32 , ptr %2 , i32 2
116- store i32 %8 , ptr %9 , align 4
115+ %p3 = getelementptr inbounds i32 , ptr %a0 , i32 2
116+ store i32 %v2 , ptr %p3 , align 4
117117
118118 ret void
119119}
120120
121+
122+ ; We shouldn't promote large memsets.
123+ %struct.e = type { [65536 x i8 ] }
124+ define void @vector_promote_memset_e (ptr %arg0 ) {
125+ ; CHECK-LABEL: @vector_promote_memset_e(
126+ ; CHECK-NEXT: [[A0_SROA_2:%.*]] = alloca [65524 x i8], align 4
127+ ; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_2]], i8 0, i64 65524, i1 false)
128+ ; CHECK-NEXT: [[V0:%.*]] = load ptr, ptr [[ARG0:%.*]], align 8
129+ ; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[V0]], align 1
130+ ; CHECK-NEXT: ret void
131+ ;
132+ %a0 = alloca %struct.e , align 4
133+ %a1 = alloca %ptr_pair , align 8
134+ call void @llvm.memset.p0.i64 (ptr align 32 %a0 , i8 0 , i64 65536 , i1 false )
135+
136+ store ptr %a0 , ptr %a1 , align 8
137+
138+ %p1 = getelementptr inbounds %ptr_pair , ptr %a1 , i64 0 , i32 1
139+ %v0 = load ptr , ptr %arg0 , align 8
140+ store ptr %v0 , ptr %p1 , align 8
141+
142+ %p2 = getelementptr inbounds i8 , ptr %a1 , i32 8
143+ %v1 = load ptr , ptr %p2 , align 8
144+
145+ %v2 = load i32 , ptr %v1 , align 1
146+
147+ %p3 = getelementptr inbounds i32 , ptr %a0 , i32 2
148+ store i32 %v2 , ptr %p3 , align 4
149+
150+ ret void
151+ }
152+
153+
154+
121155; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write)
122156declare void @llvm.memset.p0.i64 (ptr writeonly captures(none), i8 , i64 , i1 immarg) #0
123157
0 commit comments