|
2 | 2 | ; RUN: opt < %s -passes='sroa' -S | FileCheck %s |
3 | 3 | target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64" |
4 | 4 |
|
5 | | -%ptr_pair = type { ptr, ptr } |
6 | | - |
7 | | -%struct.a = type { <32 x i8> } |
8 | | -define void @vector_promote_memset_a(ptr %arg0) { |
9 | | -; CHECK-LABEL: @vector_promote_memset_a( |
10 | | -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8 |
11 | | -; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 1 |
12 | | -; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <32 x i8> zeroinitializer, i8 [[TMP3]], i32 0 |
13 | | -; CHECK-NEXT: ret void |
| 5 | +%struct_a = type { [32 x i8] } |
| 6 | +define i8 @vector_promote_a(ptr %arg0) { |
| 7 | +; CHECK-LABEL: @vector_promote_a( |
| 8 | +; CHECK-NEXT: [[V0:%.*]] = load i8, ptr [[ARG0:%.*]], align 1 |
| 9 | +; CHECK-NEXT: [[A0_SROA_0_0_VEC_INSERT:%.*]] = insertelement <32 x i8> zeroinitializer, i8 [[V0]], i32 0 |
| 10 | +; CHECK-NEXT: [[A0_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <32 x i8> [[A0_SROA_0_0_VEC_INSERT]], i32 4 |
| 11 | +; CHECK-NEXT: ret i8 [[A0_SROA_0_4_VEC_EXTRACT]] |
14 | 12 | ; |
15 | | - %a0 = alloca %struct.a, align 32 |
16 | | - %a1 = alloca %ptr_pair, align 8 |
| 13 | + %a0 = alloca %struct_a, align 32 |
17 | 14 | call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32, i1 false) |
18 | | - |
19 | | - store ptr %a0, ptr %a1, align 8 |
20 | | - |
21 | | - %p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1 |
22 | | - %v0 = load ptr, ptr %arg0, align 8 |
23 | | - store ptr %v0, ptr %p1, align 8 |
24 | | - |
25 | | - %p2 = getelementptr inbounds i8, ptr %a1, i32 8 |
26 | | - %v1 = load ptr, ptr %p2, align 8 |
27 | | - |
28 | | - %v2 = load i8, ptr %v1, align 1 |
29 | | - store i8 %v2, ptr %a0, align 32 |
30 | | - |
31 | | - ret void |
| 15 | + %v0 = load i8, ptr %arg0, align 1 |
| 16 | + store i8 %v0, ptr %a0, align 1 |
| 17 | + %p0 = getelementptr inbounds i8, ptr %a0, i64 4 |
| 18 | + %v1 = load i8, ptr %p0, align 1 |
| 19 | + ret i8 %v1 |
32 | 20 | } |
33 | 21 |
|
34 | | -%struct.b = type { <16 x i16> } |
35 | | -define void @vector_promote_memset_b(ptr %arg0) { |
36 | | -; CHECK-LABEL: @vector_promote_memset_b( |
37 | | -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8 |
38 | | -; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 1 |
39 | | -; CHECK-NEXT: [[DOTSROA_0_0_VEC_INSERT:%.*]] = insertelement <16 x i16> zeroinitializer, i16 [[TMP3]], i32 0 |
40 | | -; CHECK-NEXT: ret void |
| 22 | +%struct_b = type { [16 x i16] } |
| 23 | +define i16 @vector_promote_b(ptr %arg0) { |
| 24 | +; CHECK-LABEL: @vector_promote_b( |
| 25 | +; CHECK-NEXT: [[V0:%.*]] = load i16, ptr [[ARG0:%.*]], align 1 |
| 26 | +; CHECK-NEXT: [[A0_SROA_0_20_VEC_INSERT:%.*]] = insertelement <16 x i16> zeroinitializer, i16 [[V0]], i32 10 |
| 27 | +; CHECK-NEXT: [[A0_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <16 x i16> [[A0_SROA_0_20_VEC_INSERT]], i32 2 |
| 28 | +; CHECK-NEXT: ret i16 [[A0_SROA_0_4_VEC_EXTRACT]] |
41 | 29 | ; |
42 | | - %a0 = alloca %struct.b, align 16 |
43 | | - %a1 = alloca %ptr_pair, align 8 |
| 30 | + %a0 = alloca %struct_b, align 32 |
44 | 31 | call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32, i1 false) |
45 | | - |
46 | | - store ptr %a0, ptr %a1, align 8 |
47 | | - |
48 | | - %p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1 |
49 | | - %v0 = load ptr, ptr %arg0, align 8 |
50 | | - store ptr %v0, ptr %p1, align 8 |
51 | | - |
52 | | - %p2 = getelementptr inbounds i8, ptr %a1, i32 8 |
53 | | - %v1 = load ptr, ptr %p2, align 8 |
54 | | - |
55 | | - %v2 = load i16, ptr %v1, align 1 |
56 | | - store i16 %v2, ptr %a0, align 16 |
57 | | - |
58 | | - ret void |
| 32 | + %v0 = load i16, ptr %arg0, align 1 |
| 33 | + %p0 = getelementptr inbounds i16, ptr %a0, i64 10 |
| 34 | + store i16 %v0, ptr %p0, align 1 |
| 35 | + %p1 = getelementptr inbounds i16, ptr %a0, i64 2 |
| 36 | + %v1 = load i16, ptr %p1, align 1 |
| 37 | + ret i16 %v1 |
59 | 38 | } |
60 | 39 |
|
61 | | -%struct.c = type { <4 x i32> } |
62 | | -define void @vector_promote_memset_c(ptr %arg0) { |
63 | | -; CHECK-LABEL: @vector_promote_memset_c( |
64 | | -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8 |
65 | | -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 1 |
66 | | -; CHECK-NEXT: [[DOTSROA_0_8_VEC_INSERT:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[TMP3]], i32 2 |
67 | | -; CHECK-NEXT: ret void |
| 40 | +%struct_c = type { [4 x i32] } |
| 41 | +define i32 @vector_promote_c(ptr %arg0) { |
| 42 | +; CHECK-LABEL: @vector_promote_c( |
| 43 | +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[ARG0:%.*]], align 1 |
| 44 | +; CHECK-NEXT: [[A0_SROA_0_12_VEC_INSERT:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[V0]], i32 3 |
| 45 | +; CHECK-NEXT: [[A0_SROA_0_8_VEC_EXTRACT:%.*]] = extractelement <4 x i32> [[A0_SROA_0_12_VEC_INSERT]], i32 2 |
| 46 | +; CHECK-NEXT: ret i32 [[A0_SROA_0_8_VEC_EXTRACT]] |
68 | 47 | ; |
69 | | - %a0 = alloca %struct.c, align 4 |
70 | | - %a1 = alloca %ptr_pair, align 8 |
| 48 | + %a0 = alloca %struct_c, align 32 |
71 | 49 | call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 16, i1 false) |
72 | | - |
73 | | - store ptr %a0, ptr %a1, align 8 |
74 | | - |
75 | | - %p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1 |
76 | | - %v0 = load ptr, ptr %arg0, align 8 |
77 | | - store ptr %v0, ptr %p1, align 8 |
78 | | - |
79 | | - %p2 = getelementptr inbounds i8, ptr %a1, i32 8 |
80 | | - %v1 = load ptr, ptr %p2, align 8 |
81 | | - |
82 | | - %v2 = load i32, ptr %v1, align 1 |
83 | | - |
84 | | - %p3 = getelementptr inbounds i32, ptr %a0, i32 2 |
85 | | - store i32 %v2, ptr %p3, align 4 |
86 | | - |
87 | | - ret void |
| 50 | + %v0 = load i32, ptr %arg0, align 1 |
| 51 | + %p0 = getelementptr inbounds i32, ptr %a0, i64 3 |
| 52 | + store i32 %v0, ptr %p0, align 1 |
| 53 | + %p1 = getelementptr inbounds i32, ptr %a0, i64 2 |
| 54 | + %v1 = load i32, ptr %p1, align 1 |
| 55 | + ret i32 %v1 |
88 | 56 | } |
89 | 57 |
|
90 | 58 | ; We currently prevent promotion if the vector would require padding |
91 | | -%struct.d = type { <6 x i32> } |
92 | | -define void @vector_promote_memset_d(ptr %arg0) { |
93 | | -; CHECK-LABEL: @vector_promote_memset_d( |
94 | | -; CHECK-NEXT: [[DOTSROA_2:%.*]] = alloca [3 x i32], align 4 |
95 | | -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[DOTSROA_2]], i8 0, i64 12, i1 false) |
96 | | -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP0:%.*]], align 8 |
97 | | -; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 1 |
98 | | -; CHECK-NEXT: ret void |
| 59 | +%struct_d = type { [6 x i32] } |
| 60 | +define i32 @vector_promote_d(ptr %arg0) { |
| 61 | +; CHECK-LABEL: @vector_promote_d( |
| 62 | +; CHECK-NEXT: [[A0_SROA_3:%.*]] = alloca [3 x i32], align 4 |
| 63 | +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_3]], i8 0, i64 12, i1 false) |
| 64 | +; CHECK-NEXT: [[V0:%.*]] = load i32, ptr [[ARG0:%.*]], align 1 |
| 65 | +; CHECK-NEXT: ret i32 0 |
99 | 66 | ; |
100 | | - %a0 = alloca %struct.d, align 4 |
101 | | - %a1 = alloca %ptr_pair, align 8 |
| 67 | + %a0 = alloca %struct_d, align 32 |
102 | 68 | call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 24, i1 false) |
103 | | - |
104 | | - store ptr %a0, ptr %a1, align 8 |
105 | | - |
106 | | - %p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1 |
107 | | - %v0 = load ptr, ptr %arg0, align 8 |
108 | | - store ptr %v0, ptr %p1, align 8 |
109 | | - |
110 | | - %p2 = getelementptr inbounds i8, ptr %a1, i32 8 |
111 | | - %v1 = load ptr, ptr %p2, align 8 |
112 | | - |
113 | | - %v2 = load i32, ptr %v1, align 1 |
114 | | - |
115 | | - %p3 = getelementptr inbounds i32, ptr %a0, i32 2 |
116 | | - store i32 %v2, ptr %p3, align 4 |
117 | | - |
118 | | - ret void |
| 69 | + %v0 = load i32, ptr %arg0, align 1 |
| 70 | + %p0 = getelementptr inbounds i32, ptr %a0, i64 1 |
| 71 | + store i32 %v0, ptr %p0, align 1 |
| 72 | + %p1 = getelementptr inbounds i32, ptr %a0, i64 2 |
| 73 | + %v1 = load i32, ptr %p1, align 1 |
| 74 | + ret i32 %v1 |
119 | 75 | } |
120 | 76 |
|
121 | | - |
122 | | -; We shouldn't promote large memsets. |
123 | | -%struct.e = type { [65536 x i8] } |
124 | | -define void @vector_promote_memset_e(ptr %arg0) { |
125 | | -; CHECK-LABEL: @vector_promote_memset_e( |
126 | | -; CHECK-NEXT: [[A0_SROA_2:%.*]] = alloca [65524 x i8], align 4 |
127 | | -; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_2]], i8 0, i64 65524, i1 false) |
128 | | -; CHECK-NEXT: [[V0:%.*]] = load ptr, ptr [[ARG0:%.*]], align 8 |
129 | | -; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[V0]], align 1 |
130 | | -; CHECK-NEXT: ret void |
| 77 | +; We shouldn't promote memsets larger than the max value of `unsigned short`. |
| 78 | +; See getMaxNumFixedVectorElements(). |
| 79 | +%struct_e = type { [65536 x i8] } |
| 80 | +define i8 @vector_promote_e(ptr %arg0) { |
| 81 | +; CHECK-LABEL: @vector_promote_e( |
| 82 | +; CHECK-NEXT: [[A0_SROA_3:%.*]] = alloca [65532 x i8], align 4 |
| 83 | +; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 [[A0_SROA_3]], i8 0, i64 65532, i1 false) |
| 84 | +; CHECK-NEXT: [[V0:%.*]] = load i8, ptr [[ARG0:%.*]], align 1 |
| 85 | +; CHECK-NEXT: ret i8 0 |
131 | 86 | ; |
132 | | - %a0 = alloca %struct.e, align 4 |
133 | | - %a1 = alloca %ptr_pair, align 8 |
| 87 | + %a0 = alloca %struct_e, align 32 |
134 | 88 | call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 65536, i1 false) |
135 | | - |
136 | | - store ptr %a0, ptr %a1, align 8 |
137 | | - |
138 | | - %p1 = getelementptr inbounds %ptr_pair, ptr %a1, i64 0, i32 1 |
139 | | - %v0 = load ptr, ptr %arg0, align 8 |
140 | | - store ptr %v0, ptr %p1, align 8 |
141 | | - |
142 | | - %p2 = getelementptr inbounds i8, ptr %a1, i32 8 |
143 | | - %v1 = load ptr, ptr %p2, align 8 |
144 | | - |
145 | | - %v2 = load i32, ptr %v1, align 1 |
146 | | - |
147 | | - %p3 = getelementptr inbounds i32, ptr %a0, i32 2 |
148 | | - store i32 %v2, ptr %p3, align 4 |
149 | | - |
150 | | - ret void |
| 89 | + %v0 = load i8, ptr %arg0, align 1 |
| 90 | + %p0 = getelementptr inbounds i8, ptr %a0, i64 3 |
| 91 | + store i8 %v0, ptr %p0, align 1 |
| 92 | + %p1 = getelementptr inbounds i8, ptr %a0, i64 2 |
| 93 | + %v1 = load i8, ptr %p1, align 1 |
| 94 | + ret i8 %v1 |
151 | 95 | } |
152 | 96 |
|
153 | | - |
| 97 | +; Largest memset we currently promote |
| 98 | +%struct_f = type { [32768 x i8] } |
| 99 | +define i8 @vector_promote_f(ptr %arg0) { |
| 100 | +; CHECK-LABEL: @vector_promote_f( |
| 101 | +; CHECK-NEXT: [[V0:%.*]] = load i8, ptr [[ARG0:%.*]], align 1 |
| 102 | +; CHECK-NEXT: [[A0_SROA_0_12345_VEC_INSERT:%.*]] = insertelement <32768 x i8> zeroinitializer, i8 [[V0]], i32 12345 |
| 103 | +; CHECK-NEXT: [[A0_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <32768 x i8> [[A0_SROA_0_12345_VEC_INSERT]], i32 2 |
| 104 | +; CHECK-NEXT: ret i8 [[A0_SROA_0_2_VEC_EXTRACT]] |
| 105 | +; |
| 106 | + %a0 = alloca %struct_f, align 32 |
| 107 | + call void @llvm.memset.p0.i64(ptr align 32 %a0, i8 0, i64 32768, i1 false) |
| 108 | + %v0 = load i8, ptr %arg0, align 1 |
| 109 | + %p0 = getelementptr inbounds i8, ptr %a0, i64 12345 |
| 110 | + store i8 %v0, ptr %p0, align 1 |
| 111 | + %p1 = getelementptr inbounds i8, ptr %a0, i64 2 |
| 112 | + %v1 = load i8, ptr %p1, align 1 |
| 113 | + ret i8 %v1 |
| 114 | +} |
154 | 115 |
|
155 | 116 | ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) |
156 | 117 | declare void @llvm.memset.p0.i64(ptr writeonly captures(none), i8, i64, i1 immarg) #0 |
|
0 commit comments