Skip to content

Commit 64534d2

Browse files
david-armtstellar
authored andcommitted
[InstCombine] Bail out of load-store forwarding for scalable vector types
This patch fixes an invalid TypeSize->uint64_t implicit conversion in FoldReinterpretLoadFromConst. If the size of the constant is scalable we bail out of the optimisation for now. Tests added here: Transforms/InstCombine/load-store-forward.ll Differential Revision: https://reviews.llvm.org/D120240 (cherry picked from commit 47eff64)
1 parent 4fe93c0 commit 64534d2

File tree

2 files changed

+175
-3
lines changed

2 files changed

+175
-3
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -589,14 +589,17 @@ Constant *FoldReinterpretLoadFromConst(Constant *C, Type *LoadTy,
589589
if (BytesLoaded > 32 || BytesLoaded == 0)
590590
return nullptr;
591591

592-
int64_t InitializerSize = DL.getTypeAllocSize(C->getType()).getFixedSize();
593-
594592
// If we're not accessing anything in this constant, the result is undefined.
595593
if (Offset <= -1 * static_cast<int64_t>(BytesLoaded))
596594
return UndefValue::get(IntType);
597595

596+
// TODO: We should be able to support scalable types.
597+
TypeSize InitializerSize = DL.getTypeAllocSize(C->getType());
598+
if (InitializerSize.isScalable())
599+
return nullptr;
600+
598601
// If we're not accessing anything in this constant, the result is undefined.
599-
if (Offset >= InitializerSize)
602+
if (Offset >= InitializerSize.getFixedValue())
600603
return UndefValue::get(IntType);
601604

602605
unsigned char RawBytes[32] = {0};

llvm/test/Transforms/InstCombine/load-store-forward.ll

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,3 +120,172 @@ define i32 @vec_store_load_overlap(i32* %p) {
120120
%load = load i32, i32* %p5, align 2
121121
ret i32 %load
122122
}
123+
124+
define i32 @load_i32_store_nxv4i32(i32* %a) {
125+
; CHECK-LABEL: @load_i32_store_nxv4i32(
126+
; CHECK-NEXT: entry:
127+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>*
128+
; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16
129+
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[A]], align 4
130+
; CHECK-NEXT: ret i32 [[TMP1]]
131+
;
132+
entry:
133+
%0 = bitcast i32* %a to <vscale x 4 x i32>*
134+
store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16
135+
%1 = load i32, i32* %a, align 4
136+
ret i32 %1
137+
}
138+
139+
define i64 @load_i64_store_nxv8i8(i8* %a) {
140+
; CHECK-LABEL: @load_i64_store_nxv8i8(
141+
; CHECK-NEXT: entry:
142+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[A:%.*]] to <vscale x 8 x i8>*
143+
; CHECK-NEXT: store <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i8>* [[TMP0]], align 16
144+
; CHECK-NEXT: [[A2:%.*]] = bitcast i8* [[A]] to i64*
145+
; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64* [[A2]], align 8
146+
; CHECK-NEXT: ret i64 [[LOAD]]
147+
;
148+
entry:
149+
%0 = bitcast i8* %a to <vscale x 8 x i8>*
150+
store <vscale x 8 x i8> shufflevector (<vscale x 8 x i8> insertelement (<vscale x 8 x i8> poison, i8 1, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i8>* %0, align 16
151+
%a2 = bitcast i8* %a to i64*
152+
%load = load i64, i64* %a2, align 8
153+
ret i64 %load
154+
}
155+
156+
define i64 @load_i64_store_nxv4i32(i32* %a) {
157+
; CHECK-LABEL: @load_i64_store_nxv4i32(
158+
; CHECK-NEXT: entry:
159+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>*
160+
; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16
161+
; CHECK-NEXT: [[A2:%.*]] = bitcast i32* [[A]] to i64*
162+
; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64* [[A2]], align 8
163+
; CHECK-NEXT: ret i64 [[LOAD]]
164+
;
165+
entry:
166+
%0 = bitcast i32* %a to <vscale x 4 x i32>*
167+
store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16
168+
%a2 = bitcast i32* %a to i64*
169+
%load = load i64, i64* %a2, align 8
170+
ret i64 %load
171+
}
172+
173+
define i8 @load_i8_store_nxv4i32(i32* %a) {
174+
; CHECK-LABEL: @load_i8_store_nxv4i32(
175+
; CHECK-NEXT: entry:
176+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>*
177+
; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16
178+
; CHECK-NEXT: [[A2:%.*]] = bitcast i32* [[A]] to i8*
179+
; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8* [[A2]], align 1
180+
; CHECK-NEXT: ret i8 [[LOAD]]
181+
;
182+
entry:
183+
%0 = bitcast i32* %a to <vscale x 4 x i32>*
184+
store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16
185+
%a2 = bitcast i32* %a to i8*
186+
%load = load i8, i8* %a2, align 1
187+
ret i8 %load
188+
}
189+
190+
define float @load_f32_store_nxv4f32(float* %a) {
191+
; CHECK-LABEL: @load_f32_store_nxv4f32(
192+
; CHECK-NEXT: entry:
193+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <vscale x 4 x float>*
194+
; CHECK-NEXT: store <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float>* [[TMP0]], align 16
195+
; CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[A]], align 4
196+
; CHECK-NEXT: ret float [[TMP1]]
197+
;
198+
entry:
199+
%0 = bitcast float* %a to <vscale x 4 x float>*
200+
store <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.0, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float>* %0, align 16
201+
%1 = load float, float* %a, align 4
202+
ret float %1
203+
}
204+
205+
define i32 @load_i32_store_nxv4f32(float* %a) {
206+
; CHECK-LABEL: @load_i32_store_nxv4f32(
207+
; CHECK-NEXT: entry:
208+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast float* [[A:%.*]] to <vscale x 4 x float>*
209+
; CHECK-NEXT: store <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float>* [[TMP0]], align 16
210+
; CHECK-NEXT: [[A2:%.*]] = bitcast float* [[A]] to i32*
211+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, i32* [[A2]], align 4
212+
; CHECK-NEXT: ret i32 [[LOAD]]
213+
;
214+
entry:
215+
%0 = bitcast float* %a to <vscale x 4 x float>*
216+
store <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 1.0, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x float>* %0, align 16
217+
%a2 = bitcast float* %a to i32*
218+
%load = load i32, i32* %a2, align 4
219+
ret i32 %load
220+
}
221+
222+
define <4 x i32> @load_v4i32_store_nxv4i32(i32* %a) {
223+
; CHECK-LABEL: @load_v4i32_store_nxv4i32(
224+
; CHECK-NEXT: entry:
225+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>*
226+
; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16
227+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to <4 x i32>*
228+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 16
229+
; CHECK-NEXT: ret <4 x i32> [[TMP2]]
230+
;
231+
entry:
232+
%0 = bitcast i32* %a to <vscale x 4 x i32>*
233+
store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16
234+
%1 = bitcast i32* %a to <4 x i32>*
235+
%2 = load <4 x i32>, <4 x i32>* %1, align 16
236+
ret <4 x i32> %2
237+
}
238+
239+
define <4 x i16> @load_v4i16_store_nxv4i32(i32* %a) {
240+
; CHECK-LABEL: @load_v4i16_store_nxv4i32(
241+
; CHECK-NEXT: entry:
242+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>*
243+
; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16
244+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to <4 x i16>*
245+
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, <4 x i16>* [[TMP1]], align 16
246+
; CHECK-NEXT: ret <4 x i16> [[TMP2]]
247+
;
248+
entry:
249+
%0 = bitcast i32* %a to <vscale x 4 x i32>*
250+
store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16
251+
%1 = bitcast i32* %a to <4 x i16>*
252+
%2 = load <4 x i16>, <4 x i16>* %1, align 16
253+
ret <4 x i16> %2
254+
}
255+
256+
; Loaded data type exceeds the known minimum size of the store.
257+
define i64 @load_i64_store_nxv4i8(i8* %a) {
258+
; CHECK-LABEL: @load_i64_store_nxv4i8(
259+
; CHECK-NEXT: entry:
260+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* [[A:%.*]] to <vscale x 4 x i8>*
261+
; CHECK-NEXT: store <vscale x 4 x i8> shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i32 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i8>* [[TMP0]], align 16
262+
; CHECK-NEXT: [[A2:%.*]] = bitcast i8* [[A]] to i64*
263+
; CHECK-NEXT: [[LOAD:%.*]] = load i64, i64* [[A2]], align 8
264+
; CHECK-NEXT: ret i64 [[LOAD]]
265+
;
266+
entry:
267+
%0 = bitcast i8* %a to <vscale x 4 x i8>*
268+
store <vscale x 4 x i8> shufflevector (<vscale x 4 x i8> insertelement (<vscale x 4 x i8> poison, i8 1, i32 0), <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i8>* %0, align 16
269+
%a2 = bitcast i8* %a to i64*
270+
%load = load i64, i64* %a2, align 8
271+
ret i64 %load
272+
}
273+
274+
; Loaded data size is unknown - we cannot guarantee it won't
275+
; exceed the store size.
276+
define <vscale x 4 x i8> @load_nxv4i8_store_nxv4i32(i32* %a) {
277+
; CHECK-LABEL: @load_nxv4i8_store_nxv4i32(
278+
; CHECK-NEXT: entry:
279+
; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[A:%.*]] to <vscale x 4 x i32>*
280+
; CHECK-NEXT: store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* [[TMP0]], align 16
281+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[A]] to <vscale x 4 x i8>*
282+
; CHECK-NEXT: [[TMP2:%.*]] = load <vscale x 4 x i8>, <vscale x 4 x i8>* [[TMP1]], align 16
283+
; CHECK-NEXT: ret <vscale x 4 x i8> [[TMP2]]
284+
;
285+
entry:
286+
%0 = bitcast i32* %a to <vscale x 4 x i32>*
287+
store <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32>* %0, align 16
288+
%1 = bitcast i32* %a to <vscale x 4 x i8>*
289+
%2 = load <vscale x 4 x i8>, <vscale x 4 x i8>* %1, align 16
290+
ret <vscale x 4 x i8> %2
291+
}

0 commit comments

Comments
 (0)