@@ -5,12 +5,61 @@ define void @test(ptr %p, ptr noalias %s) {
55; CHECK-LABEL: @test(
66; CHECK-NEXT: entry:
77; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
8+ ; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
89; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
10+ ; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
11+ ; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
912; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
10- ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
11- ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
12- ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]]
13- ; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4
13+ ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
14+ ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4
15+ ; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
16+ ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
17+ ; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
18+ ; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
19+ ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
20+ ; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
21+ ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8
22+ ; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
23+ ; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
24+ ; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
25+ ; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
26+ ; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
27+ ; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
28+ ; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12
29+ ; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
30+ ; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
31+ ; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
32+ ; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
33+ ; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
34+ ; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
35+ ; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16
36+ ; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
37+ ; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
38+ ; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
39+ ; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
40+ ; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
41+ ; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
42+ ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20
43+ ; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
44+ ; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
45+ ; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
46+ ; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
47+ ; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
48+ ; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
49+ ; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24
50+ ; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
51+ ; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
52+ ; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
53+ ; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
54+ ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
55+ ; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
56+ ; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28
57+ ; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
58+ ; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 2
59+ ; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
60+ ; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
61+ ; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
62+ ; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
1463; CHECK-NEXT: ret void
1564;
1665entry:
@@ -213,40 +262,67 @@ define void @test2(ptr %p, ptr noalias %s, i32 %stride) {
213262; CHECK-NEXT: entry:
214263; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
215264; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2
265+ ; CHECK-NEXT: [[I:%.*]] = load float, ptr [[ARRAYIDX]], align 4
216266; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7
217267; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
218268; CHECK-NEXT: [[I1:%.*]] = load float, ptr [[ARRAYIDX1]], align 4
269+ ; CHECK-NEXT: [[ADD:%.*]] = fsub fast float [[I1]], [[I]]
219270; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
271+ ; CHECK-NEXT: store float [[ADD]], ptr [[ARRAYIDX2]], align 4
272+ ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 6
273+ ; CHECK-NEXT: [[I2:%.*]] = load float, ptr [[ARRAYIDX4]], align 4
220274; CHECK-NEXT: [[ST5:%.*]] = mul i64 [[STR]], 6
221275; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST5]]
222276; CHECK-NEXT: [[I3:%.*]] = load float, ptr [[ARRAYIDX6]], align 4
277+ ; CHECK-NEXT: [[ADD7:%.*]] = fsub fast float [[I3]], [[I2]]
278+ ; CHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[S]], i64 1
279+ ; CHECK-NEXT: store float [[ADD7]], ptr [[ARRAYIDX9]], align 4
280+ ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 10
281+ ; CHECK-NEXT: [[I4:%.*]] = load float, ptr [[ARRAYIDX11]], align 4
223282; CHECK-NEXT: [[ST4:%.*]] = mul i64 [[STR]], 5
224283; CHECK-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST4]]
225284; CHECK-NEXT: [[I5:%.*]] = load float, ptr [[ARRAYIDX13]], align 4
285+ ; CHECK-NEXT: [[ADD14:%.*]] = fsub fast float [[I5]], [[I4]]
286+ ; CHECK-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds float, ptr [[S]], i64 2
287+ ; CHECK-NEXT: store float [[ADD14]], ptr [[ARRAYIDX16]], align 4
288+ ; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 14
289+ ; CHECK-NEXT: [[I6:%.*]] = load float, ptr [[ARRAYIDX18]], align 4
226290; CHECK-NEXT: [[ST3:%.*]] = mul i64 [[STR]], 4
227291; CHECK-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST3]]
228292; CHECK-NEXT: [[I7:%.*]] = load float, ptr [[ARRAYIDX20]], align 4
293+ ; CHECK-NEXT: [[ADD21:%.*]] = fsub fast float [[I7]], [[I6]]
294+ ; CHECK-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds float, ptr [[S]], i64 3
295+ ; CHECK-NEXT: store float [[ADD21]], ptr [[ARRAYIDX23]], align 4
296+ ; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 18
229297; CHECK-NEXT: [[ST2:%.*]] = mul i64 [[STR]], 3
298+ ; CHECK-NEXT: [[I8:%.*]] = load float, ptr [[ARRAYIDX25]], align 4
230299; CHECK-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST2]]
231300; CHECK-NEXT: [[I9:%.*]] = load float, ptr [[ARRAYIDX27]], align 4
301+ ; CHECK-NEXT: [[ADD28:%.*]] = fsub fast float [[I9]], [[I8]]
302+ ; CHECK-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds float, ptr [[S]], i64 4
303+ ; CHECK-NEXT: store float [[ADD28]], ptr [[ARRAYIDX30]], align 4
304+ ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 22
305+ ; CHECK-NEXT: [[I10:%.*]] = load float, ptr [[ARRAYIDX32]], align 4
232306; CHECK-NEXT: [[ST1:%.*]] = mul i64 [[STR]], 2
233307; CHECK-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST1]]
234308; CHECK-NEXT: [[I11:%.*]] = load float, ptr [[ARRAYIDX34]], align 4
309+ ; CHECK-NEXT: [[ADD35:%.*]] = fsub fast float [[I11]], [[I10]]
310+ ; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds float, ptr [[S]], i64 5
311+ ; CHECK-NEXT: store float [[ADD35]], ptr [[ARRAYIDX37]], align 4
312+ ; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 26
313+ ; CHECK-NEXT: [[I12:%.*]] = load float, ptr [[ARRAYIDX39]], align 4
235314; CHECK-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[STR]]
236315; CHECK-NEXT: [[I13:%.*]] = load float, ptr [[ARRAYIDX41]], align 4
316+ ; CHECK-NEXT: [[ADD42:%.*]] = fsub fast float [[I13]], [[I12]]
317+ ; CHECK-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds float, ptr [[S]], i64 6
318+ ; CHECK-NEXT: store float [[ADD42]], ptr [[ARRAYIDX44]], align 4
319+ ; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
320+ ; CHECK-NEXT: [[I14:%.*]] = load float, ptr [[ARRAYIDX46]], align 4
237321; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 0
238322; CHECK-NEXT: [[I15:%.*]] = load float, ptr [[ARRAYIDX48]], align 4
239- ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
240- ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x float> poison, float [[I1]], i32 0
241- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x float> [[TMP1]], float [[I3]], i32 1
242- ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x float> [[TMP2]], float [[I5]], i32 2
243- ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x float> [[TMP3]], float [[I7]], i32 3
244- ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x float> [[TMP4]], float [[I9]], i32 4
245- ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x float> [[TMP5]], float [[I11]], i32 5
246- ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x float> [[TMP6]], float [[I13]], i32 6
247- ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <8 x float> [[TMP7]], float [[I15]], i32 7
248- ; CHECK-NEXT: [[TMP9:%.*]] = fsub fast <8 x float> [[TMP8]], [[TMP0]]
249- ; CHECK-NEXT: store <8 x float> [[TMP9]], ptr [[ARRAYIDX2]], align 4
323+ ; CHECK-NEXT: [[ADD49:%.*]] = fsub fast float [[I15]], [[I14]]
324+ ; CHECK-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds float, ptr [[S]], i64 7
325+ ; CHECK-NEXT: store float [[ADD49]], ptr [[ARRAYIDX51]], align 4
250326; CHECK-NEXT: ret void
251327;
252328entry:
@@ -321,12 +397,27 @@ define void @test3(ptr %p, ptr noalias %s) {
321397; CHECK-NEXT: entry:
322398; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
323399; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
400+ ; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 4
401+ ; CHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 8
402+ ; CHECK-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 12
403+ ; CHECK-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 16
404+ ; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 20
405+ ; CHECK-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 24
406+ ; CHECK-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 28
324407; CHECK-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 23
325- ; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
326- ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4
327- ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
328- ; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]]
329- ; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
408+ ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <8 x ptr> poison, ptr [[ARRAYIDX]], i32 0
409+ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x ptr> [[TMP0]], ptr [[ARRAYIDX4]], i32 1
410+ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x ptr> [[TMP1]], ptr [[ARRAYIDX11]], i32 2
411+ ; CHECK-NEXT: [[TMP3:%.*]] = insertelement <8 x ptr> [[TMP2]], ptr [[ARRAYIDX18]], i32 3
412+ ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x ptr> [[TMP3]], ptr [[ARRAYIDX25]], i32 4
413+ ; CHECK-NEXT: [[TMP5:%.*]] = insertelement <8 x ptr> [[TMP4]], ptr [[ARRAYIDX32]], i32 5
414+ ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x ptr> [[TMP5]], ptr [[ARRAYIDX39]], i32 6
415+ ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <8 x ptr> [[TMP6]], ptr [[ARRAYIDX46]], i32 7
416+ ; CHECK-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> poison)
417+ ; CHECK-NEXT: [[TMP9:%.*]] = load <8 x float>, ptr [[ARRAYIDX48]], align 4
418+ ; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x float> [[TMP9]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
419+ ; CHECK-NEXT: [[TMP11:%.*]] = fsub fast <8 x float> [[TMP10]], [[TMP8]]
420+ ; CHECK-NEXT: store <8 x float> [[TMP11]], ptr [[ARRAYIDX2]], align 4
330421; CHECK-NEXT: ret void
331422;
332423entry:
0 commit comments