@@ -45,9 +45,25 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
4545; I64-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], 14
4646; I64-NEXT: [[TMP19:%.*]] = add i32 [[INDEX]], 15
4747; I64-NEXT: [[TMP20:%.*]] = uitofp <4 x i32> [[VEC_IND]] to <4 x double>
48+ ; I64-NEXT: [[CONV:%.*]] = extractelement <4 x double> [[TMP20]], i32 0
49+ ; I64-NEXT: [[TMP57:%.*]] = extractelement <4 x double> [[TMP20]], i32 1
50+ ; I64-NEXT: [[TMP58:%.*]] = extractelement <4 x double> [[TMP20]], i32 2
51+ ; I64-NEXT: [[TMP59:%.*]] = extractelement <4 x double> [[TMP20]], i32 3
4852; I64-NEXT: [[TMP21:%.*]] = uitofp <4 x i32> [[STEP_ADD]] to <4 x double>
53+ ; I64-NEXT: [[TMP60:%.*]] = extractelement <4 x double> [[TMP21]], i32 0
54+ ; I64-NEXT: [[TMP61:%.*]] = extractelement <4 x double> [[TMP21]], i32 1
55+ ; I64-NEXT: [[TMP62:%.*]] = extractelement <4 x double> [[TMP21]], i32 2
56+ ; I64-NEXT: [[TMP63:%.*]] = extractelement <4 x double> [[TMP21]], i32 3
4957; I64-NEXT: [[TMP22:%.*]] = uitofp <4 x i32> [[STEP_ADD_2]] to <4 x double>
58+ ; I64-NEXT: [[TMP64:%.*]] = extractelement <4 x double> [[TMP22]], i32 0
59+ ; I64-NEXT: [[TMP65:%.*]] = extractelement <4 x double> [[TMP22]], i32 1
60+ ; I64-NEXT: [[TMP66:%.*]] = extractelement <4 x double> [[TMP22]], i32 2
61+ ; I64-NEXT: [[TMP67:%.*]] = extractelement <4 x double> [[TMP22]], i32 3
5062; I64-NEXT: [[TMP23:%.*]] = uitofp <4 x i32> [[STEP_ADD_3]] to <4 x double>
63+ ; I64-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP23]], i32 0
64+ ; I64-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP23]], i32 1
65+ ; I64-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP23]], i32 2
66+ ; I64-NEXT: [[TMP71:%.*]] = extractelement <4 x double> [[TMP23]], i32 3
5167; I64-NEXT: [[ADD_PTR_I:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[IV]]
5268; I64-NEXT: [[TMP25:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]]
5369; I64-NEXT: [[TMP26:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP6]]
@@ -80,37 +96,21 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
8096; I64-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP37]], align 4
8197; I64-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP38]], align 4
8298; I64-NEXT: [[TMP55:%.*]] = load ptr, ptr [[TMP39]], align 4
83- ; I64-NEXT: [[CONV:%.*]] = extractelement <4 x double> [[TMP20]], i32 0
8499; I64-NEXT: store double [[CONV]], ptr [[TMP0]], align 4
85- ; I64-NEXT: [[TMP57:%.*]] = extractelement <4 x double> [[TMP20]], i32 1
86100; I64-NEXT: store double [[TMP57]], ptr [[TMP41]], align 4
87- ; I64-NEXT: [[TMP58:%.*]] = extractelement <4 x double> [[TMP20]], i32 2
88101; I64-NEXT: store double [[TMP58]], ptr [[TMP42]], align 4
89- ; I64-NEXT: [[TMP59:%.*]] = extractelement <4 x double> [[TMP20]], i32 3
90102; I64-NEXT: store double [[TMP59]], ptr [[TMP43]], align 4
91- ; I64-NEXT: [[TMP60:%.*]] = extractelement <4 x double> [[TMP21]], i32 0
92103; I64-NEXT: store double [[TMP60]], ptr [[TMP44]], align 4
93- ; I64-NEXT: [[TMP61:%.*]] = extractelement <4 x double> [[TMP21]], i32 1
94104; I64-NEXT: store double [[TMP61]], ptr [[TMP45]], align 4
95- ; I64-NEXT: [[TMP62:%.*]] = extractelement <4 x double> [[TMP21]], i32 2
96105; I64-NEXT: store double [[TMP62]], ptr [[TMP46]], align 4
97- ; I64-NEXT: [[TMP63:%.*]] = extractelement <4 x double> [[TMP21]], i32 3
98106; I64-NEXT: store double [[TMP63]], ptr [[TMP47]], align 4
99- ; I64-NEXT: [[TMP64:%.*]] = extractelement <4 x double> [[TMP22]], i32 0
100107; I64-NEXT: store double [[TMP64]], ptr [[TMP48]], align 4
101- ; I64-NEXT: [[TMP65:%.*]] = extractelement <4 x double> [[TMP22]], i32 1
102108; I64-NEXT: store double [[TMP65]], ptr [[TMP49]], align 4
103- ; I64-NEXT: [[TMP66:%.*]] = extractelement <4 x double> [[TMP22]], i32 2
104109; I64-NEXT: store double [[TMP66]], ptr [[TMP50]], align 4
105- ; I64-NEXT: [[TMP67:%.*]] = extractelement <4 x double> [[TMP22]], i32 3
106110; I64-NEXT: store double [[TMP67]], ptr [[TMP51]], align 4
107- ; I64-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP23]], i32 0
108111; I64-NEXT: store double [[TMP68]], ptr [[TMP52]], align 4
109- ; I64-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP23]], i32 1
110112; I64-NEXT: store double [[TMP69]], ptr [[TMP53]], align 4
111- ; I64-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP23]], i32 2
112113; I64-NEXT: store double [[TMP70]], ptr [[TMP54]], align 4
113- ; I64-NEXT: [[TMP71:%.*]] = extractelement <4 x double> [[TMP23]], i32 3
114114; I64-NEXT: store double [[TMP71]], ptr [[TMP55]], align 4
115115; I64-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
116116; I64-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
@@ -139,21 +139,21 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
139139; I64-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 2
140140; I64-NEXT: [[TMP78:%.*]] = add i32 [[INDEX4]], 3
141141; I64-NEXT: [[TMP79:%.*]] = uitofp <4 x i32> [[VEC_IND5]] to <4 x double>
142- ; I64-NEXT: [[TMP80:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
143- ; I64-NEXT: [[TMP81:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
144- ; I64-NEXT: [[TMP82:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
145- ; I64-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP78]]
146- ; I64-NEXT: [[TMP84:%.*]] = load ptr, ptr [[TMP80]], align 4
147- ; I64-NEXT: [[TMP85:%.*]] = load ptr, ptr [[TMP81]], align 4
148- ; I64-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TMP82]], align 4
149- ; I64-NEXT: [[TMP87:%.*]] = load ptr, ptr [[TMP83]], align 4
150142; I64-NEXT: [[TMP88:%.*]] = extractelement <4 x double> [[TMP79]], i32 0
151- ; I64-NEXT: store double [[TMP88]], ptr [[TMP84]], align 4
152143; I64-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP79]], i32 1
153- ; I64-NEXT: store double [[TMP89]], ptr [[TMP85]], align 4
154144; I64-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP79]], i32 2
155- ; I64-NEXT: store double [[TMP90]], ptr [[TMP86]], align 4
156145; I64-NEXT: [[TMP91:%.*]] = extractelement <4 x double> [[TMP79]], i32 3
146+ ; I64-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
147+ ; I64-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
148+ ; I64-NEXT: [[TMP86:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
149+ ; I64-NEXT: [[TMP93:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP78]]
150+ ; I64-NEXT: [[TMP94:%.*]] = load ptr, ptr [[TMP84]], align 4
151+ ; I64-NEXT: [[TMP95:%.*]] = load ptr, ptr [[TMP85]], align 4
152+ ; I64-NEXT: [[TMP96:%.*]] = load ptr, ptr [[TMP86]], align 4
153+ ; I64-NEXT: [[TMP87:%.*]] = load ptr, ptr [[TMP93]], align 4
154+ ; I64-NEXT: store double [[TMP88]], ptr [[TMP94]], align 4
155+ ; I64-NEXT: store double [[TMP89]], ptr [[TMP95]], align 4
156+ ; I64-NEXT: store double [[TMP90]], ptr [[TMP96]], align 4
157157; I64-NEXT: store double [[TMP91]], ptr [[TMP87]], align 4
158158; I64-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX4]], 4
159159; I64-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND5]], splat (i32 4)
@@ -201,9 +201,25 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
201201; I32-NEXT: [[TMP42:%.*]] = add i32 [[INDEX]], 14
202202; I32-NEXT: [[TMP43:%.*]] = add i32 [[INDEX]], 15
203203; I32-NEXT: [[TMP44:%.*]] = uitofp <4 x i32> [[VEC_IND]] to <4 x double>
204+ ; I32-NEXT: [[TMP31:%.*]] = extractelement <4 x double> [[TMP44]], i32 0
205+ ; I32-NEXT: [[TMP32:%.*]] = extractelement <4 x double> [[TMP44]], i32 1
206+ ; I32-NEXT: [[TMP33:%.*]] = extractelement <4 x double> [[TMP44]], i32 2
207+ ; I32-NEXT: [[TMP34:%.*]] = extractelement <4 x double> [[TMP44]], i32 3
204208; I32-NEXT: [[TMP45:%.*]] = uitofp <4 x i32> [[STEP_ADD]] to <4 x double>
209+ ; I32-NEXT: [[TMP35:%.*]] = extractelement <4 x double> [[TMP45]], i32 0
210+ ; I32-NEXT: [[TMP36:%.*]] = extractelement <4 x double> [[TMP45]], i32 1
211+ ; I32-NEXT: [[TMP37:%.*]] = extractelement <4 x double> [[TMP45]], i32 2
212+ ; I32-NEXT: [[TMP38:%.*]] = extractelement <4 x double> [[TMP45]], i32 3
205213; I32-NEXT: [[TMP46:%.*]] = uitofp <4 x i32> [[STEP_ADD_2]] to <4 x double>
214+ ; I32-NEXT: [[TMP63:%.*]] = extractelement <4 x double> [[TMP46]], i32 0
215+ ; I32-NEXT: [[TMP64:%.*]] = extractelement <4 x double> [[TMP46]], i32 1
216+ ; I32-NEXT: [[TMP65:%.*]] = extractelement <4 x double> [[TMP46]], i32 2
217+ ; I32-NEXT: [[TMP66:%.*]] = extractelement <4 x double> [[TMP46]], i32 3
206218; I32-NEXT: [[TMP55:%.*]] = uitofp <4 x i32> [[STEP_ADD_3]] to <4 x double>
219+ ; I32-NEXT: [[TMP67:%.*]] = extractelement <4 x double> [[TMP55]], i32 0
220+ ; I32-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP55]], i32 1
221+ ; I32-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP55]], i32 2
222+ ; I32-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP55]], i32 3
207223; I32-NEXT: [[TMP15:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP3]]
208224; I32-NEXT: [[TMP16:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP4]]
209225; I32-NEXT: [[TMP17:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP5]]
@@ -236,37 +252,21 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
236252; I32-NEXT: [[TMP52:%.*]] = load ptr, ptr [[TMP61]], align 4
237253; I32-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP62]], align 4
238254; I32-NEXT: [[TMP54:%.*]] = load ptr, ptr [[TMP71]], align 4
239- ; I32-NEXT: [[TMP31:%.*]] = extractelement <4 x double> [[TMP44]], i32 0
240255; I32-NEXT: store double [[TMP31]], ptr [[TMP23]], align 4
241- ; I32-NEXT: [[TMP32:%.*]] = extractelement <4 x double> [[TMP44]], i32 1
242256; I32-NEXT: store double [[TMP32]], ptr [[TMP24]], align 4
243- ; I32-NEXT: [[TMP33:%.*]] = extractelement <4 x double> [[TMP44]], i32 2
244257; I32-NEXT: store double [[TMP33]], ptr [[TMP25]], align 4
245- ; I32-NEXT: [[TMP34:%.*]] = extractelement <4 x double> [[TMP44]], i32 3
246258; I32-NEXT: store double [[TMP34]], ptr [[TMP26]], align 4
247- ; I32-NEXT: [[TMP35:%.*]] = extractelement <4 x double> [[TMP45]], i32 0
248259; I32-NEXT: store double [[TMP35]], ptr [[TMP27]], align 4
249- ; I32-NEXT: [[TMP36:%.*]] = extractelement <4 x double> [[TMP45]], i32 1
250260; I32-NEXT: store double [[TMP36]], ptr [[TMP28]], align 4
251- ; I32-NEXT: [[TMP37:%.*]] = extractelement <4 x double> [[TMP45]], i32 2
252261; I32-NEXT: store double [[TMP37]], ptr [[TMP29]], align 4
253- ; I32-NEXT: [[TMP38:%.*]] = extractelement <4 x double> [[TMP45]], i32 3
254262; I32-NEXT: store double [[TMP38]], ptr [[TMP30]], align 4
255- ; I32-NEXT: [[TMP63:%.*]] = extractelement <4 x double> [[TMP46]], i32 0
256263; I32-NEXT: store double [[TMP63]], ptr [[TMP47]], align 4
257- ; I32-NEXT: [[TMP64:%.*]] = extractelement <4 x double> [[TMP46]], i32 1
258264; I32-NEXT: store double [[TMP64]], ptr [[TMP48]], align 4
259- ; I32-NEXT: [[TMP65:%.*]] = extractelement <4 x double> [[TMP46]], i32 2
260265; I32-NEXT: store double [[TMP65]], ptr [[TMP49]], align 4
261- ; I32-NEXT: [[TMP66:%.*]] = extractelement <4 x double> [[TMP46]], i32 3
262266; I32-NEXT: store double [[TMP66]], ptr [[TMP50]], align 4
263- ; I32-NEXT: [[TMP67:%.*]] = extractelement <4 x double> [[TMP55]], i32 0
264267; I32-NEXT: store double [[TMP67]], ptr [[TMP51]], align 4
265- ; I32-NEXT: [[TMP68:%.*]] = extractelement <4 x double> [[TMP55]], i32 1
266268; I32-NEXT: store double [[TMP68]], ptr [[TMP52]], align 4
267- ; I32-NEXT: [[TMP69:%.*]] = extractelement <4 x double> [[TMP55]], i32 2
268269; I32-NEXT: store double [[TMP69]], ptr [[TMP53]], align 4
269- ; I32-NEXT: [[TMP70:%.*]] = extractelement <4 x double> [[TMP55]], i32 3
270270; I32-NEXT: store double [[TMP70]], ptr [[TMP54]], align 4
271271; I32-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
272272; I32-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
@@ -295,21 +295,21 @@ define void @test_store_initially_interleave(i32 %n, ptr noalias %src) #0 {
295295; I32-NEXT: [[TMP76:%.*]] = add i32 [[INDEX4]], 2
296296; I32-NEXT: [[TMP77:%.*]] = add i32 [[INDEX4]], 3
297297; I32-NEXT: [[TMP78:%.*]] = uitofp <4 x i32> [[VEC_IND5]] to <4 x double>
298- ; I32-NEXT: [[TMP79:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP74]]
299- ; I32-NEXT: [[TMP80:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
300- ; I32-NEXT: [[TMP81:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
301- ; I32-NEXT: [[TMP82:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
302- ; I32-NEXT: [[TMP83:%.*]] = load ptr, ptr [[TMP79]], align 4
303- ; I32-NEXT: [[TMP84:%.*]] = load ptr, ptr [[TMP80]], align 4
304- ; I32-NEXT: [[TMP85:%.*]] = load ptr, ptr [[TMP81]], align 4
305- ; I32-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TMP82]], align 4
306298; I32-NEXT: [[TMP87:%.*]] = extractelement <4 x double> [[TMP78]], i32 0
307- ; I32-NEXT: store double [[TMP87]], ptr [[TMP83]], align 4
308299; I32-NEXT: [[TMP88:%.*]] = extractelement <4 x double> [[TMP78]], i32 1
309- ; I32-NEXT: store double [[TMP88]], ptr [[TMP84]], align 4
310300; I32-NEXT: [[TMP89:%.*]] = extractelement <4 x double> [[TMP78]], i32 2
311- ; I32-NEXT: store double [[TMP89]], ptr [[TMP85]], align 4
312301; I32-NEXT: [[TMP90:%.*]] = extractelement <4 x double> [[TMP78]], i32 3
302+ ; I32-NEXT: [[TMP83:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP74]]
303+ ; I32-NEXT: [[TMP84:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP75]]
304+ ; I32-NEXT: [[TMP85:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP76]]
305+ ; I32-NEXT: [[TMP92:%.*]] = getelementptr nusw { ptr, ptr, ptr }, ptr null, i32 [[TMP77]]
306+ ; I32-NEXT: [[TMP93:%.*]] = load ptr, ptr [[TMP83]], align 4
307+ ; I32-NEXT: [[TMP94:%.*]] = load ptr, ptr [[TMP84]], align 4
308+ ; I32-NEXT: [[TMP95:%.*]] = load ptr, ptr [[TMP85]], align 4
309+ ; I32-NEXT: [[TMP86:%.*]] = load ptr, ptr [[TMP92]], align 4
310+ ; I32-NEXT: store double [[TMP87]], ptr [[TMP93]], align 4
311+ ; I32-NEXT: store double [[TMP88]], ptr [[TMP94]], align 4
312+ ; I32-NEXT: store double [[TMP89]], ptr [[TMP95]], align 4
313313; I32-NEXT: store double [[TMP90]], ptr [[TMP86]], align 4
314314; I32-NEXT: [[INDEX_NEXT6]] = add nuw i32 [[INDEX4]], 4
315315; I32-NEXT: [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND5]], splat (i32 4)
0 commit comments