@@ -25,14 +25,13 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
2525; LA32-NEXT: move $s1, $a2
2626; LA32-NEXT: slli.w $a1, $a0, 4
2727; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
28- ; LA32-NEXT: add.w $a0, $a4, $a0
2928; LA32-NEXT: sltui $a1, $a3, 1
3029; LA32-NEXT: slti $a2, $a3, 0
3130; LA32-NEXT: masknez $a2, $a2, $a1
3231; LA32-NEXT: sltui $a3, $s1, 1
3332; LA32-NEXT: maskeqz $a1, $a3, $a1
3433; LA32-NEXT: or $a1, $a1, $a2
35- ; LA32-NEXT: addi .w $s2, $a0, 8
34+ ; LA32-NEXT: add .w $s2, $a4, $a0
3635; LA32-NEXT: bnez $a1, .LBB0_3
3736; LA32-NEXT: # %bb.1: # %for.body.preheader
3837; LA32-NEXT: move $fp, $a4
@@ -45,8 +44,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
4544; LA32-NEXT: # =>This Inner Loop Header: Depth=1
4645; LA32-NEXT: move $a0, $fp
4746; LA32-NEXT: bl f
48- ; LA32-NEXT: ld.w $a0, $s2, 4
49- ; LA32-NEXT: ld.w $a1, $s2, 0
47+ ; LA32-NEXT: ld.w $a0, $s2, 12
48+ ; LA32-NEXT: ld.w $a1, $s2, 8
5049; LA32-NEXT: add.w $a0, $a0, $s6
5150; LA32-NEXT: add.w $s3, $a1, $s3
5251; LA32-NEXT: sltu $a1, $s3, $a1
@@ -63,8 +62,8 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
6362; LA32-NEXT: move $s3, $zero
6463; LA32-NEXT: move $s6, $zero
6564; LA32-NEXT: .LBB0_4: # %for.cond.cleanup
66- ; LA32-NEXT: st.w $s3, $s2, 0
67- ; LA32-NEXT: st.w $s6, $s2, 4
65+ ; LA32-NEXT: st.w $s3, $s2, 8
66+ ; LA32-NEXT: st.w $s6, $s2, 12
6867; LA32-NEXT: ld.w $s6, $sp, 12 # 4-byte Folded Reload
6968; LA32-NEXT: ld.w $s5, $sp, 16 # 4-byte Folded Reload
7069; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
@@ -88,8 +87,7 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
8887; LA64-NEXT: move $s0, $a1
8988; LA64-NEXT: slli.d $a1, $a0, 4
9089; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
91- ; LA64-NEXT: add.d $a0, $a2, $a0
92- ; LA64-NEXT: addi.d $s1, $a0, 8
90+ ; LA64-NEXT: add.d $s1, $a2, $a0
9391; LA64-NEXT: blez $s0, .LBB0_3
9492; LA64-NEXT: # %bb.1: # %for.body.preheader
9593; LA64-NEXT: move $fp, $a2
@@ -100,15 +98,15 @@ define void @sink_fold_i64(i64 %k, i64 %n, ptr %a) nounwind {
10098; LA64-NEXT: move $a0, $fp
10199; LA64-NEXT: pcaddu18i $ra, %call36(f)
102100; LA64-NEXT: jirl $ra, $ra, 0
103- ; LA64-NEXT: ld.d $a0, $s1, 0
101+ ; LA64-NEXT: ld.d $a0, $s1, 8
104102; LA64-NEXT: addi.d $s0, $s0, -1
105103; LA64-NEXT: add.d $s2, $a0, $s2
106104; LA64-NEXT: bnez $s0, .LBB0_2
107105; LA64-NEXT: b .LBB0_4
108106; LA64-NEXT: .LBB0_3:
109107; LA64-NEXT: move $s2, $zero
110108; LA64-NEXT: .LBB0_4: # %for.cond.cleanup
111- ; LA64-NEXT: st.d $s2, $s1, 0
109+ ; LA64-NEXT: st.d $s2, $s1, 8
112110; LA64-NEXT: ld.d $s2, $sp, 8 # 8-byte Folded Reload
113111; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
114112; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
@@ -153,14 +151,13 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
153151; LA32-NEXT: move $s1, $a2
154152; LA32-NEXT: slli.w $a1, $a0, 4
155153; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
156- ; LA32-NEXT: add.w $a0, $a4, $a0
157154; LA32-NEXT: sltui $a1, $a3, 1
158155; LA32-NEXT: slti $a2, $a3, 0
159156; LA32-NEXT: masknez $a2, $a2, $a1
160157; LA32-NEXT: sltui $a3, $s1, 1
161158; LA32-NEXT: maskeqz $a1, $a3, $a1
162159; LA32-NEXT: or $a1, $a1, $a2
163- ; LA32-NEXT: addi .w $s2, $a0, 16
160+ ; LA32-NEXT: add .w $s2, $a4, $a0
164161; LA32-NEXT: bnez $a1, .LBB1_3
165162; LA32-NEXT: # %bb.1: # %for.body.preheader
166163; LA32-NEXT: move $fp, $a4
@@ -172,7 +169,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
172169; LA32-NEXT: # =>This Inner Loop Header: Depth=1
173170; LA32-NEXT: move $a0, $fp
174171; LA32-NEXT: bl f
175- ; LA32-NEXT: fld.s $fa0, $s2, 0
172+ ; LA32-NEXT: fld.s $fa0, $s2, 16
176173; LA32-NEXT: addi.w $s3, $s3, 1
177174; LA32-NEXT: sltui $a0, $s3, 1
178175; LA32-NEXT: add.w $s4, $s4, $a0
@@ -185,7 +182,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
185182; LA32-NEXT: .LBB1_3:
186183; LA32-NEXT: movgr2fr.w $fs0, $zero
187184; LA32-NEXT: .LBB1_4: # %for.cond.cleanup
188- ; LA32-NEXT: fst.s $fs0, $s2, 0
185+ ; LA32-NEXT: fst.s $fs0, $s2, 16
189186; LA32-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
190187; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
191188; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
@@ -208,8 +205,7 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
208205; LA64-NEXT: move $s0, $a1
209206; LA64-NEXT: slli.d $a1, $a0, 4
210207; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
211- ; LA64-NEXT: add.d $a0, $a2, $a0
212- ; LA64-NEXT: addi.d $s1, $a0, 16
208+ ; LA64-NEXT: add.d $s1, $a2, $a0
213209; LA64-NEXT: blez $s0, .LBB1_3
214210; LA64-NEXT: # %bb.1: # %for.body.preheader
215211; LA64-NEXT: move $fp, $a2
@@ -220,15 +216,15 @@ define void @sink_fold_f32(i64 %k, i64 %n, ptr %a) nounwind {
220216; LA64-NEXT: move $a0, $fp
221217; LA64-NEXT: pcaddu18i $ra, %call36(f)
222218; LA64-NEXT: jirl $ra, $ra, 0
223- ; LA64-NEXT: fld.s $fa0, $s1, 0
219+ ; LA64-NEXT: fld.s $fa0, $s1, 16
224220; LA64-NEXT: addi.d $s0, $s0, -1
225221; LA64-NEXT: fadd.s $fs0, $fa0, $fs0
226222; LA64-NEXT: bnez $s0, .LBB1_2
227223; LA64-NEXT: b .LBB1_4
228224; LA64-NEXT: .LBB1_3:
229225; LA64-NEXT: movgr2fr.w $fs0, $zero
230226; LA64-NEXT: .LBB1_4: # %for.cond.cleanup
231- ; LA64-NEXT: fst.s $fs0, $s1, 0
227+ ; LA64-NEXT: fst.s $fs0, $s1, 16
232228; LA64-NEXT: fld.d $fs0, $sp, 8 # 8-byte Folded Reload
233229; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
234230; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
@@ -271,14 +267,13 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
271267; LA32-NEXT: move $s0, $a3
272268; LA32-NEXT: move $s1, $a2
273269; LA32-NEXT: slli.w $a0, $a0, 6
274- ; LA32-NEXT: add.w $a0, $a4, $a0
275270; LA32-NEXT: sltui $a1, $a3, 1
276271; LA32-NEXT: slti $a2, $a3, 0
277272; LA32-NEXT: masknez $a2, $a2, $a1
278273; LA32-NEXT: sltui $a3, $s1, 1
279274; LA32-NEXT: maskeqz $a1, $a3, $a1
280275; LA32-NEXT: or $a1, $a1, $a2
281- ; LA32-NEXT: addi .w $s2, $a0, 16
276+ ; LA32-NEXT: add .w $s2, $a4, $a0
282277; LA32-NEXT: bnez $a1, .LBB2_3
283278; LA32-NEXT: # %bb.1: # %for.body.preheader
284279; LA32-NEXT: move $fp, $a4
@@ -291,7 +286,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
291286; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
292287; LA32-NEXT: move $a0, $fp
293288; LA32-NEXT: bl f
294- ; LA32-NEXT: vld $vr0, $s2, 0
289+ ; LA32-NEXT: vld $vr0, $s2, 16
295290; LA32-NEXT: addi.w $s3, $s3, 1
296291; LA32-NEXT: sltui $a0, $s3, 1
297292; LA32-NEXT: add.w $s4, $s4, $a0
@@ -307,7 +302,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
307302; LA32-NEXT: .LBB2_3:
308303; LA32-NEXT: vrepli.b $vr0, 0
309304; LA32-NEXT: .LBB2_4: # %for.cond.cleanup
310- ; LA32-NEXT: vst $vr0, $s2, 0
305+ ; LA32-NEXT: vst $vr0, $s2, 16
311306; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
312307; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
313308; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
@@ -326,8 +321,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
326321; LA64-NEXT: st.d $s0, $sp, 24 # 8-byte Folded Spill
327322; LA64-NEXT: st.d $s1, $sp, 16 # 8-byte Folded Spill
328323; LA64-NEXT: slli.d $a0, $a0, 6
329- ; LA64-NEXT: add.d $a0, $a2, $a0
330- ; LA64-NEXT: addi.d $s1, $a0, 16
324+ ; LA64-NEXT: add.d $s1, $a2, $a0
331325; LA64-NEXT: blez $a1, .LBB2_3
332326; LA64-NEXT: # %bb.1: # %for.body.preheader
333327; LA64-NEXT: move $fp, $a2
@@ -340,7 +334,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
340334; LA64-NEXT: move $a0, $fp
341335; LA64-NEXT: pcaddu18i $ra, %call36(f)
342336; LA64-NEXT: jirl $ra, $ra, 0
343- ; LA64-NEXT: vld $vr0, $s1, 0
337+ ; LA64-NEXT: vld $vr0, $s1, 16
344338; LA64-NEXT: addi.d $s0, $s0, -1
345339; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
346340; LA64-NEXT: vadd.w $vr1, $vr0, $vr1
@@ -351,7 +345,7 @@ define void @sink_fold_v4i32(i64 %k, i64 %n, ptr %a) nounwind {
351345; LA64-NEXT: .LBB2_3:
352346; LA64-NEXT: vrepli.b $vr0, 0
353347; LA64-NEXT: .LBB2_4: # %for.cond.cleanup
354- ; LA64-NEXT: vst $vr0, $s1, 0
348+ ; LA64-NEXT: vst $vr0, $s1, 16
355349; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
356350; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
357351; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -393,14 +387,13 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
393387; LA32-NEXT: move $s0, $a3
394388; LA32-NEXT: move $s1, $a2
395389; LA32-NEXT: slli.w $a0, $a0, 6
396- ; LA32-NEXT: add.w $a0, $a4, $a0
397390; LA32-NEXT: sltui $a1, $a3, 1
398391; LA32-NEXT: slti $a2, $a3, 0
399392; LA32-NEXT: masknez $a2, $a2, $a1
400393; LA32-NEXT: sltui $a3, $s1, 1
401394; LA32-NEXT: maskeqz $a1, $a3, $a1
402395; LA32-NEXT: or $a1, $a1, $a2
403- ; LA32-NEXT: addi .w $s2, $a0, 32
396+ ; LA32-NEXT: add .w $s2, $a4, $a0
404397; LA32-NEXT: bnez $a1, .LBB3_3
405398; LA32-NEXT: # %bb.1: # %for.body.preheader
406399; LA32-NEXT: move $fp, $a4
@@ -413,7 +406,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
413406; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
414407; LA32-NEXT: move $a0, $fp
415408; LA32-NEXT: bl f
416- ; LA32-NEXT: xvld $xr0, $s2, 0
409+ ; LA32-NEXT: xvld $xr0, $s2, 32
417410; LA32-NEXT: addi.w $s3, $s3, 1
418411; LA32-NEXT: sltui $a0, $s3, 1
419412; LA32-NEXT: add.w $s4, $s4, $a0
@@ -429,7 +422,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
429422; LA32-NEXT: .LBB3_3:
430423; LA32-NEXT: xvrepli.b $xr0, 0
431424; LA32-NEXT: .LBB3_4: # %for.cond.cleanup
432- ; LA32-NEXT: xvst $xr0, $s2, 0
425+ ; LA32-NEXT: xvst $xr0, $s2, 32
433426; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload
434427; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload
435428; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload
@@ -448,8 +441,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
448441; LA64-NEXT: st.d $s0, $sp, 56 # 8-byte Folded Spill
449442; LA64-NEXT: st.d $s1, $sp, 48 # 8-byte Folded Spill
450443; LA64-NEXT: slli.d $a0, $a0, 6
451- ; LA64-NEXT: add.d $a0, $a2, $a0
452- ; LA64-NEXT: addi.d $s1, $a0, 32
444+ ; LA64-NEXT: add.d $s1, $a2, $a0
453445; LA64-NEXT: blez $a1, .LBB3_3
454446; LA64-NEXT: # %bb.1: # %for.body.preheader
455447; LA64-NEXT: move $fp, $a2
@@ -462,7 +454,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
462454; LA64-NEXT: move $a0, $fp
463455; LA64-NEXT: pcaddu18i $ra, %call36(f)
464456; LA64-NEXT: jirl $ra, $ra, 0
465- ; LA64-NEXT: xvld $xr0, $s1, 0
457+ ; LA64-NEXT: xvld $xr0, $s1, 32
466458; LA64-NEXT: addi.d $s0, $s0, -1
467459; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
468460; LA64-NEXT: xvadd.h $xr1, $xr0, $xr1
@@ -473,7 +465,7 @@ define void @sink_fold_v16i16(i64 %k, i64 %n, ptr %a) nounwind {
473465; LA64-NEXT: .LBB3_3:
474466; LA64-NEXT: xvrepli.b $xr0, 0
475467; LA64-NEXT: .LBB3_4: # %for.cond.cleanup
476- ; LA64-NEXT: xvst $xr0, $s1, 0
468+ ; LA64-NEXT: xvst $xr0, $s1, 32
477469; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
478470; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
479471; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
@@ -516,14 +508,13 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
516508; LA32-NEXT: move $s1, $a2
517509; LA32-NEXT: slli.w $a1, $a0, 4
518510; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
519- ; LA32-NEXT: add.w $a0, $a4, $a0
520511; LA32-NEXT: sltui $a1, $a3, 1
521512; LA32-NEXT: slti $a2, $a3, 0
522513; LA32-NEXT: masknez $a2, $a2, $a1
523514; LA32-NEXT: sltui $a3, $s1, 1
524515; LA32-NEXT: maskeqz $a1, $a3, $a1
525516; LA32-NEXT: or $a1, $a1, $a2
526- ; LA32-NEXT: addi .w $s2, $a0, 16
517+ ; LA32-NEXT: add .w $s2, $a4, $a0
527518; LA32-NEXT: bnez $a1, .LBB4_3
528519; LA32-NEXT: # %bb.1: # %for.body.preheader
529520; LA32-NEXT: move $fp, $a4
@@ -536,7 +527,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
536527; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill
537528; LA32-NEXT: move $a0, $fp
538529; LA32-NEXT: bl f
539- ; LA32-NEXT: vldrepl.b $vr0, $s2, 0
530+ ; LA32-NEXT: vldrepl.b $vr0, $s2, 16
540531; LA32-NEXT: addi.w $s3, $s3, 1
541532; LA32-NEXT: sltui $a0, $s3, 1
542533; LA32-NEXT: add.w $s4, $s4, $a0
@@ -552,7 +543,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
552543; LA32-NEXT: .LBB4_3:
553544; LA32-NEXT: vrepli.b $vr0, 0
554545; LA32-NEXT: .LBB4_4: # %for.cond.cleanup
555- ; LA32-NEXT: vstelm.b $vr0, $s2, 0 , 1
546+ ; LA32-NEXT: vstelm.b $vr0, $s2, 16 , 1
556547; LA32-NEXT: ld.w $s4, $sp, 20 # 4-byte Folded Reload
557548; LA32-NEXT: ld.w $s3, $sp, 24 # 4-byte Folded Reload
558549; LA32-NEXT: ld.w $s2, $sp, 28 # 4-byte Folded Reload
@@ -573,8 +564,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
573564; LA64-NEXT: move $s0, $a1
574565; LA64-NEXT: slli.d $a1, $a0, 4
575566; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
576- ; LA64-NEXT: add.d $a0, $a2, $a0
577- ; LA64-NEXT: addi.d $s1, $a0, 16
567+ ; LA64-NEXT: add.d $s1, $a2, $a0
578568; LA64-NEXT: blez $s0, .LBB4_3
579569; LA64-NEXT: # %bb.1: # %for.body.preheader
580570; LA64-NEXT: move $fp, $a2
@@ -586,7 +576,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
586576; LA64-NEXT: move $a0, $fp
587577; LA64-NEXT: pcaddu18i $ra, %call36(f)
588578; LA64-NEXT: jirl $ra, $ra, 0
589- ; LA64-NEXT: vldrepl.b $vr0, $s1, 0
579+ ; LA64-NEXT: vldrepl.b $vr0, $s1, 16
590580; LA64-NEXT: addi.d $s0, $s0, -1
591581; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload
592582; LA64-NEXT: vadd.b $vr1, $vr0, $vr1
@@ -597,7 +587,7 @@ define void @sink_fold_extracti8(i64 %k, i64 %n, ptr %a) nounwind {
597587; LA64-NEXT: .LBB4_3:
598588; LA64-NEXT: vrepli.b $vr0, 0
599589; LA64-NEXT: .LBB4_4: # %for.cond.cleanup
600- ; LA64-NEXT: vstelm.b $vr0, $s1, 0 , 1
590+ ; LA64-NEXT: vstelm.b $vr0, $s1, 16 , 1
601591; LA64-NEXT: ld.d $s1, $sp, 16 # 8-byte Folded Reload
602592; LA64-NEXT: ld.d $s0, $sp, 24 # 8-byte Folded Reload
603593; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload
@@ -643,14 +633,13 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
643633; LA32-NEXT: move $s1, $a2
644634; LA32-NEXT: slli.w $a1, $a0, 4
645635; LA32-NEXT: alsl.w $a0, $a0, $a1, 3
646- ; LA32-NEXT: add.w $a0, $a4, $a0
647636; LA32-NEXT: sltui $a1, $a3, 1
648637; LA32-NEXT: slti $a2, $a3, 0
649638; LA32-NEXT: masknez $a2, $a2, $a1
650639; LA32-NEXT: sltui $a3, $s1, 1
651640; LA32-NEXT: maskeqz $a1, $a3, $a1
652641; LA32-NEXT: or $a1, $a1, $a2
653- ; LA32-NEXT: addi .w $s2, $a0, 8
642+ ; LA32-NEXT: add .w $s2, $a4, $a0
654643; LA32-NEXT: bnez $a1, .LBB5_3
655644; LA32-NEXT: # %bb.1: # %for.body.preheader
656645; LA32-NEXT: move $fp, $a4
@@ -663,7 +652,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
663652; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill
664653; LA32-NEXT: move $a0, $fp
665654; LA32-NEXT: bl f
666- ; LA32-NEXT: xvldrepl.d $xr0, $s2, 0
655+ ; LA32-NEXT: xvldrepl.d $xr0, $s2, 8
667656; LA32-NEXT: addi.w $s3, $s3, 1
668657; LA32-NEXT: sltui $a0, $s3, 1
669658; LA32-NEXT: add.w $s4, $s4, $a0
@@ -679,7 +668,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
679668; LA32-NEXT: .LBB5_3:
680669; LA32-NEXT: xvrepli.b $xr0, 0
681670; LA32-NEXT: .LBB5_4: # %for.cond.cleanup
682- ; LA32-NEXT: xvstelm.d $xr0, $s2, 0 , 1
671+ ; LA32-NEXT: xvstelm.d $xr0, $s2, 8 , 1
683672; LA32-NEXT: ld.w $s4, $sp, 52 # 4-byte Folded Reload
684673; LA32-NEXT: ld.w $s3, $sp, 56 # 4-byte Folded Reload
685674; LA32-NEXT: ld.w $s2, $sp, 60 # 4-byte Folded Reload
@@ -700,8 +689,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
700689; LA64-NEXT: move $s0, $a1
701690; LA64-NEXT: slli.d $a1, $a0, 4
702691; LA64-NEXT: alsl.d $a0, $a0, $a1, 3
703- ; LA64-NEXT: add.d $a0, $a2, $a0
704- ; LA64-NEXT: addi.d $s1, $a0, 8
692+ ; LA64-NEXT: add.d $s1, $a2, $a0
705693; LA64-NEXT: blez $s0, .LBB5_3
706694; LA64-NEXT: # %bb.1: # %for.body.preheader
707695; LA64-NEXT: move $fp, $a2
@@ -713,7 +701,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
713701; LA64-NEXT: move $a0, $fp
714702; LA64-NEXT: pcaddu18i $ra, %call36(f)
715703; LA64-NEXT: jirl $ra, $ra, 0
716- ; LA64-NEXT: xvldrepl.d $xr0, $s1, 0
704+ ; LA64-NEXT: xvldrepl.d $xr0, $s1, 8
717705; LA64-NEXT: addi.d $s0, $s0, -1
718706; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload
719707; LA64-NEXT: xvfadd.d $xr1, $xr0, $xr1
@@ -724,7 +712,7 @@ define void @sink_fold_extractf64(i64 %k, i64 %n, ptr %a) nounwind {
724712; LA64-NEXT: .LBB5_3:
725713; LA64-NEXT: xvrepli.b $xr0, 0
726714; LA64-NEXT: .LBB5_4: # %for.cond.cleanup
727- ; LA64-NEXT: xvstelm.d $xr0, $s1, 0 , 1
715+ ; LA64-NEXT: xvstelm.d $xr0, $s1, 8 , 1
728716; LA64-NEXT: ld.d $s1, $sp, 48 # 8-byte Folded Reload
729717; LA64-NEXT: ld.d $s0, $sp, 56 # 8-byte Folded Reload
730718; LA64-NEXT: ld.d $fp, $sp, 64 # 8-byte Folded Reload
0 commit comments