@@ -332,24 +332,25 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
332332; CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
333333; CHECK-NEXT: ptrue pn8.b
334334; CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
335- ; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4 , mul vl] // 32-byte Folded Spill
336- ; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8 , mul vl] // 32-byte Folded Spill
335+ ; CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2 , mul vl] // 32-byte Folded Spill
336+ ; CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4 , mul vl] // 32-byte Folded Spill
337337; CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
338- ; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12 , mul vl] // 32-byte Folded Spill
339- ; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16 , mul vl] // 32-byte Folded Spill
338+ ; CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6 , mul vl] // 32-byte Folded Spill
339+ ; CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8 , mul vl] // 32-byte Folded Spill
340340; CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
341- ; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20 , mul vl] // 32-byte Folded Spill
342- ; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24 , mul vl] // 32-byte Folded Spill
341+ ; CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10 , mul vl] // 32-byte Folded Spill
342+ ; CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12 , mul vl] // 32-byte Folded Spill
343343; CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
344- ; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28 , mul vl] // 32-byte Folded Spill
344+ ; CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14 , mul vl] // 32-byte Folded Spill
345345; CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
346346; CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
347347; CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
348348; CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
349349; CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
350350; CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
351351; CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
352- ; CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
352+ ; CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
353+ ; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
353354; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 32 - 8 * VG
354355; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 32 - 16 * VG
355356; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x60, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 32 - 24 * VG
@@ -372,15 +373,16 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
372373; CHECK-NEXT: addvl sp, sp, #1
373374; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x90, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 144 * VG
374375; CHECK-NEXT: ptrue pn8.b
376+ ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
377+ ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
378+ ; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
379+ ; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
380+ ; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
381+ ; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
382+ ; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
383+ ; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
384+ ; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
375385; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
376- ; CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
377- ; CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
378- ; CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
379- ; CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
380- ; CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
381- ; CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
382- ; CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
383- ; CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
384386; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
385387; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
386388; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
@@ -427,24 +429,25 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
427429; FP-CHECK-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill
428430; FP-CHECK-NEXT: ptrue pn8.b
429431; FP-CHECK-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill
430- ; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #4 , mul vl] // 32-byte Folded Spill
431- ; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #8 , mul vl] // 32-byte Folded Spill
432+ ; FP-CHECK-NEXT: st1b { z22.b, z23.b }, pn8, [sp, #2 , mul vl] // 32-byte Folded Spill
433+ ; FP-CHECK-NEXT: st1b { z20.b, z21.b }, pn8, [sp, #4 , mul vl] // 32-byte Folded Spill
432434; FP-CHECK-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill
433- ; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #12 , mul vl] // 32-byte Folded Spill
434- ; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #16 , mul vl] // 32-byte Folded Spill
435+ ; FP-CHECK-NEXT: st1b { z18.b, z19.b }, pn8, [sp, #6 , mul vl] // 32-byte Folded Spill
436+ ; FP-CHECK-NEXT: st1b { z16.b, z17.b }, pn8, [sp, #8 , mul vl] // 32-byte Folded Spill
435437; FP-CHECK-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill
436- ; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #20 , mul vl] // 32-byte Folded Spill
437- ; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #24 , mul vl] // 32-byte Folded Spill
438+ ; FP-CHECK-NEXT: st1b { z14.b, z15.b }, pn8, [sp, #10 , mul vl] // 32-byte Folded Spill
439+ ; FP-CHECK-NEXT: st1b { z12.b, z13.b }, pn8, [sp, #12 , mul vl] // 32-byte Folded Spill
438440; FP-CHECK-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill
439- ; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #28 , mul vl] // 32-byte Folded Spill
441+ ; FP-CHECK-NEXT: st1b { z10.b, z11.b }, pn8, [sp, #14 , mul vl] // 32-byte Folded Spill
440442; FP-CHECK-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill
441443; FP-CHECK-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill
442444; FP-CHECK-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill
443445; FP-CHECK-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill
444446; FP-CHECK-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill
445447; FP-CHECK-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill
446448; FP-CHECK-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill
447- ; FP-CHECK-NEXT: st1b { z8.b, z9.b }, pn8, [sp, #32, mul vl] // 32-byte Folded Spill
449+ ; FP-CHECK-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill
450+ ; FP-CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
448451; FP-CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x78, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d8 @ cfa - 48 - 8 * VG
449452; FP-CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x70, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d9 @ cfa - 48 - 16 * VG
450453; FP-CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0a, 0x11, 0x50, 0x22, 0x11, 0x68, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d10 @ cfa - 48 - 24 * VG
@@ -465,15 +468,16 @@ define void @vg_unwind_with_sve_args(<vscale x 2 x i64> %x) #0 {
465468; FP-CHECK-NEXT: .cfi_restore vg
466469; FP-CHECK-NEXT: addvl sp, sp, #1
467470; FP-CHECK-NEXT: ptrue pn8.b
471+ ; FP-CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload
472+ ; FP-CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload
473+ ; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #2, mul vl] // 32-byte Folded Reload
474+ ; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
475+ ; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #6, mul vl] // 32-byte Folded Reload
476+ ; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
477+ ; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #10, mul vl] // 32-byte Folded Reload
478+ ; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
479+ ; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #14, mul vl] // 32-byte Folded Reload
468480; FP-CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload
469- ; FP-CHECK-NEXT: ld1b { z22.b, z23.b }, pn8/z, [sp, #4, mul vl] // 32-byte Folded Reload
470- ; FP-CHECK-NEXT: ld1b { z20.b, z21.b }, pn8/z, [sp, #8, mul vl] // 32-byte Folded Reload
471- ; FP-CHECK-NEXT: ld1b { z18.b, z19.b }, pn8/z, [sp, #12, mul vl] // 32-byte Folded Reload
472- ; FP-CHECK-NEXT: ld1b { z16.b, z17.b }, pn8/z, [sp, #16, mul vl] // 32-byte Folded Reload
473- ; FP-CHECK-NEXT: ld1b { z14.b, z15.b }, pn8/z, [sp, #20, mul vl] // 32-byte Folded Reload
474- ; FP-CHECK-NEXT: ld1b { z12.b, z13.b }, pn8/z, [sp, #24, mul vl] // 32-byte Folded Reload
475- ; FP-CHECK-NEXT: ld1b { z10.b, z11.b }, pn8/z, [sp, #28, mul vl] // 32-byte Folded Reload
476- ; FP-CHECK-NEXT: ld1b { z8.b, z9.b }, pn8/z, [sp, #32, mul vl] // 32-byte Folded Reload
477481; FP-CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload
478482; FP-CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload
479483; FP-CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload
0 commit comments