@@ -337,19 +337,18 @@ define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible"
337337; CHECK64-LABEL: csr_d8_allocd_framepointer:
338338; CHECK64: // %bb.0: // %entry
339339; CHECK64-NEXT: sub sp, sp, #176
340- ; CHECK64-NEXT: str d8, [sp, #80 ] // 8-byte Folded Spill
340+ ; CHECK64-NEXT: stp d0, d8, [sp, #72 ] // 8-byte Folded Spill
341341; CHECK64-NEXT: stp x29, x30, [sp, #152] // 16-byte Folded Spill
342- ; CHECK64-NEXT: add x29, sp, #80
343- ; CHECK64-NEXT: .cfi_def_cfa w29, 96
342+ ; CHECK64-NEXT: add x29, sp, #152
343+ ; CHECK64-NEXT: .cfi_def_cfa w29, 24
344344; CHECK64-NEXT: .cfi_offset w30, -16
345345; CHECK64-NEXT: .cfi_offset w29, -24
346346; CHECK64-NEXT: .cfi_offset b8, -96
347347; CHECK64-NEXT: //APP
348348; CHECK64-NEXT: //NO_APP
349- ; CHECK64-NEXT: stur d0, [x29, #-8]
350349; CHECK64-NEXT: ldr x29, [sp, #152] // 8-byte Folded Reload
351- ; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload
352350; CHECK64-NEXT: mov w0, wzr
351+ ; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload
353352; CHECK64-NEXT: add sp, sp, #176
354353; CHECK64-NEXT: ret
355354;
@@ -358,17 +357,17 @@ define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible"
358357; CHECK1024-NEXT: sub sp, sp, #1056
359358; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill
360359; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill
361- ; CHECK1024-NEXT: mov x29, sp
360+ ; CHECK1024-NEXT: add x29, sp, #1032
362361; CHECK1024-NEXT: str x30, [sp, #1040] // 8-byte Folded Spill
363362; CHECK1024-NEXT: sub sp, sp, #1040
364- ; CHECK1024-NEXT: .cfi_def_cfa w29, 1056
363+ ; CHECK1024-NEXT: .cfi_def_cfa w29, 24
365364; CHECK1024-NEXT: .cfi_offset w30, -16
366365; CHECK1024-NEXT: .cfi_offset w29, -24
367366; CHECK1024-NEXT: .cfi_offset b8, -1056
368367; CHECK1024-NEXT: mov w0, wzr
369368; CHECK1024-NEXT: //APP
370369; CHECK1024-NEXT: //NO_APP
371- ; CHECK1024-NEXT: stur d0, [x29 , #-8 ]
370+ ; CHECK1024-NEXT: str d0, [sp , #1032 ]
372371; CHECK1024-NEXT: add sp, sp, #1040
373372; CHECK1024-NEXT: ldr x30, [sp, #1040] // 8-byte Folded Reload
374373; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload
@@ -2893,8 +2892,8 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
28932892; CHECK64-NEXT: stp x29, x30, [sp, #128] // 16-byte Folded Spill
28942893; CHECK64-NEXT: stp x9, x20, [sp, #144] // 16-byte Folded Spill
28952894; CHECK64-NEXT: str x19, [sp, #160] // 8-byte Folded Spill
2896- ; CHECK64-NEXT: mov x29, sp
2897- ; CHECK64-NEXT: .cfi_def_cfa w29, 176
2895+ ; CHECK64-NEXT: add x29, sp, #128
2896+ ; CHECK64-NEXT: .cfi_def_cfa w29, 48
28982897; CHECK64-NEXT: .cfi_offset w19, -16
28992898; CHECK64-NEXT: .cfi_offset w20, -24
29002899; CHECK64-NEXT: .cfi_offset w30, -40
@@ -2913,11 +2912,11 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29132912; CHECK64-NEXT: mov w20, w0
29142913; CHECK64-NEXT: msub x9, x8, x8, x9
29152914; CHECK64-NEXT: mov sp, x9
2916- ; CHECK64-NEXT: stur x9, [x29, #-80 ]
2917- ; CHECK64-NEXT: sub x9, x29, #80
2918- ; CHECK64-NEXT: sturh wzr, [x29, #-70 ]
2919- ; CHECK64-NEXT: stur wzr, [x29, #-68 ]
2920- ; CHECK64-NEXT: sturh w8, [x29, #-72 ]
2915+ ; CHECK64-NEXT: stur x9, [x29, #-208 ]
2916+ ; CHECK64-NEXT: sub x9, x29, #208
2917+ ; CHECK64-NEXT: sturh wzr, [x29, #-198 ]
2918+ ; CHECK64-NEXT: stur wzr, [x29, #-196 ]
2919+ ; CHECK64-NEXT: sturh w8, [x29, #-200 ]
29212920; CHECK64-NEXT: msr TPIDR2_EL0, x9
29222921; CHECK64-NEXT: .cfi_offset vg, -32
29232922; CHECK64-NEXT: smstop sm
@@ -2926,14 +2925,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29262925; CHECK64-NEXT: .cfi_restore vg
29272926; CHECK64-NEXT: smstart za
29282927; CHECK64-NEXT: mrs x8, TPIDR2_EL0
2929- ; CHECK64-NEXT: sub x0, x29, #80
2928+ ; CHECK64-NEXT: sub x0, x29, #208
29302929; CHECK64-NEXT: cbnz x8, .LBB33_2
29312930; CHECK64-NEXT: // %bb.1: // %entry
29322931; CHECK64-NEXT: bl __arm_tpidr2_restore
29332932; CHECK64-NEXT: .LBB33_2: // %entry
29342933; CHECK64-NEXT: mov w0, w20
29352934; CHECK64-NEXT: msr TPIDR2_EL0, xzr
2936- ; CHECK64-NEXT: mov sp, x29
2935+ ; CHECK64-NEXT: sub sp, x29, #128
29372936; CHECK64-NEXT: .cfi_def_cfa wsp, 176
29382937; CHECK64-NEXT: ldp x20, x19, [sp, #152] // 16-byte Folded Reload
29392938; CHECK64-NEXT: ldr d14, [sp, #8] // 8-byte Folded Reload
@@ -2972,8 +2971,8 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29722971; CHECK1024-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
29732972; CHECK1024-NEXT: str x20, [sp, #1120] // 8-byte Folded Spill
29742973; CHECK1024-NEXT: str x19, [sp, #1128] // 8-byte Folded Spill
2975- ; CHECK1024-NEXT: mov x29, sp
2976- ; CHECK1024-NEXT: .cfi_def_cfa w29, 1136
2974+ ; CHECK1024-NEXT: add x29, sp, #1088
2975+ ; CHECK1024-NEXT: .cfi_def_cfa w29, 48
29772976; CHECK1024-NEXT: .cfi_offset w19, -8
29782977; CHECK1024-NEXT: .cfi_offset w20, -16
29792978; CHECK1024-NEXT: .cfi_offset w28, -24
@@ -2993,14 +2992,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
29932992; CHECK1024-NEXT: mov w20, w0
29942993; CHECK1024-NEXT: msub x9, x8, x8, x9
29952994; CHECK1024-NEXT: mov sp, x9
2996- ; CHECK1024-NEXT: sub x10, x29, #784
2995+ ; CHECK1024-NEXT: sub x10, x29, #1872
29972996; CHECK1024-NEXT: stur x9, [x10, #-256]
2998- ; CHECK1024-NEXT: sub x9, x29, #774
2999- ; CHECK1024-NEXT: sub x10, x29, #772
2997+ ; CHECK1024-NEXT: sub x9, x29, #1862
2998+ ; CHECK1024-NEXT: sub x10, x29, #1860
30002999; CHECK1024-NEXT: sturh wzr, [x9, #-256]
3001- ; CHECK1024-NEXT: sub x9, x29, #1040
3000+ ; CHECK1024-NEXT: sub x9, x29, #2128
30023001; CHECK1024-NEXT: stur wzr, [x10, #-256]
3003- ; CHECK1024-NEXT: sub x10, x29, #776
3002+ ; CHECK1024-NEXT: sub x10, x29, #1864
30043003; CHECK1024-NEXT: sturh w8, [x10, #-256]
30053004; CHECK1024-NEXT: msr TPIDR2_EL0, x9
30063005; CHECK1024-NEXT: .cfi_offset vg, -32
@@ -3010,14 +3009,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
30103009; CHECK1024-NEXT: .cfi_restore vg
30113010; CHECK1024-NEXT: smstart za
30123011; CHECK1024-NEXT: mrs x8, TPIDR2_EL0
3013- ; CHECK1024-NEXT: sub x0, x29, #1040
3012+ ; CHECK1024-NEXT: sub x0, x29, #2128
30143013; CHECK1024-NEXT: cbnz x8, .LBB33_2
30153014; CHECK1024-NEXT: // %bb.1: // %entry
30163015; CHECK1024-NEXT: bl __arm_tpidr2_restore
30173016; CHECK1024-NEXT: .LBB33_2: // %entry
30183017; CHECK1024-NEXT: mov w0, w20
30193018; CHECK1024-NEXT: msr TPIDR2_EL0, xzr
3020- ; CHECK1024-NEXT: mov sp, x29
3019+ ; CHECK1024-NEXT: sub sp, x29, #1088
30213020; CHECK1024-NEXT: .cfi_def_cfa wsp, 1136
30223021; CHECK1024-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
30233022; CHECK1024-NEXT: ldr x19, [sp, #1128] // 8-byte Folded Reload
@@ -3049,3 +3048,109 @@ entry:
30493048 ret i32 %x
30503049}
30513050declare void @other ()
3051+
3052+ declare void @bar (ptr noundef) "aarch64_pstate_sm_compatible"
3053+
3054+ define i32 @sve_stack_object_and_vla (double %d , i64 %sz ) "aarch64_pstate_sm_compatible" "frame-pointer" ="all" {
3055+ ; CHECK0-LABEL: sve_stack_object_and_vla:
3056+ ; CHECK0: // %bb.0: // %entry
3057+ ; CHECK0-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
3058+ ; CHECK0-NEXT: stp x28, x19, [sp, #16] // 16-byte Folded Spill
3059+ ; CHECK0-NEXT: mov x29, sp
3060+ ; CHECK0-NEXT: addvl sp, sp, #-1
3061+ ; CHECK0-NEXT: mov x19, sp
3062+ ; CHECK0-NEXT: .cfi_def_cfa w29, 32
3063+ ; CHECK0-NEXT: .cfi_offset w19, -8
3064+ ; CHECK0-NEXT: .cfi_offset w28, -16
3065+ ; CHECK0-NEXT: .cfi_offset w30, -24
3066+ ; CHECK0-NEXT: .cfi_offset w29, -32
3067+ ; CHECK0-NEXT: lsl x9, x0, #2
3068+ ; CHECK0-NEXT: mov x8, sp
3069+ ; CHECK0-NEXT: add x9, x9, #15
3070+ ; CHECK0-NEXT: and x9, x9, #0xfffffffffffffff0
3071+ ; CHECK0-NEXT: sub x0, x8, x9
3072+ ; CHECK0-NEXT: mov sp, x0
3073+ ; CHECK0-NEXT: mov z0.s, #0 // =0x0
3074+ ; CHECK0-NEXT: ptrue p0.s
3075+ ; CHECK0-NEXT: st1w { z0.s }, p0, [x29, #-1, mul vl]
3076+ ; CHECK0-NEXT: bl bar
3077+ ; CHECK0-NEXT: mov w0, wzr
3078+ ; CHECK0-NEXT: mov sp, x29
3079+ ; CHECK0-NEXT: ldp x28, x19, [sp, #16] // 16-byte Folded Reload
3080+ ; CHECK0-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
3081+ ; CHECK0-NEXT: ret
3082+ ;
3083+ ; CHECK64-LABEL: sve_stack_object_and_vla:
3084+ ; CHECK64: // %bb.0: // %entry
3085+ ; CHECK64-NEXT: sub sp, sp, #96
3086+ ; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
3087+ ; CHECK64-NEXT: add x29, sp, #64
3088+ ; CHECK64-NEXT: stp x28, x19, [sp, #80] // 16-byte Folded Spill
3089+ ; CHECK64-NEXT: sub sp, sp, #64
3090+ ; CHECK64-NEXT: addvl sp, sp, #-1
3091+ ; CHECK64-NEXT: mov x19, sp
3092+ ; CHECK64-NEXT: .cfi_def_cfa w29, 32
3093+ ; CHECK64-NEXT: .cfi_offset w19, -8
3094+ ; CHECK64-NEXT: .cfi_offset w28, -16
3095+ ; CHECK64-NEXT: .cfi_offset w30, -24
3096+ ; CHECK64-NEXT: .cfi_offset w29, -32
3097+ ; CHECK64-NEXT: lsl x9, x0, #2
3098+ ; CHECK64-NEXT: mov x8, sp
3099+ ; CHECK64-NEXT: add x9, x9, #15
3100+ ; CHECK64-NEXT: and x9, x9, #0xfffffffffffffff0
3101+ ; CHECK64-NEXT: sub x0, x8, x9
3102+ ; CHECK64-NEXT: mov sp, x0
3103+ ; CHECK64-NEXT: mov z0.s, #0 // =0x0
3104+ ; CHECK64-NEXT: ptrue p0.s
3105+ ; CHECK64-NEXT: sub x8, x29, #64
3106+ ; CHECK64-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl]
3107+ ; CHECK64-NEXT: bl bar
3108+ ; CHECK64-NEXT: mov w0, wzr
3109+ ; CHECK64-NEXT: sub sp, x29, #64
3110+ ; CHECK64-NEXT: ldp x28, x19, [sp, #80] // 16-byte Folded Reload
3111+ ; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
3112+ ; CHECK64-NEXT: add sp, sp, #96
3113+ ; CHECK64-NEXT: ret
3114+ ;
3115+ ; CHECK1024-LABEL: sve_stack_object_and_vla:
3116+ ; CHECK1024: // %bb.0: // %entry
3117+ ; CHECK1024-NEXT: sub sp, sp, #1056
3118+ ; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
3119+ ; CHECK1024-NEXT: add x29, sp, #1024
3120+ ; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
3121+ ; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill
3122+ ; CHECK1024-NEXT: str x19, [sp, #1048] // 8-byte Folded Spill
3123+ ; CHECK1024-NEXT: sub sp, sp, #1024
3124+ ; CHECK1024-NEXT: addvl sp, sp, #-1
3125+ ; CHECK1024-NEXT: mov x19, sp
3126+ ; CHECK1024-NEXT: .cfi_def_cfa w29, 32
3127+ ; CHECK1024-NEXT: .cfi_offset w19, -8
3128+ ; CHECK1024-NEXT: .cfi_offset w28, -16
3129+ ; CHECK1024-NEXT: .cfi_offset w30, -24
3130+ ; CHECK1024-NEXT: .cfi_offset w29, -32
3131+ ; CHECK1024-NEXT: lsl x9, x0, #2
3132+ ; CHECK1024-NEXT: mov x8, sp
3133+ ; CHECK1024-NEXT: add x9, x9, #15
3134+ ; CHECK1024-NEXT: and x9, x9, #0xfffffffffffffff0
3135+ ; CHECK1024-NEXT: sub x0, x8, x9
3136+ ; CHECK1024-NEXT: mov sp, x0
3137+ ; CHECK1024-NEXT: mov z0.s, #0 // =0x0
3138+ ; CHECK1024-NEXT: ptrue p0.s
3139+ ; CHECK1024-NEXT: sub x8, x29, #1024
3140+ ; CHECK1024-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl]
3141+ ; CHECK1024-NEXT: bl bar
3142+ ; CHECK1024-NEXT: mov w0, wzr
3143+ ; CHECK1024-NEXT: sub sp, x29, #1024
3144+ ; CHECK1024-NEXT: ldr x19, [sp, #1048] // 8-byte Folded Reload
3145+ ; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload
3146+ ; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
3147+ ; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
3148+ ; CHECK1024-NEXT: add sp, sp, #1056
3149+ ; CHECK1024-NEXT: ret
3150+ entry:
3151+ %a = alloca <vscale x 4 x i32 >
3152+ %b = alloca i32 , i64 %sz , align 4
3153+ store <vscale x 4 x i32 > zeroinitializer , ptr %a
3154+ call void @bar (ptr noundef nonnull %b )
3155+ ret i32 0
3156+ }
0 commit comments