|
| 1 | +// SPDX-License-Identifier: GPL-2.0-only |
| 2 | +// Copyright (C) 2021-3 ARM Limited. |
| 3 | +// |
| 4 | +// Assembly portion of the FP ptrace test |
| 5 | + |
| 6 | +// |
| 7 | +// Load values from memory into registers, break on a breakpoint, then |
| 8 | +// break on a further breakpoint |
| 9 | +// |
| 10 | + |
| 11 | +#include "fp-ptrace.h" |
| 12 | +#include "sme-inst.h" |
| 13 | + |
| 14 | +.arch_extension sve |
| 15 | + |
| 16 | +// Load and save register values with pauses for ptrace |
| 17 | +// |
| 18 | +// x0 - SVE in use |
| 19 | +// x1 - SME in use |
| 20 | +// x2 - SME2 in use |
| 21 | +// x3 - FA64 supported |
| 22 | + |
| 23 | +.globl load_and_save |
| 24 | +load_and_save: |
| 25 | + stp x11, x12, [sp, #-0x10]! |
| 26 | + |
| 27 | + // This should be redundant in the SVE case |
| 28 | + ldr x7, =v_in |
| 29 | + ldp q0, q1, [x7] |
| 30 | + ldp q2, q3, [x7, #16 * 2] |
| 31 | + ldp q4, q5, [x7, #16 * 4] |
| 32 | + ldp q6, q7, [x7, #16 * 6] |
| 33 | + ldp q8, q9, [x7, #16 * 8] |
| 34 | + ldp q10, q11, [x7, #16 * 10] |
| 35 | + ldp q12, q13, [x7, #16 * 12] |
| 36 | + ldp q14, q15, [x7, #16 * 14] |
| 37 | + ldp q16, q17, [x7, #16 * 16] |
| 38 | + ldp q18, q19, [x7, #16 * 18] |
| 39 | + ldp q20, q21, [x7, #16 * 20] |
| 40 | + ldp q22, q23, [x7, #16 * 22] |
| 41 | + ldp q24, q25, [x7, #16 * 24] |
| 42 | + ldp q26, q27, [x7, #16 * 26] |
| 43 | + ldp q28, q29, [x7, #16 * 28] |
| 44 | + ldp q30, q31, [x7, #16 * 30] |
| 45 | + |
| 46 | + // SME? |
| 47 | + cbz x1, check_sve_in |
| 48 | + |
| 49 | + adrp x7, svcr_in |
| 50 | + ldr x7, [x7, :lo12:svcr_in] |
| 51 | + // SVCR is 0 by default, avoid triggering SME if not in use |
| 52 | + cbz x7, check_sve_in |
| 53 | + msr S3_3_C4_C2_2, x7 |
| 54 | + |
| 55 | + // ZA? |
| 56 | + tbz x7, #SVCR_ZA_SHIFT, check_sm_in |
| 57 | + rdsvl 11, 1 |
| 58 | + mov w12, #0 |
| 59 | + ldr x6, =za_in |
| 60 | +1: _ldr_za 12, 6 |
| 61 | + add x6, x6, x11 |
| 62 | + add x12, x12, #1 |
| 63 | + cmp x11, x12 |
| 64 | + bne 1b |
| 65 | + |
| 66 | + // ZT? |
| 67 | + cbz x2, check_sm_in |
| 68 | + adrp x6, zt_in |
| 69 | + add x6, x6, :lo12:zt_in |
| 70 | + _ldr_zt 6 |
| 71 | + |
| 72 | + // In streaming mode? |
| 73 | +check_sm_in: |
| 74 | + tbz x7, #SVCR_SM_SHIFT, check_sve_in |
| 75 | + mov x4, x3 // Load FFR if we have FA64 |
| 76 | + b load_sve |
| 77 | + |
| 78 | + // SVE? |
| 79 | +check_sve_in: |
| 80 | + cbz x0, wait_for_writes |
| 81 | + mov x4, #1 |
| 82 | + |
| 83 | +load_sve: |
| 84 | + ldr x7, =z_in |
| 85 | + ldr z0, [x7, #0, MUL VL] |
| 86 | + ldr z1, [x7, #1, MUL VL] |
| 87 | + ldr z2, [x7, #2, MUL VL] |
| 88 | + ldr z3, [x7, #3, MUL VL] |
| 89 | + ldr z4, [x7, #4, MUL VL] |
| 90 | + ldr z5, [x7, #5, MUL VL] |
| 91 | + ldr z6, [x7, #6, MUL VL] |
| 92 | + ldr z7, [x7, #7, MUL VL] |
| 93 | + ldr z8, [x7, #8, MUL VL] |
| 94 | + ldr z9, [x7, #9, MUL VL] |
| 95 | + ldr z10, [x7, #10, MUL VL] |
| 96 | + ldr z11, [x7, #11, MUL VL] |
| 97 | + ldr z12, [x7, #12, MUL VL] |
| 98 | + ldr z13, [x7, #13, MUL VL] |
| 99 | + ldr z14, [x7, #14, MUL VL] |
| 100 | + ldr z15, [x7, #15, MUL VL] |
| 101 | + ldr z16, [x7, #16, MUL VL] |
| 102 | + ldr z17, [x7, #17, MUL VL] |
| 103 | + ldr z18, [x7, #18, MUL VL] |
| 104 | + ldr z19, [x7, #19, MUL VL] |
| 105 | + ldr z20, [x7, #20, MUL VL] |
| 106 | + ldr z21, [x7, #21, MUL VL] |
| 107 | + ldr z22, [x7, #22, MUL VL] |
| 108 | + ldr z23, [x7, #23, MUL VL] |
| 109 | + ldr z24, [x7, #24, MUL VL] |
| 110 | + ldr z25, [x7, #25, MUL VL] |
| 111 | + ldr z26, [x7, #26, MUL VL] |
| 112 | + ldr z27, [x7, #27, MUL VL] |
| 113 | + ldr z28, [x7, #28, MUL VL] |
| 114 | + ldr z29, [x7, #29, MUL VL] |
| 115 | + ldr z30, [x7, #30, MUL VL] |
| 116 | + ldr z31, [x7, #31, MUL VL] |
| 117 | + |
| 118 | + // FFR is not present in base SME |
| 119 | + cbz x4, 1f |
| 120 | + ldr x7, =ffr_in |
| 121 | + ldr p0, [x7] |
| 122 | + ldr x7, [x7, #0] |
| 123 | + cbz x7, 1f |
| 124 | + wrffr p0.b |
| 125 | +1: |
| 126 | + |
| 127 | + ldr x7, =p_in |
| 128 | + ldr p0, [x7, #0, MUL VL] |
| 129 | + ldr p1, [x7, #1, MUL VL] |
| 130 | + ldr p2, [x7, #2, MUL VL] |
| 131 | + ldr p3, [x7, #3, MUL VL] |
| 132 | + ldr p4, [x7, #4, MUL VL] |
| 133 | + ldr p5, [x7, #5, MUL VL] |
| 134 | + ldr p6, [x7, #6, MUL VL] |
| 135 | + ldr p7, [x7, #7, MUL VL] |
| 136 | + ldr p8, [x7, #8, MUL VL] |
| 137 | + ldr p9, [x7, #9, MUL VL] |
| 138 | + ldr p10, [x7, #10, MUL VL] |
| 139 | + ldr p11, [x7, #11, MUL VL] |
| 140 | + ldr p12, [x7, #12, MUL VL] |
| 141 | + ldr p13, [x7, #13, MUL VL] |
| 142 | + ldr p14, [x7, #14, MUL VL] |
| 143 | + ldr p15, [x7, #15, MUL VL] |
| 144 | + |
| 145 | +wait_for_writes: |
| 146 | + // Wait for the parent |
| 147 | + brk #0 |
| 148 | + |
| 149 | + // Save values |
| 150 | + ldr x7, =v_out |
| 151 | + stp q0, q1, [x7] |
| 152 | + stp q2, q3, [x7, #16 * 2] |
| 153 | + stp q4, q5, [x7, #16 * 4] |
| 154 | + stp q6, q7, [x7, #16 * 6] |
| 155 | + stp q8, q9, [x7, #16 * 8] |
| 156 | + stp q10, q11, [x7, #16 * 10] |
| 157 | + stp q12, q13, [x7, #16 * 12] |
| 158 | + stp q14, q15, [x7, #16 * 14] |
| 159 | + stp q16, q17, [x7, #16 * 16] |
| 160 | + stp q18, q19, [x7, #16 * 18] |
| 161 | + stp q20, q21, [x7, #16 * 20] |
| 162 | + stp q22, q23, [x7, #16 * 22] |
| 163 | + stp q24, q25, [x7, #16 * 24] |
| 164 | + stp q26, q27, [x7, #16 * 26] |
| 165 | + stp q28, q29, [x7, #16 * 28] |
| 166 | + stp q30, q31, [x7, #16 * 30] |
| 167 | + |
| 168 | + // SME? |
| 169 | + cbz x1, check_sve_out |
| 170 | + |
| 171 | + rdsvl 11, 1 |
| 172 | + adrp x6, sme_vl_out |
| 173 | + str x11, [x6, :lo12:sme_vl_out] |
| 174 | + |
| 175 | + mrs x7, S3_3_C4_C2_2 |
| 176 | + adrp x6, svcr_out |
| 177 | + str x7, [x6, :lo12:svcr_out] |
| 178 | + |
| 179 | + // ZA? |
| 180 | + tbz x7, #SVCR_ZA_SHIFT, check_sm_out |
| 181 | + mov w12, #0 |
| 182 | + ldr x6, =za_out |
| 183 | +1: _str_za 12, 6 |
| 184 | + add x6, x6, x11 |
| 185 | + add x12, x12, #1 |
| 186 | + cmp x11, x12 |
| 187 | + bne 1b |
| 188 | + |
| 189 | + // ZT? |
| 190 | + cbz x2, check_sm_out |
| 191 | + adrp x6, zt_out |
| 192 | + add x6, x6, :lo12:zt_out |
| 193 | + _str_zt 6 |
| 194 | + |
| 195 | + // In streaming mode? |
| 196 | +check_sm_out: |
| 197 | + tbz x7, #SVCR_SM_SHIFT, check_sve_out |
| 198 | + mov x4, x3 // FFR? |
| 199 | + b read_sve |
| 200 | + |
| 201 | + // SVE? |
| 202 | +check_sve_out: |
| 203 | + cbz x0, wait_for_reads |
| 204 | + mov x4, #1 |
| 205 | + |
| 206 | + rdvl x7, #1 |
| 207 | + adrp x6, sve_vl_out |
| 208 | + str x7, [x6, :lo12:sve_vl_out] |
| 209 | + |
| 210 | +read_sve: |
| 211 | + ldr x7, =z_out |
| 212 | + str z0, [x7, #0, MUL VL] |
| 213 | + str z1, [x7, #1, MUL VL] |
| 214 | + str z2, [x7, #2, MUL VL] |
| 215 | + str z3, [x7, #3, MUL VL] |
| 216 | + str z4, [x7, #4, MUL VL] |
| 217 | + str z5, [x7, #5, MUL VL] |
| 218 | + str z6, [x7, #6, MUL VL] |
| 219 | + str z7, [x7, #7, MUL VL] |
| 220 | + str z8, [x7, #8, MUL VL] |
| 221 | + str z9, [x7, #9, MUL VL] |
| 222 | + str z10, [x7, #10, MUL VL] |
| 223 | + str z11, [x7, #11, MUL VL] |
| 224 | + str z12, [x7, #12, MUL VL] |
| 225 | + str z13, [x7, #13, MUL VL] |
| 226 | + str z14, [x7, #14, MUL VL] |
| 227 | + str z15, [x7, #15, MUL VL] |
| 228 | + str z16, [x7, #16, MUL VL] |
| 229 | + str z17, [x7, #17, MUL VL] |
| 230 | + str z18, [x7, #18, MUL VL] |
| 231 | + str z19, [x7, #19, MUL VL] |
| 232 | + str z20, [x7, #20, MUL VL] |
| 233 | + str z21, [x7, #21, MUL VL] |
| 234 | + str z22, [x7, #22, MUL VL] |
| 235 | + str z23, [x7, #23, MUL VL] |
| 236 | + str z24, [x7, #24, MUL VL] |
| 237 | + str z25, [x7, #25, MUL VL] |
| 238 | + str z26, [x7, #26, MUL VL] |
| 239 | + str z27, [x7, #27, MUL VL] |
| 240 | + str z28, [x7, #28, MUL VL] |
| 241 | + str z29, [x7, #29, MUL VL] |
| 242 | + str z30, [x7, #30, MUL VL] |
| 243 | + str z31, [x7, #31, MUL VL] |
| 244 | + |
| 245 | + ldr x7, =p_out |
| 246 | + str p0, [x7, #0, MUL VL] |
| 247 | + str p1, [x7, #1, MUL VL] |
| 248 | + str p2, [x7, #2, MUL VL] |
| 249 | + str p3, [x7, #3, MUL VL] |
| 250 | + str p4, [x7, #4, MUL VL] |
| 251 | + str p5, [x7, #5, MUL VL] |
| 252 | + str p6, [x7, #6, MUL VL] |
| 253 | + str p7, [x7, #7, MUL VL] |
| 254 | + str p8, [x7, #8, MUL VL] |
| 255 | + str p9, [x7, #9, MUL VL] |
| 256 | + str p10, [x7, #10, MUL VL] |
| 257 | + str p11, [x7, #11, MUL VL] |
| 258 | + str p12, [x7, #12, MUL VL] |
| 259 | + str p13, [x7, #13, MUL VL] |
| 260 | + str p14, [x7, #14, MUL VL] |
| 261 | + str p15, [x7, #15, MUL VL] |
| 262 | + |
| 263 | + // Only save FFR if it exists |
| 264 | + cbz x4, wait_for_reads |
| 265 | + ldr x7, =ffr_out |
| 266 | + rdffr p0.b |
| 267 | + str p0, [x7] |
| 268 | + |
| 269 | +wait_for_reads: |
| 270 | + // Wait for the parent |
| 271 | + brk #0 |
| 272 | + |
| 273 | + // Ensure we don't leave ourselves in streaming mode |
| 274 | + cbz x1, out |
| 275 | + msr S3_3_C4_C2_2, xzr |
| 276 | + |
| 277 | +out: |
| 278 | + ldp x11, x12, [sp, #-0x10] |
| 279 | + ret |
0 commit comments