|
| 1 | +#!/usr/bin/env python3 |
| 2 | +# SPDX-License-Identifier: MIT |
| 3 | +import sys, pathlib, time |
| 4 | +sys.path.append(str(pathlib.Path(__file__).resolve().parents[1])) |
| 5 | + |
| 6 | +from m1n1.setup import * |
| 7 | +from m1n1 import asm |
| 8 | + |
| 9 | +REPETITIONS = 64 |
| 10 | + |
| 11 | +PAGE_SIZE = 16384 |
| 12 | + |
| 13 | +TEST_ECORE = 1 |
| 14 | +TEST_PCORE = 4 |
| 15 | + |
| 16 | +L2_LINE_SIZE = 128 |
| 17 | +PNRG_a = 75 |
| 18 | +PRNG_m = 31337 |
| 19 | +rnd_idx = 8 |
| 20 | + |
| 21 | +def prng(x): |
| 22 | + return (PNRG_a * x) % PRNG_m |
| 23 | + |
| 24 | +SIZE_DATA_ARRAY = (PRNG_m * L2_LINE_SIZE) |
| 25 | + |
| 26 | +data_buf_addr = u.memalign(PAGE_SIZE, SIZE_DATA_ARRAY) |
| 27 | +p.memset64(data_buf_addr, 0x5555555555555555, SIZE_DATA_ARRAY) |
| 28 | +aop_addr = u.memalign(PAGE_SIZE, PAGE_SIZE) |
| 29 | +p.memset64(aop_addr, 0x5555555555555555, PAGE_SIZE) |
| 30 | + |
| 31 | +freq = u.mrs(CNTFRQ_EL0) |
| 32 | +code = u.malloc(0x1000) |
| 33 | + |
| 34 | +util = asm.ARMAsm(""" |
| 35 | +test: |
| 36 | + dc civac, x0 |
| 37 | + dc civac, x1 |
| 38 | + isb sy |
| 39 | +
|
| 40 | + mov x7, #0x8000 |
| 41 | +1: |
| 42 | + add x2, x2, #1 |
| 43 | + mul x2, x2, x2 |
| 44 | + sub x7, x7, #1 |
| 45 | + cbnz x7, 1b |
| 46 | + and x2, x2, #(15 << 60) |
| 47 | +
|
| 48 | + add x1, x1, x2 |
| 49 | + ldrb w2, [x1, #512] |
| 50 | + and x2, x2, #(15 << 60) |
| 51 | +
|
| 52 | + add x0, x0, x2 |
| 53 | +
|
| 54 | + dsb sy |
| 55 | + isb |
| 56 | + mrs x9, S3_2_c15_c0_0 // PMC0_EL1 |
| 57 | + isb |
| 58 | + ldr x2, [x0, x2] |
| 59 | + isb |
| 60 | + mrs x10, S3_2_c15_c0_0 |
| 61 | + sub x5, x10, x9 |
| 62 | +
|
| 63 | + and x2, x2, #(15 << 60) |
| 64 | + mov x7, #0x4000 |
| 65 | +1: |
| 66 | + add x2, x2, #1 |
| 67 | + mul x2, x2, x2 |
| 68 | + sub x7, x7, #1 |
| 69 | + cbnz x7, 1b |
| 70 | +
|
| 71 | + and x2, x2, #(15 << 60) |
| 72 | +
|
| 73 | + dsb sy |
| 74 | + isb |
| 75 | + mrs x9, S3_2_c15_c0_0 |
| 76 | + isb |
| 77 | + ldr x2, [x1, x2] |
| 78 | + isb |
| 79 | + mrs x10, S3_2_c15_c0_0 |
| 80 | + sub x0, x10, x9 |
| 81 | +
|
| 82 | + isb sy |
| 83 | +
|
| 84 | + lsl x5, x5, #32 |
| 85 | + orr x0, x0, x5 |
| 86 | + ret |
| 87 | +""", code) |
| 88 | +for i in util.disassemble(): |
| 89 | + print(i) |
| 90 | +iface.writemem(code, util.data) |
| 91 | +p.dc_cvau(code, len(util.data)) |
| 92 | +p.ic_ivau(code, len(util.data)) |
| 93 | + |
| 94 | +# Set higher cpufreq pstate on all clusters |
| 95 | +p.cpufreq_init() |
| 96 | +p.smp_start_secondaries() |
| 97 | +p.smp_set_wfe_mode(True); |
| 98 | + |
| 99 | +def cpu_call(cpu, x, *args): |
| 100 | + return p.smp_call_sync(cpu, x | REGION_RX_EL1, *args) |
| 101 | + |
| 102 | +def init_core(cpu): |
| 103 | + p.mmu_init_secondary(cpu) |
| 104 | + |
| 105 | + def mrs(x): |
| 106 | + return u.mrs(x, call=lambda x, *args: cpu_call(cpu, x, *args)) |
| 107 | + def msr(x, v): |
| 108 | + u.msr(x, v, call=lambda x, *args: cpu_call(cpu, x, *args)) |
| 109 | + |
| 110 | + is_ecore = not (mrs(MPIDR_EL1) & (1 << 16)) |
| 111 | + # Enable DC MVA ops |
| 112 | + v = mrs(EHID4_EL1 if is_ecore else HID4_EL1) |
| 113 | + v &= ~(1 << 11) |
| 114 | + msr(EHID4_EL1 if is_ecore else HID4_EL1, v) |
| 115 | + |
| 116 | + # Enable PMU |
| 117 | + v = mrs(PMCR0_EL1) |
| 118 | + v |= 1 | (1<<30) |
| 119 | + msr(PMCR0_EL1, v) |
| 120 | + msr(PMCR1_EL1, 0xffffffffffffffff) |
| 121 | + |
| 122 | + # Enable TBI |
| 123 | + v = mrs(TCR_EL1) |
| 124 | + v |= (1 << 37) |
| 125 | + msr(TCR_EL1, v) |
| 126 | + |
| 127 | + # Enable user cache ops |
| 128 | + v = mrs(SCTLR_EL1) |
| 129 | + v |= (1 << 26) |
| 130 | + msr(SCTLR_EL1, v) |
| 131 | + |
| 132 | +init_core(TEST_ECORE) |
| 133 | +init_core(TEST_PCORE) |
| 134 | + |
| 135 | +# Enable DC MVA ops |
| 136 | +v = u.mrs(EHID4_EL1) |
| 137 | +v &= ~(1 << 11) |
| 138 | +u.msr(EHID4_EL1, v) |
| 139 | + |
| 140 | +def test_cpu(cpu, mask): |
| 141 | + global rnd_idx |
| 142 | + |
| 143 | + total_aop = total_ptr = 0 |
| 144 | + p.memset64(data_buf_addr, 0x5555555555555555, SIZE_DATA_ARRAY) |
| 145 | + p.memset64(aop_addr, 0x5555555555555555, PAGE_SIZE) |
| 146 | + for i in range(REPETITIONS): |
| 147 | + test_offset = L2_LINE_SIZE * rnd_idx |
| 148 | + test_addr = data_buf_addr + test_offset |
| 149 | + |
| 150 | + p.write64(aop_addr, test_addr | mask | REGION_RWX_EL0) |
| 151 | + p.dc_civac(aop_addr, L2_LINE_SIZE) |
| 152 | + # p.dc_civac(data_buf_addr, SIZE_DATA_ARRAY) |
| 153 | + |
| 154 | + elapsed = p.smp_call_sync_el0(cpu, util.test | REGION_RWX_EL0, aop_addr | REGION_RWX_EL0, test_addr | REGION_RWX_EL0, 7 << 60) |
| 155 | + time_aop = elapsed >> 32 |
| 156 | + time_ptr = elapsed & 0xffffffff |
| 157 | + total_aop += time_aop |
| 158 | + total_ptr += time_ptr |
| 159 | + |
| 160 | + rnd_idx = prng(rnd_idx) |
| 161 | + |
| 162 | + return total_aop, total_ptr |
| 163 | + |
| 164 | + |
| 165 | +print("ECore plain:", test_cpu(TEST_ECORE, 0)) |
| 166 | +print("ECore mask: ", test_cpu(TEST_ECORE, 0xaaaaaaaa00000000)) |
| 167 | +print("PCore plain:", test_cpu(TEST_PCORE, 0)) |
| 168 | +print("PCore mask: ", test_cpu(TEST_PCORE, 0xaaaaaaaa00000000)) |
| 169 | + |
| 170 | +for reg in ( |
| 171 | + # "HID0_EL1", |
| 172 | + # "HID1_EL1", |
| 173 | + # "HID2_EL1", |
| 174 | + # "HID3_EL1", |
| 175 | + "HID4_EL1", |
| 176 | + # "HID5_EL1", |
| 177 | + # "HID6_EL1", |
| 178 | + # "HID7_EL1", |
| 179 | + # "HID8_EL1", |
| 180 | + # "HID9_EL1", |
| 181 | + # "HID10_EL1", |
| 182 | + "HID11_EL1", |
| 183 | + # "HID13_EL1", |
| 184 | + # "HID14_EL1", |
| 185 | + # "HID16_EL1", |
| 186 | + # "HID17_EL1", |
| 187 | + # "HID18_EL1", |
| 188 | + "HID21_EL1", |
| 189 | + # "HID26_EL1", |
| 190 | + # "HID27_EL1", |
| 191 | +): |
| 192 | + |
| 193 | + cpu = TEST_PCORE |
| 194 | + hid = u.mrs(reg, call=lambda x, *args: cpu_call(cpu, x, *args)) |
| 195 | + |
| 196 | + for i in range(64): |
| 197 | + if (reg, i) not in ( |
| 198 | + ("HID4_EL1", 4), |
| 199 | + ("HID11_EL1", 30), |
| 200 | + ("HID21_EL1", 40), |
| 201 | + ): |
| 202 | + continue |
| 203 | + |
| 204 | + bit = (1 << i) |
| 205 | + print(f"Test {reg} bit {i}:", end=" ") |
| 206 | + |
| 207 | + u.msr(reg, hid ^ bit, call=lambda x, *args: cpu_call(cpu, x, *args)) |
| 208 | + |
| 209 | + tval = test_cpu(cpu, 0)[1] |
| 210 | + control = test_cpu(cpu, 0xaaaaaaaa00000000)[1] |
| 211 | + |
| 212 | + |
| 213 | + if tval < (0.75 * control): |
| 214 | + print(f"DMP active {tval} {control}") |
| 215 | + else: |
| 216 | + print(f"DMP INACTIVE {tval} {control}") |
| 217 | + |
| 218 | + u.msr(reg, hid, call=lambda x, *args: cpu_call(cpu, x, *args)) |
| 219 | + |
0 commit comments