|
| 1 | +# Copyright lowRISC contributors. |
| 2 | +# Licensed under the Apache License, Version 2.0, see LICENSE for details. |
| 3 | +# SPDX-License-Identifier: Apache-2.0 |
| 4 | + |
| 5 | +// A set of memory initialisation functions using different access sizes, to check that |
| 6 | +// the write transactions are properly coalesced into write bursts to the HyperBus |
| 7 | +// Memory Controller. |
| 8 | +// |
| 9 | +// Each routine is expected to be writing a single, defined byte to each address that is |
| 10 | +// modified. By initialising the entirety of a target buffer to a different value first, |
| 11 | +// the set of modified addresses may be ascertained. |
| 12 | + |
| 13 | + .section .text, "ax", @progbits |
| 14 | + |
| 15 | + .option norvc |
| 16 | + |
| 17 | +// Byte-based memory writing. |
| 18 | +// |
| 19 | +// entry ca0 -> byte-aligned destination buffer |
| 20 | +// a1 = byte to be stored |
| 21 | +// a2 = number of bytes |
| 22 | +// exit - |
| 23 | + .globl hyperram_memset_b |
| 24 | + .p2align 5 |
| 25 | +hyperram_memset_b: |
| 26 | + addi a2, a2, -8 |
| 27 | + bltz a2, memset_b_8fix |
| 28 | +memset_b_8: |
| 29 | + csb a1, (ca0) |
| 30 | + csb a1, 1(ca0) |
| 31 | + csb a1, 2(ca0) |
| 32 | + csb a1, 3(ca0) |
| 33 | + csb a1, 4(ca0) |
| 34 | + csb a1, 5(ca0) |
| 35 | + csb a1, 6(ca0) |
| 36 | + csb a1, 7(ca0) |
| 37 | + cincoffset ca0, ca0, 8 |
| 38 | + addi a2, a2, -8 |
| 39 | + bgez a2, memset_b_8 |
| 40 | +memset_b_8fix: |
| 41 | + addi a2, a2, 8 |
| 42 | + bgtz a2, memset_b_tail |
| 43 | + cret |
| 44 | + |
| 45 | +// Just complete the request using byte stores; this is shared among all _ascending_ routines. |
| 46 | +// Performance is not very important, but it would be better to keep the byte writes temporally |
| 47 | +// close together to try to provoke races. |
| 48 | +// |
| 49 | +// ca0 -> byte-aligned pointer into destination buffer |
| 50 | +// a1 = byte to be stored |
| 51 | +// a2 = non-zero count of bytes remaining |
| 52 | +memset_b_tail: |
| 53 | + add a2, a0, a2 |
| 54 | +memset_b_1: |
| 55 | + csb a1, (ca0) |
| 56 | + cincoffset ca0, ca0, 1 |
| 57 | + bltu a0, a2, memset_b_1 |
| 58 | + cret |
| 59 | + |
| 60 | +// Descending transfer, pre-decrementing address; shared among the two _descending_ routines. |
| 61 | +// |
| 62 | +// ca0 -> just beyond next address to be written, decrement before use. |
| 63 | +// a1 = byte to be stored |
| 64 | +// a2 = non-zero count of bytes remaining |
| 65 | +memset_b_desc_tail: |
| 66 | + sub a2, a0, a2 |
| 67 | +memset_b_desc_1: |
| 68 | + csb a1, -1(ca0) |
| 69 | + cincoffset ca0, ca0, -1 |
| 70 | + bgtu a0, a2, memset_b_desc_1 |
| 71 | + cret |
| 72 | + |
| 73 | +// Byte and half word-based memory writing; each word is written using |
| 74 | +// 2 byte stores and a half-word store. |
| 75 | +// |
| 76 | +// entry ca0 -> word-aligned destination buffer |
| 77 | +// a1 = byte to be stored, replicated throughout word |
| 78 | +// a2 = number of bytes |
| 79 | +// exit - |
| 80 | + .globl hyperram_memset_hb |
| 81 | + .p2align 5 |
| 82 | +hyperram_memset_hb: |
| 83 | + addi a2, a2, -4 |
| 84 | + bltz a2, memset_hb_4fix |
| 85 | +memset_hb_4: |
| 86 | + csb a1, (ca0) |
| 87 | + csb a1, 1(ca0) |
| 88 | + csh a1, 2(ca0) |
| 89 | + cincoffset ca0, ca0, 4 |
| 90 | + addi a2, a2, -4 |
| 91 | + bgez a2, memset_hb_4 |
| 92 | +memset_hb_4fix: |
| 93 | + addi a2, a2, 4 |
| 94 | + bgtz a2, memset_b_tail |
| 95 | + cret |
| 96 | + |
| 97 | +// Half word-based memory writing. |
| 98 | +// |
| 99 | +// entry ca0 -> half-word aligned destination buffer |
| 100 | +// a1 = byte to be stored, replicated throughout half-word |
| 101 | +// a2 = number of bytes |
| 102 | +// exit - |
| 103 | + .globl hyperram_memset_h |
| 104 | + .p2align 5 |
| 105 | +hyperram_memset_h: |
| 106 | + addi a2, a2, -16 |
| 107 | + bltz a2, memset_h_16fix |
| 108 | +memset_h_16: |
| 109 | + csh a1, (ca0) |
| 110 | + csh a1, 2(ca0) |
| 111 | + csh a1, 4(ca0) |
| 112 | + csh a1, 6(ca0) |
| 113 | + csh a1, 8(ca0) |
| 114 | + csh a1, 10(ca0) |
| 115 | + csh a1, 12(ca0) |
| 116 | + csh a1, 14(ca0) |
| 117 | + cincoffset ca0, ca0, 16 |
| 118 | + addi a2, a2, -16 |
| 119 | + bgez a2, memset_h_16 |
| 120 | +memset_h_16fix: |
| 121 | + addi a2, a2, 16 |
| 122 | + bgtz a2, memset_b_tail |
| 123 | + cret |
| 124 | + |
| 125 | +// Word-based memory writing. |
| 126 | +// |
| 127 | +// entry ca0 -> word-aligned destination buffer |
| 128 | +// a1 = byte to be stored, replicated throughout word |
| 129 | +// a2 = number of bytes |
| 130 | + .globl hyperram_memset_w |
| 131 | + .p2align 5 |
| 132 | +hyperram_memset_w: |
| 133 | + addi a2, a2, -32 |
| 134 | + bltz a2, memset_w_32fix |
| 135 | +memset_w_32: |
| 136 | + csw a1, (ca0) |
| 137 | + csw a1, 4(ca0) |
| 138 | + csw a1, 8(ca0) |
| 139 | + csw a1, 12(ca0) |
| 140 | + csw a1, 16(ca0) |
| 141 | + csw a1, 20(ca0) |
| 142 | + csw a1, 24(ca0) |
| 143 | + csw a1, 28(ca0) |
| 144 | + cincoffset ca0, ca0, 32 |
| 145 | + addi a2, a2, -32 |
| 146 | + bgez a2, memset_w_32 |
| 147 | +memset_w_32fix: |
| 148 | + addi a2, a2, 32 |
| 149 | + bgtz a2, memset_b_tail |
| 150 | + cret |
| 151 | + |
| 152 | +// Repeated words memory writing; the performance of this code is of no consequence. |
| 153 | +// It is concerned purely with ensuring the correctness of the written data. |
| 154 | +// |
| 155 | +// entry ca0 -> word-aligned destination buffer |
| 156 | +// a1 = byte to be stored, replicated throughout word |
| 157 | +// a2 = number of bytes |
| 158 | + .globl hyperram_memset_wr |
| 159 | + .p2align 5 |
| 160 | +hyperram_memset_wr: |
| 161 | + addi a2, a2, -4 |
| 162 | + bltz a2, memset_wr_4fix |
| 163 | + xori a3, a1, -1 |
| 164 | +memset_wr_4: |
| 165 | + csw a3, (ca0) // This word should be overwritten... |
| 166 | + csw a1, (ca0) // ...by the original value. |
| 167 | + cincoffset ca0, ca0, 4 |
| 168 | + addi a2, a2, -4 |
| 169 | + bgez a2, memset_wr_4 |
| 170 | +memset_wr_4fix: |
| 171 | + addi a2, a2, 4 |
| 172 | + bgtz a2, memset_b_tail |
| 173 | + cret |
| 174 | + |
| 175 | +// Word-based memory writing to descending addresses. |
| 176 | +// |
| 177 | +// entry ca0 -> word-aligned end of destination buffer, exclusive |
| 178 | +// a1 = byte to be stored, replicated throughout word |
| 179 | +// a2 = number of bytes |
| 180 | + .globl hyperram_memset_wd |
| 181 | + .p2align 5 |
| 182 | +hyperram_memset_wd: |
| 183 | + addi a2, a2, -8 |
| 184 | + bltz a2, memset_wd_8fix |
| 185 | +memset_wd_8: |
| 186 | + csw a1, -4(ca0) |
| 187 | + csw a1, -8(ca0) |
| 188 | + cincoffset ca0, ca0, -8 |
| 189 | + addi a2, a2, -8 |
| 190 | + bgez a2, memset_wd_8 |
| 191 | +memset_wd_8fix: |
| 192 | + addi a2, a2, 8 |
| 193 | + bgtz a2, memset_b_desc_tail |
| 194 | + cret |
| 195 | + |
| 196 | +// Capability stores to ascending addresses. |
| 197 | +// |
| 198 | +// These are issued as two back-to-back word writes and we're just using |
| 199 | +// this as a way to issue 64-bit writes rather than trying to create |
| 200 | +// sensible/valid capabilities. |
| 201 | +// |
| 202 | +// entry ca0 -> double-word aligned destination buffer |
| 203 | +// a1 = byte to be stored, replicated throughout word |
| 204 | +// a2 = number of bytes |
| 205 | + .globl hyperram_memset_c |
| 206 | + .p2align 5 |
| 207 | +hyperram_memset_c: |
| 208 | + // Replicate the data word to yield a double word. |
| 209 | + cincoffset csp, csp, -8 |
| 210 | + csw a1, (csp) |
| 211 | + csw a1, 4(csp) |
| 212 | + clc ca1,(csp) |
| 213 | + cincoffset csp, csp, 8 |
| 214 | + addi a2, a2, -8 |
| 215 | + bltz a2, memset_c_8fix |
| 216 | +memset_c_8: |
| 217 | + csc ca1, (ca0) |
| 218 | + cincoffset ca0, ca0, 8 |
| 219 | + addi a2, a2, -8 |
| 220 | + bgez a2, memset_c_8 |
| 221 | +memset_c_8fix: |
| 222 | + addi a2, a2, 8 |
| 223 | + bgtz a2, memset_b_tail |
| 224 | + cret |
| 225 | + |
| 226 | +// Capability stores to descending addresses. See above. |
| 227 | +// |
| 228 | +// entry ca0 -> double-word aligned end of destination buffer, exclusive |
| 229 | +// a1 = byte to be stored, replicated throughout word |
| 230 | +// a2 = number of bytes |
| 231 | + .globl hyperram_memset_cd |
| 232 | + .p2align 5 |
| 233 | +hyperram_memset_cd: |
| 234 | + // Replicate the data word to yield a double word. |
| 235 | + cincoffset csp, csp, -8 |
| 236 | + csw a1, (csp) |
| 237 | + csw a1, 4(csp) |
| 238 | + clc ca1, (csp) |
| 239 | + cincoffset csp, csp, 8 |
| 240 | + addi a2, a2, -8 |
| 241 | + bltz a2, memset_cd_8fix |
| 242 | +memset_cd_8: |
| 243 | + csc ca1, -8(ca0) |
| 244 | + cincoffset ca0, ca0, -8 |
| 245 | + addi a2, a2, -8 |
| 246 | + bgez a2, memset_cd_8 |
| 247 | +memset_cd_8fix: |
| 248 | + addi a2, a2, 8 |
| 249 | + bgtz a2, memset_b_desc_tail |
| 250 | + cret |
| 251 | + |
0 commit comments