1414#%# decode + decode_BRANCH + end_decode
1515#% set branch_decode = 23 + 16 + 33
1616
17- #%# ... + check_interrupts + load_ram_word_unchecked + store_ram_word_unchecked + instret overflow + a bit extra to be safe
18- #% set MAX_INSTRUCTION_COST = (base_instruction_cost + amo_decode + amo_cost + branch_decode + 8 + 32 + 23 + 3 + 10)|round(method="ceil")|int
17+ #%# ... + store_ram_word_unchecked + instret overflow + a bit extra to be safe
18+ #% set base_max_cost = base_instruction_cost + amo_cost + branch_decode + 23 + 3 + 10
19+
20+ #%# ... + check_interrupts + access_icache
21+ #% set MAX_FAST_INSTRUCTION_COST = base_max_cost + 8 + 25
22+
23+ #%# ... + load_ram_word_unchecked
24+ #% set MAX_SLOW_INSTRUCTION_COST = base_max_cost + amo_decode + 32
1925
2026#%# linked buildings
2127#%# IMPORTANT: the buildings must be linked in the order shown here
@@ -128,9 +134,10 @@ check_interrupts__machine:
128134 jump trap_without_mtval notEqual mie.mtie 0
129135
130136# cost:
131- # base: 17
132- # fast path: 42 (base + 9 + 16)
133- # slow path: 35 (base + 13 + 5) + dynamic
137+ # base: 14
138+ # read_icache: 18 (base + 4)
139+ # access_icache: 43 (base + 13 + 16)
140+ # slow_fetch: 36 (base + 17 + 5) + dynamic
134141main:
135142 # store the pc for the following instruction in a separate variable, so jumps and traps don't need to account for the pc being incremented at the end of an instruction
136143 op add next_pc pc 4
@@ -145,9 +152,11 @@ main:
145152 jump main__access_icache lessThan pc ICACHE_SIZE
146153
147154main__slow_instruction_fetch:
155+ jump next_tick lessThanEq accumulator {{MAX_SLOW_INSTRUCTION_COST}}
156+
148157 # update accumulator in case we take a trap while loading
149- # 13 (instructions between main and the below comment) + 5 (load_word static cost)
150- op sub accumulator accumulator 18
158+ # 14 (instructions between main and the below comment) + 5 (load_word static cost)
159+ op sub accumulator accumulator 19
151160
152161 jump default_mtvec_handler equal pc {{SYSCON}}
153162
@@ -162,20 +171,23 @@ main__slow_instruction_fetch:
162171
163172 # a trap cannot occur between now and the lookup table fetch
164173 # so modify accumulator to take base_instruction_cost into account
165- # base_instruction_cost assumes we take the fast path
166- # at this point, we've double-counted 5 instructions on the base path
167- # also, the static cost of the slow path is 7 instructions fewer than the fast path
168- # so we can safely add 12 instructions back into the accumulator
169- op add accumulator accumulator 12
174+ # base_instruction_cost assumes we take read_icache
175+ # the static cost of slow_fetch is 18 instructions slower than read_icache
176+ # however, we subtracted 19 above
177+ # so we can safely add 1 instruction back into the accumulator
178+ op add accumulator accumulator 1
170179
171180 set instruction result
172181 set decode_address null
173182
174183 # IMPORTANT: remember to update this value if anything in access_icache or read_icache is changed
175- op add decode_ret @counter 10
184+ op add decode_ret @counter 11
176185 jump decode always
177186
178187main__access_icache:
188+ # base_instruction_cost assumes read_icache
189+ # access_icache is 25 instructions slower than read_icache
190+ op sub accumulator accumulator 25
179191 # the icache is stored in RAM cells immediately after the end of RAM with the same density as regular memory
180192 # icache address = RAM_END + pc = RAM_START + RAM_SIZE + pc
181193 # _address @ access_ram_raw = address - RAM_START
@@ -254,7 +266,7 @@ end_instruction_trap:
254266 jump state->pause equal pc breakpoint_address
255267 jump state->pause equal single_step_enabled true
256268
257- jump next_tick lessThanEq accumulator {{MAX_INSTRUCTION_COST }}
269+ jump next_tick lessThanEq accumulator {{MAX_FAST_INSTRUCTION_COST }}
258270 jump main notEqual interrupts_pending true
259271 jump check_interrupts always
260272
@@ -375,6 +387,7 @@ unsigned_to_signed__done_2:
375387access_ram:
376388 op sub _address address {{RAM_START}}
377389
390+ # cost: 16
378391access_ram_raw:
379392 # we store 4 bytes in each value
380393 op idiv _address _address 4
@@ -2296,7 +2309,8 @@ set {{UART_RX_WRITE}} null
22962309set {{UART_TX_START}} null
22972310set {{UART_TX_READ}} null
22982311set {{UART_TX_WRITE}} null
2299- set {{MAX_INSTRUCTION_COST}} null
2312+ set {{MAX_FAST_INSTRUCTION_COST}} null
2313+ set {{MAX_SLOW_INSTRUCTION_COST}} null
23002314# local preprocessor constants
23012315set {{UART_START_LINK}} null
23022316set {{LABELS}} null
0 commit comments