Skip to content

Commit d8b7d6a

Browse files
committed
Refactor instruction cost checking to actually take advantage of read_icache
1 parent 8e923eb commit d8b7d6a

File tree

3 files changed

+32
-17
lines changed

3 files changed

+32
-17
lines changed

src/cpu/config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ schematics:
77
ram: ../../schematics/ram_proc.msch
88

99
inputs:
10-
# 42 (main) + 8 (end)
11-
base_instruction_cost: 50
10+
# 18 (main__read_icache) + 8 (end)
11+
base_instruction_cost: 26
1212

1313
instructions:
1414
- label: BEQ

src/cpu/controller.mlog.jinja

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ end_pause:
280280
set last_cycle_update @tick
281281

282282
# check if interrupts should be pending
283+
# FIXME: we probably need to check this in the workers
283284

284285
# timer interrupt
285286
op greaterThan high_gt csr_mtimeh csr_mtimecmph

src/cpu/worker.mlog.jinja

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,14 @@
1414
#%# decode + decode_BRANCH + end_decode
1515
#% set branch_decode = 23 + 16 + 33
1616

17-
#%# ... + check_interrupts + load_ram_word_unchecked + store_ram_word_unchecked + instret overflow + a bit extra to be safe
18-
#% set MAX_INSTRUCTION_COST = (base_instruction_cost + amo_decode + amo_cost + branch_decode + 8 + 32 + 23 + 3 + 10)|round(method="ceil")|int
17+
#%# ... + store_ram_word_unchecked + instret overflow + a bit extra to be safe
18+
#% set base_max_cost = base_instruction_cost + amo_cost + branch_decode + 23 + 3 + 10
19+
20+
#%# ... + check_interrupts + access_icache
21+
#% set MAX_FAST_INSTRUCTION_COST = base_max_cost + 8 + 25
22+
23+
#%# ... + load_ram_word_unchecked
24+
#% set MAX_SLOW_INSTRUCTION_COST = base_max_cost + amo_decode + 32
1925

2026
#%# linked buildings
2127
#%# IMPORTANT: the buildings must be linked in the order shown here
@@ -128,9 +134,10 @@ check_interrupts__machine:
128134
jump trap_without_mtval notEqual mie.mtie 0
129135

130136
# cost:
131-
# base: 17
132-
# fast path: 42 (base + 9 + 16)
133-
# slow path: 35 (base + 13 + 5) + dynamic
137+
# base: 14
138+
# read_icache: 18 (base + 4)
139+
# access_icache: 43 (base + 13 + 16)
140+
# slow_fetch: 36 (base + 17 + 5) + dynamic
134141
main:
135142
# store the pc for the following instruction in a separate variable, so jumps and traps don't need to account for the pc being incremented at the end of an instruction
136143
op add next_pc pc 4
@@ -145,9 +152,11 @@ main:
145152
jump main__access_icache lessThan pc ICACHE_SIZE
146153

147154
main__slow_instruction_fetch:
155+
jump next_tick lessThanEq accumulator {{MAX_SLOW_INSTRUCTION_COST}}
156+
148157
# update accumulator in case we take a trap while loading
149-
# 13 (instructions between main and the below comment) + 5 (load_word static cost)
150-
op sub accumulator accumulator 18
158+
# 14 (instructions between main and the below comment) + 5 (load_word static cost)
159+
op sub accumulator accumulator 19
151160

152161
jump default_mtvec_handler equal pc {{SYSCON}}
153162

@@ -162,20 +171,23 @@ main__slow_instruction_fetch:
162171

163172
# a trap cannot occur between now and the lookup table fetch
164173
# so modify accumulator to take base_instruction_cost into account
165-
# base_instruction_cost assumes we take the fast path
166-
# at this point, we've double-counted 5 instructions on the base path
167-
# also, the static cost of the slow path is 7 instructions fewer than the fast path
168-
# so we can safely add 12 instructions back into the accumulator
169-
op add accumulator accumulator 12
174+
# base_instruction_cost assumes we take read_icache
175+
# the static cost of slow_fetch is 18 instructions slower than read_icache
176+
# however, we subtracted 19 above
177+
# so we can safely add 1 instruction back into the accumulator
178+
op add accumulator accumulator 1
170179

171180
set instruction result
172181
set decode_address null
173182

174183
# IMPORTANT: remember to update this value if anything in access_icache or read_icache is changed
175-
op add decode_ret @counter 10
184+
op add decode_ret @counter 11
176185
jump decode always
177186

178187
main__access_icache:
188+
# base_instruction_cost assumes read_icache
189+
# access_icache is 25 instructions slower than read_icache
190+
op sub accumulator accumulator 25
179191
# the icache is stored in RAM cells immediately after the end of RAM with the same density as regular memory
180192
# icache address = RAM_END + pc = RAM_START + RAM_SIZE + pc
181193
# _address @ access_ram_raw = address - RAM_START
@@ -254,7 +266,7 @@ end_instruction_trap:
254266
jump state->pause equal pc breakpoint_address
255267
jump state->pause equal single_step_enabled true
256268

257-
jump next_tick lessThanEq accumulator {{MAX_INSTRUCTION_COST}}
269+
jump next_tick lessThanEq accumulator {{MAX_FAST_INSTRUCTION_COST}}
258270
jump main notEqual interrupts_pending true
259271
jump check_interrupts always
260272

@@ -375,6 +387,7 @@ unsigned_to_signed__done_2:
375387
access_ram:
376388
op sub _address address {{RAM_START}}
377389

390+
# cost: 16
378391
access_ram_raw:
379392
# we store 4 bytes in each value
380393
op idiv _address _address 4
@@ -2296,7 +2309,8 @@ set {{UART_RX_WRITE}} null
22962309
set {{UART_TX_START}} null
22972310
set {{UART_TX_READ}} null
22982311
set {{UART_TX_WRITE}} null
2299-
set {{MAX_INSTRUCTION_COST}} null
2312+
set {{MAX_FAST_INSTRUCTION_COST}} null
2313+
set {{MAX_SLOW_INSTRUCTION_COST}} null
23002314
# local preprocessor constants
23012315
set {{UART_START_LINK}} null
23022316
set {{LABELS}} null

0 commit comments

Comments
 (0)