Skip to content

Commit 4088578

Browse files
committed
Parallelize decode instruction instead of cheating with setrate 500000
1 parent d655a0d commit 4088578

File tree

6 files changed

+288
-171
lines changed

6 files changed

+288
-171
lines changed

pyproject.toml

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,22 @@ dependencies = [
1515
"pyyaml>=5.2",
1616
"riscof",
1717
"riscv-isac",
18+
"riscv-config>=3.18.3",
1819
"tqdm>=4.67.1",
1920
"typer>=0.15.4",
2021
]
2122

2223
[dependency-groups]
2324
dev = [
25+
"poethepoet>=0.35.0",
2426
"ruff>=0.11.12",
2527
]
2628

29+
[tool.uv]
30+
override-dependencies = [
31+
"pyyaml>=6.0.2",
32+
]
33+
2734
[tool.uv.sources]
2835
riscof = { git = "https://github.com/riscv/riscof", rev = "9fe3597d75" }
2936
riscv-isac = { git = "https://github.com/riscv-software-src/riscv-isac", rev = "777d2b4762" }
@@ -33,6 +40,10 @@ packages = [
3340
"python/src/mlogv32",
3441
]
3542

43+
[tool.poe.tasks]
44+
build = "python -m mlogv32.preprocessor build src/cpu/config.yaml"
45+
build-cpu = "python -m mlogv32.preprocessor build src/cpu/config.yaml --cpu-only"
46+
3647
[tool.ruff]
3748
extend-exclude = [
3849
"**/venv",

python/src/mlogv32/preprocessor/app.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,8 @@ def build(
253253
)
254254
)
255255

256+
schem.set_tag("name", "mlogv32")
257+
256258
if output:
257259
schem.write_file(str(output))
258260
else:

src/cpu/controller.mlog.jinja

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -184,14 +184,19 @@ init_incr:
184184
write 0 {{CSRS}} "{{ 'mscratch'|csr }}"
185185
write 0 {{CSRS}} "{{ 'mtval'|csr }}"
186186

187-
# finally, tick-align again to be safe, finish any last-second initialization, then start the workers
188-
187+
# get the worker count
188+
# this is necessary for distributing parallel tasks, eg. the MLOGSYS.icache instruction
189+
set prev_proc @this
190+
set worker_id -1 # the first worker will have an id of -1 + 1 = 0
189191
wait 1e-5
192+
read worker_count prev_proc "worker_id"
193+
op add worker_count worker_count 1
190194

191-
# mtime/mcycle reference points
195+
# set mtime/mcycle reference points
192196
set last_time_update @time
193197
set last_cycle_update @tick
194198

199+
# start the workers
195200
set prev_proc @this
196201
set state "running"
197202

@@ -203,8 +208,6 @@ next_tick:
203208
read pc prev_proc "pc"
204209
read icache_ram prev_proc "icache_ram"
205210
read icache_var prev_proc "icache_var"
206-
read decode_ram prev_proc "decode_ram"
207-
read decode_var prev_proc "decode_var"
208211
read __etext prev_proc "__etext"
209212
read privilege_mode prev_proc "privilege_mode"
210213
read reservation_set prev_proc "reservation_set"
@@ -228,6 +231,7 @@ next_tick:
228231
jump halt equal state "halt"
229232
jump reset equal state "reset"
230233
jump pause equal state "pause"
234+
jump decode equal state "decoding"
231235
end_pause:
232236

233237
# poll hardware
@@ -302,8 +306,26 @@ pause__loop:
302306
sensor enabled {{PAUSE_SWITCH}} @enabled
303307
jump pause__loop equal enabled true
304308

309+
set prev_proc @this
310+
set state "running"
305311
jump end_pause always
306312

313+
decode:
314+
set workers_done 0
315+
316+
decode__loop:
317+
wait 1e-5
318+
319+
sensor enabled {{POWER_SWITCH}} @enabled
320+
jump halt equal enabled false
321+
322+
jump decode__loop lessThan workers_done worker_count
323+
324+
# signal that all workers are finished, then wait for the first decoding worker to increment pc
325+
set prev_proc @this
326+
set state "running"
327+
jump next_tick always
328+
307329
# given a value 0 <= address < RAM_PROC_SIZE, resolve that variable in the lookup table
308330
# address -> variable
309331
lookup_variable:
@@ -358,12 +380,11 @@ set _ {{UART_TX_START}}
358380
set _ {{MAX_INSTRUCTION_ACCUMULATOR_USAGE}}
359381
set _ UART_FIFO_MODULO
360382
set _ RAM_END
383+
set _ worker_id
361384
set _ state
362385
set _ pc
363386
set _ icache_ram
364387
set _ icache_var
365-
set _ decode_ram
366-
set _ decode_var
367388
set _ __etext
368389
set _ privilege_mode
369390
set _ reservation_set

src/cpu/worker.mlog.jinja

Lines changed: 62 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
reset:
2727
setrate 1000
2828

29+
# define this variable FIRST so that we can read other variables over it
30+
set _rom null
31+
2932
# load config
3033
read MEMORY_X {{CONFIG}} "MEMORY_X"
3134
read MEMORY_Y {{CONFIG}} "MEMORY_Y"
@@ -48,12 +51,14 @@ next_tick:
4851

4952
# fetch current hart state
5053
read prev_proc {{CONTROLLER}} "prev_proc"
54+
55+
read worker_id prev_proc "worker_id"
56+
op add worker_id worker_id 1
57+
5158
read state prev_proc "state"
5259
read pc prev_proc "pc"
5360
read icache_ram prev_proc "icache_ram"
5461
read icache_var prev_proc "icache_var"
55-
read decode_ram prev_proc "decode_ram"
56-
read decode_var prev_proc "decode_var"
5762
read __etext prev_proc "__etext"
5863
read privilege_mode prev_proc "privilege_mode"
5964
read reservation_set prev_proc "reservation_set"
@@ -70,12 +75,14 @@ next_tick:
7075
read csr_mie prev_proc "csr_mie"
7176
read interrupts_pending prev_proc "interrupts_pending"
7277

73-
jump reset notEqual state "running"
74-
75-
# if we're executing instructions, tell the next proc to fetch the hart state from us
78+
# tell the next proc to fetch the hart state from us
79+
# we do this even if not running so that the worker id is accurate
7680
write @this {{CONTROLLER}} "prev_proc"
7781

78-
op sub accumulator @ipt 29
82+
jump parallel_decode equal state "decoding"
83+
jump reset notEqual state "running"
84+
85+
op sub accumulator @ipt 30
7986
jump main notEqual interrupts_pending true
8087

8188
check_interrupts:
@@ -1952,7 +1959,7 @@ MLOGSYS:
19521959
op add @counter @counter jump
19531960

19541961
# init icache
1955-
op min __etext rs1 ROM_SIZE
1962+
set is_first_decoding_worker true
19561963
jump MLOGSYS__init_icache always
19571964
# printchar
19581965
printchar rs1
@@ -1965,22 +1972,38 @@ MLOGSYS:
19651972
jump end_instruction always
19661973

19671974
MLOGSYS__init_icache:
1968-
# FIXME: hack
1969-
setrate 500000
19701975
set state "decoding"
1976+
op min __etext rs1 ROM_SIZE
1977+
op min __etext __etext ICACHE_SIZE
1978+
# continue into parallel_decode
19711979

1972-
print "decoding @ {0} {1}"
1973-
format @thisx
1974-
format @thisy
1975-
printflush {{ERROR_OUTPUT}}
1980+
# we jump here from next_tick if state is "decoding"
1981+
parallel_decode:
1982+
# wait one tick to allow the controller to calculate worker_count, in case this is the very first instruction executed
1983+
wait 1e-5
19761984

1977-
op min __etext __etext ICACHE_SIZE
1985+
# https://stackoverflow.com/a/37414115
1986+
1987+
read worker_count {{CONTROLLER}} "worker_count"
1988+
1989+
op idiv n __etext 4
1990+
1991+
op idiv n/k n worker_count
1992+
op mod n%k n worker_count
1993+
1994+
op mul decode_address worker_id n/k
1995+
op min offset worker_id n%k
1996+
op add decode_address decode_address offset
1997+
op mul decode_address decode_address 4
1998+
1999+
op lessThan chunk_size worker_id n%k
2000+
op add chunk_size chunk_size n/k
2001+
op mul chunk_size chunk_size 4
2002+
op add end_decode_address decode_address chunk_size
19782003

1979-
# decode .text
1980-
set decode_address 0
19812004
set decode_var null
19822005

1983-
MLOGSYS__init_icache__loop:
2006+
parallel_decode__loop:
19842007
set address decode_address
19852008
op add ret @counter 1
19862009
jump load_rom_word_unchecked always
@@ -1989,20 +2012,33 @@ MLOGSYS__init_icache__loop:
19892012
op add decode_ret @counter 1
19902013
jump decode always
19912014

2015+
# bail out if the decode operation was cancelled
2016+
read state {{CONTROLLER}} "state"
2017+
jump next_tick notEqual state "decoding"
2018+
19922019
op add decode_address decode_address 4
1993-
jump MLOGSYS__init_icache__loop lessThan decode_address __etext
2020+
jump parallel_decode__loop lessThan decode_address end_decode_address
19942021

1995-
# FIXME: hack 2
1996-
setrate 1000
2022+
# atomic increment
19972023
wait 1e-5
2024+
read workers_done {{CONTROLLER}} "workers_done"
2025+
op add workers_done workers_done 1
2026+
write workers_done {{CONTROLLER}} "workers_done"
19982027

1999-
read controller_state {{CONTROLLER}} "state"
2000-
jump state->halt equal controller_state "halt"
2028+
# wait for the controller to signal that all workers are finished decoding
2029+
parallel_decode__wait:
2030+
wait 1e-5
2031+
read state {{CONTROLLER}} "state"
2032+
jump parallel_decode__wait equal state "decoding"
20012033

2002-
read prev_proc {{CONTROLLER}} "prev_proc"
2003-
write "running" prev_proc "state"
2004-
printflush {{ERROR_OUTPUT}}
2005-
jump next_tick always
2034+
# if we're not the worker that initially encountered the decode instruction, go to the next tick
2035+
jump next_tick notEqual is_first_decoding_worker true
2036+
2037+
# otherwise, finish this instruction and then go to the next tick
2038+
set is_first_decoding_worker null
2039+
write @this {{CONTROLLER}} "prev_proc"
2040+
set accumulator 0
2041+
jump end_instruction always
20062042

20072043
MLOGDRAW:
20082044
# I-type: rs1, imm=funct12, rd_id

0 commit comments

Comments
 (0)