Skip to content

Commit dda98de

Browse files
authored
[cpu] minor logic optimizations (#1381)
2 parents 81c1ea7 + d77a7ac commit dda98de

File tree

8 files changed

+30
-31
lines changed

8 files changed

+30
-31
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12
2929

3030
| Date | Version | Comment | Ticket |
3131
|:----:|:-------:|:--------|:------:|
32+
| 17.09.2025 | 1.12.1.9 | minor CPU logic optimizations | [#1381](https://github.com/stnolting/neorv32/pull/1381) |
3233
| 14.09.2025 | 1.12.1.8 | :warning: remove CFU CSRs (`cfureg[0..3]`) | [#1377](https://github.com/stnolting/neorv32/pull/1377) |
3334
| 13.09.2025 | 1.12.1.7 | :bug: fix unaligned instruction fetch bus error; do not trigger co-processors if pending instruction-related exception | [#1367](https://github.com/stnolting/neorv32/pull/1376) |
3435
| 13.09.2025 | 1.12.1.6 | minor RTL edits; add `Zca` ISA extension flag to `mxisa` CSR | [#1375](https://github.com/stnolting/neorv32/pull/1375) |

docs/datasheet/software.adoc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ be altered. However, experienced users can modify them to further tune compilati
261261
| `-mstrict-align` | Unaligned memory accesses cannot be resolved by the hardware and require emulation.
262262
| `-mbranch-cost=10` | Branching costs a lot of cycles.
263263
| `-Wl,--gc-sections` | Make the linker perform dead code elimination.
264-
| `-ffp-contract=off` | Disable floating-point expression contraction.
264+
| `-ffp-contract=off` | Disable floating-point expression contraction (fused multiply-add/sub; not supported by the NEORV32 FPU).
265265
| `-g` | Add (simple) debug information.
266266
|=======================
267267

rtl/core/neorv32_cpu.vhd

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,11 @@ end neorv32_cpu;
9595
architecture neorv32_cpu_rtl of neorv32_cpu is
9696

9797
-- auto-configuration --
98-
constant rf_rs3_en_c : boolean := RISCV_ISA_Zxcfu or RISCV_ISA_Zfinx; -- 3rd register file read port
98+
constant rf_rs3_en_c : boolean := RISCV_ISA_Zxcfu; -- 3rd register file read port
9999
constant riscv_a_c : boolean := RISCV_ISA_Zaamo and RISCV_ISA_Zalrsc; -- A: atomic memory operations
100100
constant riscv_b_c : boolean := RISCV_ISA_Zba and RISCV_ISA_Zbb and RISCV_ISA_Zbs; -- B: bit manipulation
101101
constant riscv_zcb_c : boolean := RISCV_ISA_C and RISCV_ISA_Zcb; -- Zcb: additional compressed instructions
102-
constant riscv_zkt_c : boolean := CPU_FAST_SHIFT_EN; -- Zkt: data-independent execution time for cryptographic operations
102+
constant riscv_zkt_c : boolean := CPU_FAST_SHIFT_EN; -- Zkt: data-independent execution time for cryptography operations
103103
constant riscv_zkn_c : boolean := RISCV_ISA_Zbkb and RISCV_ISA_Zbkc and RISCV_ISA_Zbkx and
104104
RISCV_ISA_Zkne and RISCV_ISA_Zknd and RISCV_ISA_Zknh; -- Zkn: NIST suite
105105
constant riscv_zks_c : boolean := RISCV_ISA_Zbkb and RISCV_ISA_Zbkc and RISCV_ISA_Zbkx and
@@ -245,7 +245,7 @@ begin
245245
RISCV_ISA_Zks => riscv_zks_c, -- ShangMi algorithm suite available
246246
RISCV_ISA_Zksed => RISCV_ISA_Zksed, -- implement ShangMi block cipher extension
247247
RISCV_ISA_Zksh => RISCV_ISA_Zksh, -- implement ShangMi hash extension
248-
RISCV_ISA_Zkt => riscv_zkt_c, -- data-independent execution time available (for cryptographic operations)
248+
RISCV_ISA_Zkt => riscv_zkt_c, -- data-independent execution time for cryptography operations available
249249
RISCV_ISA_Zmmul => RISCV_ISA_Zmmul, -- implement multiply-only M sub-extension
250250
RISCV_ISA_Zxcfu => RISCV_ISA_Zxcfu, -- implement custom (instr.) functions unit
251251
RISCV_ISA_Sdext => RISCV_ISA_Sdext, -- implement external debug mode extension
@@ -390,7 +390,7 @@ begin
390390
RISCV_ISA_Zknd => RISCV_ISA_Zknd, -- implement cryptography NIST AES decryption extension
391391
RISCV_ISA_Zkne => RISCV_ISA_Zkne, -- implement cryptography NIST AES encryption extension
392392
RISCV_ISA_Zknh => RISCV_ISA_Zknh, -- implement cryptography NIST hash extension
393-
RISCV_ISA_Zksed => RISCV_ISA_Zksed, -- implement ShangMi block cypher extension
393+
RISCV_ISA_Zksed => RISCV_ISA_Zksed, -- implement ShangMi block cipher extension
394394
RISCV_ISA_Zksh => RISCV_ISA_Zksh, -- implement ShangMi hash extension
395395
RISCV_ISA_Zmmul => RISCV_ISA_Zmmul, -- implement multiply-only M sub-extension
396396
RISCV_ISA_Zxcfu => RISCV_ISA_Zxcfu, -- implement custom (instr.) functions unit

rtl/core/neorv32_cpu_alu.vhd

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,6 @@ begin
255255
cmp_i => cmp, -- comparator status
256256
rs1_i => rs1_i, -- rf source 1
257257
rs2_i => rs2_i, -- rf source 2
258-
rs3_i => rs3_i, -- rf source 3
259258
-- result and status --
260259
res_o => cp_result(3), -- operation result
261260
valid_o => cp_valid(3) -- data output valid

rtl/core/neorv32_cpu_control.vhd

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,6 @@ architecture neorv32_cpu_control_rtl of neorv32_cpu_control is
185185
signal debug_ctrl : debug_ctrl_t;
186186

187187
-- misc/helpers --
188-
signal if_ack : std_ulogic; -- acknowledge instruction data from instruction fetch (front-end)
189188
signal if_reset : std_ulogic; -- reset instruction fetch (front-end)
190189
signal branch_taken : std_ulogic; -- fulfilled branch condition or unconditional jump
191190
signal monitor_cnt : std_ulogic_vector(monitor_mc_tmo_c downto 0); -- execution monitor cycle counter
@@ -292,7 +291,6 @@ begin
292291
exe_engine_nxt.pc <= exe_engine.pc;
293292
exe_engine_nxt.pc2 <= exe_engine.pc2;
294293
exe_engine_nxt.ra <= (others => '0'); -- output zero if not a branch instruction
295-
if_ack <= '0';
296294
if_reset <= '0';
297295
trap_ctrl.env_enter <= '0';
298296
trap_ctrl.env_exit <= '0';
@@ -358,8 +356,7 @@ begin
358356
--
359357
if (trap_ctrl.env_pending = '1') or (trap_ctrl.exc_fire = '1') then -- pending trap or pending exception (fast)
360358
exe_engine_nxt.state <= EX_TRAP_ENTER;
361-
elsif (frontend_i.valid = '1') and (hwtrig_i = '0') then -- new instruction word available and no pending HW trigger
362-
if_ack <= '1'; -- instruction data is about to be consumed
359+
elsif (frontend_i.valid = '1') and (hwtrig_i = '0') then -- new instruction word available and no pending HW
363360
trap_ctrl.instr_be <= frontend_i.fault or pmp_fault_i; -- access fault during instruction fetch
364361
exe_engine_nxt.ci <= frontend_i.compr; -- this is a de-compressed instruction
365362
exe_engine_nxt.ir <= frontend_i.instr; -- instruction word
@@ -461,7 +458,7 @@ begin
461458
else -- instruction fence
462459
ctrl_nxt.if_fence <= '1';
463460
end if;
464-
exe_engine_nxt.state <= EX_RESTART; -- reset instruction fetch + IPB (actually only required for fence.i)
461+
exe_engine_nxt.state <= EX_RESTART; -- reset instruction fetch + IPB via branch to PC+4 (actually only required for fence.i)
465462

466463
-- FPU: floating-point operations --
467464
when opcode_fpu_c =>
@@ -567,7 +564,7 @@ begin
567564
-- instruction fetch --
568565
ctrl_o.if_fence <= ctrl.if_fence;
569566
ctrl_o.if_reset <= if_reset;
570-
ctrl_o.if_ack <= if_ack;
567+
ctrl_o.if_ready <= '1' when (exe_engine.state = EX_DISPATCH) else '0';
571568
-- program counter --
572569
ctrl_o.pc_cur <= exe_engine.pc(XLEN-1 downto 1) & '0';
573570
ctrl_o.pc_nxt <= exe_engine.pc2(XLEN-1 downto 1) & '0';

rtl/core/neorv32_cpu_cp_fpu.vhd

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ entity neorv32_cpu_cp_fpu is
4949
cmp_i : in std_ulogic_vector(1 downto 0); -- comparator status
5050
rs1_i : in std_ulogic_vector(XLEN-1 downto 0); -- rf source 1
5151
rs2_i : in std_ulogic_vector(XLEN-1 downto 0); -- rf source 2
52-
rs3_i : in std_ulogic_vector(XLEN-1 downto 0); -- rf source 3
5352
-- result and status --
5453
res_o : out std_ulogic_vector(XLEN-1 downto 0); -- operation result
5554
valid_o : out std_ulogic -- data output valid

rtl/core/neorv32_cpu_frontend.vhd

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ architecture neorv32_cpu_frontend_rtl of neorv32_cpu_frontend is
7979

8080
-- instruction issue engine --
8181
signal align_q, align_set, align_clr : std_ulogic;
82-
signal ipb_ack : std_ulogic_vector(1 downto 0);
82+
signal issue_valid : std_ulogic_vector(1 downto 0);
8383
signal cmd16 : std_ulogic_vector(15 downto 0);
8484
signal cmd32 : std_ulogic_vector(31 downto 0);
8585

@@ -219,7 +219,7 @@ begin
219219
elsif rising_edge(clk_i) then
220220
if (fetch.restart = '1') then
221221
align_q <= ctrl_i.pc_nxt(1); -- branch to unaligned address?
222-
elsif (ctrl_i.if_ack = '1') then
222+
elsif (ipb.re(0) = '1') or (ipb.re(1) = '1') then
223223
align_q <= (align_q and (not align_clr)) or align_set; -- alignment "RS flip-flop"
224224
end if;
225225
end if;
@@ -234,14 +234,14 @@ begin
234234
if (align_q = '0') then
235235
if (ipb.rdata(0)(1 downto 0) /= "11") then -- compressed, consume IPB(0) entry
236236
align_set <= ipb.avail(0); -- start of next instruction word is NOT 32-bit-aligned
237-
ipb_ack <= "01";
238-
frontend_o.valid <= ipb.avail(0);
237+
issue_valid(0) <= ipb.avail(0);
238+
issue_valid(1) <= '0';
239239
frontend_o.fault <= ipb.rdata(0)(16);
240240
frontend_o.instr <= cmd32;
241241
frontend_o.compr <= '1';
242242
else -- aligned uncompressed, consume both IPB entries
243-
ipb_ack <= "11";
244-
frontend_o.valid <= ipb.avail(1) and ipb.avail(0);
243+
issue_valid(0) <= ipb.avail(1) and ipb.avail(0);
244+
issue_valid(1) <= ipb.avail(1) and ipb.avail(0);
245245
frontend_o.fault <= ipb.rdata(1)(16) or ipb.rdata(0)(16);
246246
frontend_o.instr <= ipb.rdata(1)(15 downto 0) & ipb.rdata(0)(15 downto 0);
247247
frontend_o.compr <= '0';
@@ -250,24 +250,27 @@ begin
250250
else
251251
if (ipb.rdata(1)(1 downto 0) /= "11") then -- compressed, consume IPB(1) entry
252252
align_clr <= ipb.avail(1); -- start of next instruction word is 32-bit-aligned again
253-
ipb_ack <= "10";
254-
frontend_o.valid <= ipb.avail(1);
253+
issue_valid(0) <= '0';
254+
issue_valid(1) <= ipb.avail(1);
255255
frontend_o.fault <= ipb.rdata(1)(16);
256256
frontend_o.instr <= cmd32;
257257
frontend_o.compr <= '1';
258258
else -- unaligned uncompressed, consume both IPB entries
259-
ipb_ack <= "11";
260-
frontend_o.valid <= ipb.avail(0) and ipb.avail(1);
259+
issue_valid(0) <= ipb.avail(0) and ipb.avail(1);
260+
issue_valid(1) <= ipb.avail(0) and ipb.avail(1);
261261
frontend_o.fault <= ipb.rdata(0)(16) or ipb.rdata(1)(16);
262262
frontend_o.instr <= ipb.rdata(0)(15 downto 0) & ipb.rdata(1)(15 downto 0);
263263
frontend_o.compr <= '0';
264264
end if;
265265
end if;
266266
end process issue_fsm_comb;
267267

268+
-- issue valid instruction word to execution stage --
269+
frontend_o.valid <= issue_valid(1) or issue_valid(0);
270+
268271
-- IPB read access --
269-
ipb.re(0) <= ipb_ack(0) and ctrl_i.if_ack;
270-
ipb.re(1) <= ipb_ack(1) and ctrl_i.if_ack;
272+
ipb.re(0) <= issue_valid(0) and ctrl_i.if_ready;
273+
ipb.re(1) <= issue_valid(1) and ctrl_i.if_ready;
271274

272275
end generate; -- /issue_enabled
273276

@@ -277,10 +280,10 @@ begin
277280
align_q <= '0';
278281
align_set <= '0';
279282
align_clr <= '0';
280-
ipb_ack <= (others => '0');
283+
issue_valid <= (others => '0');
281284
cmd16 <= (others => '0');
282285
cmd32 <= (others => '0');
283-
ipb.re <= (others => ctrl_i.if_ack);
286+
ipb.re <= (others => (ctrl_i.if_ready and ipb.avail(0)));
284287
frontend_o.valid <= ipb.avail(0);
285288
frontend_o.instr <= ipb.rdata(1)(15 downto 0) & ipb.rdata(0)(15 downto 0);
286289
frontend_o.compr <= '0';

rtl/core/neorv32_package.vhd

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ package neorv32_package is
2828

2929
-- Architecture Constants -----------------------------------------------------------------
3030
-- -------------------------------------------------------------------------------------------
31-
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01120108"; -- hardware version
31+
constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01120109"; -- hardware version
3232
constant archid_c : natural := 19; -- official RISC-V architecture ID
3333
constant XLEN : natural := 32; -- native data path width
3434

@@ -532,7 +532,7 @@ package neorv32_package is
532532
-- instruction fetch --
533533
if_fence : std_ulogic; -- fence.i operation
534534
if_reset : std_ulogic; -- restart instruction fetch
535-
if_ack : std_ulogic; -- consume data from instruction fetch
535+
if_ready : std_ulogic; -- ready for next instruction
536536
-- program counter --
537537
pc_cur : std_ulogic_vector(31 downto 0); -- address of current instruction
538538
pc_nxt : std_ulogic_vector(31 downto 0); -- address of next instruction
@@ -579,11 +579,11 @@ package neorv32_package is
579579
cpu_debug : std_ulogic; -- set when CPU is in debug mode
580580
end record;
581581

582-
-- control bus reset initializer --
582+
-- control bus reset termination --
583583
constant ctrl_bus_zero_c : ctrl_bus_t := (
584584
if_fence => '0',
585585
if_reset => '0',
586-
if_ack => '0',
586+
if_ready => '0',
587587
pc_cur => (others => '0'),
588588
pc_nxt => (others => '0'),
589589
pc_ret => (others => '0'),

0 commit comments

Comments
 (0)