diff --git a/.gitignore b/.gitignore index ebc50ae..ee63a48 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,5 @@ synth-ips.log build-hw.log profile-ips.log magia_venv/ +modelsim.ini +sw/tests/*/ diff --git a/Bender.local b/Bender.local index c7a6a13..65b4237 100644 --- a/Bender.local +++ b/Bender.local @@ -1,9 +1,10 @@ overrides: fpnew : { git: "https://github.com/pulp-platform/cvfpu.git" , rev: a8e0cba6dd50f357ece73c2c955d96efc3c6c315 } hci : { git: "https://github.com/pulp-platform/hci.git" , rev: 5a48a854573fca5bbabc1cfd4110fa4530a50ed7 } - cv32e40p : { git: "https://github.com/pulp-platform/cv32e40p.git" , rev: 1a93f340e9dadb9f7c8c471f27a40932c8b1c62e } - cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: a90101211048ba1a16cedbe4db963ab6e12569d7 } - axi : { git: "https://github.com/pulp-platform/axi.git" , version: 0.39.5 } + cv32e40p : { git: "https://github.com/pulp-platform/cv32e40p.git" , rev: 37a82d337ba60129c333d104c29e816d0698b53b } + cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: e51af131252027374d083a745ab6727fb9959daa } + axi : { git: "https://github.com/pulp-platform/axi.git" , version: 0.39.5 } + obi : { git: "https://github.com/pulp-platform/obi.git" , rev: 6a724da5c8d6412b88b6948746e04c1adf39d017 } register_interface : { git: "https://github.com/pulp-platform/register_interface.git", rev: e25b36670ff7aab3402f40efcc2b11ee0f31cf19 } idma : { git: "https://github.com/pulp-platform/iDMA.git" , rev: c12caf59bb482fe44b27361f6924ad346b2d22fe } tech_cells_generic : { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.13 } diff --git a/Bender.yml b/Bender.yml index 33fb6e3..a781a35 100644 --- a/Bender.yml +++ b/Bender.yml @@ -20,25 +20,25 @@ package: name: magia authors: - "Victor Isachi (victor.isachi@unibo.it)" + - "Luca Balboni (luca.balboni10@studio.unibo.it)" dependencies: redmule : { git: "https://github.com/pulp-platform/redmule.git" , rev: 9a1aa14be0b23f0ade84bab57e7e434397ac9876 } # branch: vi/scale_up - cv32e40x : { git: "https://github.com/pulp-platform/cv32e40x.git" , rev: a90101211048ba1a16cedbe4db963ab6e12569d7 } # branch: vi/redmule_scaleup + cv32e40p : { git: "https://github.com/pulp-platform/cv32e40p.git" , rev: 37a82d337ba60129c333d104c29e816d0698b53b } idma : { git: "https://github.com/pulp-platform/iDMA.git" , rev: a6b190c7991331432afa9a2899d032bc1b176830 } # branch: vi/redmule_scaleup hwpe-stream : { git: "https://github.com/pulp-platform/hwpe-stream.git" , version: 1.6 } hwpe-ctrl : { git: "https://github.com/pulp-platform/hwpe-ctrl.git" , rev: c35d5b0886ab549fb9144c3c14a4682112330e21 } # branch: yt/reqrsp hci : { git: "https://github.com/pulp-platform/hci.git" , rev: 5a48a854573fca5bbabc1cfd4110fa4530a50ed7 } # branch: vi/redmule_scaleup cluster_icache : { git: "https://github.com/pulp-platform/cluster_icache.git" , rev: 917ecbf908bdaa22c5713bbcff277d142506bb16 } # branch: michaero/astral - fpnew : { git: "https://github.com/pulp-platform/cvfpu.git" , rev: "pulp-v0.1.3" } - fpu_ss : { git: "https://github.com/pulp-platform/fpu_ss.git" , rev: 8e2eff774d9d38a1e17a46bd56a0936dac9522f0 } # branch: vi/bender_manifest - obi : { git: "https://github.com/pulp-platform/obi.git" , version: 0.1.6 } + obi : { git: "https://github.com/pulp-platform/obi.git" , rev: 6a724da5c8d6412b88b6948746e04c1adf39d017 } axi : { git: "https://github.com/pulp-platform/axi.git" , version: 0.39.5 } register_interface: { git: "https://github.com/pulp-platform/register_interface.git", version: 0.4.4 } - safety_island : { git: "https://github.com/pulp-platform/safety_island.git" , rev: 2273db6c780ab7c582feaf0c9645ad644c35aa11 } # branch: vi/redmule_scaleup + axi_obi : { git: "https://github.com/pulp-platform/axi_obi.git" , rev: 84f21a6524bedcf17a569a62ac01b8a5610819c8 } common_cells : { git: "https://github.com/pulp-platform/common_cells.git" , version: 1.21.0 } tech_cells_generic: { git: "https://github.com/pulp-platform/tech_cells_generic.git", version: 0.2.11 } fractal_sync : { git: "https://github.com/VictorIsachi/fractal_sync" , rev: fdb619f40f99d769cfceb20ac2117ff8d99e98a3 } # branch: main floo_noc : { git: "https://github.com/pulp-platform/FlooNoC.git" , rev: f4a36265cda8b56faee45692afb20ddfffba6dee } # branch: main + event_unit_flex : { git: "https://github.com/pulp-platform/event_unit_flex.git" , rev: 763c3b9977970f656326c70a96debfb2ac0f85b2 } export_include_dirs: - hw/include @@ -55,6 +55,9 @@ sources: - hw/mesh/magia_pkg.sv - hw/tile/magia_tile_pkg.sv # MAGIA Tile + - hw/include/xbar_periph_bus_if.sv + - hw/tile/cluster_event_map.sv + - hw/tile/magia_event_unit.sv - hw/tile/converters/data2obi.sv - hw/tile/converters/obi2data.sv - hw/tile/converters/instr2obi.sv @@ -63,14 +66,14 @@ sources: - hw/tile/converters/cache2instr.sv - hw/tile/converters/obi2hci.sv - hw/tile/converters/hci2obi.sv - - hw/tile/converters/xif_if2struct.sv + - hw/tile/converters/obi2hwpe_ctrl.sv - hw/tile/obi_demux_addr.sv - hw/tile/l1_spm.sv - - hw/tile/idma_xif_inst_decoder.sv - - hw/tile/xif_inst_dispatcher.sv - hw/tile/idma_axi_obi_transfer_ch.sv - - hw/tile/idma_ctrl.sv - - hw/tile/fractal_sync_xif_inst_decoder.sv + - hw/tile/idma_obi_ctrl_decoder.sv + - hw/tile/idma_ctrl_mm.sv + - hw/tile/obi_slave_fsync.sv + - hw/tile/core_data_demux_eu_direct.sv - hw/tile/magia_tile.sv # MAGIA DV - target/sim/src/tile/magia_tile_tb_pkg.sv @@ -95,6 +98,9 @@ sources: - hw/tile/magia_tile_pkg.sv - hw/mesh/noc/magia_noc_pkg.sv # MAGIA Tile + - hw/include/xbar_periph_bus_if.sv + - hw/tile/cluster_event_map.sv + - hw/tile/magia_event_unit.sv - hw/tile/converters/data2obi.sv - hw/tile/converters/obi2data.sv - hw/tile/converters/instr2obi.sv @@ -103,14 +109,14 @@ sources: - hw/tile/converters/cache2instr.sv - hw/tile/converters/obi2hci.sv - hw/tile/converters/hci2obi.sv - - hw/tile/converters/xif_if2struct.sv + - hw/tile/converters/obi2hwpe_ctrl.sv - hw/tile/obi_demux_addr.sv - hw/tile/l1_spm.sv - - hw/tile/idma_xif_inst_decoder.sv - - hw/tile/xif_inst_dispatcher.sv - hw/tile/idma_axi_obi_transfer_ch.sv - - hw/tile/idma_ctrl.sv - - hw/tile/fractal_sync_xif_inst_decoder.sv + - hw/tile/idma_obi_ctrl_decoder.sv + - hw/tile/idma_ctrl_mm.sv + - hw/tile/obi_slave_fsync.sv + - hw/tile/core_data_demux_eu_direct.sv - hw/tile/magia_tile.sv # MAGIA - hw/mesh/magia.sv @@ -121,7 +127,6 @@ sources: - target/sim/src/mesh/magia_vip.sv - target/sim/src/mesh/magia_fixture.sv - target/sim/src/mesh/magia_tb.sv - - target: all(not(magia_dv), asic) files: # NoC @@ -135,6 +140,9 @@ sources: - hw/tile/magia_tile_pkg.sv - hw/mesh/noc/magia_noc_pkg.sv # MAGIA Tile + - hw/include/xbar_periph_bus_if.sv + - hw/tile/cluster_event_map.sv + - hw/tile/magia_event_unit.sv - hw/tile/converters/data2obi.sv - hw/tile/converters/obi2data.sv - hw/tile/converters/instr2obi.sv @@ -143,14 +151,14 @@ sources: - hw/tile/converters/cache2instr.sv - hw/tile/converters/obi2hci.sv - hw/tile/converters/hci2obi.sv - - hw/tile/converters/xif_if2struct.sv + - hw/tile/converters/obi2hwpe_ctrl.sv - hw/tile/obi_demux_addr.sv - hw/tile/l1_spm.sv - - hw/tile/idma_xif_inst_decoder.sv - - hw/tile/xif_inst_dispatcher.sv - hw/tile/idma_axi_obi_transfer_ch.sv - - hw/tile/idma_ctrl.sv - - hw/tile/fractal_sync_xif_inst_decoder.sv + - hw/tile/idma_obi_ctrl_decoder.sv + - hw/tile/idma_ctrl_mm.sv + - hw/tile/obi_slave_fsync.sv + - hw/tile/core_data_demux_eu_direct.sv - hw/tile/magia_tile.sv # MAGIA - hw/mesh/noc/floo_axi_mesh_2x2_noc.sv diff --git a/Makefile b/Makefile index 81b7b0b..0f629e2 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,7 @@ BENDER_DIR ?= . ISA ?= riscv ARCH ?= rv XLEN ?= 32 -XTEN ?= imafc +XTEN ?= imfcxpulpv2 ABI ?= ilp XABI ?= f @@ -202,11 +202,12 @@ include bender_sim.mk include bender_synth.mk include bender_profile.mk -bender_defs += -D COREV_ASSERT_OFF bender_targs += -t rtl bender_targs += -t test -bender_targs += -t cv32e40p_exclude_tracer +bender_targs += -t cv32e40p_include_tracer + + # Targets needed to avoid error even though the module is not used bender_targs += -t snitch_cluster bender_targs += -t idma_test @@ -226,9 +227,9 @@ ifeq ($(mesh_dv),1) else tb := magia_tile_tb endif -WAVES := ./wave.do -bender_targs += -t redmule_complex -bender_targs += -t cv32e40x_bhv +WAVES := $(mkfile_path)/wave.do +bender_targs += -t redmule_hwpe + update-ips: $(BENDER) update diff --git a/hw/include/xbar_periph_bus_if.sv b/hw/include/xbar_periph_bus_if.sv new file mode 100644 index 0000000..6cdb289 --- /dev/null +++ b/hw/include/xbar_periph_bus_if.sv @@ -0,0 +1,40 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +interface XBAR_PERIPH_BUS #( + parameter int ID_WIDTH = 2 // typically number of cores plus one +); + + // Request Channel + logic req; + logic [31:0] add; + logic wen; + logic [31:0] wdata; + logic [3:0] be; + logic gnt; + logic [ID_WIDTH-1:0] id; + + // Response Channel + logic r_valid; + logic r_opc; + logic [ID_WIDTH-1:0] r_id; + logic [31:0] r_rdata; + + modport Master ( + output req, add, wen, wdata, be, id, + input gnt, r_rdata, r_opc, r_id, r_valid + ); + + modport Slave ( + input req, add, wen, wdata, be, id, + output gnt, r_rdata, r_opc, r_id, r_valid + ); + +endinterface \ No newline at end of file diff --git a/hw/mesh/magia.sv b/hw/mesh/magia.sv index ac88c10..6777403 100644 --- a/hw/mesh/magia.sv +++ b/hw/mesh/magia.sv @@ -224,10 +224,7 @@ module magia .core_sleep_o ( core_sleep_o[i*N_TILES_X+j] ), .wu_wfe_i ); - `ifdef CORE_TRACES - localparam string core_trace_file_name = $sformatf("%s%0d", "log_file_", i*N_TILES_X+j); - defparam i_magia_tile.i_cv32e40x_core.rvfi_i.tracer_i.LOGFILE_PATH_PLUSARG = core_trace_file_name; - `endif + // Note: cv32e40p tracer generates its own filename: trace_core_{cluster_id}_{core_id}.log if (i == 0) begin if (j == 0) begin // T-L corner diff --git a/hw/tile/cluster_event_map.sv b/hw/tile/cluster_event_map.sv new file mode 100644 index 0000000..1525c93 --- /dev/null +++ b/hw/tile/cluster_event_map.sv @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Solderpad Hardware License, Version 0.51 + * (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: SHL-0.51 + * + * Simple cluster event mapping module for MAGIA project + * This module maps various event types to the final cluster event outputs +*/ + +module cluster_event_map #( + parameter int unsigned NB_CORES = 1 +)( + // Input events from various sources + input logic [NB_CORES-1:0] [7:0] sw_events_i, // Software events + input logic [NB_CORES-1:0] barrier_events_i, // Barrier events (reduced) + input logic [NB_CORES-1:0] mutex_events_i, // Mutex events (reduced) + input logic [NB_CORES-1:0] dispatch_events_i, // Dispatch events + input logic periph_fifo_event_i, // Peripheral FIFO event + + // Hardware events from accelerators, DMA, timers, etc. + input logic [NB_CORES-1:0] [3:0] acc_events_i, // Accelerator events (4 bits per core) + input logic [NB_CORES-1:0] [1:0] dma_events_i, // DMA events (2 bits per core) + input logic [NB_CORES-1:0] [1:0] timer_events_i, // Timer events (2 bits per core) + input logic [NB_CORES-1:0][31:0] cluster_events_i, // Custom cluster events (32 bits per core) + + // Output: mapped events for each core + output logic [NB_CORES-1:0][31:0] events_mapped_o +); + + // Simple event mapping for each core + for (genvar i = 0; i < NB_CORES; i++) begin : gen_event_mapping + assign events_mapped_o[i] = { + cluster_events_i[i][31:16], // [31:16] Custom cluster events (upper 16 bits) + 4'b0, // [15:12] Reserved + acc_events_i[i], // [11:8] Accelerator events + 2'b0, // [7:6] Reserved + timer_events_i[i], // [5:4] Timer events + dma_events_i[i], // [3:2] DMA events + dispatch_events_i[i], // [1] Dispatch event + barrier_events_i[i] | mutex_events_i[i] | periph_fifo_event_i // [0] Combined sync/periph events + }; + end + +endmodule : cluster_event_map \ No newline at end of file diff --git a/hw/tile/converters/data2obi.sv b/hw/tile/converters/data2obi.sv index 3c515d6..d790988 100644 --- a/hw/tile/converters/data2obi.sv +++ b/hw/tile/converters/data2obi.sv @@ -34,11 +34,11 @@ module data2obi_req assign obi_req_o.a.aid = 'b0; assign obi_req_o.a.a_optional.auser = 'b0; assign obi_req_o.a.a_optional.wuser = 'b0; - assign obi_req_o.a.a_optional.atop = data_req_i.atop; - assign obi_req_o.a.a_optional.memtype = data_req_i.memtype; + assign obi_req_o.a.a_optional.atop = 6'b0; + assign obi_req_o.a.a_optional.memtype = 2'b0; assign obi_req_o.a.a_optional.mid = 'b0; - assign obi_req_o.a.a_optional.prot = data_req_i.prot; - assign obi_req_o.a.a_optional.dbg = data_req_i.dbg; + assign obi_req_o.a.a_optional.prot = 3'b0; + assign obi_req_o.a.a_optional.dbg = 1'b0; assign obi_req_o.a.a_optional.achk = 'b0; endmodule: data2obi_req diff --git a/hw/tile/converters/obi2data.sv b/hw/tile/converters/obi2data.sv index 3b5e705..b3f5717 100644 --- a/hw/tile/converters/obi2data.sv +++ b/hw/tile/converters/obi2data.sv @@ -30,6 +30,6 @@ module obi2data_rsp assign data_rsp_o.rvalid = obi_rsp_i.rvalid; assign data_rsp_o.rdata = obi_rsp_i.r.rdata; assign data_rsp_o.err = obi_rsp_i.r.err; - assign data_rsp_o.exokay = obi_rsp_i.r.r_optional.exokay; + // cv32e40p doesn't support exclusive access - exokay field removed endmodule: obi2data_rsp \ No newline at end of file diff --git a/hw/tile/converters/obi2hwpe_ctrl.sv b/hw/tile/converters/obi2hwpe_ctrl.sv new file mode 100644 index 0000000..21c35e3 --- /dev/null +++ b/hw/tile/converters/obi2hwpe_ctrl.sv @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Solderpad Hardware License, Version 0.51 + * (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: SHL-0.51 + * + * Authors: Luca Balboni + * + * + * OBI to iDMA Bridge - Memory-mapped control interface for iDMA + * + */ + +module obi2hwpe_ctrl + import magia_tile_pkg::*; +( + //OBI side + input core_obi_data_req_t obi_req_i, + output core_obi_data_rsp_t obi_rsp_o, + + //HWPE-ctrl (RedMulE) side + output redmule_ctrl_req_t ctrl_req_o, + input redmule_ctrl_rsp_t ctrl_rsp_i +); + + // ------------------------ + // Address channel mapping + // ------------------------ + assign ctrl_req_o.req = obi_req_i.req; + assign obi_rsp_o.gnt = ctrl_rsp_i.gnt; // handshake 1:1 + + assign ctrl_req_o.add = obi_req_i.a.addr; + assign ctrl_req_o.data = obi_req_i.a.wdata; + assign ctrl_req_o.be = obi_req_i.a.be; + assign ctrl_req_o.wen = ~obi_req_i.a.we; // inversione semantica + assign ctrl_req_o.id = '0; // OBI doesn't have ID in this config + + // ------------------------ + // Response channel mapping + // ------------------------ + assign obi_rsp_o.rvalid = ctrl_rsp_i.r_valid; + + assign obi_rsp_o.r.rdata = ctrl_rsp_i.r_data; + assign obi_rsp_o.r.err = 1'b0; // RedMulE ctrl no errors + + +endmodule + diff --git a/hw/tile/converters/xif_if2struct.sv b/hw/tile/converters/xif_if2struct.sv deleted file mode 100644 index 1d1bd1e..0000000 --- a/hw/tile/converters/xif_if2struct.sv +++ /dev/null @@ -1,84 +0,0 @@ -/* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna - * - * Licensed under the Solderpad Hardware License, Version 0.51 - * (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * SPDX-License-Identifier: SHL-0.51 - * - * Authors: Victor Isachi - * - * Xif Interface - Struct Converter - */ - -module xif_if2struct - import cv32e40x_pkg::*; - import fpu_ss_pkg::*; -( - cv32e40x_if_xif.coproc_compressed xif_compressed_if_i, - cv32e40x_if_xif.coproc_issue xif_issue_if_i, - cv32e40x_if_xif.coproc_commit xif_commit_if_i, - cv32e40x_if_xif.coproc_mem xif_mem_if_o, - cv32e40x_if_xif.coproc_mem_result xif_mem_result_if_i, - cv32e40x_if_xif.coproc_result xif_result_if_o, - - output logic x_compressed_valid_o, - input logic x_compressed_ready_i, - output fpu_ss_pkg::x_compressed_req_t x_compressed_req_o, - input fpu_ss_pkg::x_compressed_resp_t x_compressed_resp_i, - - output logic x_issue_valid_o, - input logic x_issue_ready_i, - output fpu_ss_pkg::x_issue_req_t x_issue_req_o, - input fpu_ss_pkg::x_issue_resp_t x_issue_resp_i, - - output logic x_commit_valid_o, - output fpu_ss_pkg::x_commit_t x_commit_o, - - input logic x_mem_valid_i, - output logic x_mem_ready_o, - input fpu_ss_pkg::x_mem_req_t x_mem_req_i, - output fpu_ss_pkg::x_mem_resp_t x_mem_resp_o, - - output logic x_mem_result_valid_o, - output fpu_ss_pkg::x_mem_result_t x_mem_result_o, - - input logic x_result_valid_i, - output logic x_result_ready_o, - input fpu_ss_pkg::x_result_t x_result_i -); - - assign x_compressed_valid_o = xif_compressed_if_i.compressed_valid; - assign xif_compressed_if_i.compressed_ready = x_compressed_ready_i; - assign x_compressed_req_o = xif_compressed_if_i.compressed_req; - assign xif_compressed_if_i.compressed_resp = x_compressed_resp_i; - - assign x_issue_valid_o = xif_issue_if_i.issue_valid; - assign xif_issue_if_i.issue_ready = x_issue_ready_i; - assign x_issue_req_o = xif_issue_if_i.issue_req; - assign xif_issue_if_i.issue_resp = x_issue_resp_i; - - assign x_commit_valid_o = xif_commit_if_i.commit_valid; - assign x_commit_o = xif_commit_if_i.commit; - - assign xif_mem_if_o.mem_valid = x_mem_valid_i; - assign x_mem_ready_o = xif_mem_if_o.mem_ready; - assign xif_mem_if_o.mem_req = x_mem_req_i; - assign x_mem_resp_o = xif_mem_if_o.mem_resp; - - assign x_mem_result_valid_o = xif_mem_result_if_i.mem_result_valid; - assign x_mem_result_o = xif_mem_result_if_i.mem_result; - - assign xif_result_if_o.result_valid = x_result_valid_i; - assign x_result_ready_o = xif_result_if_o.result_ready; - assign xif_result_if_o.result = x_result_i; - -endmodule: xif_if2struct \ No newline at end of file diff --git a/hw/tile/core_data_demux_eu_direct.sv b/hw/tile/core_data_demux_eu_direct.sv new file mode 100644 index 0000000..7e906d2 --- /dev/null +++ b/hw/tile/core_data_demux_eu_direct.sv @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Solderpad Hardware License, Version 0.51 + * (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: SHL-0.51 + * + * Authors: Luca Balboni + * + * Core Data Demux EU Direct Link + * + * This module implements a demux that splits core data requests between: + * - Regular crossbar for general memory/peripheral access + * - EU direct link for low-latency Event Unit access (WFE control) + * + * The demux decision is based on address range: + * - EVENT_UNIT_ADDR_START to EVENT_UNIT_ADDR_END -> EU direct link + * - All other addresses -> Regular crossbar + * + */ + +module core_data_demux_eu_direct + import magia_tile_pkg::*; + import magia_pkg::*; +#( + parameter logic [magia_pkg::ADDR_W-1:0] EVENT_UNIT_ADDR_START = magia_tile_pkg::EVENT_UNIT_ADDR_START, + parameter logic [magia_pkg::ADDR_W-1:0] EVENT_UNIT_ADDR_END = magia_tile_pkg::EVENT_UNIT_ADDR_END +)( + input logic clk_i, + input logic rst_ni, + + // Core data interface (input from cv32e40p) + input magia_tile_pkg::core_data_req_t core_data_req_i, + output magia_tile_pkg::core_data_rsp_t core_data_rsp_o, + + // Regular crossbar interface (for general memory/peripheral access) + output magia_tile_pkg::core_data_req_t xbar_data_req_o, + input magia_tile_pkg::core_data_rsp_t xbar_data_rsp_i, + + // EU direct link interface (abstract types) + output magia_tile_pkg::eu_direct_req_t eu_direct_req_o, + input magia_tile_pkg::eu_direct_rsp_t eu_direct_rsp_i +); + + enum logic {XBAR, EU} request_destination, request_destination_next; + + // Address range detection for EU direct access (pure combinatorial) + logic use_eu_direct; + logic request_granted; + + assign use_eu_direct = core_data_req_i.req && + (core_data_req_i.addr >= EVENT_UNIT_ADDR_START) && + (core_data_req_i.addr <= EVENT_UNIT_ADDR_END); + + // Grant occurs when request is accepted by the selected path + assign request_granted = core_data_req_i.req && core_data_rsp_o.gnt; + + // Determine next destination when a request is granted + assign request_destination_next = use_eu_direct ? EU : XBAR; + + // Update response destination based on GRANTED request + always_ff @(posedge clk_i, negedge rst_ni) begin : _UPDATE_RESPONSE_DESTINATION_ + if (!rst_ni) begin + request_destination <= XBAR; + end else begin + if (request_granted) begin + request_destination <= request_destination_next; + end + end + end + + // To regular crossbar + assign xbar_data_req_o.req = core_data_req_i.req && !use_eu_direct; + assign xbar_data_req_o.addr = core_data_req_i.addr; + assign xbar_data_req_o.be = core_data_req_i.be; + assign xbar_data_req_o.wdata = core_data_req_i.wdata; + assign xbar_data_req_o.we = core_data_req_i.we; + + // To EU direct link (abstract interface) + // Pass relative offset to Event Unit (subtract base address) + // Event Unit expects offset within its address space [9:0], not absolute address + assign eu_direct_req_o.req = core_data_req_i.req && use_eu_direct; + assign eu_direct_req_o.addr = core_data_req_i.addr - EVENT_UNIT_ADDR_START; + assign eu_direct_req_o.wen = ~core_data_req_i.we; // EU expects wen (write enable negated) + assign eu_direct_req_o.wdata = core_data_req_i.wdata; + assign eu_direct_req_o.be = core_data_req_i.be; + + // Response routing - uses stored destination + always_comb begin : _HANDLE_RESP_ + case (request_destination) + XBAR: begin + core_data_rsp_o.rvalid = xbar_data_rsp_i.rvalid; + core_data_rsp_o.rdata = xbar_data_rsp_i.rdata; + core_data_rsp_o.err = xbar_data_rsp_i.err; + end + EU: begin + core_data_rsp_o.rvalid = eu_direct_rsp_i.rvalid; + core_data_rsp_o.rdata = eu_direct_rsp_i.rdata; + core_data_rsp_o.err = eu_direct_rsp_i.err; + end + endcase + end + + // GNT is combinatorial + assign core_data_rsp_o.gnt = use_eu_direct ? eu_direct_rsp_i.gnt : xbar_data_rsp_i.gnt; + +endmodule \ No newline at end of file diff --git a/hw/tile/fractal_sync_xif_inst_decoder.sv b/hw/tile/fractal_sync_xif_inst_decoder.sv deleted file mode 100644 index a31292e..0000000 --- a/hw/tile/fractal_sync_xif_inst_decoder.sv +++ /dev/null @@ -1,332 +0,0 @@ -/* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna - * - * Licensed under the Solderpad Hardware License, Version 0.51 - * (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * SPDX-License-Identifier: SHL-0.51 - * - * Authors: Victor Isachi - * - * Fractal Sync Xif Instruction Decoder - */ - -module fractal_sync_xif_inst_decoder - import magia_tile_pkg::*; - import cv32e40x_pkg::*; -#( - parameter int unsigned INSTR_W = magia_tile_pkg::FSYNC_INSTR_W, - parameter int unsigned DATA_W = magia_tile_pkg::FSYNC_DATA_W, - parameter int unsigned ADDR_W = magia_tile_pkg::FSYNC_ADDR_W, - parameter int unsigned N_RF_PORTS = magia_tile_pkg::FSYNC_N_RF_PORTS, - parameter int unsigned OPCODE_W = magia_tile_pkg::FSYNC_OPCODE_W, - parameter int unsigned FUNC3_W = magia_tile_pkg::FSYNC_FUNC3_W, - parameter int unsigned OPCODE_OFF = magia_tile_pkg::FSYNC_OPCODE_OFF, - parameter int unsigned FUNC3_OFF = magia_tile_pkg::FSYNC_FUNC3_OFF, - parameter int unsigned N_CFG_REG = magia_tile_pkg::FSYNC_N_CFG_REG, - parameter int unsigned AGGR_W = magia_tile_pkg::FSYNC_AGGR_W, - parameter int unsigned ID_W = magia_tile_pkg::FSYNC_ID_W, - parameter int unsigned NBR_AGGR_W = magia_tile_pkg::FSYNC_NBR_AGGR_W, - parameter int unsigned NBR_ID_W = magia_tile_pkg::FSYNC_NBR_ID_W, - parameter bit STALL = magia_tile_pkg::FSYNC_STALL -)( - input logic clk_i, - input logic rst_ni, - input logic clear_i, - - cv32e40x_if_xif.coproc_issue xif_issue_if_i, - - fractal_sync_if.mst_port ht_fsync_if_o, - fractal_sync_if.mst_port hn_fsync_if_o, - fractal_sync_if.mst_port vt_fsync_if_o, - fractal_sync_if.mst_port vn_fsync_if_o, - - output logic done_o, - output logic error_o -); - -/*******************************************************/ -/** Internal Signal Definitions Beginning **/ -/*******************************************************/ - - logic clk_dec_en, clk_sync_en; - logic clk_dec_g, clk_sync_g; - - logic[OPCODE_W-1:0] opcode; - logic[ FUNC3_W-1:0] func3; - - logic sync; - logic done; - - logic[N_CFG_REG-1:0][DATA_W-1:0] cfg_reg_d, cfg_reg_q; - - typedef enum logic[1:0] { - IDLE, - SYNC, - WAIT, - DONE - } sync_state_e; - - sync_state_e c_sync_state, n_sync_state; - -/*******************************************************/ -/** Internal Signal Definitions End **/ -/*******************************************************/ -/** Hardwired Signals Beginning **/ -/*******************************************************/ - - assign opcode = xif_issue_if_i.issue_req.instr[OPCODE_OFF+:OPCODE_W]; - assign func3 = xif_issue_if_i.issue_req.instr[ FUNC3_OFF+: FUNC3_W]; - - assign done_o = done; - assign error_o = ht_fsync_if_o.error | hn_fsync_if_o.error | vt_fsync_if_o.error | vn_fsync_if_o.error; - -/*******************************************************/ -/** Hardwired Signals End **/ -/*******************************************************/ -/** Clock gating Beginning **/ -/*******************************************************/ - - tc_clk_gating i_dec_clock_gating ( - .clk_i , - .en_i ( clk_dec_en ), - .test_en_i ( '0 ), - .clk_o ( clk_dec_g ) - ); - - tc_clk_gating i_sync_clock_gating ( - .clk_i , - .en_i ( clk_sync_en ), - .test_en_i ( '0 ), - .clk_o ( clk_sync_g ) - ); - -/*******************************************************/ -/** Clock gating End **/ -/*******************************************************/ -/** Decoder FSM Beginning **/ -/*******************************************************/ - - generate if (STALL) begin: gen_stalling_decoder - always_comb begin: instruction_decoder - clk_dec_en = 1'b0; - cfg_reg_d = cfg_reg_q; - sync = 1'b0; - - if (xif_issue_if_i.issue_valid) begin - case (opcode) - FSYNC_OPCODE: begin - clk_dec_en = 1'b1; - if ((func3 == FSYNC_FUNC3) && (xif_issue_if_i.issue_req.rs_valid)) begin - cfg_reg_d[magia_tile_pkg::FSYNC_AGGR_IDX] = xif_issue_if_i.issue_req.rs[0]; - cfg_reg_d[magia_tile_pkg::FSYNC_ID_IDX] = xif_issue_if_i.issue_req.rs[1]; - sync = 1'b1; - end - end - endcase - end - end - end else begin: gen_non_stalling_decoder - always_comb begin: instruction_decoder - clk_dec_en = 1'b0; - cfg_reg_d = cfg_reg_q; - sync = 1'b0; - xif_issue_if_i.issue_ready = 1'b0; - xif_issue_if_i.issue_resp = '0; - - if (xif_issue_if_i.issue_valid) begin - case (opcode) - FSYNC_OPCODE: begin - xif_issue_if_i.issue_ready = 1'b1; - xif_issue_if_i.issue_resp.accept = 1'b1; - clk_dec_en = 1'b1; - if ((func3 == FSYNC_FUNC3) && (xif_issue_if_i.issue_req.rs_valid)) begin - cfg_reg_d[magia_tile_pkg::FSYNC_AGGR_IDX] = xif_issue_if_i.issue_req.rs[0]; - cfg_reg_d[magia_tile_pkg::FSYNC_ID_IDX] = xif_issue_if_i.issue_req.rs[1]; - sync = 1'b1; - end - end - endcase - end - end - end endgenerate - - always_ff @(posedge clk_dec_g, negedge rst_ni) begin: configuration_register - if (~rst_ni) cfg_reg_q <= '0; - else begin - if (clear_i) cfg_reg_q <= '0; - else cfg_reg_q <= cfg_reg_d; - end - end - -/*******************************************************/ -/** Decoder FSM End **/ -/*******************************************************/ -/** Synchronization FSM Beginning **/ -/*******************************************************/ - - generate if (STALL) begin: gen_stalling_sync - always_comb begin: sync_logic - n_sync_state = c_sync_state; - clk_sync_en = 1'b1; - done = 1'b0; - ht_fsync_if_o.sync = 1'b0; - ht_fsync_if_o.aggr = '0; - ht_fsync_if_o.id_req = '0; - hn_fsync_if_o.sync = 1'b0; - hn_fsync_if_o.aggr = '0; - hn_fsync_if_o.id_req = '0; - vt_fsync_if_o.sync = 1'b0; - vt_fsync_if_o.aggr = '0; - vt_fsync_if_o.id_req = '0; - vn_fsync_if_o.sync = 1'b0; - vn_fsync_if_o.aggr = '0; - vn_fsync_if_o.id_req = '0; - xif_issue_if_i.issue_ready = 1'b0; - xif_issue_if_i.issue_resp = '0; - - case (c_sync_state) - IDLE: if (sync) n_sync_state = SYNC; else clk_sync_en = 1'b0; - SYNC: begin - n_sync_state = WAIT; - if (cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX] != 1) begin // Tree (level > 1) request - case (cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][0]) - 1'b0: begin // Horizontal tree node request - ht_fsync_if_o.sync = 1'b1; - ht_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][AGGR_W-1:0]; - ht_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ ID_W-1:0]; - end - 1'b1: begin // Vertical tree node request - vt_fsync_if_o.sync = 1'b1; - vt_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][AGGR_W-1:0]; - vt_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ ID_W-1:0]; - end - endcase - end else begin // Neighbor (level = 1) request - case (cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][1:0]) - 2'b00: begin // Horizontal tree node request - ht_fsync_if_o.sync = 1'b1; - ht_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][AGGR_W-1:0]; - ht_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ ID_W-1:0]; - end - 2'b01: begin // Vertical tree node request - vt_fsync_if_o.sync = 1'b1; - vt_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][AGGR_W-1:0]; - vt_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ ID_W-1:0]; - end - 2'b10: begin // Horizontal neighbor node request - hn_fsync_if_o.sync = 1'b1; - hn_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][NBR_AGGR_W-1:0]; - hn_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ NBR_ID_W-1:0]; - end - 2'b11: begin // Vertical neighbor node request - vn_fsync_if_o.sync = 1'b1; - vn_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][NBR_AGGR_W-1:0]; - vn_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ NBR_ID_W-1:0]; - end - endcase - end - end - WAIT: begin - n_sync_state = (ht_fsync_if_o.wake | hn_fsync_if_o.wake | vt_fsync_if_o.wake | vn_fsync_if_o.wake) ? DONE : WAIT; - end - DONE: begin - n_sync_state = IDLE; - done = 1'b1; - xif_issue_if_i.issue_ready = 1'b1; - xif_issue_if_i.issue_resp.accept = 1'b1; - end - endcase - end - end else begin: gen_non_stalling_sync - always_comb begin: sync_logic - n_sync_state = c_sync_state; - clk_sync_en = 1'b1; - done = 1'b0; - ht_fsync_if_o.sync = 1'b0; - ht_fsync_if_o.aggr = '0; - ht_fsync_if_o.id_req = '0; - hn_fsync_if_o.sync = 1'b0; - hn_fsync_if_o.aggr = '0; - hn_fsync_if_o.id_req = '0; - vt_fsync_if_o.sync = 1'b0; - vt_fsync_if_o.aggr = '0; - vt_fsync_if_o.id_req = '0; - vn_fsync_if_o.sync = 1'b0; - vn_fsync_if_o.aggr = '0; - vn_fsync_if_o.id_req = '0; - - case (c_sync_state) - IDLE: if (sync) n_sync_state = SYNC; else clk_sync_en = 1'b0; - SYNC: begin - n_sync_state = WAIT; - if (cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX] != 1) begin // Tree (level > 1) request - case (cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][0]) - 1'b0: begin // Horizontal tree node request - ht_fsync_if_o.sync = 1'b1; - ht_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][AGGR_W-1:0]; - ht_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ ID_W-1:0]; - end - 1'b1: begin // Vertical tree node request - vt_fsync_if_o.sync = 1'b1; - vt_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][AGGR_W-1:0]; - vt_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ ID_W-1:0]; - end - endcase - end else begin // Neighbor (level = 1) request - case (cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][1:0]) - 2'b00: begin // Horizontal tree node request - ht_fsync_if_o.sync = 1'b1; - ht_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][AGGR_W-1:0]; - ht_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ ID_W-1:0]; - end - 2'b01: begin // Vertical tree node request - vt_fsync_if_o.sync = 1'b1; - vt_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][AGGR_W-1:0]; - vt_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ ID_W-1:0]; - end - 2'b10: begin // Horizontal neighbor node request - hn_fsync_if_o.sync = 1'b1; - hn_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][NBR_AGGR_W-1:0]; - hn_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ NBR_ID_W-1:0]; - end - 2'b11: begin // Vertical neighbor node request - vn_fsync_if_o.sync = 1'b1; - vn_fsync_if_o.aggr = cfg_reg_q[magia_tile_pkg::FSYNC_AGGR_IDX][NBR_AGGR_W-1:0]; - vn_fsync_if_o.id_req = cfg_reg_q[magia_tile_pkg::FSYNC_ID_IDX][ NBR_ID_W-1:0]; - end - endcase - end - end - WAIT: begin - n_sync_state = (ht_fsync_if_o.wake | hn_fsync_if_o.wake | vt_fsync_if_o.wake | vn_fsync_if_o.wake) ? DONE : WAIT; - end - DONE: begin - n_sync_state = IDLE; - done = 1'b1; - end - endcase - end - end endgenerate - - always_ff @(posedge clk_sync_g, negedge rst_ni) begin: sync_state - if (~rst_ni) c_sync_state <= IDLE; - else begin - if (clear_i) c_sync_state <= IDLE; - else c_sync_state <= n_sync_state; - end - end - -/*******************************************************/ -/** Synchronization FSM End **/ -/*******************************************************/ - -endmodule: fractal_sync_xif_inst_decoder \ No newline at end of file diff --git a/hw/tile/idma_axi_obi_transfer_ch.sv b/hw/tile/idma_axi_obi_transfer_ch.sv index 8e7a5b6..a51474e 100644 --- a/hw/tile/idma_axi_obi_transfer_ch.sv +++ b/hw/tile/idma_axi_obi_transfer_ch.sv @@ -45,7 +45,13 @@ module idma_axi_obi_transfer_ch input axi_rsp_t axi_rsp_i, output obi_req_t obi_req_o, - input obi_rsp_t obi_rsp_i + input obi_rsp_t obi_rsp_i, + + // IRQ-related outputs + output logic transfer_busy_o, + output logic transfer_start_o, + output logic transfer_done_o, + output logic transfer_error_o ); /*******************************************************/ @@ -321,5 +327,13 @@ module idma_axi_obi_transfer_ch /*******************************************************/ /** Back-end End **/ /*******************************************************/ +/** IRQ Signal Generation **/ +/*******************************************************/ + + // Generate IRQ signals from internal transfer state + assign transfer_busy_o = |busy; // Any busy indication from iDMA + assign transfer_start_o = issue_id; // Transfer started (ID issued) + assign transfer_done_o = retire_id; // Transfer completed (ID retired) + assign transfer_error_o = eh_req_valid; // Error handling request indicates error endmodule: idma_axi_obi_transfer_ch \ No newline at end of file diff --git a/hw/tile/idma_ctrl.sv b/hw/tile/idma_ctrl.sv deleted file mode 100644 index 58e2e3c..0000000 --- a/hw/tile/idma_ctrl.sv +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna - * - * Licensed under the Solderpad Hardware License, Version 0.51 - * (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * SPDX-License-Identifier: SHL-0.51 - * - * Authors: Victor Isachi - * - * iDMA Controller - */ - -module idma_ctrl - import magia_tile_pkg::*; - import cv32e40x_pkg::*; - import idma_pkg::*; -#( - parameter idma_pkg::error_cap_e ERROR_CAP = idma_pkg::NO_ERROR_HANDLING, - localparam int unsigned DIRECTION_W = magia_tile_pkg::DMA_DIRECTION_W, - localparam int unsigned DIRECTION_OFF = magia_tile_pkg::DMA_DIRECTION_OFF, - localparam type idma_fe_reg_req_t = magia_tile_pkg::idma_fe_reg_req_t, - localparam type idma_fe_reg_rsp_t = magia_tile_pkg::idma_fe_reg_rsp_t, - parameter type axi_req_t = magia_tile_pkg::idma_axi_req_t, - parameter type axi_rsp_t = magia_tile_pkg::idma_axi_rsp_t, - parameter type obi_req_t = magia_tile_pkg::idma_obi_req_t, - parameter type obi_rsp_t = magia_tile_pkg::idma_obi_rsp_t -)( - input logic clk_i, - input logic rst_ni, - input logic testmode_i, - input logic clear_i, - - cv32e40x_if_xif.coproc_issue xif_issue_if_i, - - output axi_req_t axi_read_req_o, - input axi_rsp_t axi_read_rsp_i, - - output axi_req_t axi_write_req_o, - input axi_rsp_t axi_write_rsp_i, - - output obi_req_t obi_read_req_o, - input obi_rsp_t obi_read_rsp_i, - - output obi_req_t obi_write_req_o, - input obi_rsp_t obi_write_rsp_i, - - output logic axi2obi_start_o, // Started L2 to L1 iDMA transfer - output logic axi2obi_busy_o, // Performing L2 to L1 iDMA transfer - output logic axi2obi_done_o, // Finished L2 to L1 iDMA transfer - output logic axi2obi_error_o, // Detected L2 to L1 transfer error - - output logic obi2axi_start_o, // Started L1 to L2 iDMA transfer - output logic obi2axi_busy_o, // Performing L1 to L2 iDMA transfer - output logic obi2axi_done_o, // Finished L1 to L2 iDMA transfer - output logic obi2axi_error_o // Detected L1 to L2 transfer error -); - -/*******************************************************/ -/** Internal Signal Definitions Beginning **/ -/*******************************************************/ - - logic direction; // Direction of the iDMA transfer: 0 -> AXI2OBI; 1 -> OBI2AXI - - idma_fe_reg_req_t idma_fe_reg_axi2obi_req; - idma_fe_reg_rsp_t idma_fe_reg_axi2obi_rsp; - idma_fe_reg_req_t idma_fe_reg_obi2axi_req; - idma_fe_reg_rsp_t idma_fe_reg_obi2axi_rsp; - -/*******************************************************/ -/** Internal Signal Definitions End **/ -/*******************************************************/ -/** Interface Definitions Beginning **/ -/*******************************************************/ - - cv32e40x_if_xif #( - .X_NUM_RS ( magia_tile_pkg::X_NUM_RS ), - .X_ID_WIDTH ( magia_tile_pkg::X_ID_W ), - .X_MEM_WIDTH ( magia_tile_pkg::X_MEM_W ), - .X_RFR_WIDTH ( magia_tile_pkg::X_RFR_W ), - .X_RFW_WIDTH ( magia_tile_pkg::X_RFW_W ), - .X_MISA ( magia_tile_pkg::X_MISA ), - .X_ECS_XS ( magia_tile_pkg::X_ECS_XS ) - ) xif_axi2obi_issue_if (); - - cv32e40x_if_xif #( - .X_NUM_RS ( magia_tile_pkg::X_NUM_RS ), - .X_ID_WIDTH ( magia_tile_pkg::X_ID_W ), - .X_MEM_WIDTH ( magia_tile_pkg::X_MEM_W ), - .X_RFR_WIDTH ( magia_tile_pkg::X_RFR_W ), - .X_RFW_WIDTH ( magia_tile_pkg::X_RFW_W ), - .X_MISA ( magia_tile_pkg::X_MISA ), - .X_ECS_XS ( magia_tile_pkg::X_ECS_XS ) - ) xif_obi2axi_issue_if (); - -/*******************************************************/ -/** Interface Definitions End **/ -/*******************************************************/ -/** Hardwired Signals Beginning **/ -/*******************************************************/ - - assign direction = xif_issue_if_i.issue_req.instr[DIRECTION_OFF+:DIRECTION_W]; - -/*******************************************************/ -/** Hardwired Signals End **/ -/*******************************************************/ -/** Xif Issue DEMUX Beginning **/ -/*******************************************************/ - - always_comb begin: xif_issue_demux - if (direction) begin // OBI2AXI - xif_obi2axi_issue_if.issue_valid = xif_issue_if_i.issue_valid; - xif_obi2axi_issue_if.issue_req = xif_issue_if_i.issue_req; - xif_issue_if_i.issue_ready = xif_obi2axi_issue_if.issue_ready; - xif_issue_if_i.issue_resp = xif_obi2axi_issue_if.issue_resp; - - xif_axi2obi_issue_if.issue_valid = 1'b0; - xif_axi2obi_issue_if.issue_req = '0; - end else begin // AXI2OBI - xif_axi2obi_issue_if.issue_valid = xif_issue_if_i.issue_valid; - xif_axi2obi_issue_if.issue_req = xif_issue_if_i.issue_req; - xif_issue_if_i.issue_ready = xif_axi2obi_issue_if.issue_ready; - xif_issue_if_i.issue_resp = xif_axi2obi_issue_if.issue_resp; - - xif_obi2axi_issue_if.issue_valid = 1'b0; - xif_obi2axi_issue_if.issue_req = '0; - end - end - -/*******************************************************/ -/** Xif Issue DEMUX End **/ -/*******************************************************/ -/** AXI2OBI Xif Instruction Decoder Beginning **/ -/*******************************************************/ - - idma_xif_inst_decoder #( - .INSTR_W ( magia_tile_pkg::DMA_INSTR_W ), - .DATA_W ( magia_tile_pkg::DMA_DATA_W ), - .N_RF_PORTS ( magia_tile_pkg::DMA_N_RF_PORTS ), - .OPCODE_W ( magia_tile_pkg::DMA_OPCODE_W ), - .FUNC3_W ( magia_tile_pkg::DMA_FUNC3_W ), - .ND_EN_W ( magia_tile_pkg::DMA_ND_EN_W ), - .DST_MAX_LOG_LEN_W ( magia_tile_pkg::DMA_DST_MAX_LOG_LEN_W ), - .SRC_MAX_LOG_LEN_W ( magia_tile_pkg::DMA_SRC_MAX_LOG_LEN_W ), - .DST_REDUCE_LEN_W ( magia_tile_pkg::DMA_DST_REDUCE_LEN_W ), - .SRC_REDUCE_LEN_W ( magia_tile_pkg::DMA_SRC_REDUCE_LEN_W ), - .DECOUPLE_R_W_W ( magia_tile_pkg::DMA_DECOUPLE_R_W_W ), - .DECOUPLE_R_AW_W ( magia_tile_pkg::DMA_DECOUPLE_R_AW_W ), - .OPCODE_OFF ( magia_tile_pkg::DMA_OPCODE_OFF ), - .FUNC3_OFF ( magia_tile_pkg::DMA_FUNC3_OFF ), - .ND_EN_OFF ( magia_tile_pkg::DMA_ND_EN_OFF ), - .DST_MAX_LOG_LEN_OFF ( magia_tile_pkg::DMA_DST_MAX_LOG_LEN_OFF ), - .SRC_MAX_LOG_LEN_OFF ( magia_tile_pkg::DMA_SRC_MAX_LOG_LEN_OFF ), - .DST_REDUCE_LEN_OFF ( magia_tile_pkg::DMA_DST_REDUCE_LEN_OFF ), - .SRC_REDUCE_LEN_OFF ( magia_tile_pkg::DMA_SRC_REDUCE_LEN_OFF ), - .DECOUPLE_R_W_OFF ( magia_tile_pkg::DMA_DECOUPLE_R_W_OFF ), - .DECOUPLE_R_AW_OFF ( magia_tile_pkg::DMA_DECOUPLE_R_AW_OFF ), - .N_CFG_REG ( magia_tile_pkg::DMA_N_CFG_REG ), - .idma_fe_req_t ( idma_fe_reg_req_t ), - .idma_fe_rsp_t ( idma_fe_reg_rsp_t ) - ) i_idma_axi2obi_inst_decoder ( - .clk_i , - .rst_ni , - .clear_i , - - .xif_issue_if_i ( xif_axi2obi_issue_if.coproc_issue ), - - .cfg_req_o ( idma_fe_reg_axi2obi_req ), - .cfg_rsp_i ( idma_fe_reg_axi2obi_rsp ), - - .start_o ( axi2obi_start_o ), - .busy_o ( axi2obi_busy_o ), - .done_o ( axi2obi_done_o ), - .error_o ( axi2obi_error_o ) - ); - -/*******************************************************/ -/** AXI2OBI Xif Instruction Decoder End **/ -/*******************************************************/ -/** OBI2AXI Xif Instruction Decoder Beginning **/ -/*******************************************************/ - - idma_xif_inst_decoder #( - .INSTR_W ( magia_tile_pkg::DMA_INSTR_W ), - .DATA_W ( magia_tile_pkg::DMA_DATA_W ), - .N_RF_PORTS ( magia_tile_pkg::DMA_N_RF_PORTS ), - .OPCODE_W ( magia_tile_pkg::DMA_OPCODE_W ), - .FUNC3_W ( magia_tile_pkg::DMA_FUNC3_W ), - .ND_EN_W ( magia_tile_pkg::DMA_ND_EN_W ), - .DST_MAX_LOG_LEN_W ( magia_tile_pkg::DMA_DST_MAX_LOG_LEN_W ), - .SRC_MAX_LOG_LEN_W ( magia_tile_pkg::DMA_SRC_MAX_LOG_LEN_W ), - .DST_REDUCE_LEN_W ( magia_tile_pkg::DMA_DST_REDUCE_LEN_W ), - .SRC_REDUCE_LEN_W ( magia_tile_pkg::DMA_SRC_REDUCE_LEN_W ), - .DECOUPLE_R_W_W ( magia_tile_pkg::DMA_DECOUPLE_R_W_W ), - .DECOUPLE_R_AW_W ( magia_tile_pkg::DMA_DECOUPLE_R_AW_W ), - .OPCODE_OFF ( magia_tile_pkg::DMA_OPCODE_OFF ), - .FUNC3_OFF ( magia_tile_pkg::DMA_FUNC3_OFF ), - .ND_EN_OFF ( magia_tile_pkg::DMA_ND_EN_OFF ), - .DST_MAX_LOG_LEN_OFF ( magia_tile_pkg::DMA_DST_MAX_LOG_LEN_OFF ), - .SRC_MAX_LOG_LEN_OFF ( magia_tile_pkg::DMA_SRC_MAX_LOG_LEN_OFF ), - .DST_REDUCE_LEN_OFF ( magia_tile_pkg::DMA_DST_REDUCE_LEN_OFF ), - .SRC_REDUCE_LEN_OFF ( magia_tile_pkg::DMA_SRC_REDUCE_LEN_OFF ), - .DECOUPLE_R_W_OFF ( magia_tile_pkg::DMA_DECOUPLE_R_W_OFF ), - .DECOUPLE_R_AW_OFF ( magia_tile_pkg::DMA_DECOUPLE_R_AW_OFF ), - .N_CFG_REG ( magia_tile_pkg::DMA_N_CFG_REG ), - .idma_fe_req_t ( idma_fe_reg_req_t ), - .idma_fe_rsp_t ( idma_fe_reg_rsp_t ) - ) i_idma_obi2axi_inst_decoder ( - .clk_i , - .rst_ni , - .clear_i , - - .xif_issue_if_i ( xif_obi2axi_issue_if.coproc_issue ), - - .cfg_req_o ( idma_fe_reg_obi2axi_req ), - .cfg_rsp_i ( idma_fe_reg_obi2axi_rsp ), - - .start_o ( obi2axi_start_o ), - .busy_o ( obi2axi_busy_o ), - .done_o ( obi2axi_done_o ), - .error_o ( obi2axi_error_o ) - ); - -/*******************************************************/ -/** OBI2AXI Xif Instruction Decoder End **/ -/*******************************************************/ -/** AXI2OBI (L2 to L1) Transfer Channel Beginning **/ -/*******************************************************/ - - idma_axi_obi_transfer_ch #( - .CHANNEL_T ( magia_tile_pkg::AXI2OBI ), - .ERROR_CAP ( ERROR_CAP ), - .idma_fe_reg_req_t ( idma_fe_reg_req_t ), - .idma_fe_reg_rsp_t ( idma_fe_reg_rsp_t ), - .axi_req_t ( axi_req_t ), - .axi_rsp_t ( axi_rsp_t ), - .obi_req_t ( obi_req_t ), - .obi_rsp_t ( obi_rsp_t ) - ) i_l2_to_l1_ch ( - .clk_i , - .rst_ni , - .testmode_i , - .clear_i , - .cfg_req_i ( idma_fe_reg_axi2obi_req ), - .cfg_rsp_o ( idma_fe_reg_axi2obi_rsp ), - .axi_req_o ( axi_read_req_o ), - .axi_rsp_i ( axi_read_rsp_i ), - .obi_req_o ( obi_write_req_o ), - .obi_rsp_i ( obi_write_rsp_i ) - ); - -/*******************************************************/ -/** AXI2OBI (L2 to L1) Transfer Channel End **/ -/*******************************************************/ -/** OBI2AXI (L1 to L2) Transfer Channel Beginning **/ -/*******************************************************/ - - idma_axi_obi_transfer_ch #( - .CHANNEL_T ( magia_tile_pkg::OBI2AXI ), - .ERROR_CAP ( ERROR_CAP ), - .idma_fe_reg_req_t ( idma_fe_reg_req_t ), - .idma_fe_reg_rsp_t ( idma_fe_reg_rsp_t ), - .axi_req_t ( axi_req_t ), - .axi_rsp_t ( axi_rsp_t ), - .obi_req_t ( obi_req_t ), - .obi_rsp_t ( obi_rsp_t ) - ) i_l1_to_l2_ch ( - .clk_i , - .rst_ni , - .testmode_i , - .clear_i , - .cfg_req_i ( idma_fe_reg_obi2axi_req ), - .cfg_rsp_o ( idma_fe_reg_obi2axi_rsp ), - .axi_req_o ( axi_write_req_o ), - .axi_rsp_i ( axi_write_rsp_i ), - .obi_req_o ( obi_read_req_o ), - .obi_rsp_i ( obi_read_rsp_i ) - ); - -/*******************************************************/ -/** OBI2AXI (L1 to L2) Transfer Channel End **/ -/*******************************************************/ - -endmodule: idma_ctrl \ No newline at end of file diff --git a/hw/tile/idma_ctrl_mm.sv b/hw/tile/idma_ctrl_mm.sv new file mode 100644 index 0000000..a1a8293 --- /dev/null +++ b/hw/tile/idma_ctrl_mm.sv @@ -0,0 +1,182 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Solderpad Hardware License, Version 0.51 + * (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: SHL-0.51 + * + * Authors: Luca Balboni + * Based on idma_ctrl by Victor Isachi + * + * iDMA Memory-Mapped Controller + * + * This module provides memory-mapped control interface for iDMA transfers + * with interrupt support. It wraps both AXI2OBI and OBI2AXI transfer channels + * along with the memory-mapped bridge, providing equivalent functionality + * to idma_ctrl but using memory-mapped register access instead of ISA extensions. + */ + +module idma_ctrl_mm + import magia_tile_pkg::*; + import idma_pkg::*; +#( + parameter int unsigned ERROR_CAP = 3, + parameter type obi_req_t = magia_tile_pkg::core_obi_data_req_t, + parameter type obi_rsp_t = magia_tile_pkg::core_obi_data_rsp_t, + parameter type idma_fe_reg_req_t = magia_tile_pkg::idma_fe_reg_req_t, + parameter type idma_fe_reg_rsp_t = magia_tile_pkg::idma_fe_reg_rsp_t, + parameter type axi_req_t = magia_tile_pkg::idma_axi_req_t, + parameter type axi_rsp_t = magia_tile_pkg::idma_axi_rsp_t, + parameter type idma_obi_req_t = magia_tile_pkg::idma_obi_req_t, + parameter type idma_obi_rsp_t = magia_tile_pkg::idma_obi_rsp_t +)( + input logic clk_i, + input logic rst_ni, + input logic test_en_i, + input logic clear_i, + + // OBI Slave Interface (CPU memory-mapped access) + input obi_req_t obi_req_i, + output obi_rsp_t obi_rsp_o, + + // AXI Master Interfaces (to L2 memory) + output axi_req_t axi_read_req_o, // AXI2OBI: L2 read + input axi_rsp_t axi_read_rsp_i, + output axi_req_t axi_write_req_o, // OBI2AXI: L2 write + input axi_rsp_t axi_write_rsp_i, + + // OBI Master Interfaces (to L1 memory) + output idma_obi_req_t obi_read_req_o, // OBI2AXI: L1 read + input idma_obi_rsp_t obi_read_rsp_i, + output idma_obi_req_t obi_write_req_o, // AXI2OBI: L1 write + input idma_obi_rsp_t obi_write_rsp_i, + + // Serialized IRQ outputs + output logic irq_a2o_busy_o, + output logic irq_a2o_start_o, + output logic irq_a2o_done_o, + output logic irq_a2o_error_o, + output logic irq_o2a_busy_o, + output logic irq_o2a_start_o, + output logic irq_o2a_done_o, + output logic irq_o2a_error_o +); + +/*******************************************************/ +/** Internal Signal Definitions Beginning **/ +/*******************************************************/ + + // Internal signals for transfer channel connections + idma_fe_reg_req_t idma_fe_reg_axi2obi_req; + idma_fe_reg_rsp_t idma_fe_reg_axi2obi_rsp; + idma_fe_reg_req_t idma_fe_reg_obi2axi_req; + idma_fe_reg_rsp_t idma_fe_reg_obi2axi_rsp; + + // Direct transfer channel IRQ signals (used for IRQ logic) + logic a2o_transfer_busy; + logic a2o_transfer_start; + logic a2o_transfer_done; + logic a2o_transfer_error; + logic o2a_transfer_busy; + logic o2a_transfer_start; + logic o2a_transfer_done; + logic o2a_transfer_error; + +/*******************************************************/ +/** Transfer Channels Instantiation **/ +/*******************************************************/ + + // AXI2OBI Transfer Channel (L2 to L1) + idma_axi_obi_transfer_ch #( + .CHANNEL_T ( magia_tile_pkg::AXI2OBI ), + .ERROR_CAP ( ERROR_CAP ), + .idma_fe_reg_req_t ( magia_tile_pkg::idma_fe_reg_req_t ), + .idma_fe_reg_rsp_t ( magia_tile_pkg::idma_fe_reg_rsp_t ), + .axi_req_t ( magia_tile_pkg::idma_axi_req_t ), + .axi_rsp_t ( magia_tile_pkg::idma_axi_rsp_t ), + .obi_req_t ( magia_tile_pkg::idma_obi_req_t ), + .obi_rsp_t ( magia_tile_pkg::idma_obi_rsp_t ) + ) i_l2_to_l1_ch ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .clear_i ( clear_i ), + .cfg_req_i ( idma_fe_reg_axi2obi_req ), + .cfg_rsp_o ( idma_fe_reg_axi2obi_rsp ), + .axi_req_o ( axi_read_req_o ), + .axi_rsp_i ( axi_read_rsp_i ), + .obi_req_o ( obi_write_req_o ), + .obi_rsp_i ( obi_write_rsp_i ), + .transfer_busy_o ( a2o_transfer_busy ), + .transfer_start_o ( a2o_transfer_start ), + .transfer_done_o ( a2o_transfer_done ), + .transfer_error_o ( a2o_transfer_error ) + ); + + // OBI2AXI Transfer Channel (L1 to L2) + idma_axi_obi_transfer_ch #( + .CHANNEL_T ( magia_tile_pkg::OBI2AXI ), + .ERROR_CAP ( ERROR_CAP ), + .idma_fe_reg_req_t ( magia_tile_pkg::idma_fe_reg_req_t ), + .idma_fe_reg_rsp_t ( magia_tile_pkg::idma_fe_reg_rsp_t ), + .axi_req_t ( magia_tile_pkg::idma_axi_req_t ), + .axi_rsp_t ( magia_tile_pkg::idma_axi_rsp_t ), + .obi_req_t ( magia_tile_pkg::idma_obi_req_t ), + .obi_rsp_t ( magia_tile_pkg::idma_obi_rsp_t ) + ) i_l1_to_l2_ch ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .testmode_i ( test_en_i ), + .clear_i ( clear_i ), + .cfg_req_i ( idma_fe_reg_obi2axi_req ), + .cfg_rsp_o ( idma_fe_reg_obi2axi_rsp ), + .axi_req_o ( axi_write_req_o ), + .axi_rsp_i ( axi_write_rsp_i ), + .obi_req_o ( obi_read_req_o ), + .obi_rsp_i ( obi_read_rsp_i ), + .transfer_busy_o ( o2a_transfer_busy ), + .transfer_start_o ( o2a_transfer_start ), + .transfer_done_o ( o2a_transfer_done ), + .transfer_error_o ( o2a_transfer_error ) + ); + +/*******************************************************/ +/** Memory-Mapped Bridge with IRQ Serialization **/ +/*******************************************************/ + + idma_obi_ctrl_decoder i_idma_obi_ctrl_decoder ( + .obi_req_i ( obi_req_i ), + .obi_rsp_o ( obi_rsp_o ), + + .idma_axi2obi_req_o ( idma_fe_reg_axi2obi_req ), + .idma_axi2obi_rsp_i ( idma_fe_reg_axi2obi_rsp ), + .idma_obi2axi_req_o ( idma_fe_reg_obi2axi_req ), + .idma_obi2axi_rsp_i ( idma_fe_reg_obi2axi_rsp ) + ); + + + // Clean IRQ pass-through logic - equivalent to idma_ctrl behavior + assign irq_a2o_start_o = a2o_transfer_start; + assign irq_a2o_busy_o = a2o_transfer_busy; + assign irq_a2o_done_o = a2o_transfer_done; + assign irq_a2o_error_o = a2o_transfer_error; + + assign irq_o2a_start_o = o2a_transfer_start; + assign irq_o2a_busy_o = o2a_transfer_busy; + assign irq_o2a_done_o = o2a_transfer_done; + assign irq_o2a_error_o = o2a_transfer_error; + +/*******************************************************/ +/** Simple IRQ Logic End **/ +/*******************************************************/ + +endmodule: idma_ctrl_mm \ No newline at end of file diff --git a/hw/tile/idma_obi_ctrl_decoder.sv b/hw/tile/idma_obi_ctrl_decoder.sv new file mode 100644 index 0000000..9939659 --- /dev/null +++ b/hw/tile/idma_obi_ctrl_decoder.sv @@ -0,0 +1,186 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Solderpad Hardware License, Version 0.51 + * (the "License"); you may not use this fendmodule : idma_obi_ctrl_decoderle except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: SHL-0.51 + * + * Authors: Luca Balboni + * Based on idma_ctrl by Victor Isachi + * + * OBI to iDMA Bridge - Memory-mapped control interface for iDMA + * + */ + +module idma_obi_ctrl_decoder + import magia_tile_pkg::*; + import magia_pkg::*; + #( + parameter type obi_req_t = magia_tile_pkg::core_obi_data_req_t, + parameter type obi_rsp_t = magia_tile_pkg::core_obi_data_rsp_t, + parameter type idma_fe_reg_req_t = magia_tile_pkg::idma_fe_reg_req_t, + parameter type idma_fe_reg_rsp_t = magia_tile_pkg::idma_fe_reg_rsp_t +)( + // OBI Slave Interface (CPU access) + input obi_req_t obi_req_i, + output obi_rsp_t obi_rsp_o, + + // iDMA Register Frontend Interface + output idma_fe_reg_req_t idma_axi2obi_req_o, + input idma_fe_reg_rsp_t idma_axi2obi_rsp_i, + + output idma_fe_reg_req_t idma_obi2axi_req_o, + input idma_fe_reg_rsp_t idma_obi2axi_rsp_i +); + +/*******************************************************/ +/** Internal Signal Definitions Beginning **/ +/*******************************************************/ + + // Address decode parameters - use parametric base address and size + localparam logic [magia_pkg::ADDR_W-1:0] IDMA_BASE_ADDR = magia_tile_pkg::IDMA_CTRL_ADDR_START; + localparam logic [magia_pkg::ADDR_W-1:0] IDMA_SIZE = magia_tile_pkg::IDMA_CTRL_SIZE; + localparam logic [magia_pkg::ADDR_W-1:0] IDMA_END_ADDR = magia_tile_pkg::IDMA_CTRL_ADDR_END; + + localparam int unsigned ADDR_WIDTH = 32; + localparam int unsigned DIRECTION_OFFSET = 12'h200; // +0x200 for direction change + + // Register offset definitions based on official reg32_3d spec + localparam logic [11:0] IDMA_CONFIG_OFFSET = 12'h0; + + localparam logic [11:0] IDMA_STATUS_0_OFFSET = 12'h4; + localparam logic [11:0] IDMA_STATUS_1_OFFSET = 12'h8; + localparam logic [11:0] IDMA_STATUS_2_OFFSET = 12'hc; + localparam logic [11:0] IDMA_STATUS_3_OFFSET = 12'h10; + + localparam logic [11:0] IDMA_NEXT_ID_0_OFFSET = 12'h44; + localparam logic [11:0] IDMA_NEXT_ID_1_OFFSET = 12'h48; + + localparam logic [11:0] IDMA_DONE_ID_0_OFFSET = 12'h84; + + localparam logic [11:0] IDMA_DST_ADDR_LOW_OFFSET = 12'hd0; + localparam logic [11:0] IDMA_SRC_ADDR_LOW_OFFSET = 12'hd8; + localparam logic [11:0] IDMA_LENGTH_LOW_OFFSET = 12'he0; + localparam logic [11:0] IDMA_DST_STRIDE_2_LOW_OFFSET = 12'he8; + localparam logic [11:0] IDMA_SRC_STRIDE_2_LOW_OFFSET = 12'hf0; + localparam logic [11:0] IDMA_REPS_2_LOW_OFFSET = 12'hf8; + localparam logic [11:0] IDMA_DST_STRIDE_3_LOW_OFFSET = 12'h100; + localparam logic [11:0] IDMA_SRC_STRIDE_3_LOW_OFFSET = 12'h108; + localparam logic [11:0] IDMA_REPS_3_LOW_OFFSET = 12'h110; + + logic direction; // Direction of the iDMA channel: 0 -> AXI2OBI; 1 -> OBI2AXI + logic [11:0] reg_offset; + logic is_valid_access; + logic is_address_in_range; + + idma_fe_reg_req_t selected_idma_req; + idma_fe_reg_rsp_t selected_idma_rsp; + +/*******************************************************/ +/** Internal Signal Definitions End **/ +/*******************************************************/ +/** Address Decoding Beginning **/ +/*******************************************************/ + + // Address range validation - check if address is within iDMA control space + assign is_address_in_range = (obi_req_i.a.addr >= IDMA_BASE_ADDR) && + (obi_req_i.a.addr <= IDMA_END_ADDR); + + // Address decoding - check if address is in OBI2AXI range (+0x200 offset) + assign direction = (obi_req_i.a.addr >= (IDMA_BASE_ADDR + DIRECTION_OFFSET)) ? 1'b1 : 1'b0; + assign reg_offset = direction ? + (obi_req_i.a.addr[11:0] - IDMA_BASE_ADDR[11:0] - DIRECTION_OFFSET[11:0]) : + (obi_req_i.a.addr[11:0] - IDMA_BASE_ADDR[11:0]); + + // Validate access: must be in address range AND at known register offset + assign is_valid_access = is_address_in_range && ( + (reg_offset == IDMA_CONFIG_OFFSET) || + // Status registers (multireg 0x4-0x40, step 4) + ((reg_offset >= 12'h4) && (reg_offset <= 12'h40) && ((reg_offset & 12'h3) == 12'h0)) || + // Next ID registers (multireg 0x44-0x80, step 4) + ((reg_offset >= 12'h44) && (reg_offset <= 12'h80) && ((reg_offset & 12'h3) == 12'h0)) || + // Done ID registers (multireg 0x84-0xc0, step 4) + ((reg_offset >= 12'h84) && (reg_offset <= 12'hc0) && ((reg_offset & 12'h3) == 12'h0)) || + // Configuration registers at specific skipto addresses + (reg_offset == IDMA_DST_ADDR_LOW_OFFSET) || + (reg_offset == IDMA_SRC_ADDR_LOW_OFFSET) || + (reg_offset == IDMA_LENGTH_LOW_OFFSET) || + (reg_offset == IDMA_DST_STRIDE_2_LOW_OFFSET) || + (reg_offset == IDMA_SRC_STRIDE_2_LOW_OFFSET) || + (reg_offset == IDMA_REPS_2_LOW_OFFSET) || + (reg_offset == IDMA_DST_STRIDE_3_LOW_OFFSET) || + (reg_offset == IDMA_SRC_STRIDE_3_LOW_OFFSET) || + (reg_offset == IDMA_REPS_3_LOW_OFFSET) + ); + +/*******************************************************/ +/** Address Decoding End **/ +/*******************************************************/ +/** Channel Selection Beginning **/ +/*******************************************************/ + + // Channel demultiplexer + always_comb begin: channel_demux + // Default assignments + idma_axi2obi_req_o = '0; + idma_obi2axi_req_o = '0; + selected_idma_rsp = '0; + + if (is_valid_access && obi_req_i.req) begin + if (direction) begin // OBI2AXI channel (L1->L2) + idma_obi2axi_req_o = selected_idma_req; + selected_idma_rsp = idma_obi2axi_rsp_i; + end else begin // AXI2OBI channel (L2->L1) + idma_axi2obi_req_o = selected_idma_req; + selected_idma_rsp = idma_axi2obi_rsp_i; + end + end + end + +/*******************************************************/ +/** Channel Selection End **/ +/*******************************************************/ +/** OBI Protocol Handling Beginning **/ +/*******************************************************/ + + // Convert OBI transaction to iDMA register access + always_comb begin: obi_to_idma_reg + selected_idma_req.addr = {20'h0, reg_offset}; // Use only offset for iDMA frontend + selected_idma_req.write = obi_req_i.a.we; + selected_idma_req.wdata = obi_req_i.a.wdata; + selected_idma_req.wstrb = obi_req_i.a.be; + selected_idma_req.valid = obi_req_i.req && is_valid_access; + end + + // OBI response - purely combinatorial like XIF interface + always_comb begin: idma_reg_to_obi + // Grant immediately for valid requests (OBI protocol requirement) + obi_rsp_o.gnt = obi_req_i.req && is_valid_access; + + // Response valid when iDMA is ready to respond (both read and write) + obi_rsp_o.rvalid = selected_idma_rsp.ready && is_valid_access; + + // Read data directly from iDMA response (writes return 0) + obi_rsp_o.r.rdata = selected_idma_rsp.rdata; + obi_rsp_o.r.r_optional = '0; + obi_rsp_o.r.err = selected_idma_rsp.error || !is_valid_access; + obi_rsp_o.r.rid = '0; + end + +/*******************************************************/ +/** OBI Protocol Handling End **/ +/*******************************************************/ +/** Debug Display Statements **/ +/*******************************************************/ + + +endmodule: idma_obi_ctrl_decoder \ No newline at end of file diff --git a/hw/tile/idma_xif_inst_decoder.sv b/hw/tile/idma_xif_inst_decoder.sv deleted file mode 100644 index d37cf58..0000000 --- a/hw/tile/idma_xif_inst_decoder.sv +++ /dev/null @@ -1,445 +0,0 @@ -/* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna - * - * Licensed under the Solderpad Hardware License, Version 0.51 - * (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * SPDX-License-Identifier: SHL-0.51 - * - * Authors: Victor Isachi - * - * iDMA Xif Instruction Decoder - */ - -module idma_xif_inst_decoder - import magia_tile_pkg::*; - import cv32e40x_pkg::*; - import idma_pkg::*; -#( - parameter int unsigned INSTR_W = magia_tile_pkg::DMA_INSTR_W, - parameter int unsigned DATA_W = magia_tile_pkg::DMA_DATA_W, - parameter int unsigned ADDR_W = magia_tile_pkg::DMA_ADDR_W, - parameter int unsigned N_RF_PORTS = magia_tile_pkg::DMA_N_RF_PORTS, - parameter int unsigned OPCODE_W = magia_tile_pkg::DMA_OPCODE_W, - parameter int unsigned FUNC3_W = magia_tile_pkg::DMA_FUNC3_W, - parameter int unsigned ND_EN_W = magia_tile_pkg::DMA_ND_EN_W, - parameter int unsigned DST_MAX_LOG_LEN_W = magia_tile_pkg::DMA_DST_MAX_LOG_LEN_W, - parameter int unsigned SRC_MAX_LOG_LEN_W = magia_tile_pkg::DMA_SRC_MAX_LOG_LEN_W, - parameter int unsigned DST_REDUCE_LEN_W = magia_tile_pkg::DMA_DST_REDUCE_LEN_W, - parameter int unsigned SRC_REDUCE_LEN_W = magia_tile_pkg::DMA_SRC_REDUCE_LEN_W, - parameter int unsigned DECOUPLE_R_W_W = magia_tile_pkg::DMA_DECOUPLE_R_W_W, - parameter int unsigned DECOUPLE_R_AW_W = magia_tile_pkg::DMA_DECOUPLE_R_AW_W, - localparam int unsigned CONF_W = ND_EN_W + - DST_MAX_LOG_LEN_W + - SRC_MAX_LOG_LEN_W + - DST_REDUCE_LEN_W + - SRC_REDUCE_LEN_W + - DECOUPLE_R_W_W + - DECOUPLE_R_AW_W, - parameter int unsigned OPCODE_OFF = magia_tile_pkg::DMA_OPCODE_OFF, - parameter int unsigned FUNC3_OFF = magia_tile_pkg::DMA_FUNC3_OFF, - parameter int unsigned ND_EN_OFF = magia_tile_pkg::DMA_ND_EN_OFF, - parameter int unsigned DST_MAX_LOG_LEN_OFF = magia_tile_pkg::DMA_DST_MAX_LOG_LEN_OFF, - parameter int unsigned SRC_MAX_LOG_LEN_OFF = magia_tile_pkg::DMA_SRC_MAX_LOG_LEN_OFF, - parameter int unsigned DST_REDUCE_LEN_OFF = magia_tile_pkg::DMA_DST_REDUCE_LEN_OFF, - parameter int unsigned SRC_REDUCE_LEN_OFF = magia_tile_pkg::DMA_SRC_REDUCE_LEN_OFF, - parameter int unsigned DECOUPLE_R_W_OFF = magia_tile_pkg::DMA_DECOUPLE_R_W_OFF, - parameter int unsigned DECOUPLE_R_AW_OFF = magia_tile_pkg::DMA_DECOUPLE_R_AW_OFF, - parameter int unsigned N_CFG_REG = magia_tile_pkg::DMA_N_CFG_REG, - parameter type idma_fe_req_t = magia_tile_pkg::idma_fe_reg_req_t, - parameter type idma_fe_rsp_t = magia_tile_pkg::idma_fe_reg_rsp_t -)( - input logic clk_i, - input logic rst_ni, - input logic clear_i, - - cv32e40x_if_xif.coproc_issue xif_issue_if_i, - - output idma_fe_req_t cfg_req_o, - input idma_fe_rsp_t cfg_rsp_i, - - output logic start_o, // Started iDMA transfer - output logic busy_o, // Performing iDMA transfer - output logic done_o, // Finished iDMA transfer - output logic error_o // Detected error -); - -/*******************************************************/ -/** Internal Signal Definitions Beginning **/ -/*******************************************************/ - - logic dec_clk_req; - logic cfg_clk_req; - logic clk_dc_en, clk_tfr_en; - logic clk_dc_g, clk_tfr_g; - - logic[ OPCODE_W-1:0] opcode; - logic[ FUNC3_W-1:0] func3; - logic[ ND_EN_W-1:0] nd_en; - logic[DST_MAX_LOG_LEN_W-1:0] dst_max_log_len; - logic[SRC_MAX_LOG_LEN_W-1:0] src_max_log_len; - logic[ DST_REDUCE_LEN_W-1:0] dst_reduce_len; - logic[ SRC_REDUCE_LEN_W-1:0] src_reduce_len; - logic[ DECOUPLE_R_W_W-1:0] decouple_r_w; - logic[ DECOUPLE_R_AW_W-1:0] decouple_r_aw; - - logic[N_CFG_REG-1:0][DATA_W-1:0] cfg_reg_d, cfg_reg_q; - logic[N_CFG_REG-1:0] cfg_reg_update_d, cfg_reg_update_q; - logic[N_CFG_REG-1:0] cfg_reg_update_clr; - - idma_fe_req_t cfg_configurer_req; - idma_fe_rsp_t cfg_configurer_rsp; - idma_fe_req_t cfg_transferer_req; - idma_fe_rsp_t cfg_transferer_rsp; - - logic free_cfg; - logic free_tfr; - - logic start_transfer; - - logic start_dma; - logic busy_dma; - logic done_dma; - - logic transfer_not_set_properly; - logic reg_error_cfg, reg_error_tfr; - - logic rw_valid_cfg, rw_valid_tfr; - - logic[DATA_W-1:0] next_id_d, next_id_q; - logic[DATA_W-1:0] done_id; - - typedef enum logic[1:0] { - IDLE, - START, - BUSY, - DONE - } idma_state_e; - - idma_state_e c_idma_state, n_idma_state; - -/*******************************************************/ -/** Internal Signal Definitions End **/ -/*******************************************************/ -/** Function Definitions Beginning **/ -/*******************************************************/ - - /* Function that writes the data argument to the addr argument of the iDMA FE register - * OUTPUT: - * req - iDMA FE register request channel - * reg_error - indicates that the req/rsp of the iDMA FE asserted the error signal - * INPUT: - * rsp - iDMA FE register response channel - * addr - iDMA FE register address - * data - iDMA FE register data - * RETURN: - * 1'b1 if the write was acknowledged, 1'b0 otherwise - */ - function automatic logic write_idma_reg(output idma_fe_req_t req, input idma_fe_rsp_t rsp, - input logic[ADDR_W-1:0] addr, input logic[DATA_W-1:0] data, - output logic reg_error); - req.addr = addr; - req.write = 1'b1; - req.wdata = data; - req.wstrb = '1; - req.valid = 1'b1; - - reg_error = rsp.error ? 1'b1 : 1'b0; - write_idma_reg = cfg_rsp_i.ready ? 1'b1 : 1'b0; - endfunction: write_idma_reg - - /* - * Function that reads the data argument of the iDMA FE register - * OUTPUT: - * req - iDMA FE register request channel - * reg_error - indicates that the req/rsp of the iDMA FE asserted the error signal - * data - iDMA FE register read data - * INPUT: - * rsp - iDMA FE register response channel - * addr - iDMA FE register address - * RETURN: - * 1'b1 if the read data is valid, 1'b0 otherwise - */ - function automatic logic read_idma_reg(output idma_fe_req_t req, input idma_fe_rsp_t rsp, - input logic[ADDR_W-1:0] addr, output logic[DATA_W-1:0] data, - output logic reg_error); - req.addr = addr; - req.write = 1'b0; - req.wdata = '0; - req.wstrb = '0; - req.valid = 1'b1; - - data = cfg_rsp_i.rdata; - - reg_error = rsp.error ? 1'b1 : 1'b0; - read_idma_reg = cfg_rsp_i.ready ? 1'b1 : 1'b0; - endfunction: read_idma_reg - -/*******************************************************/ -/** Function Definitions End **/ -/*******************************************************/ -/** Hardwired Signals Beginning **/ -/*******************************************************/ - - assign clk_dc_en = dec_clk_req | cfg_clk_req; - - assign opcode = xif_issue_if_i.issue_req.instr[ OPCODE_OFF+: OPCODE_W]; - assign func3 = xif_issue_if_i.issue_req.instr[ FUNC3_OFF+: FUNC3_W]; - assign nd_en = xif_issue_if_i.issue_req.instr[ ND_EN_OFF+: ND_EN_W]; - assign dst_max_log_len = xif_issue_if_i.issue_req.instr[DST_MAX_LOG_LEN_OFF+:DST_MAX_LOG_LEN_W]; - assign src_max_log_len = xif_issue_if_i.issue_req.instr[SRC_MAX_LOG_LEN_OFF+:SRC_MAX_LOG_LEN_W]; - assign dst_reduce_len = xif_issue_if_i.issue_req.instr[ DST_REDUCE_LEN_OFF+: DST_REDUCE_LEN_W]; - assign src_reduce_len = xif_issue_if_i.issue_req.instr[ SRC_REDUCE_LEN_OFF+: SRC_REDUCE_LEN_W]; - assign decouple_r_w = xif_issue_if_i.issue_req.instr[ DECOUPLE_R_W_OFF+: DECOUPLE_R_W_W]; - assign decouple_r_aw = xif_issue_if_i.issue_req.instr[ DECOUPLE_R_AW_OFF+: DECOUPLE_R_AW_W]; - - assign free_cfg = ~(|cfg_reg_update_q); - assign free_tfr = ~(start_dma | busy_dma); - - assign cfg_req_o = ~free_tfr ? cfg_transferer_req : - ~free_cfg ? cfg_configurer_req : '0; - assign cfg_transferer_rsp = ~free_tfr ? cfg_rsp_i : '0; - assign cfg_configurer_rsp = ~free_tfr ? '0 : - ~free_cfg ? cfg_rsp_i : '0; - - assign error_o = transfer_not_set_properly | reg_error_cfg | reg_error_tfr; - - assign start_o = start_dma; - assign busy_o = busy_dma; - assign done_o = done_dma; - -/*******************************************************/ -/** Hardwired Signals End **/ -/*******************************************************/ -/** Clock gating Beginning **/ -/*******************************************************/ - - tc_clk_gating dc_clock_gating ( - .clk_i , - .en_i ( clk_dc_en ), - .test_en_i ( '0 ), - .clk_o ( clk_dc_g ) - ); - - tc_clk_gating tfr_clock_gating ( - .clk_i , - .en_i ( clk_tfr_en ), - .test_en_i ( '0 ), - .clk_o ( clk_tfr_g ) - ); - -/*******************************************************/ -/** Clock gating End **/ -/*******************************************************/ -/** Decoder FSM Beginning **/ -/*******************************************************/ - - always_comb begin: instr_decoder - dec_clk_req = 1'b0; - start_transfer = 1'b0; - cfg_reg_d = cfg_reg_q; - cfg_reg_update_d = cfg_reg_update_q; - xif_issue_if_i.issue_ready = 1'b0; - xif_issue_if_i.issue_resp = '0; - - if (xif_issue_if_i.issue_valid) begin - case (opcode) - CONF_OPCODE: if (func3 == CONF_FUNC3) begin - xif_issue_if_i.issue_ready = 1'b1; - xif_issue_if_i.issue_resp.accept = 1'b1; - dec_clk_req = 1'b1; - cfg_reg_d [magia_tile_pkg::DMA_CONF_IDX] = {nd_en, dst_max_log_len, src_max_log_len, dst_reduce_len, src_reduce_len, decouple_r_w, decouple_r_aw}; - cfg_reg_update_d[magia_tile_pkg::DMA_CONF_IDX] = 1'b1; - end - SET_OPCODE: begin - xif_issue_if_i.issue_ready = 1'b1; - xif_issue_if_i.issue_resp.accept = 1'b1; - dec_clk_req = 1'b1; - case (func3) - SET_AL_FUNC3: if (xif_issue_if_i.issue_req.rs_valid) begin - cfg_reg_d [magia_tile_pkg::DMA_LENGTH_IDX] = xif_issue_if_i.issue_req.rs[0]; - cfg_reg_d [magia_tile_pkg::DMA_SRC_ADDR_IDX] = xif_issue_if_i.issue_req.rs[1]; - cfg_reg_d [magia_tile_pkg::DMA_DST_ADDR_IDX] = xif_issue_if_i.issue_req.rs[2]; - cfg_reg_update_d[magia_tile_pkg::DMA_LENGTH_IDX] = 1'b1; - cfg_reg_update_d[magia_tile_pkg::DMA_SRC_ADDR_IDX] = 1'b1; - cfg_reg_update_d[magia_tile_pkg::DMA_DST_ADDR_IDX] = 1'b1; - end - SET_SR2_FUNC3: if (xif_issue_if_i.issue_req.rs_valid) begin - cfg_reg_d [magia_tile_pkg::DMA_REPS_2_IDX] = xif_issue_if_i.issue_req.rs[0]; - cfg_reg_d [magia_tile_pkg::DMA_SRC_STRIDE_2_IDX] = xif_issue_if_i.issue_req.rs[1]; - cfg_reg_d [magia_tile_pkg::DMA_DST_STRIDE_2_IDX] = xif_issue_if_i.issue_req.rs[2]; - cfg_reg_update_d[magia_tile_pkg::DMA_REPS_2_IDX] = 1'b1; - cfg_reg_update_d[magia_tile_pkg::DMA_SRC_STRIDE_2_IDX] = 1'b1; - cfg_reg_update_d[magia_tile_pkg::DMA_DST_STRIDE_2_IDX] = 1'b1; - end - SET_SR3_FUNC3: if (xif_issue_if_i.issue_req.rs_valid) begin - cfg_reg_d [magia_tile_pkg::DMA_REPS_3_IDX] = xif_issue_if_i.issue_req.rs[0]; - cfg_reg_d [magia_tile_pkg::DMA_SRC_STRIDE_3_IDX] = xif_issue_if_i.issue_req.rs[1]; - cfg_reg_d [magia_tile_pkg::DMA_DST_STRIDE_3_IDX] = xif_issue_if_i.issue_req.rs[2]; - cfg_reg_update_d[magia_tile_pkg::DMA_REPS_3_IDX] = 1'b1; - cfg_reg_update_d[magia_tile_pkg::DMA_SRC_STRIDE_3_IDX] = 1'b1; - cfg_reg_update_d[magia_tile_pkg::DMA_DST_STRIDE_3_IDX] = 1'b1; - end - SET_S_FUNC3: start_transfer = 1'b1; - endcase - end - endcase - end - end - - always_ff @(posedge clk_dc_g, negedge rst_ni) begin: configuration_register - if (~rst_ni) cfg_reg_q <= '0; - else begin - if (clear_i) cfg_reg_q <= '0; - else cfg_reg_q <= cfg_reg_d; - end - end - -/*******************************************************/ -/** Decoder FSM End **/ -/*******************************************************/ -/** iDMA FE Configuration FSM Beginning **/ -/*******************************************************/ - - always_comb begin: idma_configurerer_next_state_output_logic - cfg_clk_req = 1'b0; - reg_error_cfg = 1'b0; - rw_valid_cfg = 1'b0; - cfg_reg_update_clr = '0; - cfg_configurer_req.addr = '0; - cfg_configurer_req.write = 1'b0; - cfg_configurer_req.wdata = '0; - cfg_configurer_req.wstrb = '0; - cfg_configurer_req.valid = 1'b0; - - if (free_tfr) begin - if (~free_cfg) begin - cfg_clk_req = 1'b1; - case (1'b1) - cfg_reg_update_q[magia_tile_pkg::DMA_CONF_IDX]: begin - rw_valid_cfg = write_idma_reg(.req(cfg_configurer_req), .rsp(cfg_configurer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_CONF_OFFSET), .data(cfg_reg_d[magia_tile_pkg::DMA_CONF_IDX][CONF_W-1:0]), .reg_error(reg_error_cfg)); - cfg_reg_update_clr[magia_tile_pkg::DMA_CONF_IDX] = reg_error_cfg ? 1'b0 : (rw_valid_cfg ? 1'b1 : 1'b0); - end - cfg_reg_update_q[magia_tile_pkg::DMA_DST_ADDR_IDX]: begin - rw_valid_cfg = write_idma_reg(.req(cfg_configurer_req), .rsp(cfg_configurer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_DST_ADDR_LOW_OFFSET), .data(cfg_reg_d[magia_tile_pkg::DMA_DST_ADDR_IDX]), .reg_error(reg_error_cfg)); - cfg_reg_update_clr[magia_tile_pkg::DMA_DST_ADDR_IDX] = reg_error_cfg ? 1'b0 : (rw_valid_cfg ? 1'b1 : 1'b0); - end - cfg_reg_update_q[magia_tile_pkg::DMA_SRC_ADDR_IDX]: begin - rw_valid_cfg = write_idma_reg(.req(cfg_configurer_req), .rsp(cfg_configurer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_SRC_ADDR_LOW_OFFSET), .data(cfg_reg_d[magia_tile_pkg::DMA_SRC_ADDR_IDX]), .reg_error(reg_error_cfg)); - cfg_reg_update_clr[magia_tile_pkg::DMA_SRC_ADDR_IDX] = reg_error_cfg ? 1'b0 : (rw_valid_cfg ? 1'b1 : 1'b0); - end - cfg_reg_update_q[magia_tile_pkg::DMA_LENGTH_IDX]: begin - rw_valid_cfg = write_idma_reg(.req(cfg_configurer_req), .rsp(cfg_configurer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_LENGTH_LOW_OFFSET), .data(cfg_reg_d[magia_tile_pkg::DMA_LENGTH_IDX]), .reg_error(reg_error_cfg)); - cfg_reg_update_clr[magia_tile_pkg::DMA_LENGTH_IDX] = reg_error_cfg ? 1'b0 : (rw_valid_cfg ? 1'b1 : 1'b0); - end - cfg_reg_update_q[magia_tile_pkg::DMA_DST_STRIDE_2_IDX]: begin - rw_valid_cfg = write_idma_reg(.req(cfg_configurer_req), .rsp(cfg_configurer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_DST_STRIDE_2_LOW_OFFSET), .data(cfg_reg_d[magia_tile_pkg::DMA_DST_STRIDE_2_IDX]), .reg_error(reg_error_cfg)); - cfg_reg_update_clr[magia_tile_pkg::DMA_DST_STRIDE_2_IDX] = reg_error_cfg ? 1'b0 : (rw_valid_cfg ? 1'b1 : 1'b0); - end - cfg_reg_update_q[magia_tile_pkg::DMA_SRC_STRIDE_2_IDX]: begin - rw_valid_cfg = write_idma_reg(.req(cfg_configurer_req), .rsp(cfg_configurer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_SRC_STRIDE_2_LOW_OFFSET), .data(cfg_reg_d[magia_tile_pkg::DMA_SRC_STRIDE_2_IDX]), .reg_error(reg_error_cfg)); - cfg_reg_update_clr[magia_tile_pkg::DMA_SRC_STRIDE_2_IDX] = reg_error_cfg ? 1'b0 : (rw_valid_cfg ? 1'b1 : 1'b0); - end - cfg_reg_update_q[magia_tile_pkg::DMA_REPS_2_IDX]: begin - rw_valid_cfg = write_idma_reg(.req(cfg_configurer_req), .rsp(cfg_configurer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_REPS_2_LOW_OFFSET), .data(cfg_reg_d[magia_tile_pkg::DMA_REPS_2_IDX]), .reg_error(reg_error_cfg)); - cfg_reg_update_clr[magia_tile_pkg::DMA_REPS_2_IDX] = reg_error_cfg ? 1'b0 : (rw_valid_cfg ? 1'b1 : 1'b0); - end - cfg_reg_update_q[magia_tile_pkg::DMA_DST_STRIDE_3_IDX]: begin - rw_valid_cfg = write_idma_reg(.req(cfg_configurer_req), .rsp(cfg_configurer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_DST_STRIDE_3_LOW_OFFSET), .data(cfg_reg_d[magia_tile_pkg::DMA_DST_STRIDE_3_IDX]), .reg_error(reg_error_cfg)); - cfg_reg_update_clr[magia_tile_pkg::DMA_DST_STRIDE_3_IDX] = reg_error_cfg ? 1'b0 : (rw_valid_cfg ? 1'b1 : 1'b0); - end - cfg_reg_update_q[magia_tile_pkg::DMA_SRC_STRIDE_3_IDX]: begin - rw_valid_cfg = write_idma_reg(.req(cfg_configurer_req), .rsp(cfg_configurer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_SRC_STRIDE_3_LOW_OFFSET), .data(cfg_reg_d[magia_tile_pkg::DMA_SRC_STRIDE_3_IDX]), .reg_error(reg_error_cfg)); - cfg_reg_update_clr[magia_tile_pkg::DMA_SRC_STRIDE_3_IDX] = reg_error_cfg ? 1'b0 : (rw_valid_cfg ? 1'b1 : 1'b0); - end - cfg_reg_update_q[magia_tile_pkg::DMA_REPS_3_IDX]: begin - rw_valid_cfg = write_idma_reg(.req(cfg_configurer_req), .rsp(cfg_configurer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_REPS_3_LOW_OFFSET), .data(cfg_reg_d[magia_tile_pkg::DMA_REPS_3_IDX]), .reg_error(reg_error_cfg)); - cfg_reg_update_clr[magia_tile_pkg::DMA_REPS_3_IDX] = reg_error_cfg ? 1'b0 : (rw_valid_cfg ? 1'b1 : 1'b0); - end - endcase - end - end - end - - for (genvar i = 0; i < N_CFG_REG; i++) begin: gen_configuration_update_register - always_ff @(posedge clk_dc_g, negedge rst_ni) begin: configuration_update_register - if (~rst_ni) cfg_reg_update_q[i] <= 1'b0; - else begin - if (clear_i | cfg_reg_update_clr[i]) cfg_reg_update_q[i] <= 1'b0; - else cfg_reg_update_q[i] <= cfg_reg_update_d[i]; - end - end - end - -/*******************************************************/ -/** iDMA FE Configuration FSM End **/ -/*******************************************************/ -/** iDMA Transfer FSM Beginning **/ -/*******************************************************/ - - always_comb begin: idma_transferer_next_state_output_logic - clk_tfr_en = 1'b1; - n_idma_state = c_idma_state; - start_dma = 1'b0; - busy_dma = 1'b0; - done_dma = 1'b0; - transfer_not_set_properly = 1'b0; - reg_error_tfr = 1'b0; - rw_valid_tfr = 1'b0; - next_id_d = next_id_q; - done_id = '0; - cfg_transferer_req.addr = '0; - cfg_transferer_req.write = 1'b0; - cfg_transferer_req.wdata = '0; - cfg_transferer_req.wstrb = '0; - cfg_transferer_req.valid = 1'b0; - - case (c_idma_state) - IDLE: if (start_transfer) n_idma_state = START; else clk_tfr_en = 1'b0; - START: begin - start_dma = 1'b1; - rw_valid_tfr = read_idma_reg(.req(cfg_transferer_req), .rsp(cfg_transferer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_NEXT_ID_0_OFFSET), .data(next_id_d), .reg_error(reg_error_tfr)); - transfer_not_set_properly = (rw_valid_tfr & (next_id_d == 0)) ? 1'b1 : 1'b0; - n_idma_state = (reg_error_tfr | transfer_not_set_properly) ? IDLE : (~rw_valid_tfr ? c_idma_state: BUSY); - end - BUSY: begin - busy_dma = 1'b1; - rw_valid_tfr = read_idma_reg(.req(cfg_transferer_req), .rsp(cfg_transferer_rsp), .addr(idma_reg32_3d_reg_pkg::IDMA_REG32_3D_DONE_ID_0_OFFSET), .data(done_id), .reg_error(reg_error_tfr)); - n_idma_state = reg_error_tfr ? IDLE : (~rw_valid_tfr ? c_idma_state : (done_id != next_id_q ? c_idma_state : DONE)); - end - DONE: begin - done_dma = 1'b1; - n_idma_state = IDLE; - end - endcase - end - - always_ff @(posedge clk_tfr_g, negedge rst_ni) begin: idma_state_register - if (~rst_ni) c_idma_state <= IDLE; - else begin - if (clear_i) c_idma_state <= IDLE; - else c_idma_state <= n_idma_state; - end - end - - always_ff @(posedge clk_tfr_g, negedge rst_ni) begin: next_id_register - if (~rst_ni) next_id_q <= 1; - else begin - if (clear_i) next_id_q <= 1; - else next_id_q <= next_id_d; - end - end - -/*******************************************************/ -/** iDMA Transfer FSM End **/ -/*******************************************************/ - -endmodule: idma_xif_inst_decoder \ No newline at end of file diff --git a/hw/tile/magia_event_unit.sv b/hw/tile/magia_event_unit.sv new file mode 100644 index 0000000..32050a2 --- /dev/null +++ b/hw/tile/magia_event_unit.sv @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Solderpad Hardware License, Version 0.51 + * (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: SHL-0.51 + * + * Authors: Luca Balboni + * + * Wrapper module for MAGIA Event Unit optimized for single-core systems +*/ + +module magia_event_unit +import magia_tile_pkg::*; +#( + // MAGIA Event Unit Parameters - Optimized for single-core system + parameter int unsigned NB_CORES = 1, // Single core system + parameter int unsigned NB_SW_EVT = 1, // Minimal SW events for basic functionality + parameter int unsigned NB_BARR = 0, // Barrier units disabled (no sync needed) + parameter int unsigned NB_HW_MUT = 0, // Hardware mutexes disabled (no contention) + parameter int unsigned MUTEX_MSG_W = 32, // Mutex message width (unused but kept for compatibility) + parameter int unsigned DISP_FIFO_DEPTH = 0, // Task dispatcher disabled (no distribution) + parameter int unsigned EVNT_WIDTH = 8, // SOC event width (external events) + parameter int unsigned SOC_FIFO_DEPTH = 8 // SOC event FIFO depth (external events) +) +( + // clock and reset + input logic clk_i, + input logic rst_ni, + input logic test_mode_i, + + // Event inputs (from accelerators, DMA, etc.) + input logic [NB_CORES-1:0] [3:0] acc_events_i, + input logic [NB_CORES-1:0] [1:0] dma_events_i, + input logic [NB_CORES-1:0] [1:0] timer_events_i, + input logic [NB_CORES-1:0][31:0] other_events_i, + + // Core IRQ interface (both directions needed for proper operation) + output logic [NB_CORES-1:0] core_irq_req_o, + output logic [NB_CORES-1:0] [4:0] core_irq_id_o, + input logic [NB_CORES-1:0] core_irq_ack_i, + input logic [NB_CORES-1:0] [4:0] core_irq_ack_id_i, + + // Core control interface + input logic [NB_CORES-1:0] core_busy_i, + output logic [NB_CORES-1:0] core_clock_en_o, + + // Debug interface (bidirectional) + input logic [NB_CORES-1:0] dbg_req_i, + output logic [NB_CORES-1:0] core_dbg_req_o, + + // EU Direct Link interface + input logic eu_direct_req_i, + input logic [31:0] eu_direct_addr_i, + input logic eu_direct_wen_i, + input logic [31:0] eu_direct_wdata_i, + input logic [3:0] eu_direct_be_i, + output logic eu_direct_gnt_o, + output logic eu_direct_rvalid_o, + output logic [31:0] eu_direct_rdata_o, + output logic eu_direct_err_o +); + + // Create internal interface instances + XBAR_PERIPH_BUS #(.ID_WIDTH(NB_CORES+1)) eu_direct_link[NB_CORES-1:0](); + XBAR_PERIPH_BUS #(.ID_WIDTH(NB_CORES+1)) speriph_slave(); // Tied off + + // Internal signals + logic soc_periph_evt_ready_internal; + + // Convert abstract eu_direct interface to XBAR_PERIPH_BUS + // eu_direct_addr_i already contains relative offset (subtracted by demux) + assign eu_direct_link[0].req = eu_direct_req_i; + assign eu_direct_link[0].add = eu_direct_addr_i; + assign eu_direct_link[0].wen = eu_direct_wen_i; + assign eu_direct_link[0].wdata = eu_direct_wdata_i; + assign eu_direct_link[0].be = eu_direct_be_i; + assign eu_direct_link[0].id = '0; + + // Convert XBAR_PERIPH_BUS response to abstract interface + // Event Unit handles all power management and grant logic internally + assign eu_direct_gnt_o = eu_direct_link[0].gnt; + assign eu_direct_rvalid_o = eu_direct_link[0].r_valid; + assign eu_direct_rdata_o = eu_direct_link[0].r_rdata; + assign eu_direct_err_o = eu_direct_link[0].r_opc; // r_opc: 0=OK, 1=ERROR + + // Tie off speriph_slave (not used anymore) + assign speriph_slave.req = 1'b0; + assign speriph_slave.add = '0; + assign speriph_slave.wen = 1'b1; + assign speriph_slave.wdata = '0; + assign speriph_slave.be = '0; + assign speriph_slave.id = '0; + + + + // Event Unit Flex instantiation + event_unit_top #( + .NB_CORES ( NB_CORES ), + .NB_SW_EVT ( NB_SW_EVT ), + .NB_BARR ( NB_BARR ), + .NB_HW_MUT ( NB_HW_MUT ), + .MUTEX_MSG_W ( MUTEX_MSG_W ), + .DISP_FIFO_DEPTH ( DISP_FIFO_DEPTH ), + .PER_ID_WIDTH ( NB_CORES+1 ), + .EVNT_WIDTH ( EVNT_WIDTH ), + .SOC_FIFO_DEPTH ( SOC_FIFO_DEPTH ) + ) i_event_unit_top ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + .acc_events_i ( acc_events_i ), + .dma_events_i ( dma_events_i ), + .timer_events_i ( timer_events_i ), + .cluster_events_i ( other_events_i ), + .core_irq_req_o ( core_irq_req_o ), + .core_irq_id_o ( core_irq_id_o ), + .core_irq_ack_i ( core_irq_ack_i ), + .core_irq_ack_id_i ( core_irq_ack_id_i ), + .core_busy_i ( core_busy_i ), + .core_clock_en_o ( core_clock_en_o ), + .dbg_req_i ( dbg_req_i ), + .core_dbg_req_o ( core_dbg_req_o ), + .soc_periph_evt_valid_i ( 1'b0 ), + .soc_periph_evt_ready_o ( soc_periph_evt_ready_internal ), + .soc_periph_evt_data_i ( '0 ), + .speriph_slave ( speriph_slave.Slave ), + .eu_direct_link ( eu_direct_link ) + ); + +endmodule: magia_event_unit \ No newline at end of file diff --git a/hw/tile/magia_tile.sv b/hw/tile/magia_tile.sv index 822ada0..932e6be 100644 --- a/hw/tile/magia_tile.sv +++ b/hw/tile/magia_tile.sv @@ -15,6 +15,7 @@ * SPDX-License-Identifier: SHL-0.51 * * Authors: Victor Isachi + * Luca Balboni * * MAGIA Tile */ @@ -27,8 +28,6 @@ module magia_tile import magia_pkg::*; import redmule_pkg::*; import hci_package::*; - import cv32e40x_pkg::*; - import fpu_ss_pkg::*; import snitch_icache_pkg::*; import idma_pkg::*; import obi_pkg::*; @@ -45,10 +44,6 @@ module magia_tile parameter int unsigned N_WORDS_BANK = magia_pkg::N_WORDS_BANK, // Number of words per memory bank // Parameters used by the core - parameter cv32e40x_pkg::rv32_e CORE_ISA = cv32e40x_pkg::RV32I, // RV32I (default) 32 registers in the RF - RV32E 16 registers in the RF - parameter cv32e40x_pkg::a_ext_e CORE_A = cv32e40x_pkg::A, // Atomic Istruction (A) support (dafault: full support) - parameter cv32e40x_pkg::b_ext_e CORE_B = cv32e40x_pkg::ZBA_ZBB_ZBC_ZBS, // Bit Manipulation support (dafault: full support) - parameter cv32e40x_pkg::m_ext_e CORE_M = cv32e40x_pkg::M, // Multiply and Divide support (dafault: full support) // Parameters used by the iDMA parameter idma_pkg::error_cap_e ERROR_CAP = idma_pkg::NO_ERROR_HANDLING // Error handaling capability of the iDMA @@ -124,6 +119,14 @@ module magia_tile logic[magia_pkg::ADDR_W-1:0] tile_l1_end_addr; logic[magia_pkg::ADDR_W-1:0] tile_reserved_start_addr; logic[magia_pkg::ADDR_W-1:0] tile_reserved_end_addr; + logic[magia_pkg::ADDR_W-1:0] tile_redmule_ctrl_start_addr; + logic[magia_pkg::ADDR_W-1:0] tile_redmule_ctrl_end_addr; + logic[magia_pkg::ADDR_W-1:0] tile_idma_ctrl_start_addr; + logic[magia_pkg::ADDR_W-1:0] tile_idma_ctrl_end_addr; + logic[magia_pkg::ADDR_W-1:0] tile_fsync_ctrl_start_addr; + logic[magia_pkg::ADDR_W-1:0] tile_fsync_ctrl_end_addr; + logic[magia_pkg::ADDR_W-1:0] tile_event_unit_start_addr; + logic[magia_pkg::ADDR_W-1:0] tile_event_unit_end_addr; magia_tile_pkg::redmule_data_req_t redmule_data_req; magia_tile_pkg::redmule_data_rsp_t redmule_data_rsp; @@ -131,14 +134,16 @@ module magia_tile magia_tile_pkg::redmule_ctrl_req_t redmule_ctrl_req; // Can be used to manage RedMulE control at top-level magia_tile_pkg::redmule_ctrl_rsp_t redmule_ctrl_rsp; // Can be used to manage RedMulE control at top-level + // OBI to iDMA Bridge (Memory-mapped interface) - now encapsulated in idma_ctrl_mm + magia_tile_pkg::core_data_req_t core_data_req; magia_tile_pkg::core_data_rsp_t core_data_rsp; magia_tile_pkg::core_obi_data_req_t core_obi_data_req; magia_tile_pkg::core_obi_data_rsp_t core_obi_data_rsp; - magia_tile_pkg::core_obi_data_req_t[magia_tile_pkg::N_SBR-1:0] core_mem_data_req; // Index 0 -> L2, Index 1 -> L1SPM - magia_tile_pkg::core_obi_data_rsp_t[magia_tile_pkg::N_SBR-1:0] core_mem_data_rsp; // Index 0 -> L2, Index 1 -> L1SPM + magia_tile_pkg::core_obi_data_req_t[magia_tile_pkg::N_SBR-1:0] core_mem_data_req; // Index 0 -> L2, Index 1 -> L1SPM, Index 2 -> RedMulE_ctrl, Index 3 -> iDMA_ctrl, Index 4 -> FSync_ctrl + magia_tile_pkg::core_obi_data_rsp_t[magia_tile_pkg::N_SBR-1:0] core_mem_data_rsp; // Index 0 -> L2, Index 1 -> L1SPM, Index 2 -> RedMulE_ctrl, Index 3 -> iDMA_ctrl, Index 4 -> FSync_ctrl magia_tile_pkg::core_obi_data_req_t[magia_tile_pkg::N_SBR-1:0] core_mem_data_cut_req; // Index 0 -> L2, Index 1 -> L1SPM magia_tile_pkg::core_obi_data_rsp_t[magia_tile_pkg::N_SBR-1:0] core_mem_data_cut_rsp; // Index 0 -> L2, Index 1 -> L1SPM @@ -229,43 +234,46 @@ module magia_tile logic axi2obi_rsp_r_user; logic idma_clear; // Can be used to manage iDMA clear at top-level - logic idma_axi2obi_start; - logic idma_axi2obi_busy; - logic idma_axi2obi_done; - logic idma_axi2obi_error; - logic idma_obi2axi_start; - logic idma_obi2axi_busy; - logic idma_obi2axi_done; - logic idma_obi2axi_error; - - magia_tile_pkg::xif_inst_rule_t[magia_tile_pkg::N_RULES-1:0] xif_coproc_rules; + + logic sys_clk; logic sys_clk_en; + // Core clock gating signals + logic core_clk; // Clock gated per il core + logic core_clk_en; // Enable dal tile (sempre attivo) + + // Core output signals + logic core_busy_o; + logic[magia_pkg::N_IRQ-1:0] irq; logic redmule_busy; logic[magia_tile_pkg::N_CORE-1:0][1:0] redmule_evt; - logic clic_irq; - logic[magia_tile_pkg::CLIC_ID_W-1:0] clic_irq_id; - logic[7:0] clic_irq_level; - logic[1:0] clic_irq_priv; - logic clic_irq_shv; - - logic fencei_flush_req; - logic fencei_flush_ack; - - logic enable_prefetching; - snitch_icache_pkg::icache_l0_events_t[magia_tile_pkg::NR_FETCH_PORTS-1:0] icache_l0_events; // Can be used to implement i$ IRQs - snitch_icache_pkg::icache_l1_events_t icache_l1_events; // Can be used to implement i$ IRQs - logic[magia_tile_pkg::NR_FETCH_PORTS-1:0] flush_valid; - logic[magia_tile_pkg::NR_FETCH_PORTS-1:0] flush_ready; + logic enable_prefetching; + logic[magia_tile_pkg::NR_FETCH_PORTS-1:0] flush_valid; logic fsync_clear; // Can be used to manage iDMA clear at top-level logic fsync_done; logic fsync_error; + // iDMA transfer channel IRQ signals + logic idma_a2o_busy; + logic idma_a2o_start; + logic idma_a2o_done; + logic idma_a2o_error; + logic idma_o2a_busy; + logic idma_o2a_start; + logic idma_o2a_done; + logic idma_o2a_error; + + // Event arrays for Event Unit (need proper 2D array structure) + logic [0:0] [3:0] acc_events_array; + logic [0:0] [1:0] dma_events_array; + logic [0:0] [1:0] timer_events_array; + logic [0:0][31:0] other_events_array; + // FlooNoC connections between NI and router floo_req_t [4:0] floo_router_req_in; floo_rsp_t [4:0] floo_router_rsp_in; @@ -273,26 +281,14 @@ module magia_tile floo_rsp_t [4:0] floo_router_rsp_out; id_t floo_id; - - logic x_compressed_valid; - logic x_compressed_ready; - fpu_ss_pkg::x_compressed_req_t x_compressed_req; - fpu_ss_pkg::x_compressed_resp_t x_compressed_resp; - logic x_issue_valid; - logic x_issue_ready; - fpu_ss_pkg::x_issue_req_t x_issue_req; - fpu_ss_pkg::x_issue_resp_t x_issue_resp; - logic x_commit_valid; - fpu_ss_pkg::x_commit_t x_commit; - logic x_mem_valid; - logic x_mem_ready; - fpu_ss_pkg::x_mem_req_t x_mem_req; - fpu_ss_pkg::x_mem_resp_t x_mem_resp; - logic x_mem_result_valid; - fpu_ss_pkg::x_mem_result_t x_mem_result; - logic x_result_valid; - logic x_result_ready; - fpu_ss_pkg::x_result_t x_result; + + // Event Unit signals - Corrected for single-core array interface + logic [0:0] eu_core_irq_req; // [0:0] array for single core + logic [0:0][magia_tile_pkg::EVENT_UNIT_IRQ_WIDTH-1:0] eu_core_irq_id; // [0:0][4:0] array + logic [0:0] eu_core_irq_ack; // [0:0] array + logic [0:0][magia_tile_pkg::EVENT_UNIT_IRQ_WIDTH-1:0] eu_core_irq_ack_id; // [0:0][4:0] array + logic [0:0] eu_core_clk_en; // [0:0] array + logic [0:0] eu_core_dbg_req; // [0:0] array /*******************************************************/ /** Internal Signal Definitions End **/ @@ -304,11 +300,23 @@ module magia_tile assign tile_l1_end_addr = magia_tile_pkg::L1_ADDR_END + mhartid_i*magia_tile_pkg::L1_TILE_OFFSET; assign tile_reserved_start_addr = magia_tile_pkg::RESERVED_ADDR_START + mhartid_i*magia_tile_pkg::L1_TILE_OFFSET; assign tile_reserved_end_addr = magia_tile_pkg::RESERVED_ADDR_END + mhartid_i*magia_tile_pkg::L1_TILE_OFFSET; - + assign tile_redmule_ctrl_start_addr = magia_tile_pkg::REDMULE_CTRL_ADDR_START; + assign tile_redmule_ctrl_end_addr = magia_tile_pkg::REDMULE_CTRL_ADDR_END; + assign tile_idma_ctrl_start_addr = magia_tile_pkg::IDMA_CTRL_ADDR_START; + assign tile_idma_ctrl_end_addr = magia_tile_pkg::IDMA_CTRL_ADDR_END; + assign tile_fsync_ctrl_start_addr = magia_tile_pkg::FSYNC_CTRL_ADDR_START; + assign tile_fsync_ctrl_end_addr = magia_tile_pkg::FSYNC_CTRL_ADDR_END; + assign tile_event_unit_start_addr = magia_tile_pkg::EVENT_UNIT_ADDR_START; + assign tile_event_unit_end_addr = magia_tile_pkg::EVENT_UNIT_ADDR_END; + assign obi_xbar_rule[magia_tile_pkg::L2_IDX] = '{idx: 32'd0, start_addr: magia_tile_pkg::L2_ADDR_START, end_addr: magia_tile_pkg::L2_ADDR_END }; assign obi_xbar_rule[magia_tile_pkg::L1SPM_IDX] = '{idx: 32'd1, start_addr: tile_l1_start_addr, end_addr: tile_l1_end_addr }; assign obi_xbar_rule[magia_tile_pkg::RESERVED_IDX] = '{idx: 32'd1, start_addr: tile_reserved_start_addr, end_addr: tile_reserved_end_addr }; assign obi_xbar_rule[magia_tile_pkg::STACK_IDX] = '{idx: 32'd1, start_addr: magia_tile_pkg::STACK_ADDR_START, end_addr: magia_tile_pkg::STACK_ADDR_END }; + assign obi_xbar_rule[magia_tile_pkg::REDMULE_CTRL_IDX] = '{idx: 32'd2, start_addr: tile_redmule_ctrl_start_addr, end_addr: tile_redmule_ctrl_end_addr }; + assign obi_xbar_rule[magia_tile_pkg::IDMA_IDX] = '{idx: 32'd3, start_addr: tile_idma_ctrl_start_addr, end_addr: tile_idma_ctrl_end_addr }; + assign obi_xbar_rule[magia_tile_pkg::FSYNC_CTRL_IDX] = '{idx: 32'd4, start_addr: tile_fsync_ctrl_start_addr, end_addr: tile_fsync_ctrl_end_addr }; + assign axi_xbar_rule[magia_tile_pkg::L2_IDX] = '{idx: 32'd0, start_addr: magia_tile_pkg::L2_ADDR_START, end_addr: magia_tile_pkg::L2_ADDR_END }; assign axi_xbar_rule[magia_tile_pkg::L1SPM_IDX] = '{idx: 32'd1, start_addr: tile_l1_start_addr, end_addr: tile_l1_end_addr }; @@ -350,84 +358,67 @@ module magia_tile assign hci_clear = 1'b0; assign hci_ctrl = '0; - assign redmule_ctrl_req = '0; + assign idma_clear = 1'b0; assign fsync_clear = 1'b0; - assign xif_coproc_rules[magia_tile_pkg::XIF_REDMULE_IDX] = '{sign_list: '{ {{redmule_pkg::MCNFIG, 3'h0}}, - {{redmule_pkg::MARITH, 3'h0}}, {{redmule_pkg::MARITH, 3'h1}}, - {{redmule_pkg::MARITH, 3'h2}}, {{redmule_pkg::MARITH, 3'h3}}, - {{redmule_pkg::MARITH, 3'h4}}, {{redmule_pkg::MARITH, 3'h5}}, - {{redmule_pkg::MARITH, 3'h6}}, {{redmule_pkg::MARITH, 3'h7}} }}; - assign xif_coproc_rules[magia_tile_pkg::XIF_IDMA_IDX] = '{sign_list: '{ {{magia_tile_pkg::CONF_OPCODE, magia_tile_pkg::CONF_FUNC3}}, - {{magia_tile_pkg::SET_OPCODE, magia_tile_pkg::SET_AL_FUNC3}}, - {{magia_tile_pkg::SET_OPCODE, magia_tile_pkg::SET_SR2_FUNC3}}, - {{magia_tile_pkg::SET_OPCODE, magia_tile_pkg::SET_SR3_FUNC3}}, - {{magia_tile_pkg::SET_OPCODE, magia_tile_pkg::SET_S_FUNC3}}, - {{magia_tile_pkg::SET_OPCODE, magia_tile_pkg::SET_S_FUNC3}}, - {{magia_tile_pkg::SET_OPCODE, magia_tile_pkg::SET_S_FUNC3}}, - {{magia_tile_pkg::SET_OPCODE, magia_tile_pkg::SET_S_FUNC3}}, - {{magia_tile_pkg::SET_OPCODE, magia_tile_pkg::SET_S_FUNC3}} }}; - assign xif_coproc_rules[magia_tile_pkg::XIF_FSYNC_IDX] = '{sign_list: '{ default: {magia_tile_pkg::FSYNC_OPCODE, magia_tile_pkg::FSYNC_FUNC3} }}; - - assign irq[magia_tile_pkg::IRQ_IDX_REDMULE_EVT_0] = redmule_evt[0][0]; // Only 1 core supported - assign irq[magia_tile_pkg::IRQ_IDX_REDMULE_EVT_1] = redmule_evt[0][1]; // Only 1 core supported - assign irq[magia_tile_pkg::IRQ_IDX_A2O_ERROR] = idma_axi2obi_error; - assign irq[magia_tile_pkg::IRQ_IDX_O2A_ERROR] = idma_obi2axi_error; - assign irq[magia_tile_pkg::IRQ_IDX_A2O_DONE] = idma_axi2obi_done; - assign irq[magia_tile_pkg::IRQ_IDX_O2A_DONE] = idma_obi2axi_done; - assign irq[magia_tile_pkg::IRQ_IDX_A2O_START] = idma_axi2obi_start; - assign irq[magia_tile_pkg::IRQ_IDX_O2A_START] = idma_obi2axi_start; - assign irq[magia_tile_pkg::IRQ_IDX_A2O_BUSY] = idma_axi2obi_busy; - assign irq[magia_tile_pkg::IRQ_IDX_O2A_BUSY] = idma_obi2axi_busy; - assign irq[magia_tile_pkg::IRQ_IDX_REDMULE_BUSY] = redmule_busy; - assign irq[magia_tile_pkg::IRQ_IDX_FSYNC_DONE] = fsync_done; - assign irq[magia_tile_pkg::IRQ_IDX_FSYNC_ERROR] = fsync_error; - assign irq[magia_pkg::N_IRQ-magia_tile_pkg::IRQ_USED-1:16] - = irq_i[magia_pkg::N_IRQ-magia_tile_pkg::IRQ_USED-1:16]; - assign irq[15:12] = '0; - assign irq[11] = irq_i[11]; - assign irq[10:8] = '0; - assign irq[7] = irq_i[7]; - assign irq[6:4] = '0; - assign irq[3] = irq_i[3]; - assign irq[2:0] = '0; - - // CLIC unused - assign clic_irq = 1'b0; - assign clic_irq_id = '0; - assign clic_irq_level = '0; - assign clic_irq_priv = '0; - assign clic_irq_shv = 1'b0; - + // Icache control signals assign enable_prefetching = 1'b0; - assign flush_valid[0] = fencei_flush_req; // Single port i$ - assign fencei_flush_ack = flush_ready[0]; // Signle port i$ - - assign xif_redmule_if.result_ready = 1'b0; - assign xif_redmule_if.compressed_valid = 1'b0; - assign xif_redmule_if.compressed_req = '0; - assign xif_redmule_if.mem_ready = 1'b0; - assign xif_redmule_if.mem_resp = '0; + assign flush_valid = '0; + + // Event Unit provides unified interrupt management + // External interrupts must be mapped to bit 11 (MEIE - Machine External Interrupt Enable) + assign irq[magia_pkg::N_IRQ-1:12] = '0; // Clear all high IRQs + assign irq[11] = eu_core_irq_req[0]; // Event Unit IRQ mapped to external interrupt (bit 11) + assign irq[10:8] = '0; // Clear IRQs 8-10 + assign irq[7] = 1'b0; // Timer interrupt (unused) + assign irq[6:4] = '0; // Clear IRQs 4-6 + assign irq[3] = 1'b0; // Software interrupt (unused) + assign irq[2:0] = '0; // Clear IRQs 0-2 assign floo_id = '{x: (x_id_i+1), y: y_id_i, port_id: 0}; /*******************************************************/ /** Hardwired Signals End **/ /*******************************************************/ -/** Type Conversions Beginning **/ +/** Core Data Demux & Type Conversions **/ /*******************************************************/ + // Core data demux signals + magia_tile_pkg::core_data_req_t core_data_req_to_xbar; + magia_tile_pkg::core_data_rsp_t core_data_rsp_from_xbar; + magia_tile_pkg::eu_direct_req_t eu_direct_req; + magia_tile_pkg::eu_direct_rsp_t eu_direct_rsp; + + // Core data demux: splits requests between regular crossbar and EU direct link + core_data_demux_eu_direct i_core_data_demux_eu_direct ( + .clk_i ( sys_clk ), + .rst_ni ( rst_ni ), + + // Core interface + .core_data_req_i ( core_data_req ), + .core_data_rsp_o ( core_data_rsp ), + + // Regular crossbar interface + .xbar_data_req_o ( core_data_req_to_xbar ), + .xbar_data_rsp_i ( core_data_rsp_from_xbar ), + + // EU direct link interface + .eu_direct_req_o ( eu_direct_req ), + .eu_direct_rsp_i ( eu_direct_rsp ) + ); + + // Convert core data interface to OBI for crossbar data2obi_req i_core_data2obi_req ( - .data_req_i ( core_data_req ), - .obi_req_o ( core_obi_data_req ) + .data_req_i ( core_data_req_to_xbar ), + .obi_req_o ( core_obi_data_req ) ); obi2data_rsp i_core_obi2data_rsp ( - .obi_rsp_i ( core_obi_data_rsp ), - .data_rsp_o ( core_data_rsp ) + .obi_rsp_i ( core_obi_data_rsp ), + .data_rsp_o ( core_data_rsp_from_xbar ) ); obi2hci_req #( @@ -560,6 +551,13 @@ module magia_tile .rsp_r_user_i ( axi2obi_rsp_r_user ) ); + // RedMule controller OBI-to-HWPE control interface + obi2hwpe_ctrl obi2hwpe_ctrl_inst ( + .obi_req_i ( core_mem_data_req[2] ), + .obi_rsp_o ( core_mem_data_rsp[2] ), + .ctrl_req_o ( redmule_ctrl_req ), + .ctrl_rsp_i ( redmule_ctrl_rsp ) + ); /*******************************************************/ /** Type Conversions End **/ /*******************************************************/ @@ -578,6 +576,17 @@ module magia_tile .clk_o ( sys_clk ) ); + // Core clock gating controlled by Event Unit + assign core_clk_en = eu_core_clk_en[0]; // Event Unit controls core clock + + + tc_clk_gating core_clock_gating ( + .clk_i ( sys_clk ), + .en_i ( core_clk_en ), + .test_en_i ( test_mode_i ), + .clk_o ( core_clk ) + ); + /*******************************************************/ /** Clock gating End **/ /*******************************************************/ @@ -630,38 +639,6 @@ module magia_tile .clk( sys_clk ) ); - cv32e40x_if_xif xif_redmule_if (); - - cv32e40x_if_xif #( - .X_NUM_RS ( magia_tile_pkg::X_NUM_RS ), - .X_ID_WIDTH ( magia_tile_pkg::X_ID_W ), - .X_MEM_WIDTH ( magia_tile_pkg::X_MEM_W ), - .X_RFR_WIDTH ( magia_tile_pkg::X_RFR_W ), - .X_RFW_WIDTH ( magia_tile_pkg::X_RFW_W ), - .X_MISA ( magia_tile_pkg::X_MISA ), - .X_ECS_XS ( magia_tile_pkg::X_ECS_XS ) - ) xif_fpu_if (); - - cv32e40x_if_xif #( - .X_NUM_RS ( magia_tile_pkg::X_NUM_RS ), - .X_ID_WIDTH ( magia_tile_pkg::X_ID_W ), - .X_MEM_WIDTH ( magia_tile_pkg::X_MEM_W ), - .X_RFR_WIDTH ( magia_tile_pkg::X_RFR_W ), - .X_RFW_WIDTH ( magia_tile_pkg::X_RFW_W ), - .X_MISA ( magia_tile_pkg::X_MISA ), - .X_ECS_XS ( magia_tile_pkg::X_ECS_XS ) - ) xif_if (); - - cv32e40x_if_xif #( - .X_NUM_RS ( magia_tile_pkg::X_NUM_RS ), - .X_ID_WIDTH ( magia_tile_pkg::X_ID_W ), - .X_MEM_WIDTH ( magia_tile_pkg::X_MEM_W ), - .X_RFR_WIDTH ( magia_tile_pkg::X_RFR_W ), - .X_RFW_WIDTH ( magia_tile_pkg::X_RFW_W ), - .X_MISA ( magia_tile_pkg::X_MISA ), - .X_ECS_XS ( magia_tile_pkg::X_ECS_XS ) - ) xif_coproc_if[magia_tile_pkg::N_COPROC] (); // Index 0 -> RedMulE, Index 1 -> iDMA, Index 2 -> Fractal Sync, Index 3 -> FPU - /*******************************************************/ /** Interface Definitions End **/ /*******************************************************/ @@ -687,7 +664,7 @@ module magia_tile .N_CORES ( magia_tile_pkg::N_CORE ), .DW ( magia_tile_pkg::REDMULE_DW ), .UW ( magia_tile_pkg::REDMULE_UW ), - .X_EXT ( magia_tile_pkg::X_EXT_EN ), + .X_EXT ( 1'b0 ), // RedMulE does not implement the eXtension Interface (X) - using HWPE-CTRL mode .SysInstWidth ( magia_pkg::INSTR_W ), .SysDataWidth ( magia_pkg::DATA_W ), .redmule_data_req_t ( magia_tile_pkg::redmule_data_req_t ), @@ -702,11 +679,6 @@ module magia_tile .busy_o ( redmule_busy ), .evt_o ( redmule_evt ), - .xif_issue_if_i ( xif_coproc_if.coproc_issue[magia_tile_pkg::XIF_REDMULE_IDX] ), - .xif_result_if_o ( xif_redmule_if.coproc_result ), - .xif_compressed_if_i ( xif_redmule_if.coproc_compressed ), - .xif_mem_if_o ( xif_redmule_if.coproc_mem ), - .data_req_o ( redmule_data_req ), .data_rsp_i ( redmule_data_rsp ), @@ -720,115 +692,113 @@ module magia_tile /** Core Beginning **/ /*******************************************************/ - // Documentation of cv32e40x_core's design parameters and interface is available at: - // https://docs.openhwgroup.org/projects/cv32e40x-user-manual/en/latest/integration.html#core-integration - -`ifndef CORE_TRACES - cv32e40x_core #( -`else - cv32e40x_wrapper #( -`endif - .RV32 ( CORE_ISA ), - .A_EXT ( CORE_A ), - .B_EXT ( CORE_B ), - .M_EXT ( CORE_M ), - .X_EXT ( magia_tile_pkg::X_EXT_EN ), // Support for eXtension Interface (X) - .X_NUM_RS ( magia_tile_pkg::X_NUM_RS ), // RF read ports that can be used by the eXtension interface - .X_ID_WIDTH ( magia_tile_pkg::X_ID_W ), // ID width of eXtension interface - .X_MEM_WIDTH ( magia_tile_pkg::X_MEM_W ), // MEM width for loads/stores of eXtension interface - .X_RFR_WIDTH ( magia_tile_pkg::X_RFR_W ), // RF read width of eXtension interface - .X_RFW_WIDTH ( magia_tile_pkg::X_RFW_W ), // RF write width of eXtension interface - .X_MISA ( magia_tile_pkg::X_MISA ), // MISA extensions implemented on the eXtension interface - .X_ECS_XS ( magia_tile_pkg::X_ECS_XS ), // Default value for mstatus.XS if X_EXT = 1 - .NUM_MHPMCOUNTERS ( 1 ), // 1 MHPMCOUNTER performance counter - .DEBUG ( 1 ), // Enable debug support - .DM_REGION_START ( magia_tile_pkg::DM_REGION_START ), // Start address of Debug Module region - .DM_REGION_END ( magia_tile_pkg::DM_REGION_END ), // End address of Debug Module region - .DBG_NUM_TRIGGERS ( 1 ), // 1 debug trigger - .PMA_NUM_REGIONS ( 0 ), // No PMA (Physical Memory Attribution) regions - .PMA_CFG ( ), // No array of PMA configurations - .CLIC ( magia_tile_pkg::CLIC_EN ), // Support for Smclic, Smclicshv and Smclicconfig - .CLIC_ID_WIDTH ( magia_tile_pkg::CLIC_ID_W ) // Width of clic_irq_id_i and clic_irq_id_o - ) i_cv32e40x_core ( - // Clock and reset - .clk_i ( sys_clk ), - .rst_ni ( rst_ni ), - .scan_cg_en_i , - - // Configuration - .boot_addr_i , // instead of exposing these outside the tile, they could be managed with a configuration ROM/RAM - .mtvec_addr_i , // instead of exposing these outside the tile, they could be managed with a configuration ROM/RAM - .dm_halt_addr_i , // instead of exposing these outside the tile, they could be managed with a configuration ROM/RAM - .dm_exception_addr_i , // instead of exposing these outside the tile, they could be managed with a configuration ROM/RAM - .mhartid_i , // instead of exposing these outside the tile, they could be managed with a configuration ROM/RAM - .mimpid_patch_i , // instead of exposing these outside the tile, they could be managed with a configuration ROM/RAM + // flex-v core with integrated FPU and tracer + riscv_core #( + .N_EXT_PERF_COUNTERS ( magia_tile_pkg::N_EXT_PERF_COUNTERS ), + .INSTR_RDATA_WIDTH ( magia_tile_pkg::INSTR_RDATA_WIDTH ), + .PULP_SECURE ( magia_tile_pkg::PULP_SECURE ), + .N_PMP_ENTRIES ( magia_tile_pkg::N_PMP_ENTRIES ), + .USE_PMP ( magia_tile_pkg::USE_PMP ), + .PULP_CLUSTER ( magia_tile_pkg::PULP_CLUSTER ), + .FPU ( magia_tile_pkg::FPU ), + .Zfinx ( magia_tile_pkg::ZFINX ), + .FP_DIVSQRT ( magia_tile_pkg::FP_DIVSQRT ), + .SHARED_FP ( magia_tile_pkg::SHARED_FP ), + .SHARED_DSP_MULT ( magia_tile_pkg::SHARED_DSP_MULT ), + .SHARED_INT_MULT ( magia_tile_pkg::SHARED_INT_MULT ), + .SHARED_INT_DIV ( magia_tile_pkg::SHARED_INT_DIV ), + .SHARED_FP_DIVSQRT ( magia_tile_pkg::SHARED_FP_DIVSQRT ), + .WAPUTYPE ( magia_tile_pkg::WAPUTYPE ), + .APU_NARGS_CPU ( magia_tile_pkg::APU_NARGS_CPU ), + .APU_WOP_CPU ( magia_tile_pkg::APU_WOP_CPU ), + .APU_NDSFLAGS_CPU ( magia_tile_pkg::APU_NDSFLAGS_CPU ), + .APU_NUSFLAGS_CPU ( magia_tile_pkg::APU_NUSFLAGS_CPU ), + .DM_HaltAddress ( magia_tile_pkg::DM_HALT_ADDR ) + ) i_cv32e40p_core ( + // Clock and Reset + .clk_i ( core_clk ), // Use gated clock for core + .rst_ni ( rst_ni ), + + // Clock enable and test mode + .clock_en_i ( sys_clk_en ), + .test_en_i ( test_mode_i ), + + // Floating-point register file disable (for Zfinx) + .fregfile_disable_i ( 1'b0 ), // FPU enabled, use dedicated FP regfile + + // Boot configuration + .boot_addr_i ( boot_addr_i ), + + // Cluster/Core IDs + .cluster_id_i ( mhartid_i[9:4] ), + .core_id_i ( mhartid_i[3:0] ), // Instruction memory interface - .instr_req_o ( core_instr_req.req ), - .instr_gnt_i ( core_instr_rsp.gnt ), - .instr_addr_o ( core_instr_req.addr ), - .instr_memtype_o ( core_instr_req.memtype ), - .instr_prot_o ( core_instr_req.prot ), - .instr_dbg_o ( core_instr_req.dbg ), - .instr_rvalid_i ( core_instr_rsp.rvalid ), - .instr_rdata_i ( core_instr_rsp.rdata ), - .instr_err_i ( core_instr_rsp.err ), - - // Data memory interface - .data_req_o ( core_data_req.req ), - .data_gnt_i ( core_data_rsp.gnt ), - .data_addr_o ( core_data_req.addr ), - .data_atop_o ( core_data_req.atop ), - .data_be_o ( core_data_req.be ), - .data_memtype_o ( core_data_req.memtype ), - .data_prot_o ( core_data_req.prot ), - .data_dbg_o ( core_data_req.dbg ), - .data_wdata_o ( core_data_req.wdata ), - .data_we_o ( core_data_req.we ), - .data_rvalid_i ( core_data_rsp.rvalid ), - .data_rdata_i ( core_data_rsp.rdata ), - .data_err_i ( core_data_rsp.err ), - .data_exokay_i ( core_data_rsp.exokay ), - - // Cycle, Time - .mcycle_o , - .time_i , - - // eXtension interface - .xif_compressed_if ( xif_if.cpu_compressed ), - .xif_issue_if ( xif_if.cpu_issue ), - .xif_commit_if ( xif_if.cpu_commit ), - .xif_mem_if ( xif_if.cpu_mem ), - .xif_mem_result_if ( xif_if.cpu_mem_result ), - .xif_result_if ( xif_if.cpu_result ), - - // Interrupt interface - .irq_i ( irq ), - - .clic_irq_i ( clic_irq ), - .clic_irq_id_i ( clic_irq_id ), - .clic_irq_level_i ( clic_irq_level ), - .clic_irq_priv_i ( clic_irq_priv ), - .clic_irq_shv_i ( clic_irq_shv ), - - // Fence.i flush handshake - .fencei_flush_req_o ( fencei_flush_req ), - .fencei_flush_ack_i ( fencei_flush_ack ), - + .instr_req_o ( core_instr_req.req ), + .instr_gnt_i ( core_instr_rsp.gnt ), + .instr_rvalid_i ( core_instr_rsp.rvalid ), + .instr_addr_o ( core_instr_req.addr ), + .instr_rdata_i ( core_instr_rsp.rdata ), + + // Data memory interface + .data_req_o ( core_data_req.req ), + .data_gnt_i ( core_data_rsp.gnt ), + .data_rvalid_i ( core_data_rsp.rvalid ), + .data_addr_o ( core_data_req.addr ), + .data_be_o ( core_data_req.be ), + .data_wdata_o ( core_data_req.wdata ), + .data_we_o ( core_data_req.we ), + .data_rdata_i ( core_data_rsp.rdata ), + + // APU interface (disabled - not connected) + .apu_master_req_o ( ), + .apu_master_ready_o ( ), + .apu_master_gnt_i ( '0 ), + + .apu_master_operands_o ( ), + .apu_master_op_o ( ), + .apu_master_type_o ( ), + .apu_master_flags_o ( ), + + .apu_master_valid_i ( '0 ), + .apu_master_result_i ( '0 ), + .apu_master_flags_i ( '0 ), + + // Interrupts + .irq_i ( eu_core_irq_req[0] ), + .irq_id_i ( '0 ), + .irq_ack_o ( eu_core_irq_ack[0] ), + .irq_id_o ( eu_core_irq_ack_id[0] ), + .irq_sec_i ( '0 ), + + // Security level (unused) + .sec_lvl_o ( ), + // Debug interface - .debug_req_i , - .debug_havereset_o , - .debug_running_o , - .debug_halted_o , - .debug_pc_valid_o , - .debug_pc_o , - - // Special control signals - .fetch_enable_i , - .core_sleep_o , - .wu_wfe_i + .debug_req_i ( debug_req_i ), + + // CPU control + .fetch_enable_i ( fetch_enable_i ), + .core_busy_o ( core_busy_o ), + + + // Performance counters + .ext_perf_counters_i ( '0 ) ); + assign core_sleep_o = !core_busy_o; + + assign core_instr_req.memtype = 2'b00; + assign core_instr_req.prot = 3'b000; + assign core_instr_req.dbg = 1'b0; + + assign mcycle_o = 64'h0; + assign debug_havereset_o = 1'b0; + assign debug_running_o = 1'b0; + assign debug_halted_o = 1'b0; + assign debug_pc_valid_o = 1'b0; + assign debug_pc_o = 32'h0; + /*******************************************************/ /** Core End **/ /*******************************************************/ @@ -856,7 +826,7 @@ module magia_tile .mgr_port_rsp_i ( core_l1_data_amo_rsp ) ); - for (genvar i = 0; i < magia_tile_pkg::N_MGR; i++) begin: gen_obi_xbar_sbr_cut + for (genvar i = 1; i < magia_tile_pkg::N_MGR; i++) begin: gen_obi_xbar_sbr_cut obi_cut #( .ObiCfg ( magia_tile_pkg::obi_amo_cfg ), .obi_a_chan_t ( magia_tile_pkg::core_data_obi_a_chan_t ), @@ -872,6 +842,9 @@ module magia_tile .mgr_port_rsp_i ( obi_xbar_slv_cut_rsp[i] ) ); end + + assign obi_xbar_slv_cut_req[0] = obi_xbar_slv_req[0]; + assign obi_xbar_slv_rsp[0] = obi_xbar_slv_cut_rsp[0]; obi_xbar #( .SbrPortObiCfg ( magia_tile_pkg::obi_amo_cfg ), @@ -983,67 +956,51 @@ module magia_tile /*******************************************************/ /** L1 SPM (TCDM) End **/ /*******************************************************/ -/** Xif Dispatcher Beginning **/ -/*******************************************************/ - - xif_inst_dispatcher #( - .N_COPROC ( magia_tile_pkg::N_COPROC ), - .N_RULES ( magia_tile_pkg::N_RULES ), - .DEFAULT_IDX ( magia_tile_pkg::DEFAULT_IDX ), - .OPCODE_OFF ( magia_tile_pkg::OPCODE_OFF ), - .OPCODE_W ( magia_tile_pkg::OPCODE_W ), - .xif_inst_rule_t ( magia_tile_pkg::xif_inst_rule_t ) - ) i_xif_inst_dispatcher ( - .clk_i ( sys_clk ), - .rst_ni ( rst_ni ), - .xif_issue_if_i ( xif_if.coproc_issue ), - .xif_issue_if_o ( xif_coproc_if.cpu_issue ), - .xif_result_if_o ( xif_if.coproc_result ), - .xif_result_if_i ( xif_fpu_if.cpu_result ), - .rules_i ( xif_coproc_rules ) - ); - -/*******************************************************/ -/** Xif Dispatcher End **/ -/*******************************************************/ /** iDMA Beginning **/ /*******************************************************/ - idma_ctrl #( - .ERROR_CAP ( ERROR_CAP ), - .axi_req_t ( magia_tile_pkg::idma_axi_req_t ), - .axi_rsp_t ( magia_tile_pkg::idma_axi_rsp_t ), - .obi_req_t ( magia_tile_pkg::idma_obi_req_t ), - .obi_rsp_t ( magia_tile_pkg::idma_obi_rsp_t ) - ) i_idma_ctrl ( - .clk_i ( sys_clk ), - .rst_ni ( rst_ni ), - .testmode_i ( test_mode_i ), - .clear_i ( idma_clear ), - - .xif_issue_if_i ( xif_coproc_if.coproc_issue[magia_tile_pkg::XIF_IDMA_IDX] ), - .axi_read_req_o ( idma_axi_read_req ), - .axi_read_rsp_i ( idma_axi_read_rsp ), - - .axi_write_req_o ( idma_axi_write_req ), - .axi_write_rsp_i ( idma_axi_write_rsp ), - - .obi_read_req_o ( idma_obi_read_req ), - .obi_read_rsp_i ( idma_obi_read_rsp ), - - .obi_write_req_o ( idma_obi_write_req ), - .obi_write_rsp_i ( idma_obi_write_rsp ), - - .axi2obi_start_o ( idma_axi2obi_start ), - .axi2obi_busy_o ( idma_axi2obi_busy ), - .axi2obi_done_o ( idma_axi2obi_done ), - .axi2obi_error_o ( idma_axi2obi_error ), - - .obi2axi_start_o ( idma_obi2axi_start ), - .obi2axi_busy_o ( idma_obi2axi_busy ), - .obi2axi_done_o ( idma_obi2axi_done ), - .obi2axi_error_o ( idma_obi2axi_error ) + idma_ctrl_mm #( + .ERROR_CAP ( ERROR_CAP ), + .obi_req_t ( magia_tile_pkg::core_obi_data_req_t ), + .obi_rsp_t ( magia_tile_pkg::core_obi_data_rsp_t ), + .idma_fe_reg_req_t ( magia_tile_pkg::idma_fe_reg_req_t ), + .idma_fe_reg_rsp_t ( magia_tile_pkg::idma_fe_reg_rsp_t ), + .axi_req_t ( magia_tile_pkg::idma_axi_req_t ), + .axi_rsp_t ( magia_tile_pkg::idma_axi_rsp_t ), + .idma_obi_req_t ( magia_tile_pkg::idma_obi_req_t ), + .idma_obi_rsp_t ( magia_tile_pkg::idma_obi_rsp_t ) + ) i_idma_ctrl_mm ( + .clk_i ( sys_clk ), + .rst_ni ( rst_ni ), + .test_en_i ( test_mode_i ), + .clear_i ( idma_clear ), + + // OBI Slave Interface (CPU memory-mapped access) + .obi_req_i ( core_mem_data_req[3] ), + .obi_rsp_o ( core_mem_data_rsp[3] ), + + // AXI Master Interfaces (to L2 memory) + .axi_read_req_o ( idma_axi_read_req ), + .axi_read_rsp_i ( idma_axi_read_rsp ), + .axi_write_req_o ( idma_axi_write_req ), + .axi_write_rsp_i ( idma_axi_write_rsp ), + + // OBI Master Interfaces (to L1 memory) + .obi_read_req_o ( idma_obi_read_req ), + .obi_read_rsp_i ( idma_obi_read_rsp ), + .obi_write_req_o ( idma_obi_write_req ), + .obi_write_rsp_i ( idma_obi_write_rsp ), + + // Serialized IRQ outputs + .irq_a2o_busy_o ( idma_a2o_busy ), + .irq_a2o_start_o ( idma_a2o_start ), + .irq_a2o_done_o ( idma_a2o_done ), + .irq_a2o_error_o ( idma_a2o_error ), + .irq_o2a_busy_o ( idma_o2a_busy ), + .irq_o2a_start_o ( idma_o2a_start ), + .irq_o2a_done_o ( idma_o2a_done ), + .irq_o2a_error_o ( idma_o2a_error ) ); axi_rw_join #( @@ -1094,10 +1051,10 @@ module magia_tile .fetch_rerror_o ( core_cache_instr_rsp.rerror ), .enable_prefetching_i ( enable_prefetching ), - .icache_l0_events_o ( icache_l0_events ), - .icache_l1_events_o ( icache_l1_events ), + .icache_l0_events_o ( ), + .icache_l1_events_o ( ), .flush_valid_i ( flush_valid ), - .flush_ready_o ( flush_ready ), + .flush_ready_o ( ), .sram_cfg_data_i ( '0 ), .sram_cfg_tag_i ( '0 ), @@ -1237,103 +1194,88 @@ module magia_tile /** Fractal Sync Out Beginning **/ /*******************************************************/ - fractal_sync_xif_inst_decoder #( - .INSTR_W ( magia_tile_pkg::FSYNC_INSTR_W ), - .DATA_W ( magia_tile_pkg::FSYNC_DATA_W ), - .ADDR_W ( magia_tile_pkg::FSYNC_ADDR_W ), - .N_RF_PORTS ( magia_tile_pkg::FSYNC_N_RF_PORTS ), - .OPCODE_W ( magia_tile_pkg::FSYNC_OPCODE_W ), - .FUNC3_W ( magia_tile_pkg::FSYNC_FUNC3_W ), - .OPCODE_OFF ( magia_tile_pkg::FSYNC_OPCODE_OFF ), - .FUNC3_OFF ( magia_tile_pkg::FSYNC_FUNC3_OFF ), - .N_CFG_REG ( magia_tile_pkg::FSYNC_N_CFG_REG ), - .AGGR_W ( magia_tile_pkg::FSYNC_AGGR_W ), - .ID_W ( magia_tile_pkg::FSYNC_ID_W ), - .NBR_AGGR_W ( magia_tile_pkg::FSYNC_NBR_AGGR_W ), - .NBR_ID_W ( magia_tile_pkg::FSYNC_NBR_ID_W ), - .STALL ( magia_tile_pkg::FSYNC_STALL ) - ) i_fsync_dec ( - .clk_i ( sys_clk ), - .rst_ni ( rst_ni ), - .clear_i ( fsync_clear ), - .xif_issue_if_i ( xif_coproc_if.coproc_issue[magia_tile_pkg::XIF_FSYNC_IDX] ), - .ht_fsync_if_o ( ht_fsync_if_o ), - .hn_fsync_if_o ( hn_fsync_if_o ), - .vt_fsync_if_o ( vt_fsync_if_o ), - .vn_fsync_if_o ( vn_fsync_if_o ), - .done_o ( fsync_done ), - .error_o ( fsync_error ) + // Fractal Sync OBI Memory-Mapped Slave (replaces XIF interface) + obi_slave_fsync #( + .BASE_ADDR ( magia_tile_pkg::FSYNC_CTRL_ADDR_START ), + .AGGR_W ( magia_tile_pkg::FSYNC_AGGR_W ), + .ID_W ( magia_tile_pkg::FSYNC_ID_W ), + .NBR_AGGR_W ( magia_tile_pkg::FSYNC_NBR_AGGR_W ), + .NBR_ID_W ( magia_tile_pkg::FSYNC_NBR_ID_W ) + ) i_fsync_mm ( + .clk_i ( sys_clk ), + .rst_ni ( rst_ni ), + .clear_i ( fsync_clear ), + .obi_req_i ( core_mem_data_req[4] ), + .obi_rsp_o ( core_mem_data_rsp[4] ), + .ht_fsync_if_o ( ht_fsync_if_o ), + .hn_fsync_if_o ( hn_fsync_if_o ), + .vt_fsync_if_o ( vt_fsync_if_o ), + .vn_fsync_if_o ( vn_fsync_if_o ), + .done_o ( fsync_done ), + .error_o ( fsync_error ) ); /*******************************************************/ /** Fractal Sync Out End **/ /*******************************************************/ -/** Floating-Point Unit Beginning **/ +/** Event Unit Beginning **/ /*******************************************************/ - fpu_ss #( - .PULP_ZFINX ( magia_tile_pkg::FPU_ZFINX ), - .INPUT_BUFFER_DEPTH ( magia_tile_pkg::FPU_BUFFER_DEPTH ), - .INPUT_BUFFER_FALL_THROUGH ( magia_tile_pkg::FPU_BUFFER_FT ), - .OUT_OF_ORDER ( magia_tile_pkg::FPU_OOO ), - .FORWARDING ( magia_tile_pkg::FPU_FWD ), - .PulpDivsqrt ( magia_tile_pkg::FPU_DIVSQRT ), - .FPU_FEATURES ( magia_tile_pkg::FPU_FEATURES ), - .FPU_IMPLEMENTATION ( magia_tile_pkg::FPU_IMPLEMENTATION ) - ) i_fpu ( - .clk_i ( sys_clk ), - .rst_ni ( rst_ni ), - .x_compressed_valid_i ( x_compressed_valid ), - .x_compressed_ready_o ( x_compressed_ready ), - .x_compressed_req_i ( x_compressed_req ), - .x_compressed_resp_o ( x_compressed_resp ), - .x_issue_valid_i ( x_issue_valid ), - .x_issue_ready_o ( x_issue_ready ), - .x_issue_req_i ( x_issue_req ), - .x_issue_resp_o ( x_issue_resp ), - .x_commit_valid_i ( x_commit_valid ), - .x_commit_i ( x_commit ), - .x_mem_valid_o ( x_mem_valid ), - .x_mem_ready_i ( x_mem_ready ), - .x_mem_req_o ( x_mem_req ), - .x_mem_resp_i ( x_mem_resp ), - .x_mem_result_valid_i ( x_mem_result_valid ), - .x_mem_result_i ( x_mem_result ), - .x_result_valid_o ( x_result_valid ), - .x_result_ready_i ( x_result_ready ), - .x_result_o ( x_result ) - ); - - xif_if2struct i_xif_if2struct ( - .xif_compressed_if_i ( xif_if.coproc_compressed ), - .xif_issue_if_i ( xif_coproc_if.coproc_issue[magia_tile_pkg::XIF_FPU_IDX] ), - .xif_commit_if_i ( xif_if.coproc_commit ), - .xif_mem_if_o ( xif_if.coproc_mem ), - .xif_mem_result_if_i ( xif_if.coproc_mem_result ), - .xif_result_if_o ( xif_fpu_if.coproc_result ), - .x_compressed_valid_o ( x_compressed_valid ), - .x_compressed_ready_i ( x_compressed_ready ), - .x_compressed_req_o ( x_compressed_req ), - .x_compressed_resp_i ( x_compressed_resp ), - .x_issue_valid_o ( x_issue_valid ), - .x_issue_ready_i ( x_issue_ready ), - .x_issue_req_o ( x_issue_req ), - .x_issue_resp_i ( x_issue_resp ), - .x_commit_valid_o ( x_commit_valid ), - .x_commit_o ( x_commit ), - .x_mem_valid_i ( x_mem_valid ), - .x_mem_ready_o ( x_mem_ready ), - .x_mem_req_i ( x_mem_req ), - .x_mem_resp_o ( x_mem_resp ), - .x_mem_result_valid_o ( x_mem_result_valid ), - .x_mem_result_o ( x_mem_result ), - .x_result_valid_i ( x_result_valid ), - .x_result_ready_o ( x_result_ready ), - .x_result_i ( x_result ) + // Event array assignments for proper 2D array structure + assign acc_events_array[0] = {redmule_evt[0][1], redmule_evt[0][0], redmule_busy, 1'b0}; + assign dma_events_array[0] = {idma_o2a_done, idma_a2o_done}; + assign timer_events_array[0] = 2'b00; + assign other_events_array[0] = {idma_o2a_busy, idma_a2o_busy, idma_o2a_start, idma_a2o_start, idma_o2a_error, idma_a2o_error, fsync_error, fsync_done, 24'b0}; // iDMA status events [31:28]|idma_o2a_error, idma_a2o_error, iDMA error events [27:26]|fsync_error, fsync_done, Fsync events [25:24] // Reserved [23:0] - SW events are INTERNAL to Event Unit! + + + magia_event_unit #( + .NB_CORES ( 1 ), // Single core system + .NB_SW_EVT ( 1 ), // Minimum 1 SW event to avoid indexing issues (unused but required) + .NB_BARR ( 0 ), // No barriers needed with single core + .NB_HW_MUT ( 0 ), // No mutexes needed with single core + .MUTEX_MSG_W ( 32 ), // Keep default even if unused + .DISP_FIFO_DEPTH ( 0 ), // No task dispatcher needed + .EVNT_WIDTH ( 8 ), // SOC event width (keep default) + .SOC_FIFO_DEPTH ( 8 ) // SOC FIFO depth (keep default) + ) i_magia_event_unit ( + .clk_i ( sys_clk ), + .rst_ni ( rst_ni ), + .test_mode_i ( test_mode_i ), + + // Event inputs - single core arrays + .acc_events_i ( acc_events_array ), // Accelerator events + .dma_events_i ( dma_events_array ), // iDMA completion events + .timer_events_i ( timer_events_array ), + .other_events_i ( other_events_array ), // Combined events + + // Core IRQ interface + .core_irq_req_o ( eu_core_irq_req ), + .core_irq_id_o ( eu_core_irq_id ), + .core_irq_ack_i ( eu_core_irq_ack ), + .core_irq_ack_id_i( eu_core_irq_ack_id ), + + // Core control + .core_busy_i ( core_busy_o ), + .core_clock_en_o ( eu_core_clk_en ), + + // Debug + .dbg_req_i ( debug_req_i ), + .core_dbg_req_o ( eu_core_dbg_req ), + + // EU Direct Link Interface - abstract types + .eu_direct_req_i ( eu_direct_req.req ), + .eu_direct_addr_i ( eu_direct_req.addr ), + .eu_direct_wen_i ( eu_direct_req.wen ), + .eu_direct_wdata_i ( eu_direct_req.wdata ), + .eu_direct_be_i ( eu_direct_req.be ), + .eu_direct_gnt_o ( eu_direct_rsp.gnt ), + .eu_direct_rvalid_o ( eu_direct_rsp.rvalid ), + .eu_direct_rdata_o ( eu_direct_rsp.rdata ), + .eu_direct_err_o ( eu_direct_rsp.err ) ); /*******************************************************/ -/** Floating-Point Unit End **/ +/** Event Unit End **/ /*******************************************************/ endmodule: magia_tile \ No newline at end of file diff --git a/hw/tile/magia_tile_pkg.sv b/hw/tile/magia_tile_pkg.sv index bc676bd..553315e 100644 --- a/hw/tile/magia_tile_pkg.sv +++ b/hw/tile/magia_tile_pkg.sv @@ -15,6 +15,7 @@ * SPDX-License-Identifier: SHL-0.51 * * Authors: Victor Isachi + * Luca Balboni (luca.balboni10@studio.unibo.it) * * MAGIA Tile Package */ @@ -33,37 +34,37 @@ package magia_tile_pkg; `include "../include/alias.svh" - // IRQ constraints - localparam int unsigned IRQ_IDX_REDMULE_EVT_0 = 31; - localparam int unsigned IRQ_IDX_REDMULE_EVT_1 = 30; - localparam int unsigned IRQ_IDX_A2O_ERROR = 29; - localparam int unsigned IRQ_IDX_O2A_ERROR = 28; - localparam int unsigned IRQ_IDX_A2O_DONE = 27; - localparam int unsigned IRQ_IDX_O2A_DONE = 26; - localparam int unsigned IRQ_IDX_A2O_START = 25; - localparam int unsigned IRQ_IDX_O2A_START = 24; - localparam int unsigned IRQ_IDX_A2O_BUSY = 23; - localparam int unsigned IRQ_IDX_O2A_BUSY = 22; - localparam int unsigned IRQ_IDX_REDMULE_BUSY = 21; - localparam int unsigned IRQ_IDX_FSYNC_DONE = 20; - localparam int unsigned IRQ_IDX_FSYNC_ERROR = 19; - localparam int unsigned IRQ_USED = 13; + // IRQ constraints - Event Unit provides unified interrupt management + // Individual IRQ indices no longer needed as Event Unit handles all events internally // Address map - localparam logic[magia_pkg::ADDR_W-1:0] RESERVED_ADDR_START = 32'h0000_0000; - localparam logic[magia_pkg::ADDR_W-1:0] RESERVED_SIZE = 32'h0001_0000; - localparam logic[magia_pkg::ADDR_W-1:0] RESERVED_ADDR_END = RESERVED_ADDR_START + RESERVED_SIZE; - localparam logic[magia_pkg::ADDR_W-1:0] STACK_ADDR_START = RESERVED_ADDR_END; - localparam logic[magia_pkg::ADDR_W-1:0] STACK_SIZE = 32'h0001_0000; - localparam logic[magia_pkg::ADDR_W-1:0] STACK_ADDR_END = STACK_ADDR_START + STACK_SIZE; - localparam logic[magia_pkg::ADDR_W-1:0] L1_ADDR_START = STACK_ADDR_END; - localparam logic[magia_pkg::ADDR_W-1:0] L1_SIZE = 32'h000E_0000; - localparam logic[magia_pkg::ADDR_W-1:0] L1_ADDR_END = L1_ADDR_START + L1_SIZE; - localparam logic[magia_pkg::ADDR_W-1:0] L1_TILE_OFFSET = 32'h0010_0000; - localparam logic[magia_pkg::ADDR_W-1:0] L2_ADDR_START = 32'hC000_0000; - localparam logic[magia_pkg::ADDR_W-1:0] L2_SIZE = 32'h4000_0000; - localparam logic[magia_pkg::ADDR_W-1:0] L2_ADDR_END = L2_ADDR_START + L2_SIZE; + localparam logic [magia_pkg::ADDR_W-1:0] REDMULE_CTRL_ADDR_START = 32'h0000_0100; + localparam logic [magia_pkg::ADDR_W-1:0] REDMULE_CTRL_SIZE = 32'h0000_00FF; + localparam logic [magia_pkg::ADDR_W-1:0] REDMULE_CTRL_ADDR_END = REDMULE_CTRL_ADDR_START + REDMULE_CTRL_SIZE; + localparam logic [magia_pkg::ADDR_W-1:0] IDMA_CTRL_ADDR_START = REDMULE_CTRL_ADDR_END + 1; + localparam logic [magia_pkg::ADDR_W-1:0] IDMA_CTRL_SIZE = 32'h0000_03FF; + localparam logic [magia_pkg::ADDR_W-1:0] IDMA_CTRL_ADDR_END = IDMA_CTRL_ADDR_START + IDMA_CTRL_SIZE; + localparam logic [magia_pkg::ADDR_W-1:0] FSYNC_CTRL_ADDR_START = IDMA_CTRL_ADDR_END + 1; + localparam logic [magia_pkg::ADDR_W-1:0] FSYNC_CTRL_SIZE = 32'h0000_00FF; + localparam logic [magia_pkg::ADDR_W-1:0] FSYNC_CTRL_ADDR_END = FSYNC_CTRL_ADDR_START + FSYNC_CTRL_SIZE; + localparam logic [magia_pkg::ADDR_W-1:0] EVENT_UNIT_ADDR_START = FSYNC_CTRL_ADDR_END + 1; + localparam logic [magia_pkg::ADDR_W-1:0] EVENT_UNIT_SIZE = 32'h0000_0FFF; + localparam logic [magia_pkg::ADDR_W-1:0] EVENT_UNIT_ADDR_END = EVENT_UNIT_ADDR_START + EVENT_UNIT_SIZE; + localparam logic [magia_pkg::ADDR_W-1:0] RESERVED_ADDR_START = EVENT_UNIT_ADDR_END + 1; + localparam logic [magia_pkg::ADDR_W-1:0] RESERVED_SIZE = 32'h0000_E8FF; // Calculated to make RESERVED_END = 0x0000FFFF + localparam logic [magia_pkg::ADDR_W-1:0] RESERVED_ADDR_END = RESERVED_ADDR_START + RESERVED_SIZE; + localparam logic [magia_pkg::ADDR_W-1:0] STACK_ADDR_START = RESERVED_ADDR_END + 1; + localparam logic [magia_pkg::ADDR_W-1:0] STACK_SIZE = 32'h0000_FFFF; + localparam logic [magia_pkg::ADDR_W-1:0] STACK_ADDR_END = STACK_ADDR_START + STACK_SIZE; + localparam logic [magia_pkg::ADDR_W-1:0] L1_ADDR_START = STACK_ADDR_END + 1; + localparam logic [magia_pkg::ADDR_W-1:0] L1_SIZE = 32'h000D_FFFF; + localparam logic [magia_pkg::ADDR_W-1:0] L1_ADDR_END = L1_ADDR_START + L1_SIZE; + localparam logic [magia_pkg::ADDR_W-1:0] L1_TILE_OFFSET = 32'h0010_0000; + localparam logic [magia_pkg::ADDR_W-1:0] L2_ADDR_START = 32'hC000_0000; + localparam logic [magia_pkg::ADDR_W-1:0] L2_SIZE = 32'h3FFF_FFFF; + localparam logic [magia_pkg::ADDR_W-1:0] L2_ADDR_END = L2_ADDR_START + L2_SIZE; + // Parameters used by the HCI parameter int unsigned N_HWPE = 1; // Number of HWPEs attached to the port parameter int unsigned N_CORE = 1; // Number of Core ports @@ -94,19 +95,37 @@ package magia_tile_pkg; localparam int unsigned SWH = DWH/BWH; // Strobe Width for HWPE Interconnect localparam int unsigned WDH = DWH/WWH; // Number of words per data for HWPE Interconnect - // Parameters used by the core - parameter bit X_EXT_EN = 1; // Enable eXtension Interface (X) support, see eXtension Interface - parameter int unsigned X_NUM_RS = 3; // Number of register file read ports that can be used by the eXtension interface - parameter int unsigned X_ID_W = 4; // Identification width for the eXtension interface - parameter int unsigned X_MEM_W = 32; // Memory access width for loads/stores via the eXtension interface - parameter int unsigned X_RFR_W = 32; // Register file read access width for the eXtension interface - parameter int unsigned X_RFW_W = 32; // Register file write access width for the eXtension interface - parameter bit[31:0] X_MISA = 32'h20; // MISA extensions implemented on the eXtension interface, see Machine ISA (misa). X_MISA can only be used to set a subset of the following: {P, V, F, M} - parameter bit[1 :0] X_ECS_XS = 2'b0; // Default value for mstatus.XS if X_EXT = 1, see Machine Status (mstatus) - parameter bit[31:0] DM_REGION_START = 32'hF0000000; // Start address of Debug Module region, see Debug & Trigger - parameter bit[31:0] DM_REGION_END = 32'hF0003FFF; // End address of Debug Module region, see Debug & Trigger - parameter bit CLIC_EN = 1'b0; // Specifies whether Smclic, Smclicshv and Smclicconfig are supported - parameter int unsigned CLIC_ID_W = 1; // Width of clic_irq_id_i and clic_irq_id_o. The maximum number of supported interrupts in CLIC mode is 2^CLIC_ID_WIDTH. Trap vector table alignment is restricted as described in Machine Trap Vector Table Base Address (mtvt) + // Parameters used by Event Unit + parameter int unsigned EVENT_UNIT_IRQ_WIDTH = 5; // Width of Event Unit IRQ ID signals (supports up to 32 different event types) + + // Parameters used by cv32e40p core + parameter int unsigned N_EXT_PERF_COUNTERS = 0; // Number of external performance counters + parameter int unsigned INSTR_RDATA_WIDTH = 32; // Instruction data width + parameter bit PULP_SECURE = 1'b0; // PULP security features + parameter int unsigned N_PMP_ENTRIES = 16; // Number of PMP entries + parameter bit USE_PMP = 1'b1; // Enable PMP + parameter bit PULP_CLUSTER = 1'b1; // PULP cluster mode + parameter bit FPU = 1'b1; // Enable FPU (main feature) + parameter bit ZFINX = 1'b0; // Zfinx extension (integer FP in GPR) - Must be 0 for standard FPU + parameter bit FP_DIVSQRT = 1'b1; // FP division and square root + parameter bit SHARED_FP = 1'b0; // Shared FP unit + parameter bit SHARED_DSP_MULT = 1'b0; // Shared DSP multiplier + parameter bit SHARED_INT_MULT = 1'b0; // Shared integer multiplier + parameter bit SHARED_INT_DIV = 1'b0; // Shared integer divider + parameter bit SHARED_FP_DIVSQRT = 1'b0; // Shared FP div/sqrt + parameter int unsigned WAPUTYPE = 0; // APU type width + parameter int unsigned APU_NARGS_CPU = 3; // APU number of arguments + parameter int unsigned APU_WOP_CPU = 6; // APU operation width + parameter int unsigned APU_NDSFLAGS_CPU = 15; // APU data side flags + parameter int unsigned APU_NUSFLAGS_CPU = 5; // APU user side flags + parameter logic [31:0] DM_HALT_ADDR = 32'h1A110800; // Debug module halt address + + parameter int unsigned X_NUM_RS = 2; // Number of register file read ports (R-type instructions have 2 source operands) + parameter int unsigned OPCODE_W = 7; // Opcode field width (7 bits) + parameter int unsigned FUNC3_W = 3; // FUNC3 field width (3 bits) + parameter int unsigned OPCODE_OFF = 0; // Opcode field offset (bits 6:0) + parameter int unsigned FUNC3_OFF = 12; // FUNC3 field offset (bits 14:12) + parameter int unsigned CLIC_ID_W = 5; // CLIC interrupt ID width (5 bits for 32 interrupts) // Parameters used by RedMulE parameter int unsigned REDMULE_DW = DWH; // RedMulE Data Width @@ -124,10 +143,10 @@ package magia_tile_pkg; parameter int unsigned RID_WIDTH = 1; // Width of the rid signal (response channel identifier, see OBI documentation) parameter int unsigned MID_WIDTH = 1; // Width of the mid signal (manager identifier, see OBI documentation) parameter int unsigned OBI_ID_WIDTH = 1; // Width of the id - configuration - parameter int unsigned N_SBR = 2; // Number of slaves (HCI, AXI XBAR) + parameter int unsigned N_SBR = 5; // Number of slaves (HCI, AXI XBAR, RedMulE_Ctrl, iDMA_Ctrl, FSync_Ctrl) - Event_Unit now via eu_direct_link parameter int unsigned N_MGR = 2; // Number of masters (Core, AXI XBAR) parameter int unsigned N_MAX_TRAN = 1; // Number of maximum outstanding transactions - parameter int unsigned N_ADDR_RULE = 4; // Number of address rules (L2, L1, Stack, Reserved) + parameter int unsigned N_ADDR_RULE = 7; // Number of address rules (L2, L1, Stack, Reserved, RedMulE_Ctrl, iDMA_Ctrl, FSync_Ctrl) - Event_Unit now via eu_direct_link localparam int unsigned N_BIT_SBR = $clog2(N_SBR); // Number of bits required to identify each slave // Parameters used by AXI @@ -169,27 +188,6 @@ package magia_tile_pkg; OBI2AXI = 1'b1 } idma_transfer_ch_e; // iDMA type of transfer channel - // Parameters used by the Xif Instruction Dispatcher - parameter int unsigned N_COPROC = 4; // RedMulE, iDMA, Fractal Sync and FPU - parameter int unsigned N_RULES = N_COPROC-1; // RedMulE, iDMA and Fractal Sync all have custom Xif instructions but not FPU - parameter int unsigned N_REDMULE_SIGN = 9; // Number of signitures (= {opcode, func3}) in the programming model of RedMulE - parameter int unsigned N_IDMA_SIGN = 5; // Number of signitures (= {opcode, func3}) in the programming model of the iDMA decoder - parameter int unsigned N_FSYNC_SIGN = 1; // Number of signitures (= {opcode, func3}) in the programming model of Fractal Sync - parameter int unsigned N_SIGN = 9; // Number of opcodes = max{RedMulE_signitures, iDMA_signitures, FractalSync_signitures} - typedef enum logic[1:0]{ - XIF_REDMULE_IDX = 2'b00, - XIF_IDMA_IDX = 2'b01, - XIF_FSYNC_IDX = 2'b10, - XIF_FPU_IDX = 2'b11 - } xif_inst_dispatch_idx_e; - parameter int unsigned DEFAULT_IDX = XIF_FPU_IDX; // FPU will handle the instructions by default - parameter int unsigned OPCODE_W = 7; // ISA OPCODE Width - parameter int unsigned OPCODE_OFF = 0; // ISA OPCODE Offset - parameter int unsigned FUNC3_W = 3; // ISA FUNC3 Width - parameter int unsigned FUNC3_OFF = 12; // ISA FUNC3 Offset - parameter int unsigned SIGN_W = OPCODE_W + FUNC3_W; // Width of the instruction signiture - parameter bit PRIORITY = 0; // Indicates that the dispatcher should rout the instruction to only 1 coprocessor (with highest priority) - // Parameters used by the iDMA instruction decoder parameter int unsigned DMA_INSTR_W = magia_pkg::INSTR_W; // iDMA Decoder instruction width parameter int unsigned DMA_DATA_W = magia_pkg::DATA_W; // iDMA Decoder data width @@ -283,31 +281,6 @@ package magia_tile_pkg; parameter int unsigned FILL_AW = magia_pkg::ADDR_W; // i$ Fill interface address width. Same as FILL_AW; >= 1. parameter int unsigned FILL_DW = magia_pkg::DATA_W; // i$ Fill interface data width. Power of two; >= 8. - // Parameters used by the FPU - parameter bit FPU_ZFINX = 0; // FPU use Zfinx extension instead of the F ISA extention - parameter int unsigned FPU_BUFFER_DEPTH = 8; // FPU FIFO depth that buffers instructions coming from core - parameter bit FPU_BUFFER_FT = 0; // FPU FIFO fall through that buffers instructions coming from core - parameter bit FPU_OOO = 1; // FPU enable out-of-order execution - parameter bit FPU_FWD = 1; // FPU enable forwarding from output to input of FPnew - parameter bit FPU_DIVSQRT = 0; // FPU disable FPnew T-head-based DivSqrt unit (supported only for FP32 unit) - parameter fpnew_pkg::fpu_features_t FPU_FEATURES = '{ - Width: 32, - EnableVectors: 1'b0, - EnableNanBox: 1'b1, - FpFmtMask: 6'b100000, - IntFmtMask: 4'b0010 - }; // FPU features: support only for FP32 and INT32 - parameter fpnew_pkg::fpu_implementation_t FPU_IMPLEMENTATION = '{ - PipeRegs: '{default: 2}, - UnitTypes: '{'{default: fpnew_pkg::PARALLEL}, - '{default: fpnew_pkg::MERGED}, - '{default: fpnew_pkg::PARALLEL}, - '{default: fpnew_pkg::MERGED}, - '{default: fpnew_pkg::DISABLED} - }, - PipeConfig: fpnew_pkg::DISTRIBUTED - }; // FPU implementation - typedef struct packed { int unsigned idx; logic[magia_pkg::ADDR_W-1:0] start_addr; @@ -337,11 +310,7 @@ package magia_tile_pkg; typedef struct packed { logic req; logic[magia_pkg::ADDR_W-1:0] addr; - logic[5 :0] atop; logic[3 :0] be; - logic[1 :0] memtype; - logic[2 :0] prot; - logic dbg; logic[magia_pkg::DATA_W-1:0] wdata; logic we; } core_data_req_t; @@ -351,9 +320,24 @@ package magia_tile_pkg; logic rvalid; logic[magia_pkg::DATA_W-1:0] rdata; logic err; - logic exokay; } core_data_rsp_t; + // EU Direct Link interface types + typedef struct packed { + logic req; + logic[magia_pkg::ADDR_W-1:0] addr; + logic wen; // Write enable negated (EU convention) + logic[magia_pkg::DATA_W-1:0] wdata; + logic[3 :0] be; + } eu_direct_req_t; + + typedef struct packed { + logic gnt; + logic rvalid; + logic[magia_pkg::DATA_W-1:0] rdata; + logic err; // Error signal (r_opc from XBAR_PERIPH_BUS) + } eu_direct_rsp_t; + typedef struct packed { logic[NR_FETCH_PORTS-1:0] req; logic[NR_FETCH_PORTS-1:0][FETCH_AW-1:0] addr; @@ -366,11 +350,14 @@ package magia_tile_pkg; logic[NR_FETCH_PORTS-1:0] rerror; } core_cache_instr_rsp_t; - typedef enum logic[1:0]{ - STACK_IDX = 3, - RESERVED_IDX = 2, - L1SPM_IDX = 1, - L2_IDX = 0 + typedef enum logic[2:0]{ + FSYNC_CTRL_IDX = 6, + IDMA_IDX = 5, + REDMULE_CTRL_IDX = 4, + STACK_IDX = 3, + RESERVED_IDX = 2, + L1SPM_IDX = 1, + L2_IDX = 0 } mem_array_idx_e; typedef enum logic[1:0]{ @@ -380,10 +367,6 @@ package magia_tile_pkg; AXI_CORE_INSTR_IDX = 0 } axi_xbar_idx_e; - typedef struct packed { - logic[N_SIGN-1:0][SIGN_W-1:0] sign_list; - } xif_inst_rule_t; - typedef logic[iDMA_AddrWidth-1:0] idma_addr_t; `HWPE_CTRL_TYPEDEF_REQ_T(redmule_ctrl_req_t, logic[AWC-1:0], logic[DWH-1:0], logic[SWH-1:0], logic[IW-1:0]) diff --git a/hw/tile/obi_slave_fsync.sv b/hw/tile/obi_slave_fsync.sv new file mode 100644 index 0000000..e1a0b9f --- /dev/null +++ b/hw/tile/obi_slave_fsync.sv @@ -0,0 +1,293 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Solderpad Hardware License, Version 0.51 + * (the "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: SHL-0.51 + * + * Authors: Luca Balboni + * Based on fractal_sync_xif_inst_decoder by Victor Isachi + * + * OBI Slave Fractal Sync Memory-Mapped Controller + * Replaces XIF interface with memory-mapped register access + */ + +module obi_slave_fsync + import magia_tile_pkg::*; + import magia_pkg::*; +#( + parameter logic [ADDR_W-1:0] BASE_ADDR = magia_tile_pkg::FSYNC_CTRL_ADDR_START, + parameter int unsigned AGGR_W = magia_tile_pkg::FSYNC_AGGR_W, + parameter int unsigned ID_W = magia_tile_pkg::FSYNC_ID_W, + parameter int unsigned NBR_AGGR_W = magia_tile_pkg::FSYNC_NBR_AGGR_W, + parameter int unsigned NBR_ID_W = magia_tile_pkg::FSYNC_NBR_ID_W, + parameter type obi_req_t = magia_tile_pkg::core_obi_data_req_t, + parameter type obi_rsp_t = magia_tile_pkg::core_obi_data_rsp_t +)( + input logic clk_i, + input logic rst_ni, + input logic clear_i, + + input obi_req_t obi_req_i, + output obi_rsp_t obi_rsp_o, + + fractal_sync_if.mst_port ht_fsync_if_o, + fractal_sync_if.mst_port hn_fsync_if_o, + fractal_sync_if.mst_port vt_fsync_if_o, + fractal_sync_if.mst_port vn_fsync_if_o, + + output logic done_o, + output logic error_o +); + +/*******************************************************/ +/** Internal Signal Definitions Beginning **/ +/*******************************************************/ + + logic clk_sync_en, clk_reg_en; + logic clk_sync_g, clk_reg_g; + + logic sync_trigger; + logic done; + logic addr_match; + + logic[DATA_W-1:0] aggr_reg, id_reg, status_reg, control_reg; + + typedef enum logic[1:0] { + IDLE, + SYNC, + WAIT, + DONE + } sync_state_e; + + sync_state_e c_sync_state, n_sync_state; + + // Memory Map: + // BASE_ADDR + 0x00: AGGR_REG (write-only) + // BASE_ADDR + 0x04: ID_REG (write-only) + // BASE_ADDR + 0x08: CONTROL_REG (write-only, writing triggers sync) + // BASE_ADDR + 0x0C: STATUS_REG (read-only) + localparam logic [ADDR_W-1:0] AGGR_REG_OFFSET = 4'h0; + localparam logic [ADDR_W-1:0] ID_REG_OFFSET = 4'h4; + localparam logic [ADDR_W-1:0] CONTROL_REG_OFFSET = 4'h8; + localparam logic [ADDR_W-1:0] STATUS_REG_OFFSET = 4'hC; + +/*******************************************************/ +/** Internal Signal Definitions End **/ +/*******************************************************/ +/** Hardwired Signals Beginning **/ +/*******************************************************/ + + assign addr_match = (obi_req_i.a.addr >= BASE_ADDR) && + (obi_req_i.a.addr < BASE_ADDR + 32'h100); + + assign done_o = done; + assign error_o = ht_fsync_if_o.error | hn_fsync_if_o.error | + vt_fsync_if_o.error | vn_fsync_if_o.error; + + // Status register: bit 0 = done, bit 1 = error, bit 2 = busy + // For polling: when busy=0, operation is complete + assign status_reg = {29'b0, (c_sync_state == SYNC || c_sync_state == WAIT), error_o, done}; + +/*******************************************************/ +/** Hardwired Signals End **/ +/*******************************************************/ +/** Clock gating Beginning **/ +/*******************************************************/ + + tc_clk_gating i_reg_clock_gating ( + .clk_i , + .en_i ( clk_reg_en ), + .test_en_i ( '0 ), + .clk_o ( clk_reg_g ) + ); + + tc_clk_gating i_sync_clock_gating ( + .clk_i , + .en_i ( clk_sync_en ), + .test_en_i ( '0 ), + .clk_o ( clk_sync_g ) + ); + +/*******************************************************/ +/** Clock gating End **/ +/*******************************************************/ +/** OBI Interface Logic Beginning **/ +/*******************************************************/ + + always_comb begin: obi_interface + obi_rsp_o = '0; + sync_trigger = 1'b0; + clk_reg_en = 1'b0; + + if (obi_req_i.req && addr_match) begin + obi_rsp_o.gnt = 1'b1; + obi_rsp_o.rvalid = 1'b1; + clk_reg_en = 1'b1; // Enable clock for OBI register access + + // OBI protocol: assign response ID and optional fields + obi_rsp_o.r.rid = obi_req_i.a.aid; + obi_rsp_o.r.r_optional = '0; + obi_rsp_o.r.err = 1'b0; + + if (obi_req_i.a.we) begin + // Write operation + case (obi_req_i.a.addr - BASE_ADDR) + CONTROL_REG_OFFSET: begin + sync_trigger = 1'b1; // Writing to control register triggers sync + end + default: begin + // Writes to AGGR_REG and ID_REG are handled in register logic + end + endcase + end else begin + // Read operation + case (obi_req_i.a.addr - BASE_ADDR) + STATUS_REG_OFFSET: begin + obi_rsp_o.r.rdata = status_reg; + end + default: begin + obi_rsp_o.r.rdata = 32'h0; // Return 0 for write-only registers + end + endcase + end + end + end + +/*******************************************************/ +/** OBI Interface Logic End **/ +/*******************************************************/ +/** Register Logic Beginning **/ +/*******************************************************/ + + always_ff @(posedge clk_reg_g, negedge rst_ni) begin: configuration_registers + if (~rst_ni) begin + aggr_reg <= '0; + id_reg <= '0; + end else begin + if (clear_i) begin + aggr_reg <= '0; + id_reg <= '0; + end else if (obi_req_i.req && addr_match && obi_req_i.a.we) begin + case (obi_req_i.a.addr - BASE_ADDR) + AGGR_REG_OFFSET: begin + aggr_reg <= obi_req_i.a.wdata; + end + ID_REG_OFFSET: begin + id_reg <= obi_req_i.a.wdata; + end + endcase + end + end + end + +/*******************************************************/ +/** Register Logic End **/ +/*******************************************************/ +/** Synchronization FSM Beginning **/ +/*******************************************************/ + + always_comb begin: sync_logic + n_sync_state = c_sync_state; + clk_sync_en = 1'b1; + done = 1'b0; + ht_fsync_if_o.sync = 1'b0; + ht_fsync_if_o.aggr = '0; + ht_fsync_if_o.id_req = '0; + hn_fsync_if_o.sync = 1'b0; + hn_fsync_if_o.aggr = '0; + hn_fsync_if_o.id_req = '0; + vt_fsync_if_o.sync = 1'b0; + vt_fsync_if_o.aggr = '0; + vt_fsync_if_o.id_req = '0; + vn_fsync_if_o.sync = 1'b0; + vn_fsync_if_o.aggr = '0; + vn_fsync_if_o.id_req = '0; + + case (c_sync_state) + IDLE: begin + if (sync_trigger) begin + n_sync_state = SYNC; + end else begin + clk_sync_en = 1'b0; + end + end + + SYNC: begin + n_sync_state = WAIT; + if (aggr_reg != 1) begin // Tree (level > 1) request + case (id_reg[0]) + 1'b0: begin // Horizontal tree node request + ht_fsync_if_o.sync = 1'b1; + ht_fsync_if_o.aggr = aggr_reg[AGGR_W-1:0]; + ht_fsync_if_o.id_req = id_reg[ID_W-1:0]; + end + 1'b1: begin // Vertical tree node request + vt_fsync_if_o.sync = 1'b1; + vt_fsync_if_o.aggr = aggr_reg[AGGR_W-1:0]; + vt_fsync_if_o.id_req = id_reg[ID_W-1:0]; + end + endcase + end else begin // Neighbor (level = 1) request + case (id_reg[1:0]) + 2'b00: begin // Horizontal tree node request + ht_fsync_if_o.sync = 1'b1; + ht_fsync_if_o.aggr = aggr_reg[AGGR_W-1:0]; + ht_fsync_if_o.id_req = id_reg[ID_W-1:0]; + end + 2'b01: begin // Vertical tree node request + vt_fsync_if_o.sync = 1'b1; + vt_fsync_if_o.aggr = aggr_reg[AGGR_W-1:0]; + vt_fsync_if_o.id_req = id_reg[ID_W-1:0]; + end + 2'b10: begin // Horizontal neighbor node request + hn_fsync_if_o.sync = 1'b1; + hn_fsync_if_o.aggr = aggr_reg[NBR_AGGR_W-1:0]; + hn_fsync_if_o.id_req = id_reg[NBR_ID_W-1:0]; + end + 2'b11: begin // Vertical neighbor node request + vn_fsync_if_o.sync = 1'b1; + vn_fsync_if_o.aggr = aggr_reg[NBR_AGGR_W-1:0]; + vn_fsync_if_o.id_req = id_reg[NBR_ID_W-1:0]; + end + endcase + end + end + + WAIT: begin + if (ht_fsync_if_o.wake | hn_fsync_if_o.wake | vt_fsync_if_o.wake | vn_fsync_if_o.wake) begin + n_sync_state = DONE; + end else begin + n_sync_state = WAIT; + end + end + + DONE: begin + n_sync_state = IDLE; + done = 1'b1; + end + endcase + end + + always_ff @(posedge clk_sync_g, negedge rst_ni) begin: sync_state + if (~rst_ni) c_sync_state <= IDLE; + else begin + if (clear_i) c_sync_state <= IDLE; + else c_sync_state <= n_sync_state; + end + end + +/*******************************************************/ +/** Synchronization FSM End **/ +/*******************************************************/ + +endmodule: obi_slave_fsync \ No newline at end of file diff --git a/hw/tile/xif_inst_dispatcher.sv b/hw/tile/xif_inst_dispatcher.sv deleted file mode 100644 index c82fb6c..0000000 --- a/hw/tile/xif_inst_dispatcher.sv +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna - * - * Licensed under the Solderpad Hardware License, Version 0.51 - * (the "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * SPDX-License-Identifier: SHL-0.51 - * - * Authors: Victor Isachi - * - * Xif Instruction Dispatcher - */ - -module xif_inst_dispatcher - import magia_tile_pkg::*; - import cv32e40x_pkg::*; -#( - parameter int unsigned N_COPROC = magia_tile_pkg::N_COPROC, - parameter int unsigned N_RULES = magia_tile_pkg::N_RULES, - parameter int unsigned N_SIGN = magia_tile_pkg::N_SIGN, - parameter int unsigned DEFAULT_IDX = magia_tile_pkg::DEFAULT_IDX, - parameter int unsigned OPCODE_OFF = magia_tile_pkg::OPCODE_OFF, - parameter int unsigned FUNC3_OFF = magia_tile_pkg::FUNC3_OFF, - parameter int unsigned OPCODE_W = magia_tile_pkg::OPCODE_W, - parameter int unsigned FUNC3_W = magia_tile_pkg::FUNC3_W, - parameter int unsigned SIGN_W = magia_tile_pkg::SIGN_W, - parameter type xif_inst_rule_t = magia_tile_pkg::xif_inst_rule_t -)( - input logic clk_i, - input logic rst_ni, - - cv32e40x_if_xif.coproc_issue xif_issue_if_i, - cv32e40x_if_xif.cpu_issue xif_issue_if_o[N_COPROC], - - cv32e40x_if_xif.coproc_result xif_result_if_o, - cv32e40x_if_xif.cpu_result xif_result_if_i, - - input magia_tile_pkg::xif_inst_rule_t[N_RULES-1:0] rules_i -); - -/*******************************************************/ -/** Parameters and Definitions Beginning **/ -/*******************************************************/ - - // IMPORTANT NOTE: must mirror what is found in cv32e40x_if_xif.sv - typedef struct packed { - logic accept; - logic writeback; - logic dualwrite; - logic[2:0] dualread; - logic loadstore; - logic ecswrite ; - logic exc; - } x_issue_resp_t; - - typedef enum logic[1:0] { - IDLE, - WAIT, - PROP - } result_state_e; - -/*******************************************************/ -/** Parameters and Definitions End **/ -/*******************************************************/ -/** Internal Signals Beginning **/ -/*******************************************************/ - - logic[OPCODE_W-1:0] opcode; - logic[ FUNC3_W-1:0] func3; - logic[ SIGN_W-1:0] sign; - logic[N_COPROC-1:0] coproc_sign; // Indicates which coprocessor expects detected signiture - logic[N_COPROC-1:0] coproc_issue; // Indicates to which coprocessor the instruction should be dispatched - logic[N_COPROC-1:0] coproc_issue_pr; // Priority encoded version of the above signal: used to ensure the instruction is dispatched to only 1 coprocessor - logic default_issue; // Indicates that the instruction should be dispatched to the default coprocessor - - logic [N_COPROC-1:0] issue_ready; - x_issue_resp_t[N_COPROC-1:0] issue_resp; - - result_state_e c_result_state, n_result_state; - -/*******************************************************/ -/** Internal Signals End **/ -/*******************************************************/ -/** Hardwired Signals Beginning **/ -/*******************************************************/ - - assign opcode = xif_issue_if_i.issue_req.instr[OPCODE_OFF+OPCODE_W-1:OPCODE_OFF]; - assign func3 = xif_issue_if_i.issue_req.instr[ FUNC3_OFF+FUNC3_W-1:FUNC3_OFF]; - assign sign = {opcode, func3}; - assign default_issue = ~(|coproc_sign); - assign coproc_issue = default_issue ? (1 << DEFAULT_IDX) : coproc_sign; - -/*******************************************************/ -/** Hardwired Signals End **/ -/*******************************************************/ -/** IF to Struct Beginning **/ -/*******************************************************/ - - for (genvar i = 0; i < N_COPROC; i++) begin: gen_if2signal - assign issue_ready[i] = xif_issue_if_o[i].issue_ready; - assign issue_resp[i].accept = xif_issue_if_o[i].issue_resp.accept; - assign issue_resp[i].writeback = xif_issue_if_o[i].issue_resp.writeback; - assign issue_resp[i].dualwrite = xif_issue_if_o[i].issue_resp.dualwrite; - assign issue_resp[i].dualread = xif_issue_if_o[i].issue_resp.dualread; - assign issue_resp[i].loadstore = xif_issue_if_o[i].issue_resp.loadstore; - assign issue_resp[i].ecswrite = xif_issue_if_o[i].issue_resp.ecswrite; - assign issue_resp[i].exc = xif_issue_if_o[i].issue_resp.exc; - end - -/*******************************************************/ -/** IF to Struct End **/ -/*******************************************************/ -/** Instruction Dispatcher Beginning **/ -/*******************************************************/ - - always_comb begin: sign_detector - for (int i = 0; i < N_COPROC; i++) begin - coproc_sign[i] = 1'b0; - if (i < N_RULES) begin // Only check first N_RULES coprocessors, the rest do not have associated rule - for (int j = 0; j < N_SIGN; j++) begin - coproc_sign[i] |= (sign == rules_i[i].sign_list[j]) ? 1'b1 : 1'b0; - end - end - end - end - - always_comb begin: priority_encoder - coproc_issue_pr = '0; - for (int i = 0; i < N_COPROC; i++) begin - if (coproc_issue[i]) begin - coproc_issue_pr = 1 << i; - break; - end - end - end - - for (genvar i = 0; i < N_COPROC; i++) begin: gen_issue_out - always_comb begin - if (coproc_issue_pr[i]) begin - xif_issue_if_o[i].issue_valid = xif_issue_if_i.issue_valid; - xif_issue_if_o[i].issue_req = xif_issue_if_i.issue_req; - end else begin - xif_issue_if_o[i].issue_valid = '0; - xif_issue_if_o[i].issue_req = '0; - end - end - end - - always_comb begin: issue_in - xif_issue_if_i.issue_ready = '0; - xif_issue_if_i.issue_resp = '0; - for (int i = 0; i < N_COPROC; i++) begin - if (coproc_issue_pr[i]) begin - xif_issue_if_i.issue_ready = issue_ready[i]; - xif_issue_if_i.issue_resp.accept = issue_resp[i].accept; - xif_issue_if_i.issue_resp.writeback = issue_resp[i].writeback; - xif_issue_if_i.issue_resp.dualwrite = issue_resp[i].dualwrite; - xif_issue_if_i.issue_resp.dualread = issue_resp[i].dualread; - xif_issue_if_i.issue_resp.loadstore = issue_resp[i].loadstore; - xif_issue_if_i.issue_resp.ecswrite = issue_resp[i].ecswrite; - xif_issue_if_i.issue_resp.exc = issue_resp[i].exc; - break; - end - end - end - - always_ff @(posedge clk_i, negedge rst_ni) begin: result_state_register - if (!rst_ni) c_result_state <= IDLE; - else c_result_state <= n_result_state; - end - - always_comb begin: result_state_logic - n_result_state = c_result_state; - case (c_result_state) - IDLE: if (!default_issue) n_result_state = WAIT; - WAIT: if (xif_result_if_o.result_ready) n_result_state = PROP; - PROP: if (!xif_result_if_o.result_ready) n_result_state = default_issue ? IDLE : WAIT; - endcase - end - - always_comb begin: result_handler - xif_result_if_o.result_valid = xif_result_if_i.result_valid; - xif_result_if_i.result_ready = xif_result_if_o.result_ready; - xif_result_if_o.result = xif_result_if_i.result; - if (c_result_state == PROP) begin - xif_result_if_o.result_valid = xif_result_if_o.result_ready; - xif_result_if_i.result_ready = 1'b0; - xif_result_if_o.result = '0; - end - end - -/*******************************************************/ -/** Instruction Dispatcher End **/ -/*******************************************************/ - -endmodule: xif_inst_dispatcher \ No newline at end of file diff --git a/setup_env.sh b/setup_env.sh index f19f95e..caba5cd 100644 --- a/setup_env.sh +++ b/setup_env.sh @@ -11,7 +11,7 @@ export PATH=/usr/pack/gcc-5.2.0-af/x86_64-rhe6-linux/bin:$PATH export PATH=/usr/local/anaconda3-2023.07/condabin:$PATH export PATH=/home/visachi/.local/bin:$PATH export XLEN=32 -export XTEN=imafc +export XTEN=imfcxpulpv2 echo "Sourcing python virtual environment" source ./magia_venv/bin/activate echo "Finished setting up the environment" \ No newline at end of file diff --git a/sw/.gitignore b/sw/.gitignore deleted file mode 100644 index 26abe44..0000000 --- a/sw/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -tests/amo_test/* -tests/boot_test/* -tests/fpu_test/* -tests/fsync_extended_test/* -tests/fsync_test/* -tests/hello_mesh/* -tests/hello_world/* -tests/idma_test/* -tests/inter_l1_test/* -tests/l1_test/* -tests/mesh_test/* -tests/redmule_test/* -tests/tile_test/* \ No newline at end of file diff --git a/sw/tests/amo_test.c b/sw/tests/amo_test.c deleted file mode 100644 index de6f16f..0000000 --- a/sw/tests/amo_test.c +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * SPDX-License-Identifier: Apache-2.0 - * - * Authors: Victor Isachi - * - * MAGIA AMO Test - */ - -#include "magia_tile_utils.h" -#include "magia_utils.h" - -#define VERBOSE (0) - -#define NUM_ITER (1000) -#define INITIAL_VAL (0) -#define AMO_TILES (NUM_HARTS) -#define EXPECTED_VAL (INITIAL_VAL + NUM_ITER*AMO_TILES) - -int main(void) { - uint32_t hartid = get_hartid(); - - printf("Starting AMO test...\n"); - - for (int i = 0; i < NUM_ITER; i++){ - wait_nop(get_hartid()); - for (int i = 0; i < AMO_TILES; i++){ - asm volatile("addi t0, %0, 0" ::"r"((uint32_t)(SYNC_BASE + ((hartid+i)%NUM_HARTS)*L1_TILE_OFFSET))); - asm volatile("li t1, 1" ::); - asm volatile("amoadd.w t2, t1, (t0)" ::); - } - } - - printf("Waiting for counter to reach expected value...\n"); - while (mmio32(SYNC_BASE + hartid*L1_TILE_OFFSET) != EXPECTED_VAL){ -#if VERBOSE > 10 - printf("Read Synch Value: %0d - Expected: %0d\n", mmio32(SYNC_BASE + hartid*L1_TILE_OFFSET), EXPECTED_VAL); -#endif - } - - printf("Test PASSED: counter reached\n"); - - return 0; -} - diff --git a/sw/tests/event_unit_test.c b/sw/tests/event_unit_test.c new file mode 100644 index 0000000..c93a34a --- /dev/null +++ b/sw/tests/event_unit_test.c @@ -0,0 +1,200 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + * + * Authors: Luca Balboni + * + * MAGIA Event Unit Tile Stress Test - Event Unit WFE API Version + * Uses event_unit_utils.h for Event Unit control and WFE/polling + * Tests concurrent RedMulE and IDMA operations with out-of-order completions + * + */ + +#include +#include "magia_tile_utils.h" +#include "redmule_mm_utils.h" +#include "idma_mm_utils.h" +#include "event_unit_utils.h" + +#include "x_input.h" +#include "w_input.h" +#include "y_input.h" +#include "z_output.h" + +#define X_BASE_1 (L1_BASE + 0x00012048) +#define W_BASE_1 (L1_BASE + 0x00016048) +#define Y_BASE_1 (L1_BASE + 0x0001A048) +#define X_BASE_2 (L1_BASE + 0x0001E048) +#define W_BASE_2 (L1_BASE + 0x00022048) +#define Y_BASE_2 (L1_BASE + 0x00026048) + +#define Z_BASE_1 (L2_BASE + 0x00001000) +#define Z_BASE_2 (L2_BASE + 0x00005000) +#define Z_BASE_4 (L2_BASE + 0x0000D000) + +#define DMA_BUFFER_1 (L1_BASE + 0x00036048) +#define DMA_BUFFER_2 (L1_BASE + 0x0003A048) + +#define M_SIZE (96) +#define N_SIZE (64) +#define K_SIZE (64) + +#define VERBOSE (1) + +#define USE_WFE (1) + +#define WAIT_CYCLES (10) + +#define DIFF_TH (0x0011) + +#define DMA_CHUNK_SIZE (M_SIZE * N_SIZE * 2) + +int main(void) { + uint32_t dst_addr; + uint32_t src_addr; + uint32_t len; + + uint32_t redmule_completed = 0; + uint32_t idma_a2o_completed = 0; + uint32_t idma_o2a_completed = 0; + + // Initialize Event Unit once + eu_init(); + + // Setup test data + printf("Setting up test data...\n"); + + // X matrix for RedMulE + for (int i = 0; i < M_SIZE*N_SIZE; i++) + mmio16(X_BASE_1 + 2*i) = x_inp[i]; + + // W matrix for RedMulE + for (int i = 0; i < N_SIZE*K_SIZE; i++) + mmio16(W_BASE_1 + 2*i) = w_inp[i]; + + // Y matrix (accumulator) for RedMulE + for (int i = 0; i < M_SIZE*K_SIZE; i++) + mmio16(Y_BASE_1 + 2*i) = y_inp[i]; + + // Z - golden (reference) for RedMulE + for (int i = 0; i < M_SIZE*K_SIZE; i++) + mmio16(Z_BASE_1 + 2*i) = z_oup[i]; + + // Initialize IDMA test data + for (int i = 0; i < DMA_CHUNK_SIZE/2; i++) { + uint16_t test_pattern = (uint16_t)(0x1000 + (i & 0xFFF)); + mmio16(Z_BASE_4 + 2*i) = test_pattern; + } + +#if VERBOSE > 10 + printf("Test data setup complete\n"); +#endif + + printf("Testing concurrent RedMulE and IDMA operations...\n"); + + // Initialize Event Unit BEFORE launching operations + eu_multi_init(1, 1, 1, 0); // Enable RedMulE, IDMA A2O, IDMA O2A, disable FSync + + // Launch RedMulE operation + printf("Launching RedMulE operation...\n"); + hwpe_cg_enable(); + hwpe_soft_clear(); + + int offload_id_tmp; + while ((offload_id_tmp = hwpe_acquire_job()) < 0) + ; + + redmule_cfg((unsigned int)X_BASE_1, (unsigned int)W_BASE_1, (unsigned int)Y_BASE_1, + M_SIZE, N_SIZE, K_SIZE, (uint8_t)gemm_ops, (uint8_t)Float16); + + // Launch IDMA operations + printf("Launching IDMA operations...\n"); + + // First IDMA transfer: L2 to L1 + dst_addr = (uint32_t)DMA_BUFFER_1; + src_addr = (uint32_t)Z_BASE_4; + len = (uint32_t)DMA_CHUNK_SIZE; + + uint32_t transfer_id_1 = idma_L2ToL1(src_addr, dst_addr, len); + + + // Second IDMA transfer: L1 to L2 + dst_addr = (uint32_t)Z_BASE_2; + src_addr = (uint32_t)DMA_BUFFER_1; + len = (uint32_t)DMA_CHUNK_SIZE; + + uint32_t transfer_id_2 = idma_L1ToL2(src_addr, dst_addr, len); + printf("iDMA transfer 2 (L1->L2) started, ID: %d\n", transfer_id_2); + + // Trigger RedMulE after IDMA to create concurrency + hwpe_trigger_job(); + + // Wait for ALL accelerators using eu_multi_wait_all - elegante! + printf("Waiting for ALL accelerators completion (RedMulE + IDMA A2O + IDMA O2A)...\n"); + + eu_wait_mode_t wait_mode = USE_WFE ? EU_WAIT_MODE_WFE : EU_WAIT_MODE_POLLING; + uint32_t all_events = eu_multi_wait_all(1, 1, 1, 0, wait_mode); + + // eu_multi_wait_all returns only when ALL events are present (or 0 on timeout) + if (all_events) { + redmule_completed = 1; + idma_a2o_completed = 1; + idma_o2a_completed = 1; + } + // If all_events == 0, it means timeout occurred + + // Check for timeout + if (!(redmule_completed && idma_a2o_completed && idma_o2a_completed)) { + mmio16(TEST_END_ADDR) = FAIL_EXIT_CODE; + return 1; + } + + // Disable RedMulE + hwpe_cg_disable(); + + unsigned int num_errors = 0; + + // Verify RedMulE results + uint16_t computed, expected, diff; + for(int i = 0; i < M_SIZE*K_SIZE; i++){ + computed = mmio16(Y_BASE_1 + 2*i); + expected = mmio16(Z_BASE_1 + 2*i); + diff = (computed > expected) ? (computed - expected) : (expected - computed); + if(diff > DIFF_TH){ + num_errors++; + } + } + + // Verify IDMA results (basic integrity check) + uint32_t idma_errors = 0; + for(int i = 0; i < 100; i++) { // Check first 100 elements + uint16_t source_data = mmio16(Z_BASE_4 + 2*i); + uint16_t copied_data = mmio16(DMA_BUFFER_1 + 2*i); + if(source_data != copied_data) { + idma_errors++; + } + } + + num_errors += idma_errors; + + // Event Unit integrity check + if (!(redmule_completed && idma_a2o_completed && idma_o2a_completed)) { + num_errors++; + } + + printf("Finished test with %0d errors\n", num_errors); + + return num_errors; +} \ No newline at end of file diff --git a/sw/tests/fpu_test.c b/sw/tests/fpu_test.c index e37042d..776faed 100644 --- a/sw/tests/fpu_test.c +++ b/sw/tests/fpu_test.c @@ -39,30 +39,25 @@ inline uint32_t f_add(volatile uint32_t op_a, volatile uint32_t op_b){ } int main(void) { - // uint32_t exit_code; + uint32_t exit_code; - // volatile float a, b, c; - // a = A_VAL; - // b = B_VAL; - // c = a+b; + volatile float a, b, c; + a = A_VAL; + b = B_VAL; + c = a+b; - // if (abs_diff(c, C_EXP) > FP_TH){ - // exit_code = FAIL_EXIT_CODE; - // printf("Test FAILED\n"); - // }else{ - // exit_code = PASS_EXIT_CODE; - // printf("Test PASSED\n"); - // } - - // mmio16(TEST_END_ADDR) = exit_code; + if (abs_diff(c, C_EXP) > FP_TH){ + printf("Test FAILED\n"); + }else{ + printf("Test PASSED\n"); + } +/* uint32_t a, b, c; a = 0x414570A4; // Binary for 12.34f b = 0x42631EB8; // Binary for 56.78f c = f_add(a, b); printf("Float operation result: 0x%0x [expected: 0x428A3D71(69.12f)]\n", c); - - mmio16(TEST_END_ADDR) = DEFAULT_EXIT_CODE; - +*/ return 0; } diff --git a/sw/tests/fsync_extended_test_event_unit.c b/sw/tests/fsync_extended_test_event_unit.c new file mode 100644 index 0000000..a3125b5 --- /dev/null +++ b/sw/tests/fsync_extended_test_event_unit.c @@ -0,0 +1,217 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + * + * Authors: Luca Balboni + * Based on fsync_extended_test.c by Victor Isachi + * + * MAGIA FractalSync Memory-Mapped Synchronization Test - Event Unit Version + * Uses event_unit_utils.h for Event Unit control and WFE/polling + * + */ + +#include "magia_tile_utils.h" +#include "magia_utils.h" +#include "fsync_mm_utils.h" +#include "fsync_mm_api.h" +#include "event_unit_utils.h" +#include "cache_fill.h" + +#define VERBOSE (0) + +#define USE_WFE (1) + +int main(void) { + uint32_t aggregates[NUM_HARTS]; + uint32_t ids[NUM_HARTS]; + + // Initialize Event Unit once + eu_init(); + +#if NUM_HARTS == 16 + /// Custom 4x4 synch. + switch (get_hartid()){ + case 0: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 1: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 2: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 3: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 4: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 5: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 6: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 7: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 8: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 9: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 10: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 11: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 12: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 13: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 14: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + case 15: aggregates[get_hartid()] = 0b1111; ids[get_hartid()] = 7; break; + } + + // h_pprintf("FractalSync aggregate: 0b"); pprintf(bs(aggregates[get_hartid()])); pprintf(", id: "); pprintf(ds(ids[get_hartid()])); n_pprintf("..."); + printf("FractalSync aggregate: 0x%0x, id: %0d...\n", aggregates[get_hartid()], ids[get_hartid()]); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm(ids[get_hartid()], aggregates[get_hartid()]); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + sentinel_instr_id(); +#endif + + printf("[FractalSync MM] Horizontal neighbor test starting\n"); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_hnbr(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + + sentinel_instr_id(); + printf("[FractalSync MM] Horizontal neighbor test ending\n"); + + printf("[FractalSync MM] Horizontal ring neighbor test starting\n"); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_hring(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + + sentinel_instr_id(); + printf("[FractalSync MM] Horizontal ring neighbor test ending\n"); + + printf("[FractalSync MM] Vertical neighbor test starting\n"); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_vnbr(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + sentinel_instr_id(); + printf("[FractalSync MM] Vertical neighbor test ending\n"); + + printf("[FractalSync MM] Vertical ring neighbor test starting\n"); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_vring(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + sentinel_instr_id(); + printf("[FractalSync MM] Vertical ring neighbor test ending\n"); + + printf("[FractalSync MM] Row test starting\n"); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_rows(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + sentinel_instr_id(); + printf("[FractalSync MM] Row test ending\n"); + + printf("[FractalSync MM] Column test starting\n"); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_cols(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + sentinel_instr_id(); + printf("[FractalSync MM] Column test ending\n"); + + printf("[FractalSync MM] Global test starting\n"); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_global(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + sentinel_instr_id(); + printf("[FractalSync MM] Global test ending\n"); + + // h_pprintf("FractalSync test finished...\n"); + printf("FractalSync MM test finished...\n"); + + + + return 0; +} \ No newline at end of file diff --git a/sw/tests/fsync_extended_test.c b/sw/tests/fsync_extended_test_mm.c similarity index 54% rename from sw/tests/fsync_extended_test.c rename to sw/tests/fsync_extended_test_mm.c index 675ff06..ded77fc 100644 --- a/sw/tests/fsync_extended_test.c +++ b/sw/tests/fsync_extended_test_mm.c @@ -14,28 +14,26 @@ * limitations under the License. * SPDX-License-Identifier: Apache-2.0 * - * Authors: Victor Isachi + * Authors: Luca Balboni + * Based on fsync_extended_test.c by Victor Isachi * - * MAGIA Extended FractalSync Synchronization Test + * MAGIA FractalSync Memory-Mapped Synchronization Test + * WARNING: //STALLING MODE = POLLING, //NONSTALLING = WFI currently not working for race conditions */ #include "magia_tile_utils.h" #include "magia_utils.h" -#include "fsync_isa_utils.h" -#include "fsync_api.h" +#include "fsync_mm_utils.h" +#include "fsync_mm_api.h" #include "cache_fill.h" #define VERBOSE (0) -#define STALLING int main(void) { uint32_t aggregates[NUM_HARTS]; uint32_t ids[NUM_HARTS]; -#ifndef STALLING - irq_en(1< + * Based on fsync_test_mm.c by Victor Isachi + * + * MAGIA FractalSync Memory-Mapped Synchronization Test - Event Unit Version + * Uses event_unit_utils.h for Event Unit control and WFE/polling + * + */ + +#include "magia_tile_utils.h" +#include "magia_utils.h" +#include "fsync_mm_utils.h" +#include "fsync_mm_api.h" +#include "event_unit_utils.h" +#include "cache_fill.h" + +#define VERBOSE (0) + +#define USE_WFE (1) + +#define CLIB_FS_MM_TEST +// #define GLOBAL_FS_MM_TEST +// #define ROW_FS_MM_TEST +// #define COL_FS_MM_TEST +// #define HNBR_FS_MM_TEST +// #define VNBR_FS_MM_TEST +// #define HRING_FS_MM_TEST +// #define VRING_FS_MM_TEST + +#define NUM_LEVELS (31-__builtin_clz(NUM_HARTS)) + + +#define CACHE_HEAT_CYCLES (3) + +int main(void) { + uint32_t tile_hartid = get_hartid(); + uint32_t tile_xhartid = GET_X_ID(tile_hartid); + uint32_t tile_yhartid = GET_Y_ID(tile_hartid); + + // Initialize Event Unit once + eu_init(); + + printf("Starting Fractal Sync Memory-Mapped test...\n"); + + // Filling up the cache + fill_icache(); + + // Execute synchronization multiple times to pre-heat the cache + for (int i = 0; i < CACHE_HEAT_CYCLES; i++) { +#ifdef CLIB_FS_MM_TEST + // Climb FS tree test using memory-mapped interface + for (int i = 0; i < NUM_LEVELS; i++){ + printf("Fractal Sync at level %0d...\n", i+1); + + uint32_t aggregates = (1 << (i+1))-1; + uint32_t ids = 0; +#if VERBOSE > 10 + printf("aggregate: 0x%0x\n", aggregates); + printf("id: 0x%0x\n", ids); +#endif + + // Instruction immediately preceding synchronization: indicates start of the synchronization region + sentinel_start(); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm(ids, aggregates); + + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + + // Instruction immediately following synchronization: indicates end of the synchronization region + sentinel_end(); + + printf("Synchronized...\n"); + } +#endif + +#ifdef GLOBAL_FS_MM_TEST +#if VERBOSE > 1 + printf("Fractal Sync global synchronization test...\n"); +#endif + +#if VERBOSE > 10 + printf("aggregate: 0x%0x\n", _FS_MM_GLOBAL_AGGR); + printf("id: 0x%0x\n", _FS_MM_GLOBAL_ID); +#endif + + // Instruction immediately preceding synchronization: indicates start of the synchronization region + sentinel_start(); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_global(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + // Instruction immediately following synchronization: indicates end of the synchronization region + sentinel_end(); + +#if VERBOSE > 1 + printf("Synchronized...\n"); +#endif +#endif + +#ifdef HNBR_FS_MM_TEST +#if VERBOSE > 1 + printf("Fractal Sync horizontal neighbor synchronization test...\n"); +#endif + +#if VERBOSE > 10 + printf("aggregate: 0x%0x\n", _FS_MM_HNBR_AGGR); + printf("id: 0x%0x\n", _FS_MM_HNBR_ID); +#endif + + // Instruction immediately preceding synchronization: indicates start of the synchronization region + sentinel_start(); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_hnbr(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + // Instruction immediately following synchronization: indicates end of the synchronization region + sentinel_end(); + +#if VERBOSE > 1 + printf("Synchronized...\n"); +#endif +#endif + +#ifdef VNBR_FS_MM_TEST +#if VERBOSE > 1 + printf("Fractal Sync vertical neighbor synchronization test...\n"); +#endif + +#if VERBOSE > 10 + printf("aggregate: 0x%0x\n", _FS_MM_VNBR_AGGR); + printf("id: 0x%0x\n", _FS_MM_VNBR_ID); +#endif + + // Instruction immediately preceding synchronization: indicates start of the synchronization region + sentinel_start(); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_vnbr(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + // Instruction immediately following synchronization: indicates end of the synchronization region + sentinel_end(); + +#if VERBOSE > 1 + printf("Synchronized...\n"); +#endif +#endif + +#ifdef HRING_FS_MM_TEST +#if VERBOSE > 1 + printf("Fractal Sync horizontal ring synchronization test...\n"); +#endif + +#if VERBOSE > 10 + if ((tile_xhartid == 0) || (tile_xhartid == MESH_X_TILES-1)){ + uint32_t id = row_id_lookup_mm(tile_yhartid); + printf("aggregate: 0x%0x\n", _FS_MM_RC_LVL); + printf("id: 0x%0x\n", id); + } else { + printf("aggregate: 0x%0x\n", _FS_MM_HRING_AGGR); + printf("id: 0x%0x\n", _FS_MM_HRING_ID); + } +#endif + + // Instruction immediately preceding synchronization: indicates start of the synchronization region + sentinel_start(); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_hring(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + // Instruction immediately following synchronization: indicates end of the synchronization region + sentinel_end(); + +#if VERBOSE > 1 + printf("Synchronized...\n"); +#endif +#endif + +#ifdef VRING_FS_MM_TEST +#if VERBOSE > 1 + printf("Fractal Sync vertical ring synchronization test...\n"); +#endif + +#if VERBOSE > 10 + if ((tile_yhartid == 0) || (tile_yhartid == MESH_Y_TILES-1)){ + uint32_t id = col_id_lookup_mm(tile_xhartid); + printf("aggregate: 0x%0x\n", _FS_MM_RC_LVL); + printf("id: 0x%0x\n", id); + } else { + printf("aggregate: 0x%0x\n", _FS_MM_VRING_AGGR); + printf("id: 0x%0x\n", _FS_MM_VRING_ID); + } +#endif + + // Instruction immediately preceding synchronization: indicates start of the synchronization region + sentinel_start(); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_vring(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + // Instruction immediately following synchronization: indicates end of the synchronization region + sentinel_end(); + +#if VERBOSE > 1 + printf("Synchronized...\n"); +#endif +#endif + +#ifdef ROW_FS_MM_TEST +#if VERBOSE > 1 + printf("Fractal Sync row synchronization test...\n"); +#endif + +#if VERBOSE > 10 + uint32_t id = row_id_lookup_mm(tile_yhartid); + printf("aggregate: 0x%0x\n", _FS_MM_RC_AGGR); + printf("id: 0x%0x\n", id); +#endif + + // Instruction immediately preceding synchronization: indicates start of the synchronization region + sentinel_start(); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_rows(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + // Instruction immediately following synchronization: indicates end of the synchronization region + sentinel_end(); + +#if VERBOSE > 1 + printf("Synchronized...\n"); +#endif +#endif + +#ifdef COL_FS_MM_TEST +#if VERBOSE > 1 + printf("Fractal Sync column synchronization test...\n"); +#endif + +#if VERBOSE > 10 + uint32_t id = col_id_lookup_mm(tile_xhartid); + printf("aggregate: 0x%0x\n", _FS_MM_RC_AGGR); + printf("id: 0x%0x\n", id); +#endif + + // Instruction immediately preceding synchronization: indicates start of the synchronization region + sentinel_start(); + + // Clear Event Unit and ensure FSync mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_DONE_MASK); + + fsync_mm_cols(); + + if (USE_WFE) { + eu_fsync_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_fsync_wait_completion(EU_WAIT_MODE_POLLING); + } + + // Instruction immediately following synchronization: indicates end of the synchronization region + sentinel_end(); + +#if VERBOSE > 1 + printf("Synchronized...\n"); +#endif +#endif + } + + printf("Fractal Sync Memory-Mapped test finished...\n"); + + + return 0; +} \ No newline at end of file diff --git a/sw/tests/fsync_test.c b/sw/tests/fsync_test_mm.c similarity index 57% rename from sw/tests/fsync_test.c rename to sw/tests/fsync_test_mm.c index a7fe82e..aae62a7 100644 --- a/sw/tests/fsync_test.c +++ b/sw/tests/fsync_test_mm.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * Copyright (C) 2023-2024 ETH Zurich and Unive printf("Starting Fractal Sync test...\n");sity of Bologna * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,31 +14,33 @@ * limitations under the License. * SPDX-License-Identifier: Apache-2.0 * - * Authors: Victor Isachi + * Authors: Luca Balboni + * Based on fsync_test.c by Victor Isachi * - * MAGIA FractalSync Synchronization Test + * MAGIA FractalSync Memory-Mapped Synchronization Test + * WARNING: //STALLING = POLLING, //NONSTALLING = WFI not working in this new version of the tile with event unit + * WARNING: Make sure to undefine EVENT_UNIT in fsync_mm_utils.h, otherwise test will proceed without correctly waiting for sync completion */ #include "magia_tile_utils.h" #include "magia_utils.h" -#include "fsync_isa_utils.h" -#include "fsync_api.h" +#include "fsync_mm_utils.h" +#include "fsync_mm_api.h" #include "cache_fill.h" #define VERBOSE (0) -#define CLIB_FS_TEST -// #define GLOBAL_FS_TEST -// #define ROW_FS_TEST -// #define COL_FS_TEST -// #define HNBR_FS_TEST -// #define VNBR_FS_TEST -// #define HRING_FS_TEST -// #define VRING_FS_TEST +#define CLIB_FS_MM_TEST +// #define GLOBAL_FS_MM_TEST +// #define ROW_FS_MM_TEST +// #define COL_FS_MM_TEST +// #define HNBR_FS_MM_TEST +// #define VNBR_FS_MM_TEST +// #define HRING_FS_MM_TEST +// #define VRING_FS_MM_TEST #define NUM_LEVELS (31-__builtin_clz(NUM_HARTS)) -#define STALLING #define CACHE_HEAT_CYCLES (3) @@ -47,21 +49,18 @@ int main(void) { uint32_t tile_xhartid = GET_X_ID(tile_hartid); uint32_t tile_yhartid = GET_Y_ID(tile_hartid); - printf("Starting Fractal Sync test...\n"); + printf("Starting Fractal Sync Memory-Mapped test...\n"); // Filling up the cache fill_icache(); // Execute synchronization multiple times to pre-heat the cache for (int i = 0; i < CACHE_HEAT_CYCLES; i++) { -#ifdef CLIB_FS_TEST - // Climb FS tree test +#ifdef CLIB_FS_MM_TEST + // Climb FS tree test using memory-mapped interface for (int i = 0; i < NUM_LEVELS; i++){ printf("Fractal Sync at level %0d...\n", i+1); -#ifndef STALLING - irq_en(1< 1 - printf("Fractal Sync global synchrnonization test...\n"); + printf("Fractal Sync global synchronization test...\n"); #endif -#ifndef STALLING - irq_en(1< 10 - printf("aggregate: 0x%0x\n", _FS_GLOBAL_AGGR); - printf("id: 0x%0x\n", _FS_GLOBAL_ID); + printf("aggregate: 0x%0x\n", _FS_MM_GLOBAL_AGGR); + printf("id: 0x%0x\n", _FS_MM_GLOBAL_ID); #endif // Instruction immediately preceding synchronization: indicates start of the synchronization region sentinel_start(); - fsync_global(); -#ifndef STALLING - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); -#endif + fsync_mm_global(); // Instruction immediately following synchronization: indicates end of the synchronization region sentinel_end(); @@ -117,28 +105,21 @@ int main(void) { #endif #endif -#ifdef HNBR_FS_TEST +#ifdef HNBR_FS_MM_TEST #if VERBOSE > 1 - printf("Fractal Sync horizontal neighbor synchrnonization test...\n"); + printf("Fractal Sync horizontal neighbor synchronization test...\n"); #endif -#ifndef STALLING - irq_en(1< 10 - printf("aggregate: 0x%0x\n", _FS_HNBR_AGGR); - printf("id: 0x%0x\n", _FS_HNBR_ID); + printf("aggregate: 0x%0x\n", _FS_MM_HNBR_AGGR); + printf("id: 0x%0x\n", _FS_MM_HNBR_ID); #endif // Instruction immediately preceding synchronization: indicates start of the synchronization region sentinel_start(); - fsync_hnbr(); -#ifndef STALLING - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); -#endif + fsync_mm_hnbr(); // Instruction immediately following synchronization: indicates end of the synchronization region sentinel_end(); @@ -148,28 +129,21 @@ int main(void) { #endif #endif -#ifdef VNBR_FS_TEST +#ifdef VNBR_FS_MM_TEST #if VERBOSE > 1 - printf("Fractal Sync vertical neighbor synchrnonization test...\n"); + printf("Fractal Sync vertical neighbor synchronization test...\n"); #endif -#ifndef STALLING - irq_en(1< 10 - printf("aggregate: 0x%0x\n", _FS_VNBR_AGGR); - printf("id: 0x%0x\n", _FS_VNBR_ID); + printf("aggregate: 0x%0x\n", _FS_MM_VNBR_AGGR); + printf("id: 0x%0x\n", _FS_MM_VNBR_ID); #endif // Instruction immediately preceding synchronization: indicates start of the synchronization region sentinel_start(); - fsync_vnbr(); -#ifndef STALLING - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); -#endif + fsync_mm_vnbr(); // Instruction immediately following synchronization: indicates end of the synchronization region sentinel_end(); @@ -179,34 +153,27 @@ int main(void) { #endif #endif -#ifdef HRING_FS_TEST +#ifdef HRING_FS_MM_TEST #if VERBOSE > 1 - printf("Fractal Sync horizontal ring synchrnonization test...\n"); + printf("Fractal Sync horizontal ring synchronization test...\n"); #endif -#ifndef STALLING - irq_en(1< 10 if ((tile_xhartid == 0) || (tile_xhartid == MESH_X_TILES-1)){ - uint32_t id = row_id_lookup(tile_yhartid); - printf("aggregate: 0x%0x\n", _FS_RC_LVL); + uint32_t id = row_id_lookup_mm(tile_yhartid); + printf("aggregate: 0x%0x\n", _FS_MM_RC_LVL); printf("id: 0x%0x\n", id); } else { - printf("aggregate: 0x%0x\n", _FS_HRING_AGGR); - printf("id: 0x%0x\n", _FS_HRING_ID); + printf("aggregate: 0x%0x\n", _FS_MM_HRING_AGGR); + printf("id: 0x%0x\n", _FS_MM_HRING_ID); } #endif // Instruction immediately preceding synchronization: indicates start of the synchronization region sentinel_start(); - fsync_hring(); -#ifndef STALLING - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); -#endif + fsync_mm_hring(); // Instruction immediately following synchronization: indicates end of the synchronization region sentinel_end(); @@ -216,34 +183,27 @@ int main(void) { #endif #endif -#ifdef VRING_FS_TEST +#ifdef VRING_FS_MM_TEST #if VERBOSE > 1 - printf("Fractal Sync vertical ring synchrnonization test...\n"); + printf("Fractal Sync vertical ring synchronization test...\n"); #endif -#ifndef STALLING - irq_en(1< 10 if ((tile_yhartid == 0) || (tile_yhartid == MESH_Y_TILES-1)){ - uint32_t id = col_id_lookup(tile_xhartid); - printf("aggregate: 0x%0x\n", _FS_RC_LVL); + uint32_t id = col_id_lookup_mm(tile_xhartid); + printf("aggregate: 0x%0x\n", _FS_MM_RC_LVL); printf("id: 0x%0x\n", id); } else { - printf("aggregate: 0x%0x\n", _FS_VRING_AGGR); - printf("id: 0x%0x\n", _FS_VRING_ID); + printf("aggregate: 0x%0x\n", _FS_MM_VRING_AGGR); + printf("id: 0x%0x\n", _FS_MM_VRING_ID); } #endif // Instruction immediately preceding synchronization: indicates start of the synchronization region sentinel_start(); - fsync_vring(); -#ifndef STALLING - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); -#endif + fsync_mm_vring(); // Instruction immediately following synchronization: indicates end of the synchronization region sentinel_end(); @@ -253,29 +213,22 @@ int main(void) { #endif #endif -#ifdef ROW_FS_TEST +#ifdef ROW_FS_MM_TEST #if VERBOSE > 1 - printf("Fractal Sync row synchrnonization test...\n"); + printf("Fractal Sync row synchronization test...\n"); #endif -#ifndef STALLING - irq_en(1< 10 - uint32_t id = row_id_lookup(tile_yhartid); - printf("aggregate: 0x%0x\n", _FS_RC_AGGR); + uint32_t id = row_id_lookup_mm(tile_yhartid); + printf("aggregate: 0x%0x\n", _FS_MM_RC_AGGR); printf("id: 0x%0x\n", id); #endif // Instruction immediately preceding synchronization: indicates start of the synchronization region sentinel_start(); - fsync_rows(); -#ifndef STALLING - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); -#endif + fsync_mm_rows(); // Instruction immediately following synchronization: indicates end of the synchronization region sentinel_end(); @@ -285,29 +238,22 @@ int main(void) { #endif #endif -#ifdef COL_FS_TEST +#ifdef COL_FS_MM_TEST #if VERBOSE > 1 - printf("Fractal Sync column synchrnonization test...\n"); + printf("Fractal Sync column synchronization test...\n"); #endif -#ifndef STALLING - irq_en(1< 10 - uint32_t id = col_id_lookup(tile_xhartid); - printf("aggregate: 0x%0x\n", _FS_RC_AGGR); + uint32_t id = col_id_lookup_mm(tile_xhartid); + printf("aggregate: 0x%0x\n", _FS_MM_RC_AGGR); printf("id: 0x%0x\n", id); #endif // Instruction immediately preceding synchronization: indicates start of the synchronization region sentinel_start(); - fsync_cols(); -#ifndef STALLING - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); -#endif + fsync_mm_cols(); // Instruction immediately following synchronization: indicates end of the synchronization region sentinel_end(); @@ -318,7 +264,8 @@ int main(void) { #endif } - printf("Fractal Sync test finished...\n"); + printf("Fractal Sync Memory-Mapped test finished...\n"); + return 0; -} +} \ No newline at end of file diff --git a/sw/tests/hello_mesh.c b/sw/tests/hello_mesh.c index f1ff565..48e5c5d 100644 --- a/sw/tests/hello_mesh.c +++ b/sw/tests/hello_mesh.c @@ -24,7 +24,7 @@ int main(void) { // h_pprintf("Hello World! it is hartid "); pprintf(ds(get_hartid())); pprintln; - printf("Hello World! it is hartid %0d\n", get_hartid()); + printf("Hello World! it is tile/hart %0d\n", get_hartid()); return 0; } diff --git a/sw/tests/idma_test_event_unit.c b/sw/tests/idma_test_event_unit.c new file mode 100644 index 0000000..988d8d9 --- /dev/null +++ b/sw/tests/idma_test_event_unit.c @@ -0,0 +1,204 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + * + * Authors: Luca Balboni + * Based on idma_test.c by Victor Isachi + * + * MAGIA iDMA Test - Event Unit WFE API Version + * Uses event_unit_utils.h for Event Unit control and WFE/polling + * + */ + +#include "magia_tile_utils.h" +#include "idma_mm_utils.h" +#include "event_unit_utils.h" + +#include "x_input.h" + +#define X_BASE (L1_BASE + 0x00012048) +#define Y_BASE (L1_BASE + 0x00016048) +#define Z_BASE (L2_BASE + 0x00001000) +#define W_BASE (L2_BASE + 0x00005000) + +#define M_SIZE (96) +#define N_SIZE (64) + +#define VERBOSE (0) + +#define USE_WFE (1) + +#define WAIT_CYCLES (10) + +#define CONCURRENT + +int main(void) { + uint32_t dst_addr; + uint32_t src_addr; + uint32_t len; + + uint32_t dst_std_2; + uint32_t src_std_2; + uint32_t reps_2; + + uint32_t dst_std_3; + uint32_t src_std_3; + uint32_t reps_3; + + // Initialize Event Unit once + eu_init(); + + // Z - golden (reference) + for (int i = 0; i < M_SIZE*N_SIZE; i++) + mmio16(Z_BASE + 2*i) = x_inp[i]; +#if VERBOSE > 100 + for (int i = 0; i < M_SIZE*N_SIZE; i++) + printf("Z[%8x]: 0x%4x\n", Z_BASE + 2*i, mmio16(Z_BASE + 2*i)); +#endif + + dst_addr = (uint32_t)X_BASE; + src_addr = (uint32_t)Z_BASE; + len = (uint32_t)(M_SIZE*N_SIZE*2); // 2 Bytes per element +#if VERBOSE > 10 + printf("dst_addr: 0x%8x (X_BASE)\n", dst_addr); + printf("src_addr: 0x%8x (Z_BASE)\n", src_addr); + printf("len: %0d\n", len); +#endif + + dst_std_2 = 0; + src_std_2 = 0; + reps_2 = 1; +#if VERBOSE > 10 + printf("dst_std_2: 0x%8x\n", dst_std_2); + printf("src_std_2: 0x%8x\n", src_std_2); + printf("reps_2: 0x%8x\n", reps_2); +#endif + + dst_std_3 = 0; + src_std_3 = 0; + reps_3 = 1; +#if VERBOSE > 10 + printf("dst_std_3: 0x%8x\n", dst_std_3); + printf("src_std_3: 0x%8x\n", src_std_3); + printf("reps_3: 0x%8x\n", reps_3); +#endif + + uint32_t transfer_id_1 = idma_L2ToL1(src_addr, dst_addr, len); + printf("iDMA moving data from L2 to L1...\n"); + + // Clear Event Unit and ensure A2O mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_IDMA_A2O_DONE_MASK); + + if (USE_WFE) { + eu_idma_wait_a2o_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_idma_wait_a2o_completion(EU_WAIT_MODE_POLLING); + } + + dst_addr = (uint32_t)W_BASE; + src_addr = (uint32_t)X_BASE; + len = (uint32_t)(M_SIZE*N_SIZE*2); // 2 Bytes per element +#if VERBOSE > 10 + printf("dst_addr: 0x%8x (W_BASE)\n", dst_addr); + printf("src_addr: 0x%8x (X_BASE)\n", src_addr); + printf("len: %0d\n", len); +#endif + + dst_std_2 = 0; + src_std_2 = 0; + reps_2 = 1; +#if VERBOSE > 10 + printf("dst_std_2: 0x%8x\n", dst_std_2); + printf("src_std_2: 0x%8x\n", src_std_2); + printf("reps_2: 0x%8x\n", reps_2); +#endif + + dst_std_3 = 0; + src_std_3 = 0; + reps_3 = 1; +#if VERBOSE > 10 + printf("dst_std_3: 0x%8x\n", dst_std_3); + printf("src_std_3: 0x%8x\n", src_std_3); + printf("reps_3: 0x%8x\n", reps_3); +#endif + + uint32_t transfer_id_2 = idma_L1ToL2(src_addr, dst_addr, len); + + printf("iDMA moving data from L1 to L2...\n"); + + // Clear Event Unit and ensure O2A mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_IDMA_O2A_DONE_MASK); + + if (USE_WFE) { + eu_idma_wait_o2a_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_idma_wait_o2a_completion(EU_WAIT_MODE_POLLING); + } + +#ifdef CONCURRENT + // Setup concurrent transfer L2->L1 to Y_BASE + dst_addr = (uint32_t)Y_BASE; + src_addr = (uint32_t)Z_BASE; + len = (uint32_t)(M_SIZE*N_SIZE*2); // 2 Bytes per element +#if VERBOSE > 10 + printf("dst_addr: 0x%8x (Y_BASE)\n", dst_addr); + printf("src_addr: 0x%8x (Z_BASE)\n", src_addr); + printf("len: %0d\n", len); +#endif + + // Start both transfers concurrently + uint32_t transfer_id_o2a = transfer_id_2; // OBI2AXI (L1->L2) already started + uint32_t transfer_id_a2o = idma_L2ToL1(src_addr, dst_addr, len); // Start AXI2OBI (L2->L1) + + printf("iDMA moving concurrently data from L1 to L2 and from L2 to L1...\n"); + + // Clear Event Unit and ensure both masks are enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_IDMA_ALL_DONE_MASK); + + if (USE_WFE) { + eu_idma_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_idma_wait_completion(EU_WAIT_MODE_POLLING); + } +#endif + + printf("Verifying results...\n"); + + unsigned int num_errors = 0; + + uint16_t detected_l1, detected_l2, expected; + for(int i = 0; i < M_SIZE*N_SIZE; i++){ + detected_l2 = mmio16(W_BASE + 2*i); +#ifdef CONCURRENT + detected_l1 = mmio16(Y_BASE + 2*i); +#else + detected_l1 = mmio16(X_BASE + 2*i); +#endif + expected = mmio16(Z_BASE + 2*i); + if((detected_l2 != expected) || (detected_l1 != expected)){ + num_errors++; + printf("**ERROR**: DETECTED L2[%0d](=0x%4x) || DETECTED L1[%0d](=0x%4x) != EXPECTED[%0d](=0x%4x)\n", i, detected_l2, i, detected_l1, i, expected); + } + } + printf("Finished test with %0d errors\n", num_errors); + + return num_errors; +} \ No newline at end of file diff --git a/sw/tests/idma_test.c b/sw/tests/idma_test_mm.c similarity index 71% rename from sw/tests/idma_test.c rename to sw/tests/idma_test_mm.c index 0811fbb..07554c9 100644 --- a/sw/tests/idma_test.c +++ b/sw/tests/idma_test_mm.c @@ -14,13 +14,14 @@ * limitations under the License. * SPDX-License-Identifier: Apache-2.0 * - * Authors: Victor Isachi + * Authors: Luca Balboni + * Based on idma_test.c by Victor Isachi * - * MAGIA iDMA Test + * MAGIA iDMA Test using Memory-Mapped Control */ #include "magia_tile_utils.h" -#include "idma_isa_utils.h" +#include "idma_mm_utils.h" #include "x_input.h" @@ -34,8 +35,6 @@ #define VERBOSE (0) -#define IRQ_EN - #define WAIT_CYCLES (10) #define CONCURRENT @@ -61,14 +60,6 @@ int main(void) { printf("Z[%8x]: 0x%4x\n", Z_BASE + 2*i, mmio16(Z_BASE + 2*i)); #endif -#ifdef IRQ_EN - // Enable IRQs - uint32_t index = (1<L1 to Y_BASE dst_addr = (uint32_t)Y_BASE; src_addr = (uint32_t)Z_BASE; len = (uint32_t)(M_SIZE*N_SIZE*2); // 2 Bytes per element @@ -151,49 +137,21 @@ int main(void) { printf("src_addr: 0x%8x (Z_BASE)\n", src_addr); printf("len: %0d\n", len); #endif - idma_set_addr_len_in(dst_addr, src_addr, len); - - dst_std_2 = 0; - src_std_2 = 0; - reps_2 = 1; -#if VERBOSE > 10 - printf("dst_std_2: 0x%8x\n", dst_std_2); - printf("src_std_2: 0x%8x\n", src_std_2); - printf("reps_2: 0x%8x\n", reps_2); -#endif - idma_set_std2_rep2_in(dst_std_2, src_std_2, reps_2); - - dst_std_3 = 0; - src_std_3 = 0; - reps_3 = 1; -#if VERBOSE > 10 - printf("dst_std_3: 0x%8x\n", dst_std_3); - printf("src_std_3: 0x%8x\n", src_std_3); - printf("reps_3: 0x%8x\n", reps_3); -#endif - idma_set_std3_rep3_in(dst_std_3, src_std_3, reps_3); - - idma_start_out(); - idma_start_in(); + // Start both transfers concurrently + uint32_t transfer_id_o2a = transfer_id_2; // OBI2AXI (L1->L2) already started + uint32_t transfer_id_a2o = idma_L2ToL1(src_addr, dst_addr, len); // Start AXI2OBI (L2->L1) printf("iDMA moving concurrently data from L1 to L2 and from L2 to L1...\n"); -#ifdef IRQ_EN - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); -#else - wait_print(2*WAIT_CYCLES); -#endif -#else - idma_start_out(); - printf("iDMA moving data from L1 to L2...\n"); -#ifdef IRQ_EN - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); + // Use polling to wait for both transfers completion + dma_wait(transfer_id_o2a); + dma_wait(transfer_id_a2o); #else - wait_print(WAIT_CYCLES); -#endif + // Single transfer mode + + // Use polling to wait for completion + dma_wait(transfer_id_2); #endif printf("Verifying results...\n"); @@ -217,4 +175,4 @@ int main(void) { printf("Finished test with %0d errors\n", num_errors); return num_errors; -} +} \ No newline at end of file diff --git a/sw/tests/inter_l1_test.c b/sw/tests/inter_l1_test.c index 41eec0c..d8d2b40 100644 --- a/sw/tests/inter_l1_test.c +++ b/sw/tests/inter_l1_test.c @@ -27,29 +27,34 @@ int main() { uint32_t error[NUM_HARTS]; - uint32_t total_errors; + uint32_t total_errors = 0; + + // Initialize error array + for(int i = 0; i < NUM_HARTS; i++) { + error[i] = 0; + } // Write the tiles ID to different L1 memory locations in other tiles for(int i = 0; i < NUM_HARTS; i++) { - if(get_hartid() != i) { - *(volatile int*) (L1_BASE + i*L1_TILE_OFFSET + MEM_OFFSET + get_hartid()*4) = (int) get_hartid(); + if(get_mhartid() != i) { + *(volatile int*) (L1_BASE + i*L1_TILE_OFFSET + MEM_OFFSET + get_mhartid()*4) = (int) get_mhartid(); } } // Read back the values for (int i = 0; i < NUM_HARTS; i++) { - if(get_hartid() != i) { - if (*(volatile int *) (L1_BASE + i*L1_TILE_OFFSET + MEM_OFFSET + get_hartid()*4) != get_hartid()) { + if(get_mhartid() != i) { + if (*(volatile int *) (L1_BASE + i*L1_TILE_OFFSET + MEM_OFFSET + get_mhartid()*4) != get_mhartid()) { // h_pprintf("Read wrong value, expected "); pprintf(ds(get_hartid())); pprintln; - printf("Read wrong value, expected %0d\n", get_hartid()); - error[get_hartid()]++; + printf("Read wrong value, expected %0d\n", get_mhartid()); + error[get_mhartid()]++; } } } wait_nop(SETTLE_CYCLE); - if (get_hartid() == 0) { + if (get_mhartid() == 0) { for (int i = 0; i < NUM_HARTS; i++) if (error[i]) total_errors++; if (total_errors) { /*h_pprintf("TEST FAILED!!"); pprintln;*/ printf("TEST FAILED!!"); } diff --git a/sw/tests/mesh_test.c b/sw/tests/mesh_test.c deleted file mode 100644 index 1d622f2..0000000 --- a/sw/tests/mesh_test.c +++ /dev/null @@ -1,317 +0,0 @@ -/* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * SPDX-License-Identifier: Apache-2.0 - * - * Authors: Victor Isachi - * - * MAGIA Mesh Test - */ - -#include "magia_tile_utils.h" -#include "magia_utils.h" -#include "redmule_isa_utils.h" -#include "idma_isa_utils.h" - -#include "x_input.h" -#include "w_input.h" -#include "y_input.h" -#include "z_output.h" - -#define X_BASE (L1_BASE + 0x00012048) -#define W_BASE (L1_BASE + 0x00016048) -#define Y_BASE (L1_BASE + 0x0001A048) -#define Z_BASE (L2_BASE + 0x00042000) // Note: for a large number of tiles (e.g. 64x64 mesh) we might exceed memory range of L2 -#define V_BASE (L2_BASE + 0x00046000) // Note: for a large number of tiles (e.g. 64x64 mesh) we might exceed memory range of L2 -#define T_BASE (L2_BASE + 0x0004A000) // Note: for a large number of tiles (e.g. 64x64 mesh) we might exceed memory range of L2 - -#define MHARTID_OFFSET (0x00010000) - -#define M_SIZE (96) -#define N_SIZE (64) -#define K_SIZE (64) - -#define VERBOSE (0) - -#define WAIT_CYCLES (10) - -#define DIFF_TH (0x0011) - -#define CONCURRENT - -#define IRQ_EN - -void idma_mv_in(unsigned int x_dim, unsigned int y_dim, uint16_t src_data[], uint32_t dst_address){ - uint32_t dst_addr; - uint32_t src_addr; - uint32_t len; - - uint32_t dst_std_2; - uint32_t src_std_2; - uint32_t reps_2; - - uint32_t dst_std_3; - uint32_t src_std_3; - uint32_t reps_3; - -#ifdef IRQ_EN - irq_en(1< 10 - // h_pprintf("dst_addr: 0x"); n_pprintf(hs(dst_addr)); - // h_pprintf("src_addr: 0x"); n_pprintf(hs(src_addr)); - // h_pprintf("len: "); n_pprintf(ds(len)); - printf("dst_addr: 0x%0x\n", dst_addr); - printf("src_addr: 0x%0x\n", src_addr); - printf("len: %0d\n", len); -#endif - idma_set_addr_len_in(dst_addr, src_addr, len); - - dst_std_2 = 0; - src_std_2 = 0; - reps_2 = 1; -#if VERBOSE > 100 - // h_pprintf("dst_std_2: 0x"); n_pprintf(hs(dst_std_2)); - // h_pprintf("src_std_2: 0x"); n_pprintf(hs(src_std_2)); - // h_pprintf("reps_2: 0x"); n_pprintf(hs(reps_2)); - printf("dst_std_2: 0x%0x\n", dst_std_2); - printf("src_std_2: 0x%0x\n", src_std_2); - printf("reps_2: 0x%0x\n", reps_2); -#endif - idma_set_std2_rep2_in(dst_std_2, src_std_2, reps_2); - - dst_std_3 = 0; - src_std_3 = 0; - reps_3 = 1; -#if VERBOSE > 100 - // h_pprintf("dst_std_3: 0x"); n_pprintf(hs(dst_std_3)); - // h_pprintf("src_std_3: 0x"); n_pprintf(hs(src_std_3)); - // h_pprintf("reps_3: 0x"); n_pprintf(hs(reps_3)); - printf("dst_std_3: 0x%0x\n", dst_std_3); - printf("src_std_3: 0x%0x\n", src_std_3); - printf("reps_3: 0x%0x\n", reps_3); -#endif - idma_set_std3_rep3_in(dst_std_3, src_std_3, reps_3); - - idma_start_in(); - -#ifdef IRQ_EN - asm volatile("wfi" ::: "memory"); - // h_pprintf("Detected IRQ...\n"); - printf("Detected IRQ...\n"); -#else - wait_print(WAIT_CYCLES); -#endif - -#if VERBOSE > 100 - for (int i = 0; i < x_dim*y_dim; i++){ - // h_pprintf("DST[0x"); pprintf(hs(dst_addr + 2*i)); pprintf("]: 0x"); n_pprintf(hs(mmio16(dst_addr + 2*i))); - printf("DST[0x%0x]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i)); - } -#endif - -#if VERBOSE > 10 - unsigned int num_errors; - num_errors = 0; - for (int i = 0; i < x_dim*y_dim; i++) { - if (mmio16(dst_addr + 2*i) != src_data[i]) { - num_errors++; - // h_pprintf("DST[0x"); pprintf(hs(dst_addr + 2*i)); pprintf("]: 0x"); pprintf(hs(mmio16(dst_addr + 2*i))); - // pprintf(" != SRC["); pprintf(ds(i)); pprintf("]: 0x"); n_pprintf(ds(src_data[i])); - printf("DST[0x%0x]: 0x%0x != SRC[%0d]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i), i, src_data[i]); - } - } - // h_pprintf("Detected "); pprintf(ds(num_errors)); n_pprintf(" error(s) in the transfer..."); - printf("Detected %0d error(s) in the transfer...\n", num_errors); -#endif -} - -void idma_mv_out(unsigned int x_dim, unsigned int y_dim, uint32_t src_address, uint32_t dst_address){ - uint32_t dst_addr; - uint32_t src_addr; - uint32_t len; - - uint32_t dst_std_2; - uint32_t src_std_2; - uint32_t reps_2; - - uint32_t dst_std_3; - uint32_t src_std_3; - uint32_t reps_3; - -#ifdef IRQ_EN - irq_en(1< 10 - // h_pprintf("dst_addr: 0x"); n_pprintf(hs(dst_addr)); - // h_pprintf("src_addr: 0x"); n_pprintf(hs(src_addr)); - // h_pprintf("len: "); n_pprintf(ds(len)); - printf("dst_addr: 0x%0x\n", dst_addr); - printf("src_addr: 0x%0x\n", src_addr); - printf("len: %0d\n", len); -#endif - idma_set_addr_len_out(dst_addr, src_addr, len); - - dst_std_2 = 0; - src_std_2 = 0; - reps_2 = 1; -#if VERBOSE > 100 - // h_pprintf("dst_std_2: 0x"); n_pprintf(hs(dst_std_2)); - // h_pprintf("src_std_2: 0x"); n_pprintf(hs(src_std_2)); - // h_pprintf("reps_2: 0x"); n_pprintf(hs(reps_2)); - printf("dst_std_2: 0x%0x\n", dst_std_2); - printf("src_std_2: 0x%0x\n", src_std_2); - printf("reps_2: 0x%0x\n", reps_2); -#endif - idma_set_std2_rep2_out(dst_std_2, src_std_2, reps_2); - - dst_std_3 = 0; - src_std_3 = 0; - reps_3 = 1; -#if VERBOSE > 100 - // h_pprintf("dst_std_3: 0x"); n_pprintf(hs(dst_std_3)); - // h_pprintf("src_std_3: 0x"); n_pprintf(hs(src_std_3)); - // h_pprintf("reps_3: 0x"); n_pprintf(hs(reps_3)); - printf("dst_std_3: 0x%0x\n", dst_std_3); - printf("src_std_3: 0x%0x\n", src_std_3); - printf("reps_3: 0x%0x\n", reps_3); -#endif - idma_set_std3_rep3_out(dst_std_3, src_std_3, reps_3); - - idma_start_out(); - -#ifdef IRQ_EN - asm volatile("wfi" ::: "memory"); - // h_pprintf("Detected IRQ...\n"); - printf("Detected IRQ...\n"); -#else - wait_print(WAIT_CYCLES); -#endif - -#if VERBOSE > 100 - for (int i = 0; i < x_dim*y_dim; i++){ - // h_pprintf("DST[0x"); pprintf(hs(dst_addr + 2*i)); pprintf("]: 0x"); n_pprintf(hs(mmio16(dst_addr + 2*i))); - printf("DST[0x%0x]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i)); - } -#endif - -#if VERBOSE > 10 - unsigned int num_errors; - num_errors = 0; - for (int i = 0; i < x_dim*y_dim; i++) { - if (mmio16(dst_addr + 2*i) != mmio16(src_addr + 2*i)) { - num_errors++; - // h_pprintf("DST[0x"); pprintf(hs(dst_addr + 2*i)); pprintf("]: 0x"); pprintf(hs(mmio16(dst_addr + 2*i))); - // pprintf(" != SRC[0x"); pprintf(hs(src_addr + 2*i)); pprintf("]: 0x"); n_pprintf(hs(mmio16(src_addr + 2*i))); - printf("DST[0x%0x]: 0x%0x != SRC[%0d]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i), i, mmio16(src_addr + 2*i)); - } - } - // h_pprintf("Detected "); pprintf(ds(num_errors)); n_pprintf(" error(s) in the transfer..."); - printf("Detected %0d error(s) in the transfer...\n", num_errors); -#endif -} - -int main(void) { - // X - // h_pprintf("Initializing X through iDMA...\n"); - printf("Initializing X through iDMA...\n"); - idma_mv_in(M_SIZE, N_SIZE, x_inp, (X_BASE + get_hartid()*L1_TILE_OFFSET)); - - // W - // h_pprintf("Initializing W through iDMA...\n"); - printf("Initializing W through iDMA...\n"); - idma_mv_in(N_SIZE, K_SIZE, w_inp, (W_BASE + get_hartid()*L1_TILE_OFFSET)); - - // Y - // h_pprintf("Initializing Y through iDMA...\n"); - printf("Initializing Y through iDMA...\n"); - idma_mv_in(M_SIZE, K_SIZE, y_inp, (Y_BASE + get_hartid()*L1_TILE_OFFSET)); - -#if VERBOSE > 10 - // h_pprintf("K_SIZE: 0x"); n_pprintf(hs(K_SIZE)); - // h_pprintf("M_SIZE: 0x"); n_pprintf(hs(M_SIZE)); - // h_pprintf("N_SIZE: 0x"); n_pprintf(hs(N_SIZE)); - printf("K_SIZE: 0x%0x\n", K_SIZE); - printf("M_SIZE: 0x%0x\n", M_SIZE); - printf("N_SIZE: 0x%0x\n", N_SIZE); -#endif - -#ifdef IRQ_EN - irq_en(1< expected[get_hartid()]) ? (computed[get_hartid()] - expected[get_hartid()]) : (expected[get_hartid()] - computed[get_hartid()]); - if(diff[get_hartid()] > DIFF_TH){ - num_errors[get_hartid()]++; - // h_pprintf("**ERROR**: V[0x"); pprintf(hs(V_BASE + get_hartid()*MHARTID_OFFSET + 2*i)); pprintf("](=0x"); pprintf(hs(computed[get_hartid()])); - // pprintf(") != Z["); pprintf(ds(i)); pprintf("](=0x"); pprintf(hs(expected[get_hartid()])); n_pprintf(")"); - printf("**ERROR**: V[0x%0x](=0x%0x) != Z[%0d](=0x%0x)\n", V_BASE + get_hartid()*MHARTID_OFFSET + 2*i, computed[get_hartid()], i, expected[get_hartid()]); - } - } - // h_pprintf("Finished test with "); pprintf(ds(num_errors[get_hartid()])); n_pprintf(" error(s)"); - printf("Finished test with %0d error(s)\n", num_errors[get_hartid()]); - - uint32_t exit_code[NUM_HARTS]; - if(num_errors[get_hartid()]) - exit_code[get_hartid()] = FAIL_EXIT_CODE; - else - exit_code[get_hartid()] = PASS_EXIT_CODE; - - return exit_code; -} diff --git a/sw/tests/mesh_test_event_unit.c b/sw/tests/mesh_test_event_unit.c new file mode 100644 index 0000000..aecc049 --- /dev/null +++ b/sw/tests/mesh_test_event_unit.c @@ -0,0 +1,228 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + * + * Authors: Luca Balboni + * Based on mesh_test.c by Victor Isachi + * + * MAGIA Mesh Test - Pure Event Unit API Version + * Uses ONLY event_unit_utils for WFE/polling + * + * Configuration: + * - Set USE_WFE to 1 for WFE (Wait-For-Event) mode + * - Set USE_WFE to 0 for Event Unit polling mode + */ + +#include "magia_tile_utils.h" +#include "magia_utils.h" +#include "redmule_mm_utils.h" +#include "idma_mm_utils.h" +#include "event_unit_utils.h" + +#include "x_input.h" +#include "w_input.h" +#include "y_input.h" +#include "z_output.h" + +#define X_BASE (L1_BASE + 0x00012048) +#define W_BASE (L1_BASE + 0x00016048) +#define Y_BASE (L1_BASE + 0x0001A048) +#define Z_BASE (L2_BASE + 0x00042000) // Note: for a large number of tiles (e.g. 64x64 mesh) we might exceed memory range of L2 +#define V_BASE (L2_BASE + 0x00046000) // Note: for a large number of tiles (e.g. 64x64 mesh) we might exceed memory range of L2 +#define T_BASE (L2_BASE + 0x0004A000) // Note: for a large number of tiles (e.g. 64x64 mesh) we might exceed memory range of L2 + +#define MHARTID_OFFSET (0x00010000) + +#define M_SIZE (96) +#define N_SIZE (64) +#define K_SIZE (64) + +#define VERBOSE (0) + +#define WAIT_CYCLES (10) + +#define DIFF_TH (0x0011) + +#define USE_WFE (0) + +void idma_mv_in_pure_eu(unsigned int x_dim, unsigned int y_dim, uint16_t src_data[], uint32_t dst_address) { + uint32_t dst_addr; + uint32_t src_addr; + uint32_t len; + + // Initialize Event Unit once + static int eu_initialized = 0; + if (!eu_initialized) { + eu_init(); + eu_initialized = 1; + } + + for (int i = 0; i < x_dim*y_dim; i++) + mmio16(T_BASE + get_hartid()*MHARTID_OFFSET + 2*i) = src_data[i]; + + dst_addr = (uint32_t)dst_address; + src_addr = (uint32_t)(T_BASE + get_hartid()*MHARTID_OFFSET); + len = (uint32_t)(x_dim*y_dim*2); // 2 Bytes per element +#if VERBOSE > 10 + printf("dst_addr: 0x%0x\n", dst_addr); + printf("src_addr: 0x%0x\n", src_addr); + printf("len: %0d\n", len); +#endif + + idma_L2ToL1(src_addr, dst_addr, len); + + // Clear Event Unit and ensure A2O mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_IDMA_A2O_DONE_MASK); + + // Use PURE Event Unit + eu_wait_mode_t wait_mode = USE_WFE ? EU_WAIT_MODE_WFE : EU_WAIT_MODE_POLLING; + + // Use direction-specific wait for L2->L1 (A2O, direction = 0) + eu_idma_wait_direction_completion(0, wait_mode); + +#if VERBOSE > 100 + for (int i = 0; i < x_dim*y_dim; i++){ + printf("DST[0x%0x]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i)); + } +#endif + +#if VERBOSE > 10 + unsigned int num_errors; + num_errors = 0; + for (int i = 0; i < x_dim*y_dim; i++) { + if (mmio16(dst_addr + 2*i) != src_data[i]) { + num_errors++; + printf("DST[0x%0x]: 0x%0x != SRC[%0d]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i), i, src_data[i]); + } + } + printf("Detected %0d error(s) in the transfer...\n", num_errors); +#endif +} + +void idma_mv_out_pure_eu(unsigned int x_dim, unsigned int y_dim, uint32_t src_address, uint32_t dst_address) { + uint32_t dst_addr; + uint32_t src_addr; + uint32_t len; + + dst_addr = (uint32_t)dst_address; + src_addr = (uint32_t)src_address; + len = (uint32_t)(x_dim*y_dim*2); // 2 Bytes per element +#if VERBOSE > 10 + printf("dst_addr: 0x%0x\n", dst_addr); + printf("src_addr: 0x%0x\n", src_addr); + printf("len: %0d\n", len); +#endif + + idma_L1ToL2(src_addr, dst_addr, len); + + // Clear Event Unit and ensure O2A mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_IDMA_O2A_DONE_MASK); + + // Use PURE Event Unit + eu_wait_mode_t wait_mode = USE_WFE ? EU_WAIT_MODE_WFE : EU_WAIT_MODE_POLLING; + + // Use direction-specific wait for L1->L2 (O2A, direction = 1) + eu_idma_wait_direction_completion(1, wait_mode); + +#if VERBOSE > 100 + for (int i = 0; i < x_dim*y_dim; i++){ + printf("DST[0x%0x]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i)); + } +#endif + +#if VERBOSE > 10 + unsigned int num_errors; + num_errors = 0; + for (int i = 0; i < x_dim*y_dim; i++) { + if (mmio16(dst_addr + 2*i) != mmio16(src_addr + 2*i)) { + num_errors++; + printf("DST[0x%0x]: 0x%0x != SRC[%0d]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i), i, mmio16(src_addr + 2*i)); + } + } + printf("Detected %0d error(s) in the transfer...\n", num_errors); +#endif +} + +int main(void) { + + // X + printf("Initializing X through iDMA...\n"); + idma_mv_in_pure_eu(M_SIZE, N_SIZE, x_inp, (X_BASE + get_hartid()*L1_TILE_OFFSET)); + + // W + printf("Initializing W through iDMA...\n"); + idma_mv_in_pure_eu(N_SIZE, K_SIZE, w_inp, (W_BASE + get_hartid()*L1_TILE_OFFSET)); + + // Y + printf("Initializing Y through iDMA...\n"); + idma_mv_in_pure_eu(M_SIZE, K_SIZE, y_inp, (Y_BASE + get_hartid()*L1_TILE_OFFSET)); + +#if VERBOSE > 10 + printf("K_SIZE: 0x%0x\n", K_SIZE); + printf("M_SIZE: 0x%0x\n", M_SIZE); + printf("N_SIZE: 0x%0x\n", N_SIZE); +#endif + + printf("Testing matrix multiplication with RedMulE...\n"); + + // Initialize and configure RedMulE using MM approach + hwpe_cg_enable(); + hwpe_soft_clear(); + + int offload_id_tmp; + while ((offload_id_tmp = hwpe_acquire_job()) < 0) + ; + + redmule_cfg((unsigned int)(X_BASE + get_hartid()*L1_TILE_OFFSET), + (unsigned int)(W_BASE + get_hartid()*L1_TILE_OFFSET), + (unsigned int)(Y_BASE + get_hartid()*L1_TILE_OFFSET), + M_SIZE, N_SIZE, K_SIZE, (uint8_t)gemm_ops, (uint8_t)Float16); + + hwpe_trigger_job(); + + // Clear Event Unit and ensure RedMulE mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_REDMULE_DONE_MASK); + + // Use PURE Event Unit + eu_wait_mode_t wait_mode = USE_WFE ? EU_WAIT_MODE_WFE : EU_WAIT_MODE_POLLING; + + // Wait for HWPE completion + eu_redmule_wait_completion(wait_mode); + + printf("Moving results through iDMA...\n"); + idma_mv_out_pure_eu(M_SIZE, K_SIZE, Y_BASE + get_hartid()*L1_TILE_OFFSET, V_BASE + get_hartid()*MHARTID_OFFSET); + + printf("Verifying results...\n"); + + unsigned int num_errors[NUM_HARTS]; + num_errors[get_hartid()] = 0; + + volatile uint16_t computed[NUM_HARTS], expected[NUM_HARTS], diff[NUM_HARTS]; + for(int i = 0; i < M_SIZE*K_SIZE; i++){ + computed[get_hartid()] = mmio16(V_BASE + get_hartid()*MHARTID_OFFSET + 2*i); + expected[get_hartid()] = z_oup[i]; + diff[get_hartid()] = (computed[get_hartid()] > expected[get_hartid()]) ? (computed[get_hartid()] - expected[get_hartid()]) : (expected[get_hartid()] - computed[get_hartid()]); + if(diff[get_hartid()] > DIFF_TH){ + num_errors[get_hartid()]++; + printf("**ERROR**: V[0x%0x](=0x%0x) != Z[%0d](=0x%0x)\n", V_BASE + get_hartid()*MHARTID_OFFSET + 2*i, computed[get_hartid()], i, expected[get_hartid()]); + } + } + printf("Finished test with %0d error(s)\n", num_errors[get_hartid()]); + + return num_errors[get_hartid()]; +} \ No newline at end of file diff --git a/sw/tests/mesh_test_mm.c b/sw/tests/mesh_test_mm.c new file mode 100644 index 0000000..919bf95 --- /dev/null +++ b/sw/tests/mesh_test_mm.c @@ -0,0 +1,191 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + * + * Authors: Luca Balboni + * Based on mesh_test.c by Victor Isachi + * + * MAGIA Mesh Test - Memory Mapped Version + */ + +#include "magia_tile_utils.h" +#include "magia_utils.h" +#include "redmule_mm_utils.h" +#include "idma_mm_utils.h" + +#include "x_input.h" +#include "w_input.h" +#include "y_input.h" +#include "z_output.h" + +#define X_BASE (L1_BASE + 0x00012048) +#define W_BASE (L1_BASE + 0x00016048) +#define Y_BASE (L1_BASE + 0x0001A048) +#define Z_BASE (L2_BASE + 0x00042000) // Note: for a large number of tiles (e.g. 64x64 mesh) we might exceed memory range of L2 +#define V_BASE (L2_BASE + 0x00046000) // Note: for a large number of tiles (e.g. 64x64 mesh) we might exceed memory range of L2 +#define T_BASE (L2_BASE + 0x0004A000) // Note: for a large number of tiles (e.g. 64x64 mesh) we might exceed memory range of L2 + +#define MHARTID_OFFSET (0x00010000) + +#define M_SIZE (96) +#define N_SIZE (64) +#define K_SIZE (64) + +#define VERBOSE (0) + +#define WAIT_CYCLES (10) + +#define DIFF_TH (0x0011) + +#define CONCURRENT + +void idma_mv_in(unsigned int x_dim, unsigned int y_dim, uint16_t src_data[], uint32_t dst_address){ + uint32_t dst_addr; + uint32_t src_addr; + uint32_t len; + + for (int i = 0; i < x_dim*y_dim; i++) + mmio16(T_BASE + get_hartid()*MHARTID_OFFSET + 2*i) = src_data[i]; + + dst_addr = (uint32_t)dst_address; + src_addr = (uint32_t)(T_BASE + get_hartid()*MHARTID_OFFSET); + len = (uint32_t)(x_dim*y_dim*2); // 2 Bytes per element +#if VERBOSE > 10 + printf("dst_addr: 0x%0x\n", dst_addr); + printf("src_addr: 0x%0x\n", src_addr); + printf("len: %0d\n", len); +#endif + + uint32_t transfer_id = idma_L2ToL1(src_addr, dst_addr, len); + + dma_wait(transfer_id); + +#if VERBOSE > 100 + for (int i = 0; i < x_dim*y_dim; i++){ + printf("DST[0x%0x]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i)); + } +#endif + +#if VERBOSE > 10 + unsigned int num_errors; + num_errors = 0; + for (int i = 0; i < x_dim*y_dim; i++) { + if (mmio16(dst_addr + 2*i) != src_data[i]) { + num_errors++; + printf("DST[0x%0x]: 0x%0x != SRC[%0d]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i), i, src_data[i]); + } + } + printf("Detected %0d error(s) in the transfer...\n", num_errors); +#endif +} + +void idma_mv_out(unsigned int x_dim, unsigned int y_dim, uint32_t src_address, uint32_t dst_address){ + uint32_t dst_addr; + uint32_t src_addr; + uint32_t len; + + dst_addr = (uint32_t)dst_address; + src_addr = (uint32_t)src_address; + len = (uint32_t)(x_dim*y_dim*2); // 2 Bytes per element +#if VERBOSE > 10 + printf("dst_addr: 0x%0x\n", dst_addr); + printf("src_addr: 0x%0x\n", src_addr); + printf("len: %0d\n", len); +#endif + + uint32_t transfer_id = idma_L1ToL2(src_addr, dst_addr, len); + + dma_wait(transfer_id); + +#if VERBOSE > 100 + for (int i = 0; i < x_dim*y_dim; i++){ + printf("DST[0x%0x]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i)); + } +#endif + +#if VERBOSE > 10 + unsigned int num_errors; + num_errors = 0; + for (int i = 0; i < x_dim*y_dim; i++) { + if (mmio16(dst_addr + 2*i) != mmio16(src_addr + 2*i)) { + num_errors++; + printf("DST[0x%0x]: 0x%0x != SRC[%0d]: 0x%0x\n", dst_addr + 2*i, mmio16(dst_addr + 2*i), i, mmio16(src_addr + 2*i)); + } + } + printf("Detected %0d error(s) in the transfer...\n", num_errors); +#endif +} + +int main(void) { + // X + printf("Initializing X through iDMA...\n"); + idma_mv_in(M_SIZE, N_SIZE, x_inp, (X_BASE + get_hartid()*L1_TILE_OFFSET)); + + // W + printf("Initializing W through iDMA...\n"); + idma_mv_in(N_SIZE, K_SIZE, w_inp, (W_BASE + get_hartid()*L1_TILE_OFFSET)); + + // Y + printf("Initializing Y through iDMA...\n"); + idma_mv_in(M_SIZE, K_SIZE, y_inp, (Y_BASE + get_hartid()*L1_TILE_OFFSET)); + +#if VERBOSE > 10 + printf("K_SIZE: 0x%0x\n", K_SIZE); + printf("M_SIZE: 0x%0x\n", M_SIZE); + printf("N_SIZE: 0x%0x\n", N_SIZE); +#endif + + printf("Testing matrix multiplication with RedMulE...\n"); + + // Initialize and configure RedMulE using MM approach + hwpe_cg_enable(); + hwpe_soft_clear(); + + int offload_id_tmp; + while ((offload_id_tmp = hwpe_acquire_job()) < 0) + ; + + redmule_cfg((unsigned int)(X_BASE + get_hartid()*L1_TILE_OFFSET), + (unsigned int)(W_BASE + get_hartid()*L1_TILE_OFFSET), + (unsigned int)(Y_BASE + get_hartid()*L1_TILE_OFFSET), + M_SIZE, N_SIZE, K_SIZE, (uint8_t)gemm_ops, (uint8_t)Float16); + + hwpe_trigger_job(); + + // Wait for HWPE completion + hwpe_wait_for_completion(); + + printf("Moving results through iDMA...\n"); + idma_mv_out(M_SIZE, K_SIZE, Y_BASE + get_hartid()*L1_TILE_OFFSET, V_BASE + get_hartid()*MHARTID_OFFSET); + + printf("Verifying results...\n"); + + unsigned int num_errors[NUM_HARTS]; + num_errors[get_hartid()] = 0; + + uint16_t computed[NUM_HARTS], expected[NUM_HARTS], diff[NUM_HARTS]; + for(int i = 0; i < M_SIZE*K_SIZE; i++){ + computed[get_hartid()] = mmio16(V_BASE + get_hartid()*MHARTID_OFFSET + 2*i); + expected[get_hartid()] = z_oup[i]; + diff[get_hartid()] = (computed[get_hartid()] > expected[get_hartid()]) ? (computed[get_hartid()] - expected[get_hartid()]) : (expected[get_hartid()] - computed[get_hartid()]); + if(diff[get_hartid()] > DIFF_TH){ + num_errors[get_hartid()]++; + printf("**ERROR**: V[0x%0x](=0x%0x) != Z[%0d](=0x%0x)\n", V_BASE + get_hartid()*MHARTID_OFFSET + 2*i, computed[get_hartid()], i, expected[get_hartid()]); + } + } + printf("Finished test with %0d error(s)\n", num_errors[get_hartid()]); + + return num_errors[get_hartid()]; +} \ No newline at end of file diff --git a/sw/tests/redmule_test_event_unit.c b/sw/tests/redmule_test_event_unit.c new file mode 100644 index 0000000..5efe2d0 --- /dev/null +++ b/sw/tests/redmule_test_event_unit.c @@ -0,0 +1,138 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + * + * Authors: Luca Balboni + * Based on redmule_test.c by Victor Isachi + * + * RedMulE Matrix Multiplication Test with Event Unit WFE API + * Uses event_unit_utils.h for Event Unit control and WFE/polling + * + */ + +#include +#include "magia_tile_utils.h" +#include "redmule_mm_utils.h" +#include "event_unit_utils.h" + +#include "x_input.h" +#include "w_input.h" +#include "y_input.h" +#include "z_output.h" + +#define X_BASE (L1_BASE + 0x00012048) +#define W_BASE (L1_BASE + 0x00016048) +#define Y_BASE (L1_BASE + 0x0001A048) +#define Z_BASE (L2_BASE + 0x00042000) + +#define M_SIZE (96) +#define N_SIZE (64) +#define K_SIZE (64) + +#define VERBOSE (0) + +#define USE_WFE (1) + +#define WAIT_CYCLES (10) + +#define DIFF_TH (0x0011) + +int main(void) { + // X + for (int i = 0; i < M_SIZE*N_SIZE; i++) + mmio16(X_BASE + 2*i) = x_inp[i]; +#if VERBOSE > 10 + for (int i = 0; i < M_SIZE*N_SIZE; i++) + printf("X[%8x]: 0x%4x\n", X_BASE + 2*i, mmio16(X_BASE + 2*i)); +#endif + + // W + for (int i = 0; i < N_SIZE*K_SIZE; i++) + mmio16(W_BASE + 2*i) = w_inp[i]; +#if VERBOSE > 10 + for (int i = 0; i < N_SIZE*K_SIZE; i++) + printf("W[%8x]: 0x%4x\n", W_BASE + 2*i, mmio16(W_BASE + 2*i)); +#endif + +// Y + for (int i = 0; i < M_SIZE*K_SIZE; i++) + mmio16(Y_BASE + 2*i) = y_inp[i]; +#if VERBOSE > 10 + for (int i = 0; i < M_SIZE*K_SIZE; i++) + printf("Y[%8x]: 0x%4x\n", Y_BASE + 2*i, mmio16(Y_BASE + 2*i)); +#endif + + // Z - golden (reference) + for (int i = 0; i < M_SIZE*K_SIZE; i++) + mmio16(Z_BASE + 2*i) = z_oup[i]; +#if VERBOSE > 10 + for (int i = 0; i < M_SIZE*K_SIZE; i++) + printf("Z[%8x]: 0x%4x\n", Z_BASE + 2*i, mmio16(Z_BASE + 2*i)); +#endif + +#if VERBOSE > 10 + printf("K_SIZE: %4x\n", K_SIZE); + printf("M_SIZE: %4x\n", M_SIZE); + printf("N_SIZE: %4x\n", N_SIZE); +#endif + + // Initialize and configure RedMulE + hwpe_cg_enable(); + hwpe_soft_clear(); + + int offload_id_tmp; + while ((offload_id_tmp = hwpe_acquire_job()) < 0) + ; + + redmule_cfg((unsigned int)X_BASE, (unsigned int)W_BASE, (unsigned int)Y_BASE, + M_SIZE, N_SIZE, K_SIZE, (uint8_t)gemm_ops, (uint8_t)Float16); + + // Initialize Event Unit for RedMulE + eu_redmule_init(); + + // Wait for end of computation + printf("Testing matrix multiplication with RedMulE...\n"); + hwpe_trigger_job(); + + // Wait for HWPE completion using Event Unit + if (USE_WFE) { + eu_redmule_wait_completion(EU_WAIT_MODE_WFE); + printf("Detected WFE...\n"); + } else { + eu_redmule_wait_completion(EU_WAIT_MODE_POLLING); + printf("Detected polling completion...\n"); + } + printf("Verifying results...\n"); + + // Disable RedMulE + hwpe_cg_disable(); + + unsigned int num_errors = 0; + + uint16_t computed, expected, diff; + for(int i = 0; i < M_SIZE*K_SIZE; i++){ + computed = mmio16(Y_BASE + 2*i); + expected = mmio16(Z_BASE + 2*i); + diff = (computed > expected) ? (computed - expected) : (expected - computed); + if(diff > DIFF_TH){ + num_errors++; + printf("**ERROR**: Y[%8x](=0x%4x) != Z[%8x](=0x%4x)\n", Y_BASE + 2*i, computed, Z_BASE + 2*i, expected); + } + } + printf("Finished test with %0d errors\n", num_errors); + + + return num_errors; +} \ No newline at end of file diff --git a/sw/tests/redmule_test.c b/sw/tests/redmule_test_mm.c similarity index 77% rename from sw/tests/redmule_test.c rename to sw/tests/redmule_test_mm.c index ecaef3f..eea3abf 100644 --- a/sw/tests/redmule_test.c +++ b/sw/tests/redmule_test_mm.c @@ -14,13 +14,18 @@ * limitations under the License. * SPDX-License-Identifier: Apache-2.0 * - * Authors: Victor Isachi + * Authors: Luca Balboni + * Based on redmule_test.c by Victor Isachi * - * MAGIA RedMulE Test + * RedMulE Matrix Multiplication Test with MMIO HWPE Control Functions + * + * This test uses MMIO functions for HWPE control and simplified IRQ management + * copied from redmule_test.c for better reliability. */ +#include #include "magia_tile_utils.h" -#include "redmule_isa_utils.h" +#include "redmule_mm_utils.h" #include "x_input.h" #include "w_input.h" @@ -38,8 +43,6 @@ #define VERBOSE (0) -#define IRQ_EN - #define WAIT_CYCLES (10) #define DIFF_TH (0x0011) @@ -61,7 +64,7 @@ int main(void) { printf("W[%8x]: 0x%4x\n", W_BASE + 2*i, mmio16(W_BASE + 2*i)); #endif - // Y +// Y for (int i = 0; i < M_SIZE*K_SIZE; i++) mmio16(Y_BASE + 2*i) = y_inp[i]; #if VERBOSE > 10 @@ -83,25 +86,28 @@ int main(void) { printf("N_SIZE: %4x\n", N_SIZE); #endif - redmule_mcnfig(K_SIZE, M_SIZE, N_SIZE); + // Initialize and configure RedMulE + hwpe_cg_enable(); + hwpe_soft_clear(); - redmule_marith(Y_BASE, W_BASE, X_BASE); + int offload_id_tmp; + while ((offload_id_tmp = hwpe_acquire_job()) < 0) + ; -#ifdef IRQ_EN - // Enable IRQs - uint32_t index = (1< + * Authors: Luca Balboni + * Based on tile_test.c by Victor Isachi + * + * MAGIA Tile Test - Event Unit WFE API Version + * Uses event_unit_utils.h for Event Unit control and WFE/polling * - * MAGIA Tile Test */ #include "magia_tile_utils.h" -#include "redmule_isa_utils.h" -#include "idma_isa_utils.h" +#include "redmule_mm_utils.h" +#include "idma_mm_utils.h" +#include "event_unit_utils.h" #include "x_input.h" #include "w_input.h" @@ -41,36 +45,29 @@ #define VERBOSE (0) +#define USE_WFE (0) + #define WAIT_CYCLES (10) #define DIFF_TH (0x0011) #define CONCURRENT -#define IRQ_EN - void idma_mv_in(unsigned int x_dim, unsigned int y_dim, uint16_t src_data[], uint32_t dst_address){ uint32_t dst_addr; uint32_t src_addr; uint32_t len; - uint32_t dst_std_2; - uint32_t src_std_2; - uint32_t reps_2; - - uint32_t dst_std_3; - uint32_t src_std_3; - uint32_t reps_3; - -#ifdef IRQ_EN - irq_en(1< 100 - printf("dst_std_2: 0x%8x\n", dst_std_2); - printf("src_std_2: 0x%8x\n", src_std_2); - printf("reps_2: 0x%8x\n", reps_2); -#endif - idma_set_std2_rep2_in(dst_std_2, src_std_2, reps_2); - - dst_std_3 = 0; - src_std_3 = 0; - reps_3 = 1; -#if VERBOSE > 100 - printf("dst_std_3: 0x%8x\n", dst_std_3); - printf("src_std_3: 0x%8x\n", src_std_3); - printf("reps_3: 0x%8x\n", reps_3); -#endif - idma_set_std3_rep3_in(dst_std_3, src_std_3, reps_3); + uint32_t transfer_id = idma_L2ToL1(src_addr, dst_addr, len); - idma_start_in(); + // Clear Event Unit and ensure A2O mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_IDMA_A2O_DONE_MASK); -#ifdef IRQ_EN - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); -#else - wait_print(WAIT_CYCLES); -#endif + if (USE_WFE) { + eu_idma_wait_a2o_completion(EU_WAIT_MODE_WFE); + } else { + eu_idma_wait_a2o_completion(EU_WAIT_MODE_POLLING); + } #if VERBOSE > 100 for (int i = 0; i < x_dim*y_dim; i++) @@ -133,20 +112,6 @@ void idma_mv_out(unsigned int x_dim, unsigned int y_dim, uint32_t src_address, u uint32_t src_addr; uint32_t len; - uint32_t dst_std_2; - uint32_t src_std_2; - uint32_t reps_2; - - uint32_t dst_std_3; - uint32_t src_std_3; - uint32_t reps_3; - -#ifdef IRQ_EN - irq_en(1< 100 - printf("dst_std_2: 0x%8x\n", dst_std_2); - printf("src_std_2: 0x%8x\n", src_std_2); - printf("reps_2: 0x%8x\n", reps_2); -#endif - idma_set_std2_rep2_out(dst_std_2, src_std_2, reps_2); - dst_std_3 = 0; - src_std_3 = 0; - reps_3 = 1; -#if VERBOSE > 100 - printf("dst_std_3: 0x%8x\n", dst_std_3); - printf("src_std_3: 0x%8x\n", src_std_3); - printf("reps_3: 0x%8x\n", reps_3); -#endif - idma_set_std3_rep3_out(dst_std_3, src_std_3, reps_3); + uint32_t transfer_id = idma_L1ToL2(src_addr, dst_addr, len); - idma_start_out(); + // Clear Event Unit and ensure O2A mask is enabled + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_IDMA_O2A_DONE_MASK); -#ifdef IRQ_EN - asm volatile("wfi" ::: "memory"); - printf("Detected IRQ...\n"); -#else - wait_print(WAIT_CYCLES); -#endif + if (USE_WFE) { + eu_idma_wait_o2a_completion(EU_WAIT_MODE_WFE); + } else { + eu_idma_wait_o2a_completion(EU_WAIT_MODE_POLLING); + } #if VERBOSE > 100 for (int i = 0; i < x_dim*y_dim; i++) @@ -232,23 +179,29 @@ int main(void) { printf("N_SIZE: %4x\n", N_SIZE); #endif - redmule_mcnfig(K_SIZE, M_SIZE, N_SIZE); + // Initialize and configure RedMulE using MM approach + hwpe_cg_enable(); + hwpe_soft_clear(); - redmule_marith(Y_BASE, W_BASE, X_BASE); + int offload_id_tmp; + while ((offload_id_tmp = hwpe_acquire_job()) < 0) + ; -#ifdef IRQ_EN - irq_en(1< + * Based on tile_test.c by Victor Isachi + * + * MAGIA Tile Test - Memory Mapped Version + */ + +#include "magia_tile_utils.h" +#include "redmule_mm_utils.h" +#include "idma_mm_utils.h" + +#include "x_input.h" +#include "w_input.h" +#include "y_input.h" +#include "z_output.h" + +#define X_BASE (L1_BASE + 0x00012048) +#define W_BASE (L1_BASE + 0x00016048) +#define Y_BASE (L1_BASE + 0x0001A048) +#define Z_BASE (L2_BASE + 0x00042000) +#define V_BASE (L2_BASE + 0x00046000) +#define T_BASE (L2_BASE + 0x0004A000) + +#define M_SIZE (96) +#define N_SIZE (64) +#define K_SIZE (64) + +#define VERBOSE (0) + +#define WAIT_CYCLES (10) + +#define DIFF_TH (0x0011) + +#define CONCURRENT + + +void idma_mv_in(unsigned int x_dim, unsigned int y_dim, uint16_t src_data[], uint32_t dst_address){ + uint32_t dst_addr; + uint32_t src_addr; + uint32_t len; + + for (int i = 0; i < x_dim*y_dim; i++) + mmio16(T_BASE + 2*i) = src_data[i]; + + dst_addr = (uint32_t)dst_address; + src_addr = (uint32_t)T_BASE; + len = (uint32_t)(x_dim*y_dim*2); // 2 Bytes per element +#if VERBOSE > 10 + printf("dst_addr: 0x%8x\n", dst_addr); + printf("src_addr: 0x%8x\n", src_addr); + printf("len: %0d\n", len); +#endif + + uint32_t transfer_id = idma_L2ToL1(src_addr, dst_addr, len); + + dma_wait(transfer_id); + +#if VERBOSE > 100 + for (int i = 0; i < x_dim*y_dim; i++) + printf("DST[%8x]: 0x%4x\n", dst_address + 2*i, mmio16(dst_address + 2*i)); +#endif + +#if VERBOSE > 10 + unsigned int num_errors; + num_errors = 0; + for (int i = 0; i < x_dim*y_dim; i++) { + if (mmio16(dst_address + 2*i) != src_data[i]) { + num_errors++; + printf("DST[%8x]: 0x%4x != SRC[%0d]: 0x%4x\n", dst_address + 2*i, mmio16(dst_address + 2*i), i, src_data[i]); + } + } + printf("Detected %0d error(s) in the transfer...\n", num_errors); +#endif +} + +void idma_mv_out(unsigned int x_dim, unsigned int y_dim, uint32_t src_address, uint32_t dst_address){ + uint32_t dst_addr; + uint32_t src_addr; + uint32_t len; + + dst_addr = (uint32_t)dst_address; + src_addr = (uint32_t)src_address; + len = (uint32_t)(x_dim*y_dim*2); // 2 Bytes per element +#if VERBOSE > 10 + printf("dst_addr: 0x%8x\n", dst_addr); + printf("src_addr: 0x%8x\n", src_addr); + printf("len: %0d\n", len); +#endif + + uint32_t transfer_id = idma_L1ToL2(src_addr, dst_addr, len); + + dma_wait(transfer_id); + +#if VERBOSE > 100 + for (int i = 0; i < x_dim*y_dim; i++) + printf("DST[%8x]: 0x%4x\n", dst_address + 2*i, mmio16(dst_address + 2*i)); +#endif + +#if VERBOSE > 10 + unsigned int num_errors; + num_errors = 0; + for (int i = 0; i < x_dim*y_dim; i++) { + if (mmio16(dst_address + 2*i) != mmio16(src_address + 2*i)) { + num_errors++; + printf("DST[%8x]: 0x%4x != SRC[%8x]: 0x%4x\n", dst_address + 2*i, mmio16(dst_address + 2*i), src_address + 2*i, mmio16(src_address + 2*i)); + } + } + printf("Detected %0d error(s) in the transfer...\n", num_errors); +#endif +} + +int main(void) { + // X + printf("Initializing X through iDMA...\n"); + idma_mv_in(M_SIZE, N_SIZE, x_inp, X_BASE); + + // W + printf("Initializing W through iDMA...\n"); + idma_mv_in(N_SIZE, K_SIZE, w_inp, W_BASE); + + // Y + printf("Initializing Y through iDMA...\n"); + idma_mv_in(M_SIZE, K_SIZE, y_inp, Y_BASE); + + // Z - golden (reference) + printf("Initializing Z - golden...\n"); + for (int i = 0; i < M_SIZE*K_SIZE; i++) + mmio16(Z_BASE + 2*i) = z_oup[i]; +#if VERBOSE > 100 + for (int i = 0; i < M_SIZE*K_SIZE; i++) + printf("Z[%8x]: 0x%4x\n", Z_BASE + 2*i, mmio16(Z_BASE + 2*i)); +#endif + +#if VERBOSE > 10 + printf("K_SIZE: %4x\n", K_SIZE); + printf("M_SIZE: %4x\n", M_SIZE); + printf("N_SIZE: %4x\n", N_SIZE); +#endif + + // Initialize and configure RedMulE using MM approach + hwpe_cg_enable(); + hwpe_soft_clear(); + + int offload_id_tmp; + while ((offload_id_tmp = hwpe_acquire_job()) < 0) + ; + + redmule_cfg((unsigned int)X_BASE, (unsigned int)W_BASE, (unsigned int)Y_BASE, + M_SIZE, N_SIZE, K_SIZE, (uint8_t)gemm_ops, (uint8_t)Float16); + + printf("Testing matrix multiplication with RedMulE...\n"); + hwpe_trigger_job(); + + // Wait for HWPE completion + hwpe_wait_for_completion(); + + printf("Moving results through iDMA...\n"); + idma_mv_out(M_SIZE, K_SIZE, Y_BASE, V_BASE); + + printf("Verifying results...\n"); + + unsigned int num_errors = 0; + + uint16_t computed, expected, diff; + for(int i = 0; i < M_SIZE*K_SIZE; i++){ + computed = mmio16(V_BASE + 2*i); + expected = mmio16(Z_BASE + 2*i); + diff = (computed > expected) ? (computed - expected) : (expected - computed); + if(diff > DIFF_TH){ + num_errors++; + printf("**ERROR**: V[%8x](=0x%4x) != Z[%8x](=0x%4x)\n", V_BASE + 2*i, computed, Z_BASE + 2*i, expected); + } + } + printf("Finished test with %0d errors\n", num_errors); + + return num_errors; +} \ No newline at end of file diff --git a/sw/utils/event_unit_utils.h b/sw/utils/event_unit_utils.h new file mode 100644 index 0000000..10ee2ea --- /dev/null +++ b/sw/utils/event_unit_utils.h @@ -0,0 +1,380 @@ +/* + * Copyright (C) 2024 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + * + * Authors: Luca Balboni + * + * MAGIA Event Unit Utilities + * Two modes: POLLING (non-blocking) and WFE (blocking with p.elw sleep) + */ + +#ifndef EVENT_UNIT_UTILS_H +#define EVENT_UNIT_UTILS_H + +#include +#include "magia_tile_utils.h" + +//============================================================================= +// REGISTER DEFINITIONS AND CONSTANTS +//============================================================================= + +#define EU_BASE EVENT_UNIT_BASE + +// Control and status registers +#define EU_CORE_MASK (EU_BASE + 0x00) +#define EU_CORE_MASK_AND (EU_BASE + 0x04) +#define EU_CORE_MASK_OR (EU_BASE + 0x08) +#define EU_CORE_IRQ_MASK (EU_BASE + 0x0C) +#define EU_CORE_IRQ_MASK_AND (EU_BASE + 0x10) +#define EU_CORE_IRQ_MASK_OR (EU_BASE + 0x14) +#define EU_CORE_STATUS (EU_BASE + 0x18) +#define EU_CORE_BUFFER (EU_BASE + 0x1C) +#define EU_CORE_BUFFER_MASKED (EU_BASE + 0x20) +#define EU_CORE_BUFFER_IRQ_MASKED (EU_BASE + 0x24) +#define EU_CORE_BUFFER_CLEAR (EU_BASE + 0x28) + +// Wait registers (blocking with p.elw) +#define EU_CORE_EVENT_WAIT (EU_BASE + 0x38) +#define EU_CORE_EVENT_WAIT_CLEAR (EU_BASE + 0x3C) + +// Hardware mutex registers (0x04 * mutex_id offset) +#define EU_CORE_HW_MUTEX (EU_BASE + 0x0C0) // R/W: HW mutex management + +// Hardware barrier registers (0x20 * barr_id offset) +#define HW_BARR_TRIGGER_MASK (EU_BASE + 0x400) // R/W: Barrier trigger mask +#define HW_BARR_STATUS (EU_BASE + 0x404) // R: Barrier status +#define HW_BARR_TARGET_MASK (EU_BASE + 0x40C) // R/W: Barrier target mask +#define HW_BARR_TRIGGER (EU_BASE + 0x410) // W: Manual barrier trigger +#define HW_BARR_TRIGGER_SELF (EU_BASE + 0x414) // R: Automatic trigger +#define HW_BARR_TRIGGER_WAIT (EU_BASE + 0x418) // R: Trigger + sleep +#define HW_BARR_TRIGGER_WAIT_CLEAR (EU_BASE + 0x41C) // R: Trigger + sleep + clear + +// Software event trigger registers (0x04 * sw_event_id offset) +#define EU_CORE_TRIGG_SW_EVENT (EU_BASE + 0x600) // W: Generate SW event +#define EU_CORE_TRIGG_SW_EVENT_WAIT (EU_BASE + 0x640) // R: Generate event + sleep +#define EU_CORE_TRIGG_SW_EVENT_WAIT_CLEAR (EU_BASE + 0x680) // R: Generate event + sleep + clear + +// SoC event FIFO register +#define EU_CORE_CURRENT_EVENT (EU_BASE + 0x700) // R: SoC event FIFO + +// Event bit mapping +#define EU_DMA_EVT_0_BIT 2 +#define EU_DMA_EVT_1_BIT 3 +#define EU_TIMER_EVT_0_BIT 4 +#define EU_TIMER_EVT_1_BIT 5 + +#define EU_REDMULE_UNUSED_BIT 8 +#define EU_REDMULE_BUSY_BIT 9 +#define EU_REDMULE_DONE_BIT 10 +#define EU_REDMULE_EVT1_BIT 11 + +// RedMulE event masks +#define EU_REDMULE_DONE_MASK (1 << EU_REDMULE_DONE_BIT) +#define EU_REDMULE_BUSY_MASK (1 << EU_REDMULE_BUSY_BIT) +#define EU_REDMULE_ALL_MASK 0x0F00 + +// iDMA events (DMA events [3:2] + extended [31:26]) +#define EU_IDMA_A2O_DONE_BIT 2 +#define EU_IDMA_O2A_DONE_BIT 3 +#define EU_IDMA_A2O_DONE_MASK (1 << EU_IDMA_A2O_DONE_BIT) +#define EU_IDMA_O2A_DONE_MASK (1 << EU_IDMA_O2A_DONE_BIT) +#define EU_IDMA_ALL_DONE_MASK (EU_IDMA_A2O_DONE_MASK | EU_IDMA_O2A_DONE_MASK) +#define EU_IDMA_A2O_ERROR_BIT 26 +#define EU_IDMA_O2A_ERROR_BIT 27 +#define EU_IDMA_A2O_START_BIT 28 +#define EU_IDMA_O2A_START_BIT 29 +#define EU_IDMA_A2O_BUSY_BIT 30 +#define EU_IDMA_O2A_BUSY_BIT 31 +#define EU_IDMA_A2O_ERROR_MASK (1 << EU_IDMA_A2O_ERROR_BIT) +#define EU_IDMA_O2A_ERROR_MASK (1 << EU_IDMA_O2A_ERROR_BIT) +#define EU_IDMA_A2O_START_MASK (1 << EU_IDMA_A2O_START_BIT) +#define EU_IDMA_O2A_START_MASK (1 << EU_IDMA_O2A_START_BIT) +#define EU_IDMA_A2O_BUSY_MASK (1 << EU_IDMA_A2O_BUSY_BIT) +#define EU_IDMA_O2A_BUSY_MASK (1 << EU_IDMA_O2A_BUSY_BIT) + +// FSync events (cluster events [25:24]) +#define EU_FSYNC_DONE_BIT 24 +#define EU_FSYNC_ERROR_BIT 25 +#define EU_FSYNC_DONE_MASK (1 << EU_FSYNC_DONE_BIT) +#define EU_FSYNC_ERROR_MASK (1 << EU_FSYNC_ERROR_BIT) +#define EU_FSYNC_ALL_MASK (EU_FSYNC_DONE_MASK | EU_FSYNC_ERROR_MASK) + +// Wait modes +typedef enum { + EU_WAIT_MODE_POLLING = 0, + EU_WAIT_MODE_WFE = 1 +} eu_wait_mode_t; + +//============================================================================= +// LOW-LEVEL HAL (PULP-compatible evt_read32) +//============================================================================= + +// evt_read32: blocking read with p.elw instruction +static inline unsigned int evt_read32(unsigned int base, unsigned int offset) { + unsigned int value; + unsigned int addr = base + offset; + // Direct p.elw inline assembly for PULP cores (RI5CY, CV32E40P) + __asm__ __volatile__ ( + "p.elw %0, 0(%1)" + : "=r" (value) + : "r" (addr) + : "memory" + ); + return value; +} + +//============================================================================= +// BASIC CONTROL FUNCTIONS +//============================================================================= + +static inline void eu_init(void) { + mmio32(EU_CORE_BUFFER_CLEAR) = 0xFFFFFFFF; + mmio32(EU_CORE_MASK) = 0x00000000; + mmio32(EU_CORE_IRQ_MASK) = 0x00000000; +} + +static inline void eu_enable_events(uint32_t event_mask) { + mmio32(EU_CORE_MASK_OR) = event_mask; +} + +static inline void eu_disable_events(uint32_t event_mask) { + mmio32(EU_CORE_MASK_AND) = event_mask; +} + +static inline void eu_enable_irq(uint32_t irq_mask) { + mmio32(EU_CORE_IRQ_MASK_OR) = irq_mask; +} + +static inline void eu_disable_irq(uint32_t irq_mask) { + mmio32(EU_CORE_IRQ_MASK_AND) = irq_mask; +} + +static inline void eu_clear_events(uint32_t event_mask) { + mmio32(EU_CORE_BUFFER_CLEAR) = event_mask; +} + +//============================================================================= +// STATUS READ FUNCTIONS (non-blocking) +//============================================================================= + +static inline uint32_t eu_get_events(void) { + return mmio32(EU_CORE_BUFFER); +} + +static inline uint32_t eu_get_events_masked(void) { + return mmio32(EU_CORE_BUFFER_MASKED); +} + +static inline uint32_t eu_check_events(uint32_t event_mask) { + return mmio32(EU_CORE_BUFFER_MASKED) & event_mask; +} + +//============================================================================= +// WAIT FUNCTIONS (polling and blocking) +//============================================================================= + +// POLLING mode: non-blocking busy-wait +static inline uint32_t eu_wait_events_polling(uint32_t event_mask, uint32_t timeout_cycles) { + uint32_t cycles = 0; + uint32_t detected_events; + do { + detected_events = eu_check_events(event_mask); + if (detected_events){ + eu_clear_events(detected_events); + return detected_events; + } + wait_nop(10); + cycles += 10; + } while (timeout_cycles == 0 || cycles < timeout_cycles); + return 0; +} + +// WFE mode: blocking sleep with p.elw +static inline uint32_t eu_wait_events_wfe(uint32_t event_mask) { + eu_enable_events(event_mask); + return evt_read32(EU_BASE, EU_CORE_EVENT_WAIT_CLEAR - EU_BASE); +} + +// Generic wait with mode selection +static inline uint32_t eu_wait_events(uint32_t event_mask, eu_wait_mode_t mode, uint32_t timeout_cycles) { + if (mode == EU_WAIT_MODE_WFE) + return eu_wait_events_wfe(event_mask); + else + return eu_wait_events_polling(event_mask, timeout_cycles); +} + +// PULP HAL compatible functions +static inline unsigned int eu_evt_wait(void) { + return evt_read32(EU_BASE, EU_CORE_EVENT_WAIT - EU_BASE); +} + +static inline unsigned int eu_evt_waitAndClr(void) { + return evt_read32(EU_BASE, EU_CORE_EVENT_WAIT_CLEAR - EU_BASE); +} + +static inline unsigned int eu_evt_maskWaitAndClr(unsigned int evtMask) { + eu_enable_events(evtMask); + unsigned int result = eu_evt_waitAndClr(); + eu_disable_events(evtMask); + return result; +} + +//============================================================================= +// REDMULE FUNCTIONS +//============================================================================= + +static inline void eu_redmule_init(void) { + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_REDMULE_DONE_MASK); +} + +static inline uint32_t eu_redmule_wait_completion(eu_wait_mode_t mode) { + return eu_wait_events(EU_REDMULE_DONE_MASK, mode, 1000000); +} + +static inline uint32_t eu_redmule_is_busy(void) { + return eu_check_events(EU_REDMULE_BUSY_MASK); +} + +static inline uint32_t eu_redmule_is_done(void) { + return eu_check_events(EU_REDMULE_DONE_MASK); +} + +//============================================================================= +// IDMA FUNCTIONS +//============================================================================= + +static inline void eu_idma_init(void) { + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_IDMA_ALL_DONE_MASK); +} + +static inline uint32_t eu_idma_wait_completion(eu_wait_mode_t mode) { + return eu_wait_events(EU_IDMA_ALL_DONE_MASK, mode, 1000000); +} + +static inline uint32_t eu_idma_wait_direction_completion(uint32_t direction, eu_wait_mode_t mode) { + uint32_t wait_mask = direction ? EU_IDMA_O2A_DONE_MASK : EU_IDMA_A2O_DONE_MASK; + return eu_wait_events(wait_mask, mode, 1000000); +} + +static inline uint32_t eu_idma_wait_a2o_completion(eu_wait_mode_t mode) { + return eu_wait_events(EU_IDMA_A2O_DONE_MASK, mode, 1000000); +} + +static inline uint32_t eu_idma_wait_o2a_completion(eu_wait_mode_t mode) { + return eu_wait_events(EU_IDMA_O2A_DONE_MASK, mode, 1000000); +} + +static inline uint32_t eu_idma_is_done(void) { + return eu_check_events(EU_IDMA_ALL_DONE_MASK); +} + +static inline uint32_t eu_idma_a2o_is_done(void) { + return eu_check_events(EU_IDMA_A2O_DONE_MASK); +} + +static inline uint32_t eu_idma_o2a_is_done(void) { + return eu_check_events(EU_IDMA_O2A_DONE_MASK); +} + +static inline uint32_t eu_idma_is_busy(void) { + uint32_t events = eu_get_events(); + return events & (EU_IDMA_A2O_BUSY_MASK | EU_IDMA_O2A_BUSY_MASK); +} + +static inline uint32_t eu_idma_has_error(void) { + uint32_t events = eu_get_events(); + return events & (EU_IDMA_A2O_ERROR_MASK | EU_IDMA_O2A_ERROR_MASK); +} + +//============================================================================= +// FSYNC FUNCTIONS +//============================================================================= + +static inline void eu_fsync_init(void) { + eu_clear_events(0xFFFFFFFF); + eu_enable_events(EU_FSYNC_ALL_MASK); +} + +static inline uint32_t eu_fsync_wait_completion(eu_wait_mode_t mode) { + return eu_wait_events(EU_FSYNC_DONE_MASK, mode, 1000000); +} + +static inline uint32_t eu_fsync_is_done(void) { + return eu_check_events(EU_FSYNC_DONE_MASK); +} + +static inline uint32_t eu_fsync_has_error(void) { + return eu_check_events(EU_FSYNC_ERROR_MASK); +} + +//============================================================================= +// MULTI-ACCELERATOR FUNCTIONS +//============================================================================= + +static inline void eu_multi_init(uint32_t redmule_en, uint32_t idma_a2o_en, + uint32_t idma_o2a_en, uint32_t fsync_en) { + eu_clear_events(0xFFFFFFFF); + uint32_t event_mask = 0; + + if (redmule_en) { + event_mask |= EU_REDMULE_ALL_MASK; + } + if (idma_a2o_en) { + event_mask |= EU_IDMA_A2O_DONE_MASK; + } + if (idma_o2a_en) { + event_mask |= EU_IDMA_O2A_DONE_MASK; + } + if (fsync_en) { + event_mask |= EU_FSYNC_ALL_MASK; + } + + if (event_mask) eu_enable_events(event_mask); +} + +static inline uint32_t eu_multi_wait_all(uint32_t wait_redmule, uint32_t wait_idma_a2o, + uint32_t wait_idma_o2a, uint32_t wait_fsync, + eu_wait_mode_t mode) { + uint32_t required_mask = 0; + if (wait_redmule) required_mask |= EU_REDMULE_DONE_MASK; + if (wait_idma_a2o) required_mask |= EU_IDMA_A2O_DONE_MASK; + if (wait_idma_o2a) required_mask |= EU_IDMA_O2A_DONE_MASK; + if (wait_fsync) required_mask |= EU_FSYNC_DONE_MASK; + + eu_enable_events(required_mask); + + if (mode == EU_WAIT_MODE_WFE) { + uint32_t accumulated = 0; + while ((accumulated & required_mask) != required_mask) { + uint32_t new_events = evt_read32(EU_BASE, EU_CORE_EVENT_WAIT_CLEAR - EU_BASE); + accumulated |= new_events; + } + return accumulated; + } else { + uint32_t timeout = 1000000; + uint32_t cycles = 0; + uint32_t accumulated = 0; + while (cycles < timeout && (accumulated & required_mask) != required_mask) { + accumulated |= eu_check_events(required_mask); + wait_nop(10); + cycles += 10; + } + return accumulated; + } +} + +#endif /* EVENT_UNIT_UTILS_H */ diff --git a/sw/utils/fsync_isa_utils.h b/sw/utils/fsync_isa_utils.h deleted file mode 100644 index 441954d..0000000 --- a/sw/utils/fsync_isa_utils.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * SPDX-License-Identifier: Apache-2.0 - * - * Authors: Victor Isachi - * - * MAGIA FractalSync ISA Utils - */ - -#ifndef FSYNC_ISA_UTILS_H -#define FSYNC_ISA_UTILS_H - -/* synch instruction */ - // asm volatile( - // ".word (0x0 << 25) | \ /* Reserved - 0x0 */ - // (0b00110 << 20) | \ /* R2 - t1 */ - // (0b00101 << 15) | \ /* R1 - t0 */ - // (0b010 << 12) | \ /* FUNC3 */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1011011 << 0) \n"); /* OPCODE */ -static inline void fsync(volatile uint32_t id, volatile uint32_t aggregate){ - asm volatile("addi t1, %0, 0" ::"r"(id)); - asm volatile("addi t0, %0, 0" ::"r"(aggregate)); - asm volatile( - ".word (0x0 << 25) | \ - (0b00110 << 20) | \ - (0b00101 << 15) | \ - (0b010 << 12) | \ - (0x0 << 7) | \ - (0b1011011 << 0) \n"); -} - -#endif /*FSYNC_ISA_UTILS_H*/ diff --git a/sw/utils/fsync_api.h b/sw/utils/fsync_mm_api.h similarity index 50% rename from sw/utils/fsync_api.h rename to sw/utils/fsync_mm_api.h index 18807e8..93f82b6 100644 --- a/sw/utils/fsync_api.h +++ b/sw/utils/fsync_mm_api.h @@ -14,91 +14,94 @@ * limitations under the License. * SPDX-License-Identifier: Apache-2.0 * - * Authors: Victor Isachi + * Authors: Luca Balboni + * Based on fsync_api.h by Victor Isachi * - * MAGIA FractalSync common synchronization patterns API + * MAGIA FractalSync Memory-Mapped Synchronization Patterns API */ -#ifndef FSYNC_API_H -#define FSYNC_API_H +#ifndef FSYNC_MM_API_H +#define FSYNC_MM_API_H - #include "fsync_isa_utils.h" + #include "fsync_mm_utils.h" #include "magia_tile_utils.h" #include "magia_utils.h" - #define _FS_GLOBAL_AGGR (0xFFFFFFFF >> (1+__builtin_clz(NUM_HARTS))) - #define _FS_GLOBAL_ID (-1) - #define _FS_HNBR_AGGR (0x1) - #define _FS_HNBR_ID (0) - #define _FS_VNBR_AGGR (0x1) - #define _FS_VNBR_ID (1) - #define _FS_HRING_AGGR (0x1) - #define _FS_HRING_ID (2) - #define _FS_VRING_AGGR (0x1) - #define _FS_VRING_ID (3) - #define _FS_RC_LVL (0x1 << (29-__builtin_clz(NUM_HARTS))) - #define _FS_RC_AGGR (0x155 >> (__builtin_clz(NUM_HARTS)-21)) + #define _FS_MM_GLOBAL_AGGR (0xFFFFFFFF >> (1+__builtin_clz(NUM_HARTS))) + #define _FS_MM_GLOBAL_ID (-1) + #define _FS_MM_HNBR_AGGR (0x1) + #define _FS_MM_HNBR_ID (0) + #define _FS_MM_VNBR_AGGR (0x1) + #define _FS_MM_VNBR_ID (1) + #define _FS_MM_HRING_AGGR (0x1) + #define _FS_MM_HRING_ID (2) + #define _FS_MM_VRING_AGGR (0x1) + #define _FS_MM_VRING_ID (3) + #define _FS_MM_RC_LVL (0x1 << (29-__builtin_clz(NUM_HARTS))) + #define _FS_MM_RC_AGGR (0x155 >> (__builtin_clz(NUM_HARTS)-21)) // Lookup table indicating the id of row synchronization - uint32_t row_id_lookup(volatile uint32_t hartid_y){ + uint32_t row_id_lookup_mm(volatile uint32_t hartid_y){ if (hartid_y < MESH_Y_TILES/2) return 2*hartid_y; else return 2*(hartid_y-MESH_Y_TILES/2); } // Lookup table indicating the id of column synchronization - uint32_t col_id_lookup(volatile uint32_t hartid_x){ + uint32_t col_id_lookup_mm(volatile uint32_t hartid_x){ if (hartid_x < MESH_X_TILES/2) return 2*hartid_x+1; else return 2*(hartid_x-MESH_X_TILES/2)+1; } - static inline void fsync_hnbr(){ - fsync(_FS_HNBR_ID, _FS_HNBR_AGGR); + static inline void fsync_mm_hnbr(){ + fsync_mm(_FS_MM_HNBR_ID, _FS_MM_HNBR_AGGR); } - static inline void fsync_vnbr(){ - fsync(_FS_VNBR_ID, _FS_VNBR_AGGR); + static inline void fsync_mm_vnbr(){ + fsync_mm(_FS_MM_VNBR_ID, _FS_MM_VNBR_AGGR); } - void fsync_hring(){ + void fsync_mm_hring(){ uint32_t hartid = get_hartid(); uint32_t hartid_x = GET_X_ID(hartid); uint32_t hartid_y = GET_Y_ID(hartid); if ((hartid_x == 0) || (hartid_x == MESH_X_TILES-1)){ - uint32_t id = row_id_lookup(hartid_y); - fsync(id, _FS_RC_LVL); + uint32_t id = row_id_lookup_mm(hartid_y); + fsync_mm(id, _FS_MM_RC_LVL); } else { - fsync(_FS_HRING_ID, _FS_HRING_AGGR); + fsync_mm(_FS_MM_HRING_ID, _FS_MM_HRING_AGGR); } } - void fsync_vring(){ + void fsync_mm_vring(){ uint32_t hartid = get_hartid(); uint32_t hartid_x = GET_X_ID(hartid); uint32_t hartid_y = GET_Y_ID(hartid); if ((hartid_y == 0) || (hartid_y == MESH_Y_TILES-1)){ - uint32_t id = col_id_lookup(hartid_x); - fsync(id, _FS_RC_LVL); + uint32_t id = col_id_lookup_mm(hartid_x); + fsync_mm(id, _FS_MM_RC_LVL); } else { - fsync(_FS_VRING_ID, _FS_VRING_AGGR); + fsync_mm(_FS_MM_VRING_ID, _FS_MM_VRING_AGGR); } } - void fsync_rows(){ + void fsync_mm_rows(){ uint32_t hartid = get_hartid(); uint32_t hartid_y = GET_Y_ID(hartid); - uint32_t id = row_id_lookup(hartid_y); - fsync(id, _FS_RC_AGGR); + uint32_t id = row_id_lookup_mm(hartid_y); + fsync_mm(id, _FS_MM_RC_AGGR); } - void fsync_cols(){ + void fsync_mm_cols(){ uint32_t hartid = get_hartid(); uint32_t hartid_x = GET_X_ID(hartid); - uint32_t id = col_id_lookup(hartid_x); - fsync(id, _FS_RC_AGGR); + uint32_t id = col_id_lookup_mm(hartid_x); + fsync_mm(id, _FS_MM_RC_AGGR); } - static inline void fsync_global(){ - fsync(_FS_GLOBAL_ID, _FS_GLOBAL_AGGR); + static inline void fsync_mm_global(){ + fsync_mm(_FS_MM_GLOBAL_ID, _FS_MM_GLOBAL_AGGR); } -#endif /*FSYNC_API_H*/ + + +#endif /*FSYNC_MM_API_H*/ \ No newline at end of file diff --git a/sw/utils/fsync_mm_utils.h b/sw/utils/fsync_mm_utils.h new file mode 100644 index 0000000..8398f98 --- /dev/null +++ b/sw/utils/fsync_mm_utils.h @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + * + * Authors: Luca Balboni + * Based on fsync_isa_utils.h by Victor Isachi + * + * MAGIA FractalSync Memory-Mapped Utils + * WARNING: Make sure to undefine EVENT_UNIT in this file if POLLING in registers mm is desired, otherwise polling mode will not work correctly + */ + +#ifndef FSYNC_MM_UTILS_H +#define FSYNC_MM_UTILS_H +#define EVENT_UNIT + +#include "magia_tile_utils.h" + +/* Memory-mapped FractalSync register offsets */ +#define FSYNC_MM_AGGR_REG_OFFSET (0x00) +#define FSYNC_MM_ID_REG_OFFSET (0x04) +#define FSYNC_MM_CONTROL_REG_OFFSET (0x08) +#define FSYNC_MM_STATUS_REG_OFFSET (0x0C) + +/* Status register bits */ +#define FSYNC_MM_STATUS_BUSY_MASK (1 << 2) + +/* Memory-mapped sync function */ +static inline void fsync_mm(volatile uint32_t id, volatile uint32_t aggregate){ + volatile char *fsync_base = (volatile char *)(FSYNC_BASE); + + *(volatile uint32_t *)(fsync_base + FSYNC_MM_AGGR_REG_OFFSET) = aggregate; + *(volatile uint32_t *)(fsync_base + FSYNC_MM_ID_REG_OFFSET) = id; + *(volatile uint32_t *)(fsync_base + FSYNC_MM_CONTROL_REG_OFFSET) = 1; + +#ifndef EVENT_UNIT + // Polling mode - wait for completion + volatile uint32_t status; + do { + status = *(volatile uint32_t *)(fsync_base + FSYNC_MM_STATUS_REG_OFFSET); + if (status & FSYNC_MM_STATUS_BUSY_MASK) { + printf("FSYNC_MM still busy...\n"); + // Still busy, optionally add a small delay here + } + } while (status & FSYNC_MM_STATUS_BUSY_MASK); +#endif + // In non-stalling mode, the function returns immediately + // and the caller should do wfi to wait for interrupt +} + +#endif /*FSYNC_MM_UTILS_H*/ \ No newline at end of file diff --git a/sw/utils/idma_isa_utils.h b/sw/utils/idma_isa_utils.h deleted file mode 100644 index c7bf862..0000000 --- a/sw/utils/idma_isa_utils.h +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * SPDX-License-Identifier: Apache-2.0 - * - * Authors: Victor Isachi - * - * MAGIA iDMA ISA Utils - */ - -#ifndef IDMA_ISA_UTILS_H -#define IDMA_ISA_UTILS_H - -/* conf instruction */ - // asm volatile( - // ".word (0x0 << 27) | \ /* Reserved - 0x0 */ - // (0b11 << 26) | \ /* Enable ND extension - see iDMA documentation */ - // (0b0 << 25) | \ /* Direction - 0 for AXI2OBI (L2 to L1), 1 for OBI2AXI (L1 to L2) */ - // (0b000 << 22) | \ /* Destination maximum logarithmic length - see iDMA documentation */ - // (0b000 << 19) | \ /* Source maximum logarithmic length - see iDMA documentation */ - // (0b0 << 18) | \ /* Destination reduce length - see iDMA documentation */ - // (0b0 << 17) | \ /* Source reduce length - see iDMA documentation */ - // (0b0 << 16) | \ /* Decouple R/W - see iDMA documentation */ - // (0b0 << 15) | \ /* Decouple R/AW - see iDMA documentation */ - // (0b000 << 12) | \ /* FUNC3 */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1011011 << 0) \n"); /* OPCODE */ -static inline void idma_conf_in(){ - asm volatile( - ".word (0x0 << 27) | \ - (0b11 << 26) | \ - (0b0 << 25) | \ - (0b000 << 22) | \ - (0b000 << 19) | \ - (0b0 << 18) | \ - (0b0 << 17) | \ - (0b0 << 16) | \ - (0b0 << 15) | \ - (0b000 << 12) | \ - (0x0 << 7) | \ - (0b1011011 << 0) \n"); -} - -/* conf instruction */ - // asm volatile( - // ".word (0x0 << 27) | \ /* Reserved - 0x0 */ - // (0b11 << 26) | \ /* Enable ND extension - see iDMA documentation */ - // (0b1 << 25) | \ /* Direction - 0 for AXI2OBI (L2 to L1), 1 for OBI2AXI (L1 to L2) */ - // (0b000 << 22) | \ /* Destination maximum logarithmic length - see iDMA documentation */ - // (0b000 << 19) | \ /* Source maximum logarithmic length - see iDMA documentation */ - // (0b0 << 18) | \ /* Destination reduce length - see iDMA documentation */ - // (0b0 << 17) | \ /* Source reduce length - see iDMA documentation */ - // (0b0 << 16) | \ /* Decouple R/W - see iDMA documentation */ - // (0b0 << 15) | \ /* Decouple R/AW - see iDMA documentation */ - // (0b000 << 12) | \ /* FUNC3 */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1011011 << 0) \n"); /* OPCODE */ -static inline void idma_conf_out(){ - asm volatile( - ".word (0x0 << 27) | \ - (0b11 << 26) | \ - (0b1 << 25) | \ - (0b000 << 22) | \ - (0b000 << 19) | \ - (0b0 << 18) | \ - (0b0 << 17) | \ - (0b0 << 16) | \ - (0b0 << 15) | \ - (0b000 << 12) | \ - (0x0 << 7) | \ - (0b1011011 << 0) \n"); -} - -/* set instruction */ - // asm volatile( - // ".word (0b00111 << 27) | \ /* R3 - t2 */ - // (0x0 << 26) | \ /* Reserved - 0x0 */ - // (0b0 << 25) | \ /* Direction - 0 for AXI2OBI (L2 to L1), 1 for OBI2AXI (L1 to L2) */ - // (0b00110 << 20) | \ /* R2 - t1 */ - // (0b00101 << 15) | \ /* R1 - t0 */ - // (0b000 << 12) | \ /* FUNC3 - ADDR/LEN */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1111011 << 0) \n"); /* OPCODE */ -static inline void idma_set_addr_len_in(volatile uint32_t dst_addr, volatile uint32_t src_addr, volatile uint32_t len){ - asm volatile ("addi t2, %0, 0" :: "r"(dst_addr)); - asm volatile ("addi t1, %0, 0" :: "r"(src_addr)); - asm volatile ("addi t0, %0, 0" :: "r"(len)); - asm volatile( - ".word (0b00111 << 27) | \ - (0x0 << 26) | \ - (0b0 << 25) | \ - (0b00110 << 20) | \ - (0b00101 << 15) | \ - (0b000 << 12) | \ - (0x0 << 7) | \ - (0b1111011 << 0) \n"); -} - -/* set instruction */ - // asm volatile( - // ".word (0b00111 << 27) | \ /* R3 - t2 */ - // (0x0 << 26) | \ /* Reserved - 0x0 */ - // (0b1 << 25) | \ /* Direction - 0 for AXI2OBI (L2 to L1), 1 for OBI2AXI (L1 to L2) */ - // (0b00110 << 20) | \ /* R2 - t1 */ - // (0b00101 << 15) | \ /* R1 - t0 */ - // (0b000 << 12) | \ /* FUNC3 - ADDR/LEN */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1111011 << 0) \n"); /* OPCODE */ -static inline void idma_set_addr_len_out(volatile uint32_t dst_addr, volatile uint32_t src_addr, volatile uint32_t len){ - asm volatile ("addi t2, %0, 0" :: "r"(dst_addr)); - asm volatile ("addi t1, %0, 0" :: "r"(src_addr)); - asm volatile ("addi t0, %0, 0" :: "r"(len)); - asm volatile( - ".word (0b00111 << 27) | \ - (0x0 << 26) | \ - (0b1 << 25) | \ - (0b00110 << 20) | \ - (0b00101 << 15) | \ - (0b000 << 12) | \ - (0x0 << 7) | \ - (0b1111011 << 0) \n"); -} - -/* set instruction */ - // asm volatile( - // ".word (0b00111 << 27) | \ /* R3 - t2 */ - // (0x0 << 26) | \ /* Reserved - 0x0 */ - // (0b0 << 25) | \ /* Direction - 0 for AXI2OBI (L2 to L1), 1 for OBI2AXI (L1 to L2) */ - // (0b00110 << 20) | \ /* R2 - t1 */ - // (0b00101 << 15) | \ /* R1 - t0 */ - // (0b001 << 12) | \ /* FUNC3 - STD_2/REP_2 */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1111011 << 0) \n"); /* OPCODE */ -static inline void idma_set_std2_rep2_in(volatile uint32_t dst_std_2, volatile uint32_t src_std_2, volatile uint32_t reps_2){ - asm volatile ("addi t2, %0, 0" :: "r"(dst_std_2)); - asm volatile ("addi t1, %0, 0" :: "r"(src_std_2)); - asm volatile ("addi t0, %0, 0" :: "r"(reps_2)); - asm volatile( - ".word (0b00111 << 27) | \ - (0x0 << 26) | \ - (0b0 << 25) | \ - (0b00110 << 20) | \ - (0b00101 << 15) | \ - (0b001 << 12) | \ - (0x0 << 7) | \ - (0b1111011 << 0) \n"); -} - -/* set instruction */ - // asm volatile( - // ".word (0b00111 << 27) | \ /* R3 - t2 */ - // (0x0 << 26) | \ /* Reserved - 0x0 */ - // (0b1 << 25) | \ /* Direction - 0 for AXI2OBI (L2 to L1), 1 for OBI2AXI (L1 to L2) */ - // (0b00110 << 20) | \ /* R2 - t1 */ - // (0b00101 << 15) | \ /* R1 - t0 */ - // (0b001 << 12) | \ /* FUNC3 - STD_2/REP_2 */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1111011 << 0) \n"); /* OPCODE */ -static inline void idma_set_std2_rep2_out(volatile uint32_t dst_std_2, volatile uint32_t src_std_2, volatile uint32_t reps_2){ - asm volatile ("addi t2, %0, 0" :: "r"(dst_std_2)); - asm volatile ("addi t1, %0, 0" :: "r"(src_std_2)); - asm volatile ("addi t0, %0, 0" :: "r"(reps_2)); - asm volatile( - ".word (0b00111 << 27) | \ - (0x0 << 26) | \ - (0b1 << 25) | \ - (0b00110 << 20) | \ - (0b00101 << 15) | \ - (0b001 << 12) | \ - (0x0 << 7) | \ - (0b1111011 << 0) \n"); -} - -/* set instruction */ - // asm volatile( - // ".word (0b00111 << 27) | \ /* R3 - t2 */ - // (0x0 << 26) | \ /* Reserved - 0x0 */ - // (0b0 << 25) | \ /* Direction - 0 for AXI2OBI (L2 to L1), 1 for OBI2AXI (L1 to L2) */ - // (0b00110 << 20) | \ /* R2 - t1 */ - // (0b00101 << 15) | \ /* R1 - t0 */ - // (0b010 << 12) | \ /* FUNC3 - STD_3/REP_3 */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1111011 << 0) \n"); /* OPCODE */ -static inline void idma_set_std3_rep3_in(volatile uint32_t dst_std_3, volatile uint32_t src_std_3, volatile uint32_t reps_3){ - asm volatile ("addi t2, %0, 0" :: "r"(dst_std_3)); - asm volatile ("addi t1, %0, 0" :: "r"(src_std_3)); - asm volatile ("addi t0, %0, 0" :: "r"(reps_3)); - asm volatile( - ".word (0b00111 << 27) | \ - (0x0 << 26) | \ - (0b0 << 25) | \ - (0b00110 << 20) | \ - (0b00101 << 15) | \ - (0b010 << 12) | \ - (0x0 << 7) | \ - (0b1111011 << 0) \n"); -} - -/* set instruction */ - // asm volatile( - // ".word (0b00111 << 27) | \ /* R3 - t2 */ - // (0x0 << 26) | \ /* Reserved - 0x0 */ - // (0b1 << 25) | \ /* Direction - 0 for AXI2OBI (L2 to L1), 1 for OBI2AXI (L1 to L2) */ - // (0b00110 << 20) | \ /* R2 - t1 */ - // (0b00101 << 15) | \ /* R1 - t0 */ - // (0b010 << 12) | \ /* FUNC3 - STD_3/REP_3 */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1111011 << 0) \n"); /* OPCODE */ -static inline void idma_set_std3_rep3_out(volatile uint32_t dst_std_3, volatile uint32_t src_std_3, volatile uint32_t reps_3){ - asm volatile ("addi t2, %0, 0" :: "r"(dst_std_3)); - asm volatile ("addi t1, %0, 0" :: "r"(src_std_3)); - asm volatile ("addi t0, %0, 0" :: "r"(reps_3)); - asm volatile( - ".word (0b00111 << 27) | \ - (0x0 << 26) | \ - (0b1 << 25) | \ - (0b00110 << 20) | \ - (0b00101 << 15) | \ - (0b010 << 12) | \ - (0x0 << 7) | \ - (0b1111011 << 0) \n"); -} - -/* start instruction */ - // asm volatile( - // ".word (0x0 << 26) | \ /* Reserved - 0x0 */ - // (0b0 << 25) | \ /* Direction - 0 for AXI2OBI (L2 to L1), 1 for OBI2AXI (L1 to L2) */ - // (0x0 << 15) | \ /* Reserved - 0x0 */ - // (0b111 << 12) | \ /* FUNC3 - START */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1111011 << 0) \n"); /* OPCODE */ -static inline void idma_start_in(){ - asm volatile( - ".word (0x0 << 26) | \ - (0b0 << 25) | \ - (0x0 << 15) | \ - (0b111 << 12) | \ - (0x0 << 7) | \ - (0b1111011 << 0) \n"); -} - -/* start instruction */ - // asm volatile( - // ".word (0x0 << 26) | \ /* Reserved - 0x0 */ - // (0b1 << 25) | \ /* Direction - 0 for AXI2OBI (L2 to L1), 1 for OBI2AXI (L1 to L2) */ - // (0x0 << 15) | \ /* Reserved - 0x0 */ - // (0b111 << 12) | \ /* FUNC3 - START */ - // (0x0 << 7) | \ /* Reserved - 0x0 */ - // (0b1111011 << 0) \n"); /* OPCODE */ -static inline void idma_start_out(){ - asm volatile( - ".word (0x0 << 26) | \ - (0b1 << 25) | \ - (0x0 << 15) | \ - (0b111 << 12) | \ - (0x0 << 7) | \ - (0b1111011 << 0) \n"); -} - -#endif /*IDMA_ISA_UTILS_H*/ diff --git a/sw/utils/idma_mm_utils.h b/sw/utils/idma_mm_utils.h new file mode 100644 index 0000000..558a024 --- /dev/null +++ b/sw/utils/idma_mm_utils.h @@ -0,0 +1,355 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + * + * Authors: Luca Balboni + * Based on idma_utils.h by Victor Isachi + * + * MAGIA iDMA Memory-Mapped I/O Utils + */ + +#ifndef IDMA_MM_UTILS_H +#define IDMA_MM_UTILS_H + +#include +#include "magia_tile_utils.h" + +//============================================================================= +// Register Definitions and Constants +//============================================================================= + +// iDMA Memory-Mapped Register Base Addresses +#define IDMA_MM_DIRECTION_OFFSET (0x200) +#define IDMA_MM_BASE_AXI2OBI (IDMA_BASE) // direction=0, L2 to L1 +#define IDMA_MM_BASE_OBI2AXI (IDMA_BASE + IDMA_MM_DIRECTION_OFFSET) // direction=1, L1 to L2 + +#define IDMA_CONF_OFFSET (0x00) +#define IDMA_STATUS_OFFSET (0x04) +#define IDMA_NEXT_ID_OFFSET (0x44) +#define IDMA_DONE_ID_OFFSET (0x84) +#define IDMA_DST_ADDR_LOW_OFFSET (0xD0) +#define IDMA_SRC_ADDR_LOW_OFFSET (0xD8) +#define IDMA_LENGTH_LOW_OFFSET (0xE0) +#define IDMA_DST_STRIDE_2_LOW_OFFSET (0xE8) +#define IDMA_SRC_STRIDE_2_LOW_OFFSET (0xF0) +#define IDMA_REPS_2_LOW_OFFSET (0xF8) +#define IDMA_DST_STRIDE_3_LOW_OFFSET (0x100) +#define IDMA_SRC_STRIDE_3_LOW_OFFSET (0x108) +#define IDMA_REPS_3_LOW_OFFSET (0x110) + +// Register Addresses - now direction-aware +#define IDMA_CONF_ADDR(is_l1_to_l2) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_CONF_OFFSET) : (IDMA_MM_BASE_AXI2OBI + IDMA_CONF_OFFSET)) +#define IDMA_STATUS_ADDR(is_l1_to_l2, id) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_STATUS_OFFSET + ((id) * 4)) : (IDMA_MM_BASE_AXI2OBI + IDMA_STATUS_OFFSET + ((id) * 4))) +#define IDMA_NEXT_ID_ADDR(is_l1_to_l2, id) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_NEXT_ID_OFFSET + ((id) * 4)) : (IDMA_MM_BASE_AXI2OBI + IDMA_NEXT_ID_OFFSET + ((id) * 4))) +#define IDMA_DONE_ID_ADDR(is_l1_to_l2, id) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_DONE_ID_OFFSET + ((id) * 4)) : (IDMA_MM_BASE_AXI2OBI + IDMA_DONE_ID_OFFSET + ((id) * 4))) +#define IDMA_DST_ADDR_LOW_ADDR(is_l1_to_l2) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_DST_ADDR_LOW_OFFSET) : (IDMA_MM_BASE_AXI2OBI + IDMA_DST_ADDR_LOW_OFFSET)) +#define IDMA_SRC_ADDR_LOW_ADDR(is_l1_to_l2) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_SRC_ADDR_LOW_OFFSET) : (IDMA_MM_BASE_AXI2OBI + IDMA_SRC_ADDR_LOW_OFFSET)) +#define IDMA_LENGTH_LOW_ADDR(is_l1_to_l2) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_LENGTH_LOW_OFFSET) : (IDMA_MM_BASE_AXI2OBI + IDMA_LENGTH_LOW_OFFSET)) +#define IDMA_DST_STRIDE_2_LOW_ADDR(is_l1_to_l2) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_DST_STRIDE_2_LOW_OFFSET) : (IDMA_MM_BASE_AXI2OBI + IDMA_DST_STRIDE_2_LOW_OFFSET)) +#define IDMA_SRC_STRIDE_2_LOW_ADDR(is_l1_to_l2) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_SRC_STRIDE_2_LOW_OFFSET) : (IDMA_MM_BASE_AXI2OBI + IDMA_SRC_STRIDE_2_LOW_OFFSET)) +#define IDMA_REPS_2_LOW_ADDR(is_l1_to_l2) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_REPS_2_LOW_OFFSET) : (IDMA_MM_BASE_AXI2OBI + IDMA_REPS_2_LOW_OFFSET)) +#define IDMA_DST_STRIDE_3_LOW_ADDR(is_l1_to_l2) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_DST_STRIDE_3_LOW_OFFSET) : (IDMA_MM_BASE_AXI2OBI + IDMA_DST_STRIDE_3_LOW_OFFSET)) +#define IDMA_SRC_STRIDE_3_LOW_ADDR(is_l1_to_l2) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_SRC_STRIDE_3_LOW_OFFSET) : (IDMA_MM_BASE_AXI2OBI + IDMA_SRC_STRIDE_3_LOW_OFFSET)) +#define IDMA_REPS_3_LOW_ADDR(is_l1_to_l2) ((is_l1_to_l2) ? (IDMA_MM_BASE_OBI2AXI + IDMA_REPS_3_LOW_OFFSET) : (IDMA_MM_BASE_AXI2OBI + IDMA_REPS_3_LOW_OFFSET)) + +// Configuration Register Bit Fields +#define IDMA_CONF_DECOUPLE_AW_BIT (0) +#define IDMA_CONF_DECOUPLE_RW_BIT (1) +#define IDMA_CONF_SRC_REDUCE_LEN_BIT (2) +#define IDMA_CONF_DST_REDUCE_LEN_BIT (3) +#define IDMA_CONF_SRC_MAX_LLEN_MASK (0x70) // bits 6:4 +#define IDMA_CONF_SRC_MAX_LLEN_SHIFT (4) +#define IDMA_CONF_DST_MAX_LLEN_MASK (0x380) // bits 9:7 +#define IDMA_CONF_DST_MAX_LLEN_SHIFT (7) +#define IDMA_CONF_ENABLE_ND_MASK (0xC00) // bits 11:10 +#define IDMA_CONF_ENABLE_ND_SHIFT (10) + +// Status Register Bit Fields +#define IDMA_STATUS_BUSY_MASK (0x3FF) // bits 9:0 + +// Transfer Direction Constants +#define IDMA_DIR_L2_TO_L1 (0) // AXI2OBI direction +#define IDMA_DIR_L1_TO_L2 (1) // OBI2AXI direction + +// Direction aliases +#define IDMA_EXT2LOC 0 // L2 to L1 (AXI2OBI) +#define IDMA_LOC2EXT 1 // L1 to L2 (OBI2AXI) + +// Transfer dimensions +#define IDMA_1D 0 +#define IDMA_2D 1 +#define IDMA_3D 2 + +// Protocol definitions +typedef enum { + IDMA_PROT_AXI = 0, // AXI protocol: L2 memory + IDMA_PROT_OBI = 1 // OBI protocol: L1 memory +} idma_prot_t; + +typedef unsigned int dma_ext_t; + +// Configuration macros +#define IDMA_DEFAULT_CONFIG 0x0 + +//============================================================================= +// Low-Level Register Access Functions +//============================================================================= + +static inline void idma_mm_conf_dir(uint32_t is_l1_to_l2, uint32_t decouple_aw, uint32_t decouple_rw, + uint32_t src_reduce_len, uint32_t dst_reduce_len, + uint32_t src_max_llen, uint32_t dst_max_llen, + uint32_t enable_nd) { + uint32_t conf_val = 0; + + if (decouple_aw) conf_val |= (1 << IDMA_CONF_DECOUPLE_AW_BIT); + if (decouple_rw) conf_val |= (1 << IDMA_CONF_DECOUPLE_RW_BIT); + if (src_reduce_len) conf_val |= (1 << IDMA_CONF_SRC_REDUCE_LEN_BIT); + if (dst_reduce_len) conf_val |= (1 << IDMA_CONF_DST_REDUCE_LEN_BIT); + + conf_val |= ((src_max_llen & 0x7) << IDMA_CONF_SRC_MAX_LLEN_SHIFT); + conf_val |= ((dst_max_llen & 0x7) << IDMA_CONF_DST_MAX_LLEN_SHIFT); + conf_val |= ((enable_nd & 0x3) << IDMA_CONF_ENABLE_ND_SHIFT); + + mmio32(IDMA_CONF_ADDR(is_l1_to_l2)) = conf_val; +} + +static inline void idma_mm_conf_default_dir(uint32_t is_l1_to_l2) { + idma_mm_conf_dir(is_l1_to_l2, 0, 0, 0, 0, 0, 0, 3); +} + +static inline uint32_t idma_mm_is_busy_dir(uint32_t is_l1_to_l2, uint32_t stream_id) { + if (stream_id >= 16) return 0; + uint32_t status = mmio32(IDMA_STATUS_ADDR(is_l1_to_l2, stream_id)); + return (status & IDMA_STATUS_BUSY_MASK) ? 1 : 0; +} + +static inline uint32_t idma_mm_start_transfer_dir(uint32_t is_l1_to_l2, uint32_t stream_id) { + if (stream_id >= 16) return 0; + uint32_t transfer_id = mmio32(IDMA_NEXT_ID_ADDR(is_l1_to_l2, stream_id)); + return transfer_id; +} + +static inline uint32_t idma_mm_get_done_id_dir(uint32_t is_l1_to_l2, uint32_t stream_id) { + if (stream_id >= 16) return 0; + return mmio32(IDMA_DONE_ID_ADDR(is_l1_to_l2, stream_id)); +} + +static inline void idma_mm_set_addr_len_dir(uint32_t is_l1_to_l2, uint32_t dst_addr, uint32_t src_addr, uint32_t length) { + mmio32(IDMA_DST_ADDR_LOW_ADDR(is_l1_to_l2)) = dst_addr; + mmio32(IDMA_SRC_ADDR_LOW_ADDR(is_l1_to_l2)) = src_addr; + mmio32(IDMA_LENGTH_LOW_ADDR(is_l1_to_l2)) = length; +} + +static inline void idma_mm_set_2d_params_dir(uint32_t is_l1_to_l2, uint32_t dst_stride_2, uint32_t src_stride_2, uint32_t reps_2) { + mmio32(IDMA_DST_STRIDE_2_LOW_ADDR(is_l1_to_l2)) = dst_stride_2; + mmio32(IDMA_SRC_STRIDE_2_LOW_ADDR(is_l1_to_l2)) = src_stride_2; + mmio32(IDMA_REPS_2_LOW_ADDR(is_l1_to_l2)) = reps_2; +} + +static inline void idma_mm_set_3d_params_dir(uint32_t is_l1_to_l2, uint32_t dst_stride_3, uint32_t src_stride_3, uint32_t reps_3) { + mmio32(IDMA_DST_STRIDE_3_LOW_ADDR(is_l1_to_l2)) = dst_stride_3; + mmio32(IDMA_SRC_STRIDE_3_LOW_ADDR(is_l1_to_l2)) = src_stride_3; + mmio32(IDMA_REPS_3_LOW_ADDR(is_l1_to_l2)) = reps_3; +} + +static inline uint32_t idma_mm_wait_for_completion(uint32_t direction, uint32_t transfer_id) { + if (transfer_id == 0) return 0; + + uint32_t is_l1_to_l2 = (direction == IDMA_DIR_L1_TO_L2) ? 1 : 0; + uint32_t stream_id = 0; + uint32_t timeout = 1000000; + + while (timeout-- > 0) { + uint32_t is_busy = idma_mm_is_busy_dir(is_l1_to_l2, stream_id); + + if (!is_busy) { + uint32_t done_id = idma_mm_get_done_id_dir(is_l1_to_l2, stream_id); + if (done_id == transfer_id) { + return 1; + } + } + + wait_nop(10); + } + + return 0; +} + +//============================================================================= +// High-Level DMA API - 1D Transfers +//============================================================================= + +// Forward declarations +static inline int idma_L1ToL2(unsigned int src, unsigned int dst, unsigned short size); +static inline int idma_L2ToL1(unsigned int src, unsigned int dst, unsigned short size); +static inline int idma_L1ToL1(unsigned int src, unsigned int dst, unsigned short size); +static inline int idma_L1ToL2_2d(unsigned int src, unsigned int dst, unsigned short size, + unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); +static inline int idma_L2ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, + unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); +static inline int idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, + unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps); + +static inline int dma_memcpy(dma_ext_t ext, unsigned int loc, unsigned short size, int ext2loc) { + if (ext2loc) + return idma_L2ToL1(ext, loc, size); + else + return idma_L1ToL2(loc, ext, size); +} + +static inline int dma_l1ToExt(dma_ext_t ext, unsigned int loc, unsigned short size) { + return idma_L1ToL2(loc, ext, size); +} + +static inline int dma_extToL1(unsigned int loc, dma_ext_t ext, unsigned short size) { + return idma_L2ToL1(ext, loc, size); +} + +static inline int idma_memcpy(unsigned int src, unsigned int dst, unsigned int size, + idma_prot_t src_prot, idma_prot_t dst_prot) { + if (src_prot == IDMA_PROT_OBI && dst_prot == IDMA_PROT_AXI) { + return idma_L1ToL2(src, dst, size); + } else if (src_prot == IDMA_PROT_AXI && dst_prot == IDMA_PROT_OBI) { + return idma_L2ToL1(src, dst, size); + } else if (src_prot == IDMA_PROT_OBI && dst_prot == IDMA_PROT_OBI) { + return idma_L1ToL1(src, dst, size); + } + return 0; +} + +static inline int idma_L1ToL2(unsigned int src, unsigned int dst, unsigned short size) { + idma_mm_conf_default_dir(1); + idma_mm_set_addr_len_dir(1, dst, src, size); + idma_mm_set_2d_params_dir(1, 0, 0, 1); + idma_mm_set_3d_params_dir(1, 0, 0, 1); + return idma_mm_start_transfer_dir(1, 0); +} + +static inline int idma_L2ToL1(unsigned int src, unsigned int dst, unsigned short size) { + idma_mm_conf_default_dir(0); + idma_mm_set_addr_len_dir(0, dst, src, size); + idma_mm_set_2d_params_dir(0, 0, 0, 1); + idma_mm_set_3d_params_dir(0, 0, 0, 1); + return idma_mm_start_transfer_dir(0, 0); +} + +// L1-to-L1 transfers: Remote to Local (PULL) +static inline int idma_L1ToL1_pull(unsigned int remote_src, unsigned int local_dst, unsigned short size) { + idma_mm_conf_default_dir(0); // AXI2OBI: read from remote (AXI), write to local (OBI) + idma_mm_set_addr_len_dir(0, local_dst, remote_src, size); + idma_mm_set_2d_params_dir(0, 0, 0, 1); + idma_mm_set_3d_params_dir(0, 0, 0, 1); + return idma_mm_start_transfer_dir(0, 0); +} + +// L1-to-L1 transfers: Local to Remote (PUSH) +static inline int idma_L1ToL1_push(unsigned int local_src, unsigned int remote_dst, unsigned short size) { + idma_mm_conf_default_dir(1); // OBI2AXI: read from local (OBI), write to remote (AXI) + idma_mm_set_addr_len_dir(1, remote_dst, local_src, size); + idma_mm_set_2d_params_dir(1, 0, 0, 1); + idma_mm_set_3d_params_dir(1, 0, 0, 1); + return idma_mm_start_transfer_dir(1, 0); +} + + +//============================================================================= +// High-Level DMA API - 2D Transfers +//============================================================================= + +static inline int idma_memcpy_2d(unsigned int src, unsigned int dst, unsigned int size, + unsigned int src_stride, unsigned int dst_stride, + unsigned int num_reps, idma_prot_t src_prot, idma_prot_t dst_prot) { + if (src_prot == IDMA_PROT_OBI && dst_prot == IDMA_PROT_AXI) { + return idma_L1ToL2_2d(src, dst, size, src_stride, dst_stride, num_reps); + } else if (src_prot == IDMA_PROT_AXI && dst_prot == IDMA_PROT_OBI) { + return idma_L2ToL1_2d(src, dst, size, src_stride, dst_stride, num_reps); + } else if (src_prot == IDMA_PROT_OBI && dst_prot == IDMA_PROT_OBI) { + return idma_L1ToL1_2d(src, dst, size, src_stride, dst_stride, num_reps); + } + return 0; +} + +static inline int idma_L1ToL2_2d(unsigned int src, unsigned int dst, unsigned short size, + unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + idma_mm_conf_default_dir(1); + idma_mm_set_addr_len_dir(1, dst, src, size); + idma_mm_set_2d_params_dir(1, dst_stride, src_stride, num_reps); + idma_mm_set_3d_params_dir(1, 0, 0, 1); + return idma_mm_start_transfer_dir(1, 0); +} + +static inline int idma_L2ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, + unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + idma_mm_conf_default_dir(0); + idma_mm_set_addr_len_dir(0, dst, src, size); + idma_mm_set_2d_params_dir(0, dst_stride, src_stride, num_reps); + idma_mm_set_3d_params_dir(0, 0, 0, 1); + return idma_mm_start_transfer_dir(0, 0); +} + +// L1-to-L1 2D transfers: Remote to Local (PULL) +static inline int idma_L1ToL1_pull_2d(unsigned int remote_src, unsigned int local_dst, unsigned short size, + unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + idma_mm_conf_default_dir(0); // AXI2OBI + idma_mm_set_addr_len_dir(0, local_dst, remote_src, size); + idma_mm_set_2d_params_dir(0, dst_stride, src_stride, num_reps); + idma_mm_set_3d_params_dir(0, 0, 0, 1); + return idma_mm_start_transfer_dir(0, 0); +} + +// L1-to-L1 2D transfers: Local to Remote (PUSH) +static inline int idma_L1ToL1_push_2d(unsigned int local_src, unsigned int remote_dst, unsigned short size, + unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + idma_mm_conf_default_dir(1); // OBI2AXI + idma_mm_set_addr_len_dir(1, remote_dst, local_src, size); + idma_mm_set_2d_params_dir(1, dst_stride, src_stride, num_reps); + idma_mm_set_3d_params_dir(1, 0, 0, 1); + return idma_mm_start_transfer_dir(1, 0); +} + +// Legacy L1-to-L1 2D function (assumes remote->local for backward compatibility) +static inline int idma_L1ToL1_2d(unsigned int src, unsigned int dst, unsigned short size, + unsigned int src_stride, unsigned int dst_stride, unsigned int num_reps) { + return idma_L1ToL1_pull_2d(src, dst, size, src_stride, dst_stride, num_reps); +} + +//============================================================================= +// Status and Wait Functions +//============================================================================= + +static inline unsigned int idma_tx_cplt(unsigned int dma_tx_id) { + uint32_t done_id_axi2obi = idma_mm_get_done_id_dir(0, 0); + uint32_t done_id_obi2axi = idma_mm_get_done_id_dir(1, 0); + + return (done_id_axi2obi == dma_tx_id) || (done_id_obi2axi == dma_tx_id); +} + +static inline unsigned int dma_status() { + return idma_mm_is_busy_dir(0, 0) || idma_mm_is_busy_dir(1, 0); +} + +static inline void dma_wait(unsigned int dma_tx_id) { + while(!idma_tx_cplt(dma_tx_id)) { + wait_nop(1); + } +} + +static inline void dma_barrier() { + while(dma_status()) { + wait_nop(1); + } +} + +#endif /*IDMA_MM_UTILS_H*/ \ No newline at end of file diff --git a/sw/utils/magia_tile_utils.h b/sw/utils/magia_tile_utils.h index f5c8006..38b8d82 100644 --- a/sw/utils/magia_tile_utils.h +++ b/sw/utils/magia_tile_utils.h @@ -22,19 +22,29 @@ #ifndef MAGIA_TILE_UTILS_H #define MAGIA_TILE_UTILS_H +#include #include "tinyprintf.h" + #define NUM_L1_BANKS (32) #define WORDS_BANK (8192) #define BITS_WORD (32) #define BITS_BYTE (8) -#define RESERVED_START (0x00000000) -#define RESERVED_END (0x0000FFFF) +#define REDMULE_BASE (0x00000100) +#define REDMULE_END (0x000001FF) +#define IDMA_BASE (0x00000200) +#define IDMA_END (0x000005FF) +#define FSYNC_BASE (0x00000600) +#define FSYNC_END (0x000006FF) +#define EVENT_UNIT_BASE (0x00000700) +#define EVENT_UNIT_END (0x000016FF) +#define RESERVED_START (0x00001700) +#define RESERVED_END (0x0000FFFF) #define STACK_START (0x00010000) #define STACK_END (0x0001FFFF) #define L1_BASE (0x00020000) -#define L1_SIZE (0x000E0000) +#define L1_SIZE (0x000DFFFF) #define L1_TILE_OFFSET (0x00100000) #define L2_BASE (0xCC000000) #define TEST_END_ADDR (0xCC030000) @@ -43,19 +53,8 @@ #define PASS_EXIT_CODE (0xAAAA) #define FAIL_EXIT_CODE (0xFFFF) -#define IRQ_REDMULE_EVT_0 (31) -#define IRQ_REDMULE_EVT_1 (30) -#define IRQ_A2O_ERROR (29) -#define IRQ_O2A_ERROR (28) -#define IRQ_A2O_DONE (27) -#define IRQ_O2A_DONE (26) -#define IRQ_A2O_START (25) -#define IRQ_O2A_START (24) -#define IRQ_A2O_BUSY (23) -#define IRQ_O2A_BUSY (22) -#define IRQ_REDMULE_BUSY (21) -#define IRQ_FSYNC_DONE (20) -#define IRQ_FSYNC_ERROR (19) +// Individual IRQ indices removed - Event Unit provides unified interrupt management +// Use Event Unit API (event_unit_utils.h) for event handling #define mmio64(x) (*(volatile uint64_t *)(x)) #define mmio32(x) (*(volatile uint32_t *)(x)) @@ -112,16 +111,18 @@ static inline void sentinel_end(){ } static inline void ccount_en(){ - asm volatile("csrrci zero, 0x320, 0x1" ::); + uint32_t pcmr = 1; + asm volatile("csrw 0x7e1, %0" ::"r"(pcmr)); } static inline void ccount_dis(){ - asm volatile("csrrsi zero, 0x320, 0x1" ::); + uint32_t pcmr = 0; + asm volatile("csrw 0x7e1, %0" ::"r"(pcmr)); } static inline uint32_t get_cyclel(){ uint32_t cyclel; - asm volatile("csrr %0, cycle" + asm volatile("csrr %0, 0x780" :"=r"(cyclel):); return cyclel; } @@ -142,13 +143,14 @@ uint32_t get_cycle(){ static inline uint32_t get_timel(){ uint32_t timel; - asm volatile("csrr %0, time" + asm volatile("csrr %0, 0x781" :"=r"(timel):); return timel; } static inline uint32_t get_timeh(){ uint32_t timeh; + // RI5CY doesn't have separate timeh, return 0 asm volatile("csrr %0, timeh" :"=r"(timeh):); return timeh; @@ -161,4 +163,90 @@ uint32_t get_time(){ return timel; } +static inline uint32_t get_mhartid(){ + uint32_t mhartid; + asm volatile("csrr %0, mhartid" + :"=r"(mhartid):); + return mhartid; +} + +static inline uint32_t get_cluster_id(){ + // In MAGIA: cluster_id comes from bits [9:4] of mhartid (to match hardware mapping) + uint32_t mhartid = get_mhartid(); + return (mhartid >> 4) & 0x3F; // Extract mhartid[9:4] - 6 bits for cluster_id +} + +static inline uint32_t get_core_id(){ + // In MAGIA: core_id comes from lower 4 bits of mhartid (tile/hart ID) + uint32_t mhartid = get_mhartid(); + return mhartid & 0xF; // Extract mhartid[3:0] - 4 bits for core_id +} + +static inline uint32_t get_tile_id(){ + // In MAGIA: tile ID = hart ID (full mhartid value) + return get_mhartid(); +} + +// Additional Flex-V CSR access functions based on CSR table +static inline uint32_t get_mstatus(){ + uint32_t mstatus; + asm volatile("csrr %0, 0x300" :"=r"(mstatus):); // MSTATUS (0x300) + return mstatus; +} + +static inline void set_mstatus(uint32_t value){ + asm volatile("csrw 0x300, %0" ::"r"(value)); // MSTATUS (0x300) +} + +static inline uint32_t get_mtvec(){ + uint32_t mtvec; + asm volatile("csrr %0, 0x305" :"=r"(mtvec):); // MTVEC (0x305) + return mtvec; +} + +static inline void set_mtvec(uint32_t value){ + asm volatile("csrw 0x305, %0" ::"r"(value)); // MTVEC (0x305) +} + +static inline uint32_t get_mepc(){ + uint32_t mepc; + asm volatile("csrr %0, 0x341" :"=r"(mepc):); // MEPC (0x341) + return mepc; +} + +static inline void set_mepc(uint32_t value){ + asm volatile("csrw 0x341, %0" ::"r"(value)); // MEPC (0x341) +} + +static inline uint32_t get_mcause(){ + uint32_t mcause; + asm volatile("csrr %0, 0x342" :"=r"(mcause):); // MCAUSE (0x342) + return mcause; +} + +static inline uint32_t get_privlv(){ + uint32_t privlv; + asm volatile("csrr %0, 0xc10" :"=r"(privlv):); // PRIVLV (0xC10) + return privlv; +} + +static inline uint32_t get_uhartid(){ + uint32_t uhartid; + asm volatile("csrr %0, 0x014" :"=r"(uhartid):); // UHARTID (0x014) + return uhartid; +} + +// Flex-V performance counter control +static inline void perf_counter_enable(){ + uint32_t pcer = 3; // Enable cycles (bit 0) and instruction count (bit 1) + uint32_t pcmr = 1; // Enable global performance counter + asm volatile("csrw 0x7e0, %0" ::"r"(pcer)); // PCER_MACHINE (0x7E0) + asm volatile("csrw 0x7e1, %0" ::"r"(pcmr)); // PCMR_MACHINE (0x7E1) +} + +static inline void perf_counter_disable(){ + uint32_t pcmr = 0; // Disable global performance counter + asm volatile("csrw 0x7e1, %0" ::"r"(pcmr)); // PCMR_MACHINE (0x7E1) +} + #endif /*MAGIA_TILE_UTILS_H*/ diff --git a/sw/utils/magia_utils.h b/sw/utils/magia_utils.h index fc21b05..fac3864 100644 --- a/sw/utils/magia_utils.h +++ b/sw/utils/magia_utils.h @@ -45,9 +45,11 @@ static inline uint32_t get_hartid(){ uint32_t hartid; + // In MAGIA: hartid = tile_id (each tile contains exactly one Flex-V core) + // mhartid_i is passed to each magia_tile and represents both tile and hart ID asm volatile("csrr %0, mhartid" :"=r"(hartid):); - return hartid; + return hartid; // Hart ID = Tile ID in MAGIA architecture } static inline void amo_increment(volatile uint32_t addr, volatile uint32_t amnt){ diff --git a/sw/utils/redmule_isa_utils.h b/sw/utils/redmule_isa_utils.h deleted file mode 100644 index e4c91f4..0000000 --- a/sw/utils/redmule_isa_utils.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright (C) 2023-2024 ETH Zurich and University of Bologna - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * SPDX-License-Identifier: Apache-2.0 - * - * Authors: Victor Isachi - * - * MAGIA RedMulE ISA Utils - */ - -#ifndef REDMULE_ISA_UTILS_H -#define REDMULE_ISA_UTILS_H - -/* mcnfig instruction */ - // asm volatile( - // ".word (0x0 << 25) | \ /* Empty */ - // (0b00110 << 20) | \ /* Rs2 - t1 */ - // (0b00101 << 15) | \ /* Rs1 - t0 */ - // (0x00 << 7) | \ /* Empty */ - // (0b0001011 << 0) \n"); /* OpCode */ -static inline void redmule_mcnfig(volatile uint16_t k_size, volatile uint16_t m_size, volatile uint16_t n_size){ - uint32_t cfg_reg0 = (k_size << 16) | (m_size << 0); - uint32_t cfg_reg1 = n_size << 0; - asm volatile("addi t0, %0, 0" ::"r"(cfg_reg0)); - asm volatile("addi t1, %0, 0" ::"r"(cfg_reg1)); - asm volatile( - ".word (0x0 << 25) | \ - (0b00110 << 20) | \ - (0b00101 << 15) | \ - (0x00 << 7) | \ - (0b0001011 << 0) \n"); -} - -/* marith instruction */ - // asm volatile( - // ".word (0b00111 << 27) | \ /* Rs3 - t2 */ - // (0b00 << 25) | \ /* Empty */ - // (0b00110 << 20) | \ /* Rs2 - t1 */ - // (0b00101 << 15) | \ /* Rs1 - t0 */ - // (0b0 << 14) | \ /* Custom format enable/disable */ - // (0b0 << 13) | \ /* Widening enable/disable */ - // (0b001 << 10) | \ /* Operation selection */ - // (0b001 << 7) | \ /* Data format */ - // (0b0101011 << 0) \n"); /* OpCode */ -static inline void redmule_marith(volatile uint32_t y_base, volatile uint32_t w_base, volatile uint32_t x_base){ - asm volatile("addi t2, %0, 0" ::"r"(y_base)); - asm volatile("addi t1, %0, 0" ::"r"(w_base)); - asm volatile("addi t0, %0, 0" ::"r"(x_base)); - asm volatile( - ".word (0b00111 << 27) | \ - (0b00 << 25) | \ - (0b00110 << 20) | \ - (0b00101 << 15) | \ - (0b0 << 14) | \ - (0b0 << 13) | \ - (0b001 << 10) | \ - (0b001 << 7) | \ - (0b0101011 << 0) \n"); -} - -#endif /*REDMULE_ISA_UTILS_H*/ diff --git a/sw/utils/redmule_mm_utils.h b/sw/utils/redmule_mm_utils.h new file mode 100644 index 0000000..b39468c --- /dev/null +++ b/sw/utils/redmule_mm_utils.h @@ -0,0 +1,146 @@ +/* + * Copyright (C) 2023-2024 ETH Zurich and University of Bologna + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * SPDX-License-Identifier: Apache-2.0 + * + * + * RedMulE MM Utilities + * + * This header contains MM-related definitions and functions for RedMulE control + * using MMIO-based register access. + */ + +#ifndef REDMULE_MM_UTILS_H +#define REDMULE_MM_UTILS_H + +#include +#include "magia_tile_utils.h" + +/* OBI2HWPE Protocol Stamps */ +#define REDMULE_ADDR_BASE REDMULE_BASE + +#define HWPE_WRITE(value, offset) *(volatile int *)(REDMULE_ADDR_BASE + offset) = value +#define HWPE_READ(offset) *(volatile int *)(REDMULE_ADDR_BASE + offset) + +/* Register offsets (RedMulE hwpe-ctrl) */ +#define REDMULE_REG_OFFS 0x00 +#define REDMULE_TRIGGER 0x00 +#define REDMULE_ACQUIRE 0x04 +#define REDMULE_EVT_ENABLE 0x08 +#define REDMULE_STATUS 0x0C +#define REDMULE_RUNNING_JOB 0x10 +#define REDMULE_SOFT_CLEAR 0x14 + +/* RedMulE configuration registers */ +#define REDMULE_REG_X_PTR 0x40 +#define REDMULE_REG_W_PTR 0x44 +#define REDMULE_REG_Z_PTR 0x48 +#define REDMULE_MCFG0_PTR 0x4C +#define REDMULE_MCFG1_PTR 0x50 +#define REDMULE_ARITH_PTR 0x54 + +/* Operations and formats */ +#define gemm_ops 0x1 +#define Float16 0x1 +#define Float16Alt 0x2 +#define Float8 0x3 +#define Float8Alt 0x4 + +/* HWPE Register Access Functions */ +static inline void redmule_x_add_set(unsigned int value) { + HWPE_WRITE(value, REDMULE_REG_OFFS + REDMULE_REG_X_PTR); +} + +static inline void redmule_w_add_set(unsigned int value) { + HWPE_WRITE(value, REDMULE_REG_OFFS + REDMULE_REG_W_PTR); +} + +static inline void redmule_z_add_set(unsigned int value) { + HWPE_WRITE(value, REDMULE_REG_OFFS + REDMULE_REG_Z_PTR); +} + +static inline void redmule_mcfg_set(uint32_t mcfg0, uint32_t mcfg1) { + HWPE_WRITE(mcfg0, REDMULE_REG_OFFS + REDMULE_MCFG0_PTR); + HWPE_WRITE(mcfg1, REDMULE_REG_OFFS + REDMULE_MCFG1_PTR); +} + +static inline void redmule_arith_set(uint32_t arith) { + HWPE_WRITE(arith, REDMULE_REG_OFFS + REDMULE_ARITH_PTR); +} + +static inline void hwpe_trigger_job() { + HWPE_WRITE(0, REDMULE_TRIGGER); +} + +static inline int hwpe_acquire_job() { + int result = HWPE_READ(REDMULE_ACQUIRE); + return result; +} + +static inline unsigned int hwpe_get_status() { + unsigned int result = HWPE_READ(REDMULE_STATUS); + return result; +} + +static inline void hwpe_soft_clear() { + HWPE_WRITE(0, REDMULE_SOFT_CLEAR); +} + +static inline void hwpe_cg_enable() { + return; +} + +static inline void hwpe_cg_disable() { + return; +} + +static inline void hwpe_wait_for_completion() { + // Polling-based completion detection + unsigned int status; + unsigned int poll_count = 0; + unsigned int max_polls = 100000; + + do { + status = hwpe_get_status(); + poll_count++; + + // Small pause to not overload system + if (poll_count % 50 == 0) { + wait_nop(10); + } + + // Exit conditions: idle status (0) or timeout + if (status == 0 || poll_count >= max_polls) { + break; + } + + } while (1); +} + +/* RedMulE Configuration Function */ +static inline void redmule_cfg(unsigned int x, unsigned int w, unsigned int z, uint16_t m_size, uint16_t n_size, + uint16_t k_size, uint8_t gemm_op, uint8_t gemm_fmt) { + + uint32_t mcfg_reg0 = (k_size << 16) | (m_size << 0); + uint32_t mcfg_reg1 = n_size << 0; + uint32_t arith_reg = (gemm_op << 10) | (gemm_fmt << 7); + + redmule_x_add_set((unsigned int)x); + redmule_w_add_set((unsigned int)w); + redmule_z_add_set((unsigned int)z); + redmule_mcfg_set((unsigned int)mcfg_reg0, (unsigned int)mcfg_reg1); + redmule_arith_set((unsigned int)arith_reg); +} + +#endif /* REDMULE_MM_UTILS_H */ \ No newline at end of file diff --git a/target/sim/src/mesh/magia_vip.sv b/target/sim/src/mesh/magia_vip.sv index b430d17..05f53f9 100644 --- a/target/sim/src/mesh/magia_vip.sv +++ b/target/sim/src/mesh/magia_vip.sv @@ -258,7 +258,7 @@ module magia_vip int unsigned completed_syncs_id = 0; for (genvar i = 0; i < magia_tb_pkg::N_TILES_Y; i++) begin: gen_tile_instr_monitor_y for (genvar j = 0; j < magia_tb_pkg::N_TILES_X; j++) begin: gen_tile_instr_monitor_x - assign curr_instr_ex[i*magia_tb_pkg::N_TILES_X+j] = i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40x_core.core_i.id_stage_i.id_ex_pipe_o.instr.bus_resp.rdata; + assign curr_instr_ex[i*magia_tb_pkg::N_TILES_X+j] = i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40p_core.ex_valid ? i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40p_core.id_stage_i.instr_rdata_i : '0; always @(curr_instr_ex[i*magia_tb_pkg::N_TILES_X+j]) begin: instr_ex_reporter if (curr_instr_ex[i*magia_tb_pkg::N_TILES_X+j] == 32'h50500013) $display("[TB][mhartid %0d - Tile (%0d, %0d)] detected sentinel instruction in EX stage at time %0dns", i*magia_tb_pkg::N_TILES_X+j, i, j, time_var); @@ -274,7 +274,7 @@ module magia_vip completed_syncs_ex++; end end - assign curr_instr_id[i*magia_tb_pkg::N_TILES_X+j] = i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40x_core.core_i.id_stage_i.if_id_pipe_i.instr.bus_resp.rdata; + assign curr_instr_id[i*magia_tb_pkg::N_TILES_X+j] = i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40p_core.id_stage_i.instr_rdata_i; always @(curr_instr_id[i*magia_tb_pkg::N_TILES_X+j]) begin: instr_id_reporter if (curr_instr_id[i*magia_tb_pkg::N_TILES_X+j] == 32'h40400013) $display("[TB][mhartid %0d - Tile (%0d, %0d)] detected sentinel instruction in ID stage at time %0dns", i*magia_tb_pkg::N_TILES_X+j, i, j, time_var); @@ -320,8 +320,8 @@ module magia_vip int unsigned sync_iteration = 0; for (genvar i = 0; i < magia_tb_pkg::N_TILES_Y; i++) begin: gen_tile_instr_monitor_y for (genvar j = 0; j < magia_tb_pkg::N_TILES_X; j++) begin: gen_tile_instr_monitor_x - assign curr_instr_wb[i*magia_tb_pkg::N_TILES_X+j] = i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40x_core.core_i.wb_stage_i.ex_wb_pipe_i.instr_valid ? - i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40x_core.core_i.wb_stage_i.ex_wb_pipe_i.instr.bus_resp.rdata : '0; + assign curr_instr_wb[i*magia_tb_pkg::N_TILES_X+j] = i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40p_core.wb_valid ? + i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40p_core.regfile_wdata : '0; always @(curr_instr_wb[i*magia_tb_pkg::N_TILES_X+j]) begin: instr_wb_reporter if (curr_instr_wb[i*magia_tb_pkg::N_TILES_X+j] == 32'h5AA00013) begin start_sentinel[i*magia_tb_pkg::N_TILES_X+j].push_back($time); @@ -475,8 +475,8 @@ module magia_vip time sentinel_latency[magia_tb_pkg::N_TILES]; for (genvar i = 0; i < magia_tb_pkg::N_TILES_Y; i++) begin: gen_tile_instr_monitor_y for (genvar j = 0; j < magia_tb_pkg::N_TILES_X; j++) begin: gen_tile_instr_monitor_x - assign curr_instr_wb[i*magia_tb_pkg::N_TILES_X+j] = i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40x_core.core_i.wb_stage_i.ex_wb_pipe_i.instr_valid ? - i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40x_core.core_i.wb_stage_i.ex_wb_pipe_i.instr.bus_resp.rdata : '0; + assign curr_instr_wb[i*magia_tb_pkg::N_TILES_X+j] = i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40p_core.wb_valid ? + i_magia.gen_y_tile[i].gen_x_tile[j].i_magia_tile.i_cv32e40p_core.regfile_wdata : '0; always @(curr_instr_wb[i*magia_tb_pkg::N_TILES_X+j]) begin: instr_wb_reporter if (curr_instr_wb[i*magia_tb_pkg::N_TILES_X+j] == 32'h5AA00013) begin start_sentinel[i*magia_tb_pkg::N_TILES_X+j].push_back($time); diff --git a/target/sim/src/tile/magia_tile_fixture.sv b/target/sim/src/tile/magia_tile_fixture.sv index ec782c7..af647fa 100644 --- a/target/sim/src/tile/magia_tile_fixture.sv +++ b/target/sim/src/tile/magia_tile_fixture.sv @@ -159,10 +159,7 @@ module magia_tile_fixture; .core_sleep_o ( core_sleep ), .wu_wfe_i ( wu_wfe ) ); - `ifdef CORE_TRACES - localparam string core_trace_file_name = "log_file_0"; - defparam i_magia_tile.i_cv32e40x_core.rvfi_i.tracer_i.LOGFILE_PATH_PLUSARG = core_trace_file_name; - `endif + // Note: cv32e40p tracer generates its own filename: trace_core_{cluster_id}_{core_id}.log /*******************************************************/ /** DUT End **/ diff --git a/target/sim/src/tile/magia_tile_vip.sv b/target/sim/src/tile/magia_tile_vip.sv index d30e36c..f19eb07 100644 --- a/target/sim/src/tile/magia_tile_vip.sv +++ b/target/sim/src/tile/magia_tile_vip.sv @@ -246,7 +246,7 @@ end `ifdef PROFILE_DETAILED bit[31:0] curr_instr; - assign curr_instr = i_magia_tile.i_cv32e40x_core.core_i.if_stage_i.if_id_pipe_o.instr.bus_resp.rdata; + assign curr_instr = i_magia_tile.i_cv32e40p_core.id_stage_i.instr_rdata_i; always @(curr_instr) begin: instr_reporter if (curr_instr == 32'h50500013) $display("[TB] detected sentinel instruction at time %0dns", time_var); end @@ -258,8 +258,8 @@ end time start_sentinel[$]; time end_sentinel[$]; time sentinel_latency; - assign curr_instr_wb = i_magia_tile.i_cv32e40x_core.core_i.wb_stage_i.ex_wb_pipe_i.instr_valid ? - i_magia_tile.i_cv32e40x_core.core_i.wb_stage_i.ex_wb_pipe_i.instr.bus_resp.rdata : '0; + assign curr_instr_wb = i_magia_tile.i_cv32e40p_core.wb_valid ? + i_magia_tile.i_cv32e40p_core.regfile_wdata : '0; always @(curr_instr_wb) begin: instr_wb_reporter if (curr_instr_wb == 32'h5AA00013) begin start_sentinel.push_back($time);