-
Notifications
You must be signed in to change notification settings - Fork 484
Vivado accelerator axi master interface #683
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
f42ffe2
10b515b
f775a0c
db442b6
17c0fa5
a3f134f
4d44359
7b425c5
5c6c356
109773c
8c204ba
e417e63
ed47f1e
5b7873c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,76 @@ | ||
| from pynq import DefaultHierarchy, DefaultIP, allocate | ||
| from pynq import Overlay | ||
| from datetime import datetime | ||
| import pynq.lib.dma | ||
| import numpy as np | ||
|
|
||
|
|
||
| class NeuralNetworkOverlay(Overlay): | ||
| def __init__(self, bitfile_name, x_shape, y_shape, dtype=np.float32, dtbo=None, download=True, ignore_version=False, | ||
| device=None): | ||
| super().__init__(bitfile_name, dtbo=None, download=True, ignore_version=False, device=None) | ||
| self.regin = self.myproject_axi_0.register_map.in_r.address | ||
| self.regout = self.myproject_axi_0.register_map.out_r.address | ||
| self.ctrl = self.myproject_axi_0.register_map.CTRL | ||
| self.input_buffer = allocate(shape=x_shape, dtype=dtype) | ||
| self.output_buffer = allocate(shape=y_shape, dtype=dtype) | ||
|
|
||
| def _print_dt(self, timea, timeb, N): | ||
| dt = (timeb - timea) | ||
| dts = dt.seconds + dt.microseconds * 10 ** -6 | ||
| rate = N / dts | ||
| print("Classified {} samples in {} seconds ({} inferences / s)".format(N, dts, rate)) | ||
| return dts, rate | ||
|
|
||
| def predict(self, X, debug=False, profile=False, encode=None, decode=None): | ||
| """ | ||
| Obtain the predictions of the NN implemented in the FPGA. | ||
| Parameters: | ||
| - X : the input vector. Should be numpy ndarray. | ||
| - dtype : the data type of the elements of the input/output vectors. | ||
| Note: it should be set depending on the interface of the accelerator; if it uses 'float' | ||
| types for the 'data' AXI-Stream field, 'np.float32' dtype is the correct one to use. | ||
| Instead if it uses 'ap_fixed<A,B>', 'np.intA' is the correct one to use (note that A cannot | ||
| any integer value, but it can assume {..., 8, 16, 32, ...} values. Check `numpy` | ||
| doc for more info). | ||
| In this case the encoding/decoding has to be computed by the PS. For example for | ||
| 'ap_fixed<16,6>' type the following 2 functions are the correct one to use for encode/decode | ||
| 'float' -> 'ap_fixed<16,6>': | ||
| ``` | ||
| def encode(xi): | ||
| return np.int16(round(xi * 2**10)) # note 2**10 = 2**(A-B) | ||
| def decode(yi): | ||
| return yi * 2**-10 | ||
| encode_v = np.vectorize(encode) # to apply them element-wise | ||
| decode_v = np.vectorize(decode) | ||
| ``` | ||
| - profile : boolean. Set it to `True` to print the performance of the algorithm in term of `inference/s`. | ||
| - encode/decode: function pointers. See `dtype` section for more information. | ||
| - return: an output array based on `np.ndarray` with a shape equal to `y_shape` and a `dtype` equal to | ||
| the namesake parameter. | ||
| """ | ||
| if profile: | ||
| timea = datetime.now() | ||
| if encode is not None: | ||
| X = encode(X) | ||
| self.input_buffer[:] = X | ||
| self.myproject_axi_0.write(self.regin, self.input_buffer.physical_address) | ||
| self.myproject_axi_0.write(self.regout, self.output_buffer.physical_address) | ||
| self.myproject_axi_0.write(self.ctrl.AP_START, 0x1) | ||
| if debug: | ||
| print("Config OK") | ||
| while not self.ctrl.AP_DONE: | ||
| if debug: | ||
| print("Polling...") | ||
| if debug: | ||
| print("Done OK") | ||
| # result = self.output_buffer.copy() | ||
| if decode is not None: | ||
| self.output_buffer = decode(self.output_buffer) | ||
|
|
||
| if profile: | ||
| timeb = datetime.now() | ||
| dts, rate = self._print_dt(timea, timeb, len(X)) | ||
| return self.output_buffer, dts, rate | ||
| else: | ||
| return self.output_buffer | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,93 @@ | ||
| set tcldir [file dirname [info script]] | ||
| source [file join $tcldir project.tcl] | ||
|
|
||
| # Project names | ||
| set design_name "design_1" | ||
| set hls_solution_name "solution1" | ||
| set ps_name "processing_system7_0" | ||
| set acc_name "${project_name}_axi_0" | ||
|
|
||
| # Board and chip part names | ||
| create_project ${project_name} ${project_name}_vivado_accelerator -part xc7z020clg400-1 -force | ||
| set_property board_part tul.com.tw:pynq-z2:part0:1.0 [current_project] | ||
|
|
||
| # Create block design | ||
| create_bd_design ${design_name} | ||
|
|
||
| # Setup IP repo | ||
| #set_property ip_repo_paths ${project_name}_prj [current_project] | ||
| set_property ip_repo_paths ${project_name}_prj/${hls_solution_name}/impl/ip [current_project] | ||
| update_ip_catalog | ||
|
|
||
| # Create and setup PS | ||
| create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 ${ps_name} | ||
| apply_bd_automation -rule xilinx.com:bd_rule:processing_system7 -config " \ | ||
| make_external {FIXED_IO, DDR} \ | ||
| apply_board_preset {1} \ | ||
| Master {Disable} \ | ||
| Slave {Disable} " [get_bd_cells ${ps_name}] | ||
| set_property -dict [list \ | ||
| CONFIG.PCW_USE_S_AXI_GP0 {1} \ | ||
| CONFIG.PCW_USE_FABRIC_INTERRUPT {1} \ | ||
| CONFIG.PCW_IRQ_F2P_INTR {1}\ | ||
| ] [get_bd_cells ${ps_name}] | ||
|
|
||
| # Create accelerator | ||
| create_bd_cell -type ip -vlnv xilinx.com:hls:myproject_axi:1.0 ${acc_name} | ||
|
|
||
| # Wiring | ||
| apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config " \ | ||
| Clk_master {Auto} \ | ||
| Clk_slave {Auto} \ | ||
| Clk_xbar {Auto} \ | ||
| Master /${ps_name}/M_AXI_GP0 \ | ||
| Slave /${acc_name}/s_axi_CTRL_BUS \ | ||
| intc_ip {New AXI Interconnect} \ | ||
| master_apm {0}" [get_bd_intf_pins ${acc_name}/s_axi_CTRL_BUS] | ||
|
|
||
| apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config " \ | ||
| Clk_master {Auto} \ | ||
| Clk_slave {Auto} \ | ||
| Clk_xbar {Auto} \ | ||
| Master /${acc_name}/m_axi_IN_BUS \ | ||
| Slave /${ps_name}/S_AXI_GP0 \ | ||
| intc_ip {Auto} \ | ||
| master_apm {0}" [get_bd_intf_pins ${ps_name}/S_AXI_GP0] | ||
|
|
||
| apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config " \ | ||
| Clk_master /${ps_name}/FCLK_CLK0 (100 MHz) \ | ||
| Clk_slave /${ps_name}/FCLK_CLK0 (100 MHz) \ | ||
| Clk_xbar /${ps_name}/FCLK_CLK0 (100 MHz) \ | ||
| Master /${acc_name}/m_axi_OUT_BUS \ | ||
| Slave /${ps_name}/S_AXI_GP0 \ | ||
| intc_ip {/axi_smc} \ | ||
| master_apm {0}" [get_bd_intf_pins ${acc_name}/m_axi_OUT_BUS] | ||
|
|
||
| # Wiring interrupt signal | ||
| connect_bd_net [get_bd_pins ${acc_name}/interrupt] [get_bd_pins ${ps_name}/IRQ_F2P] | ||
|
|
||
| # Top level wrapper | ||
| make_wrapper -files [get_files ./${project_name}_vivado_accelerator/${project_name}.srcs/sources_1/bd/${design_name}/${design_name}.bd] -top | ||
| add_files -norecurse ./${project_name}_vivado_accelerator/${project_name}.srcs/sources_1/bd/${design_name}/hdl/${design_name}_wrapper.v | ||
|
|
||
| # Memory mapping | ||
| delete_bd_objs [get_bd_addr_segs ${project_name}/Data_m_axi_IN_BUS/SEG_${ps_name}_GP0_QSPI_LINEAR] | ||
| delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_IN_BUS/SEG_${ps_name}_GP0_IOP] | ||
| delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_IN_BUS/SEG_${ps_name}_GP0_M_AXI_GP0] | ||
| delete_bd_objs [get_bd_addr_segs ${acc_name}/Data_m_axi_OUT_BUS/SEG_${ps_name}_GP0_QSPI_LINEAR] | ||
| delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_OUT_BUS/SEG_${ps_name}_GP0_IOP] | ||
| delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_OUT_BUS/SEG_${ps_name}_GP0_M_AXI_GP0] | ||
|
|
||
| # Run synthesis and implementation | ||
| reset_run impl_1 | ||
| reset_run synth_1 | ||
| launch_runs impl_1 -to_step write_bitstream -jobs 6 | ||
| wait_on_run -timeout 360 impl_1 | ||
|
|
||
| # Reporting | ||
| open_run impl_1 | ||
| report_utilization -file util.rpt -hierarchical -hierarchical_percentages | ||
|
|
||
| # Export HDF file for SDK flow | ||
| file mkdir ./hdf | ||
| file copy -force ${project_name}_vivado_accelerator/${project_name}.runs/impl_1/${design_name}_wrapper.sysdef ./hdf/${design_name}_wrapper.hdf |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -82,11 +82,18 @@ def write_axi_wrapper(self, model): | |
|
|
||
| io_type = model.config.get_config_value("IOType") | ||
|
|
||
| model_brams = [var for var in model.get_weight_variables() if var.storage.lower() == 'bram'] | ||
|
|
||
| for line in f.readlines(): | ||
| if 'void myproject(' in line: | ||
| newline = 'void {}_axi(\n'.format(model.config.get_project_name()) | ||
| elif '//hls-fpga-machine-learning insert include' in line: | ||
| newline = '#include "{}_axi.h"\n'.format(model.config.get_project_name()) | ||
| for b in model_brams: | ||
| newline += '#include "weights/{}.h"\n'.format(b.name) | ||
| newline += '\n' | ||
| if model_brams: | ||
| newline += '#include "nnet_utils/nnet_helpers.h"\n' | ||
| elif '//hls-fpga-machine-learning insert local vars' in line: | ||
| newline = '' | ||
| if self.vivado_accelerator_config.get_interface() == 'axi_stream': | ||
|
|
@@ -102,8 +109,8 @@ def write_axi_wrapper(self, model): | |
| newline += indent + '#pragma HLS STREAM variable=out_local depth={}\n'\ | ||
| .format(model.get_output_variables()[0].pragma[1]) | ||
| elif '//hls-fpga-machine-learning insert call' in line: | ||
| newline = indent + '{}(in_local, out_local);\n'.format( | ||
| model.config.get_project_name()) | ||
| brams_str = (''.join([', ' + b.name for b in model_brams])) if len(model_brams) > 0 else ""; | ||
| newline = indent + '{}(in_local, out_local{});\n'.format(model.config.get_project_name(), brams_str) | ||
| elif '//hls-fpga-machine-learning insert interface' in line: | ||
| if self.vivado_accelerator_config.get_interface() == 'axi_lite': | ||
| newline = '' | ||
|
|
@@ -124,6 +131,23 @@ def write_axi_wrapper(self, model): | |
| newline += indent + '#pragma HLS INTERFACE ap_ctrl_none port=return\n' | ||
| if model.config.get_config_value("IOType") == 'io_stream': | ||
| newline += indent + '#pragma HLS DATAFLOW\n' | ||
| elif '//hls-fpga-machine-learning insert load weights' in line: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would strongly discourage this style of code in the writer. What's the point of templates if they are blank and all the logic is written by the writer? Just clutters the writer. This is much better addressed with a template that contains compile-time constants to select the functionality and just fill out the few lines with specific weights There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Isn't this a copy mainly from There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jmitrevs, yes I moved the code from the |
||
| newline = '' | ||
| if model_brams: | ||
| newline += '#ifndef __SYNTHESIS__\n' | ||
| newline += indent + 'static bool loaded_weights = false;\n' | ||
| newline += indent + 'if (!loaded_weights) {\n' | ||
| newline += indent + ' loaded_weights = true;\n' | ||
| for layer in model.get_layers(): | ||
| for w in layer.get_weights(): | ||
| if w.weight_class == 'CompressedWeightVariable': | ||
| newline += indent + ' nnet::load_compressed_weights_from_txt<{}, {}>({}, "{}.txt");\n'.format(w.type.name, w.nonzeros, w.name, w.name) | ||
| elif w.weight_class == 'ExponentWeightVariable': | ||
| newline += indent + ' nnet::load_exponent_weights_from_txt<{}, {}>({}, "{}.txt");\n'.format(w.type.name, w.data_length, w.name, w.name) | ||
| else: | ||
| newline += indent + ' nnet::load_weights_from_txt<{}, {}>({}, "{}.txt");\n'.format(w.type.name, w.data_length, w.name, w.name) | ||
| newline += indent + '}\n' | ||
| newline += '#endif\n' | ||
| elif '//hls-fpga-machine-learning insert enqueue' in line: | ||
| io_type = model.config.get_config_value("IOType") | ||
| if io_type == 'io_parallel': | ||
|
|
@@ -139,10 +163,12 @@ def write_axi_wrapper(self, model): | |
| newline += indent + '}\n' | ||
| elif io_type == 'io_stream': | ||
| newline = '' | ||
| newline += 'LOAD_INPUT_OUTER_LOOP:\n' | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the point of this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, can we remove the explicit creation of loops and move that to the template, as suggested above? |
||
| newline += indent + 'for(unsigned i = 0; i < N_IN / {input_t}::size; ++i) {{\n' | ||
| # newline += indent + indent + '#pragma HLS PIPELINE\n' | ||
| newline += indent + indent + '{input_t} ctype;\n' | ||
| newline += indent + indent + '#pragma HLS DATA_PACK variable=ctype\n' | ||
| newline += 'LOAD_INPUT_INNER_LOOP:\n' | ||
| newline += indent + indent + 'for(unsigned j = 0; j < {input_t}::size; j++) {{\n' | ||
| # newline += indent + indent + indent + '#pragma HLS UNROLL\n' | ||
| if self.vivado_accelerator_config.get_interface() == 'axi_stream': | ||
|
|
@@ -169,9 +195,11 @@ def write_axi_wrapper(self, model): | |
| newline += indent + '}\n' | ||
| elif io_type == 'io_stream': | ||
| newline = '' | ||
| newline += 'STORE_OUTPUT_OUTER_LOOP:\n' | ||
| newline += indent + 'for(unsigned i = 0; i < N_OUT / {result_t}::size; ++i) {{\n' | ||
| # newline += indent + indent + '#pragma HLS PIPELINE\n' | ||
| newline += indent + indent + '{result_t} ctype = out_local.read();\n' | ||
| newline += 'STORE_OUTPUT_INNER_LOOP:\n' | ||
| newline += indent + indent + 'for(unsigned j = 0; j < {result_t}::size; j++) {{\n' | ||
| # newline += indent + indent + indent + '#pragma HLS UNROLL\n' | ||
| if self.vivado_accelerator_config.get_interface() == 'axi_stream': | ||
|
|
@@ -188,6 +216,35 @@ def write_axi_wrapper(self, model): | |
| f.close() | ||
| fout.close() | ||
|
|
||
| def modify_project_cpp(self, model): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Lol. Just add a flag to not write the interface in the VivadoWriter instead of this approach. |
||
| ''' | ||
| Modify the build_prj.tcl and build_lib.sh scripts to add the extra wrapper files and set the top function | ||
| ''' | ||
| filedir = os.path.dirname(os.path.abspath(__file__)) | ||
| oldfile = '{}/firmware/{}.cpp'.format(model.config.get_output_dir(), model.config.get_project_name()) | ||
| newfile = '{}/build_prj_axi.tcl'.format(model.config.get_output_dir()) | ||
| f = open(oldfile, 'r') | ||
| fout = open(newfile, 'w') | ||
|
|
||
| for line in f.readlines(): | ||
| if '#pragma HLS INTERFACE axis port=' in line: | ||
| newline = '' | ||
| elif '#pragma HLS INTERFACE bram port=' in line: | ||
| newline = '' | ||
| elif 'nnet::load_weights_from_txt' in line: | ||
| newline = '' | ||
| elif 'nnet::load_exponent_weights_from_txt' in line: | ||
| newline = '' | ||
| elif 'nnet::load_compressed_weights_from_txt' in line: | ||
| newline = '' | ||
| else: | ||
| newline = line | ||
| fout.write(newline) | ||
|
|
||
| f.close() | ||
| fout.close() | ||
| os.rename(newfile, oldfile) | ||
|
|
||
| def modify_build_script(self, model): | ||
| ''' | ||
| Modify the build_prj.tcl and build_lib.sh scripts to add the extra wrapper files and set the top function | ||
|
|
@@ -369,6 +426,7 @@ def write_hls(self, model): | |
| self.write_driver(model) | ||
| self.write_wrapper_test(model) | ||
| self.write_axi_wrapper(model) | ||
| self.modify_project_cpp(model) | ||
| self.modify_build_script(model) | ||
| self.write_new_tar(model) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,10 +40,10 @@ def print_array_to_cpp(self, var, odir, write_txt_file=True): | |
|
|
||
| if write_txt_file: | ||
| h_file.write("#ifndef __SYNTHESIS__\n") | ||
| h_file.write(var.definition_cpp() + ";\n") | ||
| h_file.write("static " + var.definition_cpp() + ";\n") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| h_file.write("#else\n") | ||
|
|
||
| h_file.write(var.definition_cpp() + " = {") | ||
| h_file.write("static " + var.definition_cpp() + " = {") | ||
|
|
||
| # fill c++ array. | ||
| # not including internal brackets for multidimensional case | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we convert this docstring to Google style, like is used elsewhere in the code?