Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
14 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions hls4ml/backends/vivado_accelerator/supported_boards.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"pynq-z2": {
"part": "xc7z020clg400-1",
"tcl_scripts": {"axi_lite": "axi_lite_design.tcl", "axi_stream": "axi_stream_design.tcl"},
"python_drivers": {"axi_stream": "axi_stream_driver.py"},
"tcl_scripts": {"axi_lite": "axi_lite_design.tcl", "axi_stream": "axi_stream_design.tcl", "axi_master": "axi_master_design.tcl"},
"python_drivers": {"axi_stream": "axi_stream_driver.py", "axi_master": "axi_master_driver.py"},
"c_drivers": {}
},
"zcu102": {
Expand Down
2 changes: 2 additions & 0 deletions hls4ml/templates/vivado_accelerator/myproject_axi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ void myproject(

//hls-fpga-machine-learning insert local vars

//hls-fpga-machine-learning insert load weights

//hls-fpga-machine-learning insert enqueue

//hls-fpga-machine-learning insert call
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
from pynq import DefaultHierarchy, DefaultIP, allocate
from pynq import Overlay
from datetime import datetime
import pynq.lib.dma
import numpy as np


class NeuralNetworkOverlay(Overlay):
def __init__(self, bitfile_name, x_shape, y_shape, dtype=np.float32, dtbo=None, download=True, ignore_version=False,
device=None):
super().__init__(bitfile_name, dtbo=None, download=True, ignore_version=False, device=None)
self.regin = self.myproject_axi_0.register_map.in_r.address
self.regout = self.myproject_axi_0.register_map.out_r.address
self.ctrl = self.myproject_axi_0.register_map.CTRL
self.input_buffer = allocate(shape=x_shape, dtype=dtype)
self.output_buffer = allocate(shape=y_shape, dtype=dtype)

def _print_dt(self, timea, timeb, N):
dt = (timeb - timea)
dts = dt.seconds + dt.microseconds * 10 ** -6
rate = N / dts
print("Classified {} samples in {} seconds ({} inferences / s)".format(N, dts, rate))
return dts, rate

def predict(self, X, debug=False, profile=False, encode=None, decode=None):
"""
Obtain the predictions of the NN implemented in the FPGA.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we convert this docstring to Google style, like is used elsewhere in the code?

Parameters:
- X : the input vector. Should be numpy ndarray.
- dtype : the data type of the elements of the input/output vectors.
Note: it should be set depending on the interface of the accelerator; if it uses 'float'
types for the 'data' AXI-Stream field, 'np.float32' dtype is the correct one to use.
Instead if it uses 'ap_fixed<A,B>', 'np.intA' is the correct one to use (note that A cannot
any integer value, but it can assume {..., 8, 16, 32, ...} values. Check `numpy`
doc for more info).
In this case the encoding/decoding has to be computed by the PS. For example for
'ap_fixed<16,6>' type the following 2 functions are the correct one to use for encode/decode
'float' -> 'ap_fixed<16,6>':
```
def encode(xi):
return np.int16(round(xi * 2**10)) # note 2**10 = 2**(A-B)
def decode(yi):
return yi * 2**-10
encode_v = np.vectorize(encode) # to apply them element-wise
decode_v = np.vectorize(decode)
```
- profile : boolean. Set it to `True` to print the performance of the algorithm in term of `inference/s`.
- encode/decode: function pointers. See `dtype` section for more information.
- return: an output array based on `np.ndarray` with a shape equal to `y_shape` and a `dtype` equal to
the namesake parameter.
"""
if profile:
timea = datetime.now()
if encode is not None:
X = encode(X)
self.input_buffer[:] = X
self.myproject_axi_0.write(self.regin, self.input_buffer.physical_address)
self.myproject_axi_0.write(self.regout, self.output_buffer.physical_address)
self.myproject_axi_0.write(self.ctrl.AP_START, 0x1)
if debug:
print("Config OK")
while not self.ctrl.AP_DONE:
if debug:
print("Polling...")
if debug:
print("Done OK")
# result = self.output_buffer.copy()
if decode is not None:
self.output_buffer = decode(self.output_buffer)

if profile:
timeb = datetime.now()
dts, rate = self._print_dt(timea, timeb, len(X))
return self.output_buffer, dts, rate
else:
return self.output_buffer
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
set tcldir [file dirname [info script]]
source [file join $tcldir project.tcl]

# Project names
set design_name "design_1"
set hls_solution_name "solution1"
set ps_name "processing_system7_0"
set acc_name "${project_name}_axi_0"

# Board and chip part names
create_project ${project_name} ${project_name}_vivado_accelerator -part xc7z020clg400-1 -force
set_property board_part tul.com.tw:pynq-z2:part0:1.0 [current_project]

# Create block design
create_bd_design ${design_name}

# Setup IP repo
#set_property ip_repo_paths ${project_name}_prj [current_project]
set_property ip_repo_paths ${project_name}_prj/${hls_solution_name}/impl/ip [current_project]
update_ip_catalog

# Create and setup PS
create_bd_cell -type ip -vlnv xilinx.com:ip:processing_system7:5.5 ${ps_name}
apply_bd_automation -rule xilinx.com:bd_rule:processing_system7 -config " \
make_external {FIXED_IO, DDR} \
apply_board_preset {1} \
Master {Disable} \
Slave {Disable} " [get_bd_cells ${ps_name}]
set_property -dict [list \
CONFIG.PCW_USE_S_AXI_GP0 {1} \
CONFIG.PCW_USE_FABRIC_INTERRUPT {1} \
CONFIG.PCW_IRQ_F2P_INTR {1}\
] [get_bd_cells ${ps_name}]

# Create accelerator
create_bd_cell -type ip -vlnv xilinx.com:hls:myproject_axi:1.0 ${acc_name}

# Wiring
apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config " \
Clk_master {Auto} \
Clk_slave {Auto} \
Clk_xbar {Auto} \
Master /${ps_name}/M_AXI_GP0 \
Slave /${acc_name}/s_axi_CTRL_BUS \
intc_ip {New AXI Interconnect} \
master_apm {0}" [get_bd_intf_pins ${acc_name}/s_axi_CTRL_BUS]

apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config " \
Clk_master {Auto} \
Clk_slave {Auto} \
Clk_xbar {Auto} \
Master /${acc_name}/m_axi_IN_BUS \
Slave /${ps_name}/S_AXI_GP0 \
intc_ip {Auto} \
master_apm {0}" [get_bd_intf_pins ${ps_name}/S_AXI_GP0]

apply_bd_automation -rule xilinx.com:bd_rule:axi4 -config " \
Clk_master /${ps_name}/FCLK_CLK0 (100 MHz) \
Clk_slave /${ps_name}/FCLK_CLK0 (100 MHz) \
Clk_xbar /${ps_name}/FCLK_CLK0 (100 MHz) \
Master /${acc_name}/m_axi_OUT_BUS \
Slave /${ps_name}/S_AXI_GP0 \
intc_ip {/axi_smc} \
master_apm {0}" [get_bd_intf_pins ${acc_name}/m_axi_OUT_BUS]

# Wiring interrupt signal
connect_bd_net [get_bd_pins ${acc_name}/interrupt] [get_bd_pins ${ps_name}/IRQ_F2P]

# Top level wrapper
make_wrapper -files [get_files ./${project_name}_vivado_accelerator/${project_name}.srcs/sources_1/bd/${design_name}/${design_name}.bd] -top
add_files -norecurse ./${project_name}_vivado_accelerator/${project_name}.srcs/sources_1/bd/${design_name}/hdl/${design_name}_wrapper.v

# Memory mapping
delete_bd_objs [get_bd_addr_segs ${project_name}/Data_m_axi_IN_BUS/SEG_${ps_name}_GP0_QSPI_LINEAR]
delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_IN_BUS/SEG_${ps_name}_GP0_IOP]
delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_IN_BUS/SEG_${ps_name}_GP0_M_AXI_GP0]
delete_bd_objs [get_bd_addr_segs ${acc_name}/Data_m_axi_OUT_BUS/SEG_${ps_name}_GP0_QSPI_LINEAR]
delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_OUT_BUS/SEG_${ps_name}_GP0_IOP]
delete_bd_objs [get_bd_addr_segs -excluded ${acc_name}/Data_m_axi_OUT_BUS/SEG_${ps_name}_GP0_M_AXI_GP0]

# Run synthesis and implementation
reset_run impl_1
reset_run synth_1
launch_runs impl_1 -to_step write_bitstream -jobs 6
wait_on_run -timeout 360 impl_1

# Reporting
open_run impl_1
report_utilization -file util.rpt -hierarchical -hierarchical_percentages

# Export HDF file for SDK flow
file mkdir ./hdf
file copy -force ${project_name}_vivado_accelerator/${project_name}.runs/impl_1/${design_name}_wrapper.sysdef ./hdf/${design_name}_wrapper.hdf
62 changes: 60 additions & 2 deletions hls4ml/writer/vivado_accelerator_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,18 @@ def write_axi_wrapper(self, model):

io_type = model.config.get_config_value("IOType")

model_brams = [var for var in model.get_weight_variables() if var.storage.lower() == 'bram']

for line in f.readlines():
if 'void myproject(' in line:
newline = 'void {}_axi(\n'.format(model.config.get_project_name())
elif '//hls-fpga-machine-learning insert include' in line:
newline = '#include "{}_axi.h"\n'.format(model.config.get_project_name())
for b in model_brams:
newline += '#include "weights/{}.h"\n'.format(b.name)
newline += '\n'
if model_brams:
newline += '#include "nnet_utils/nnet_helpers.h"\n'
elif '//hls-fpga-machine-learning insert local vars' in line:
newline = ''
if self.vivado_accelerator_config.get_interface() == 'axi_stream':
Expand All @@ -102,8 +109,8 @@ def write_axi_wrapper(self, model):
newline += indent + '#pragma HLS STREAM variable=out_local depth={}\n'\
.format(model.get_output_variables()[0].pragma[1])
elif '//hls-fpga-machine-learning insert call' in line:
newline = indent + '{}(in_local, out_local);\n'.format(
model.config.get_project_name())
brams_str = (''.join([', ' + b.name for b in model_brams])) if len(model_brams) > 0 else "";
newline = indent + '{}(in_local, out_local{});\n'.format(model.config.get_project_name(), brams_str)
elif '//hls-fpga-machine-learning insert interface' in line:
if self.vivado_accelerator_config.get_interface() == 'axi_lite':
newline = ''
Expand All @@ -124,6 +131,23 @@ def write_axi_wrapper(self, model):
newline += indent + '#pragma HLS INTERFACE ap_ctrl_none port=return\n'
if model.config.get_config_value("IOType") == 'io_stream':
newline += indent + '#pragma HLS DATAFLOW\n'
elif '//hls-fpga-machine-learning insert load weights' in line:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would strongly discourage this style of code in the writer. What's the point of templates if they are blank and all the logic is written by the writer? Just clutters the writer. This is much better addressed with a template that contains compile-time constants to select the functionality and just fill out the few lines with specific weights

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this a copy mainly from vivado_writer.py lines 130-139? Do they both need to change?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jmitrevs, yes I moved the code from the vivado_write.py to the vivado_accelerator_write.py.

newline = ''
if model_brams:
newline += '#ifndef __SYNTHESIS__\n'
newline += indent + 'static bool loaded_weights = false;\n'
newline += indent + 'if (!loaded_weights) {\n'
newline += indent + ' loaded_weights = true;\n'
for layer in model.get_layers():
for w in layer.get_weights():
if w.weight_class == 'CompressedWeightVariable':
newline += indent + ' nnet::load_compressed_weights_from_txt<{}, {}>({}, "{}.txt");\n'.format(w.type.name, w.nonzeros, w.name, w.name)
elif w.weight_class == 'ExponentWeightVariable':
newline += indent + ' nnet::load_exponent_weights_from_txt<{}, {}>({}, "{}.txt");\n'.format(w.type.name, w.data_length, w.name, w.name)
else:
newline += indent + ' nnet::load_weights_from_txt<{}, {}>({}, "{}.txt");\n'.format(w.type.name, w.data_length, w.name, w.name)
newline += indent + '}\n'
newline += '#endif\n'
elif '//hls-fpga-machine-learning insert enqueue' in line:
io_type = model.config.get_config_value("IOType")
if io_type == 'io_parallel':
Expand All @@ -139,10 +163,12 @@ def write_axi_wrapper(self, model):
newline += indent + '}\n'
elif io_type == 'io_stream':
newline = ''
newline += 'LOAD_INPUT_OUTER_LOOP:\n'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the point of this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, can we remove the explicit creation of loops and move that to the template, as suggested above?

newline += indent + 'for(unsigned i = 0; i < N_IN / {input_t}::size; ++i) {{\n'
# newline += indent + indent + '#pragma HLS PIPELINE\n'
newline += indent + indent + '{input_t} ctype;\n'
newline += indent + indent + '#pragma HLS DATA_PACK variable=ctype\n'
newline += 'LOAD_INPUT_INNER_LOOP:\n'
newline += indent + indent + 'for(unsigned j = 0; j < {input_t}::size; j++) {{\n'
# newline += indent + indent + indent + '#pragma HLS UNROLL\n'
if self.vivado_accelerator_config.get_interface() == 'axi_stream':
Expand All @@ -169,9 +195,11 @@ def write_axi_wrapper(self, model):
newline += indent + '}\n'
elif io_type == 'io_stream':
newline = ''
newline += 'STORE_OUTPUT_OUTER_LOOP:\n'
newline += indent + 'for(unsigned i = 0; i < N_OUT / {result_t}::size; ++i) {{\n'
# newline += indent + indent + '#pragma HLS PIPELINE\n'
newline += indent + indent + '{result_t} ctype = out_local.read();\n'
newline += 'STORE_OUTPUT_INNER_LOOP:\n'
newline += indent + indent + 'for(unsigned j = 0; j < {result_t}::size; j++) {{\n'
# newline += indent + indent + indent + '#pragma HLS UNROLL\n'
if self.vivado_accelerator_config.get_interface() == 'axi_stream':
Expand All @@ -188,6 +216,35 @@ def write_axi_wrapper(self, model):
f.close()
fout.close()

def modify_project_cpp(self, model):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lol. Just add a flag to not write the interface in the VivadoWriter instead of this approach.

'''
Modify the build_prj.tcl and build_lib.sh scripts to add the extra wrapper files and set the top function
'''
filedir = os.path.dirname(os.path.abspath(__file__))
oldfile = '{}/firmware/{}.cpp'.format(model.config.get_output_dir(), model.config.get_project_name())
newfile = '{}/build_prj_axi.tcl'.format(model.config.get_output_dir())
f = open(oldfile, 'r')
fout = open(newfile, 'w')

for line in f.readlines():
if '#pragma HLS INTERFACE axis port=' in line:
newline = ''
elif '#pragma HLS INTERFACE bram port=' in line:
newline = ''
elif 'nnet::load_weights_from_txt' in line:
newline = ''
elif 'nnet::load_exponent_weights_from_txt' in line:
newline = ''
elif 'nnet::load_compressed_weights_from_txt' in line:
newline = ''
else:
newline = line
fout.write(newline)

f.close()
fout.close()
os.rename(newfile, oldfile)

def modify_build_script(self, model):
'''
Modify the build_prj.tcl and build_lib.sh scripts to add the extra wrapper files and set the top function
Expand Down Expand Up @@ -369,6 +426,7 @@ def write_hls(self, model):
self.write_driver(model)
self.write_wrapper_test(model)
self.write_axi_wrapper(model)
self.modify_project_cpp(model)
self.modify_build_script(model)
self.write_new_tar(model)

4 changes: 2 additions & 2 deletions hls4ml/writer/vivado_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ def print_array_to_cpp(self, var, odir, write_txt_file=True):

if write_txt_file:
h_file.write("#ifndef __SYNTHESIS__\n")
h_file.write(var.definition_cpp() + ";\n")
h_file.write("static " + var.definition_cpp() + ";\n")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Why at all?
  2. Why like this and not in definition_cpp()?

h_file.write("#else\n")

h_file.write(var.definition_cpp() + " = {")
h_file.write("static " + var.definition_cpp() + " = {")

#fill c++ array.
#not including internal brackets for multidimensional case
Expand Down