diff --git a/README.md b/README.md index e1bbdc1219..ff815f7cbc 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ [![DOI](https://zenodo.org/badge/108329371.svg)](https://zenodo.org/badge/latestdoi/108329371) [![PyPI version](https://badge.fury.io/py/hls4ml.svg)](https://badge.fury.io/py/hls4ml) [![Supported Python versions](https://img.shields.io/pypi/pyversions/hls4ml.svg)](https://pypi.org/project/hls4ml/) +[![Documentation Status](https://github.com/fastmachinelearning/hls4ml/actions/workflows/build-sphinx.yml/badge.svg)](https://fastmachinelearning.org/hls4ml) A package for machine learning inference in FPGAs. We create firmware implementations of machine learning algorithms using high level synthesis language (HLS). We translate traditional open-source machine learning package models into HLS that can be configured for your use-case! @@ -17,13 +18,13 @@ For more information visit the webpage: [https://fastmachinelearning.org/hls4ml/ Detailed tutorials on how to use `hls4ml`'s various functionalities can be found [here](https://github.com/hls-fpga-machine-learning/hls4ml-tutorial). # Installation -``` +```bash pip install hls4ml ``` -To install the extra dependencies for profiling: +To install the extra dependencies for profiling: -``` +```bash pip install hls4ml[profiling] ``` @@ -32,13 +33,14 @@ pip install hls4ml[profiling] ```Python import hls4ml -#Fetch a keras model from our example repository -#This will download our example model to your working directory and return an example configuration file +# Fetch a keras model from our example repository +# This will download our example model to your working directory and return an example configuration file config = hls4ml.utils.fetch_example_model('KERAS_3layer.json') -print(config) #You can print the configuration to see some default parameters +# You can print the configuration to see some default parameters +print(config) -#Convert it to a hls project +# Convert it to a hls project hls_model = hls4ml.converters.keras_to_hls(config) # Print full list of example models if you want to explore more @@ -49,11 +51,11 @@ hls4ml.utils.fetch_example_list() Note: Vitis HLS is not yet supported. Vivado HLS versions between 2018.2 and 2020.1 are recommended. ```Python -#Use Vivado HLS to synthesize the model -#This might take several minutes +# Use Vivado HLS to synthesize the model +# This might take several minutes hls_model.build() -#Print out the report if you want +# Print out the report if you want hls4ml.report.read_vivado_report('my-hls-test') ``` diff --git a/contrib/kl_layer/README.md b/contrib/kl_layer/README.md new file mode 100644 index 0000000000..5d306ae69a --- /dev/null +++ b/contrib/kl_layer/README.md @@ -0,0 +1,18 @@ +This folder contains the implementation of custom KL divergence layer. +This is a custom implementation and not a built-in layer in any deep learning framework. +It was developed specifically for [AD@L1 CMS paper](https://www.nature.com/articles/s42256-022-00441-3). + +# Files + +* `kl_layer.py`: contains the standalone implementation of the custom KL divergence layer +* `kl_layer.h`: contains the HLS implementation of KL layer + + +# Usage + +`kl_layer.py` contains the example of how to use the KL layer. +To run do + +``` +python kl_layer.py +``` diff --git a/contrib/kl_layer/kl_layer.h b/contrib/kl_layer/kl_layer.h new file mode 100644 index 0000000000..0435b9a22e --- /dev/null +++ b/contrib/kl_layer/kl_layer.h @@ -0,0 +1,87 @@ +#ifndef KL_LAYER_H_ +#define KL_LAYER_H_ + +#include "nnet_activation.h" +#include "nnet_common.h" +#include +#include + +namespace nnet { + +struct distance_config { + // IO size + static const unsigned n_in = 10; + static const unsigned n_out = 1; + + // Internal data type definitions + typedef float accum_t; + typedef float sum_t; + typedef ap_fixed<18, 8> exp_table_t; + + // Internal info + static const unsigned table_size = 1024; + static constexpr unsigned exp_range = 8; +}; + +template void init_klloss_exp_table(typename CONFIG_T::exp_table_t table_out[N_TABLE]) { + for (int ii = 0; ii < N_TABLE; ii++) { + // First, convert from table index to X-value (range -1 to +1) + float in_val = 2 * CONFIG_T::exp_range * (ii - float(N_TABLE) / 2.0) / float(N_TABLE); + // Next, compute lookup table function + typename CONFIG_T::exp_table_t real_val = exp_fcn_float(in_val); + // std::cout << "Lookup table In Value: " << in_val << " Result: " << real_val << " Index: " << ii << std::endl; + table_out[ii] = real_val; + } +} +template +void klloss(data1_T mean[CONFIG_T::n_in], data2_T log_var[CONFIG_T::n_in], res_T res[CONFIG_T::n_out]) { + #pragma HLS PIPELINE + // Initialize the lookup tables +#ifdef __HLS_SYN__ + bool initialized = false; + typename CONFIG_T::exp_table_t exp_table[CONFIG_T::table_size]; +#else + static bool initialized = false; + static typename CONFIG_T::exp_table_t exp_table[CONFIG_T::table_size]; +#endif + if (!initialized) { + init_klloss_exp_table(exp_table); + initialized = true; + } + typename CONFIG_T::accum_t kl[CONFIG_T::n_in]; + #pragma HLS ARRAY_PARTITION variable=kl complete + typename CONFIG_T::accum_t mean_sq[CONFIG_T::n_in]; + #pragma HLS ARRAY_PARTITION variable=mean_sq complete + typename CONFIG_T::accum_t kl_sum(0); + for (unsigned i = 0; i < CONFIG_T::n_in; i++) { + #pragma HLS UNROLL + mean_sq[i] = mean[i] * mean[i]; + kl[i] = data2_T(1.) + log_var[i]; + // std::cout << "Log var: " << log_var[i] << " Result: " << kl[i] << std::endl; + } + constexpr unsigned table_scale = (unsigned)(CONFIG_T::table_size / (2 * CONFIG_T::exp_range)); + constexpr unsigned index_scale = (unsigned)(CONFIG_T::exp_range * table_scale); + for (unsigned i = 0; i < CONFIG_T::n_in; i++) { + #pragma HLS UNROLL + auto data_round = log_var[i] * table_scale; + auto index = data_round + index_scale; + if (index < 0) + index = 0; + if (index > CONFIG_T::table_size - 1) + index = CONFIG_T::table_size - 1; + kl[i] -= exp_table[index]; + // std::cout << "Exp var: " << exp_table[index] << " Result: " << kl[i] << " Index: " << index << std::endl; + } + for (unsigned i = 0; i < CONFIG_T::n_in; i++) { + #pragma HLS UNROLL + kl[i] -= mean_sq[i]; + } + Op_add op_add; + kl_sum = reduce>(kl, op_add); + // std::cout << "KL sum: " << kl_sum << std::endl; + kl_sum *= typename CONFIG_T::accum_t(1. / CONFIG_T::n_in); + res[0] = res_T(-0.5) * kl_sum; +} +} // namespace nnet + +#endif diff --git a/contrib/kl_layer/kl_layer.py b/contrib/kl_layer/kl_layer.py new file mode 100644 index 0000000000..ec2af1b797 --- /dev/null +++ b/contrib/kl_layer/kl_layer.py @@ -0,0 +1,185 @@ +""" + Usage example for a custom KL loss layer + Takes as an input two arrays: z_mean and z_log_var + and computes KL "distance" between normal distribution + and Gaussian with mu=z_mean and sigma=z_log_var + + The HLS part is in contrib/kl_layer/kl_layer.h +""" +from pathlib import Path + +import numpy as np +import tensorflow as tf + +try: + from keras.layers.merge import _Merge as Merge +except Exception: + from keras.layers.merging.base_merge import _Merge as Merge + +from tensorflow.python.keras.utils import tf_utils +from tensorflow.python.ops import math_ops + +import hls4ml +from hls4ml.converters.keras_to_hls import parse_default_keras_layer +from hls4ml.model.attributes import ConfigurableAttribute, TypeAttribute +from hls4ml.model.types import FixedPrecisionType, RoundingMode, SaturationMode + + +# Keras implementation of a KL layer +class KLLoss(Merge): + '''Keras implementation of a KL loss custom layer''' + + @tf_utils.shape_type_conversion + def build(self, input_shape): + super().build(input_shape) + + def _merge_function(self, inputs): + + mean = inputs[0] + log_var = inputs[1] + + kl = 1.0 + log_var - math_ops.square(mean) - math_ops.exp(log_var) + kl = -0.5 * math_ops.reduce_mean(kl, axis=-1, keepdims=True) + + return kl + + +# hls4ml implementations +class HKLLoss(hls4ml.model.layers.Layer): + '''hls4ml implementation of a KL loss custom layer''' + + _expected_attributes = [ + ConfigurableAttribute('table_size', default=1024), + ConfigurableAttribute('exp_range', default=8), + TypeAttribute('accum'), + TypeAttribute( + 'sum', + default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT), + ), + TypeAttribute( + 'exp_table', + default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT), + ), + ] + + def initialize(self): + self.add_output_variable(shape=[1], dim_names=[f'KL_LOSS_{self.index}']) + + +# Templates +distance_config_template = """struct config{index} : nnet::distance_config {{ + static const unsigned n_in = {n_in}; + static const unsigned n_out = 1; + typedef {accum_t.name} accum_t; + typedef {sum_t.name} sum_t; + typedef {exp_table_t.name} exp_table_t; + static const unsigned table_size = {table_size}; + static constexpr float exp_range = {exp_range}; +}};\n""" +distance_function_template = 'nnet::klloss<{input1_t}, {input2_t}, {output_t}, {config}>({input1}, {input2}, {output});' +distance_include_list = ['nnet_utils/kl_layer.h'] + + +class HKLLossConfigTemplate(hls4ml.backends.template.LayerConfigTemplate): + def __init__(self): + super().__init__(HKLLoss) + self.template = distance_config_template + + def format(self, node): + params = self._default_config_params(node) + params['n_in'] = node.get_input_variable(node.inputs[0]).shape[0] + params['n_out'] = 1 + return self.template.format(**params) + + +class HKLLossFunctionTemplate(hls4ml.backends.template.FunctionCallTemplate): + def __init__(self): + super().__init__(HKLLoss, include_header=distance_include_list) + self.template = distance_function_template + + def format(self, node): + params = {} + params['config'] = f'config{node.index}' + params['input1_t'] = node.get_input_variable(node.inputs[0]).type.name + params['input2_t'] = node.get_input_variable(node.inputs[1]).type.name + params['output_t'] = node.get_output_variable().type.name + params['input1'] = node.get_input_variable(node.inputs[0]).name + params['input2'] = node.get_input_variable(node.inputs[1]).name + params['output'] = node.get_output_variable().name + + return self.template.format(**params) + + +# Parser for converter +def parse_klloss_layer(keras_layer, input_names, input_shapes, data_reader): + assert 'KLLoss' in keras_layer['class_name'] + + layer = parse_default_keras_layer(keras_layer, input_names) + + output_shape = [input_shapes[0][0], 1] + + return layer, output_shape + + +def main(): + # Register the converter for custom Keras layer + hls4ml.converters.register_keras_layer_handler('KLLoss', parse_klloss_layer) + + # Register the hls4ml's IR layer + hls4ml.model.layers.register_layer('KLLoss', HKLLoss) + + # Register the optimization passes (if any) + backend = hls4ml.backends.get_backend('Vivado') + + # Register template passes for the given backend + backend.register_template(HKLLossConfigTemplate) + backend.register_template(HKLLossFunctionTemplate) + + # Register HLS implementation + p = Path(__file__).parent / 'kl_layer.h' + backend.register_source(p) + + # Test if it works + # Create a dummy Keras model with KL loss layer + inp = tf.keras.layers.Input(shape=(19, 3, 1)) + z_mean = tf.keras.layers.Dense(10)(inp) + z_log_var = tf.keras.layers.Dense(10)(inp) + custom_output = KLLoss()([z_mean, z_log_var]) + # create new model + kmodel = tf.keras.models.Model(inputs=inp, outputs=custom_output) + kmodel.summary() + + # test on random inputs + x = np.random.randint(-5, 5, (1, 19, 3, 1), dtype='int32') + kres = kmodel(x) + + # Create dummy config + config = {} + config['Model'] = { + 'Precision': 'ap_fixed<16,6>', + 'ReuseFactor': 1, + 'ParallelizationFactor': 1, + 'Strategy': 'Resource', + } + hmodel = hls4ml.converters.convert_from_keras_model( + kmodel, + output_dir='hls4mlprj_kl_layer', + backend='Vivado', + io_type='io_parallel', + part='xcvu9p-flga2577-2-e', + hls_config=config, + ) + + hmodel.compile() + hres = hmodel.predict(x.astype('float32')) + + print('Compare prediction by hls4ml model to Keras one') + print(kres - hres) + + print('Building model') + report = hmodel.build(reset=True, csim=False, cosim=True, synth=True, vsynth=True) + print(report) + + +if __name__ == '__main__': + main() diff --git a/docs/conf.py b/docs/conf.py index 04df6dba96..ab7d6c33bf 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,15 +12,17 @@ # import os import sys + sys.path.insert(0, os.path.abspath('../')) import datetime + from setuptools_scm import get_version # -- Project information ----------------------------------------------------- project = 'hls4ml' -copyright = str(datetime.datetime.now().year)+', Fast Machine Learning Lab' +copyright = str(datetime.datetime.now().year) + ', Fast Machine Learning Lab' author = 'Fast Machine Learning Lab' # The full version, including alpha/beta/rc tags @@ -36,7 +38,8 @@ 'sphinx.ext.autodoc', 'sphinx.ext.githubpages', 'sphinx_rtd_theme', - 'sphinx.ext.napoleon' + 'sphinx.ext.napoleon', + 'sphinx_contributors', ] # Add any paths that contain templates here, relative to this directory. @@ -72,12 +75,11 @@ 'display_version': True, 'prev_next_buttons_location': 'bottom', 'style_external_links': False, - 'style_nav_header_background': '#2980B9', # Toc options 'collapse_navigation': True, 'sticky_navigation': True, 'navigation_depth': 2, 'includehidden': True, - 'titles_only': False + 'titles_only': False, } diff --git a/docs/extension.rst b/docs/extension.rst new file mode 100644 index 0000000000..2836430ea8 --- /dev/null +++ b/docs/extension.rst @@ -0,0 +1,185 @@ +======================== +Extension API +======================== + +hls4ml natively supports a large number of neural network layers. +But what if a desired layer is not supported? +If it is standard enough and its implementation would benefit the community as a whole, we would welcome a contribution to add it to the standard set of supported layers. +However, if it is a somewhat niche custom layer, there is another approach we can take to extend hls4ml through the *extension API*. + +This documentation will walk through a complete `complete end-to-end example `_, which is part of our testing suite. +To implement a custom layer in hls4ml with the extension API, the required components are: + +* Your custom layer class +* Equivalent hls4ml custom layer class +* Parser for the converter +* HLS implementation +* Layer config template +* Function config template +* Registration of layer, source code, and templates + +Complete example +================ + +For concreteness, let's say our custom layer ``KReverse`` is implemented in Keras and reverses the order of the last dimension of the input. + +.. code-block:: Python + + # Keras implementation of a custom layer + class KReverse(tf.keras.layers.Layer): + '''Keras implementation of a hypothetical custom layer''' + + def __init__(self): + super().__init__() + + def call(self, inputs): + return tf.reverse(inputs, axis=[-1]) + +We can define the equivalent layer in hls4ml ``HReverse``, which inherits from ``hls4ml.model.layers.Layer``. + +.. code-block:: Python + + # hls4ml layer implementation + class HReverse(hls4ml.model.layers.Layer): + '''hls4ml implementation of a hypothetical custom layer''' + + def initialize(self): + inp = self.get_input_variable() + shape = inp.shape + dims = inp.dim_names + self.add_output_variable(shape, dims) + +A parser for the Keras to HLS converter is also required. +This parser reads the attributes of the Keras layer instance and populates a dictionary of attributes for the hls4ml layer. +It also returns a list of output shapes (one sjape for each output). +In this case, there a single output with the same shape as the input. + +.. code-block:: Python + + # Parser for converter + def parse_reverse_layer(keras_layer, input_names, input_shapes, data_reader): + layer = {} + layer['class_name'] = 'HReverse' + layer['name'] = keras_layer['config']['name'] + layer['n_in'] = input_shapes[0][1] + + if input_names is not None: + layer['inputs'] = input_names + + return layer, [shape for shape in input_shapes[0]] + +Next, we need the actual HLS implementaton of the function, which can be written in a header file ``nnet_reverse.h``. + +.. code-block:: C++ + + #ifndef NNET_REVERSE_H_ + #define NNET_REVERSE_H_ + + #include "nnet_common.h" + + namespace nnet { + + struct reverse_config { + static const unsigned n_in = 10; + }; + + template + void reverse( + data_T input[CONFIG_T::n_in], + data_T reversed[CONFIG_T::n_in] + ) { + for (int i = 0; i < CONFIG_T::n_in; i++) { + reversed[CONFIG_T::n_in - 1 - i] = input[i]; + } + } + + } + + #endif + +Now, we can define the layer config and function call templates. +These two templates determine how to populate the config template based on the layer attributes and the function call signature for the layer in HLS, respectively. + +.. code-block:: Python + + rev_config_template = """struct config{index} : nnet::reverse_config {{ + static const unsigned n_in = {n_in}; + }};\n""" + + rev_function_template = 'nnet::reverse<{input_t}, {config}>({input}, {output});' + rev_include_list = ['nnet_utils/nnet_reverse.h'] + + + class HReverseConfigTemplate(hls4ml.backends.template.LayerConfigTemplate): + def __init__(self): + super().__init__(HReverse) + self.template = rev_config_template + + def format(self, node): + params = self._default_config_params(node) + return self.template.format(**params) + + + class HReverseFunctionTemplate(hls4ml.backends.template.FunctionCallTemplate): + def __init__(self): + super().__init__(HReverse, include_header=rev_include_list) + self.template = rev_function_template + + def format(self, node): + params = self._default_function_params(node) + return self.template.format(**params) + +Now, we need to tell hls4ml about the existence of this new layer by registering it. +We also need to register the parser (a.k.a. the layer handler), the template passes, and HLS implementation source code with the particular backend. +In this case, the HLS code is valid for both the Vivado and Quartus backends. + +.. code-block:: Python + + # Register the converter for custom Keras layer + hls4ml.converters.register_keras_layer_handler('KReverse', parse_reverse_layer) + + # Register the hls4ml's IR layer + hls4ml.model.layers.register_layer('HReverse', HReverse) + + for backend_id in ['Vivado', 'Quartus']: + # Register the optimization passes (if any) + backend = hls4ml.backends.get_backend(backend_id) + backend.register_pass('remove_duplicate_reverse', RemoveDuplicateReverse, flow=f'{backend_id.lower()}:optimize') + + # Register template passes for the given backend + backend.register_template(HReverseConfigTemplate) + backend.register_template(HReverseFunctionTemplate) + + # Register HLS implementation + backend.register_source('nnet_reverse.h') + +Finally, we can actually test the hls4ml custom layer compared to the Keras one. + +.. code-block:: Python + + # Test if it works + kmodel = tf.keras.models.Sequential( + [ + tf.keras.layers.Input(shape=(8,)), + KReverse(), + tf.keras.layers.ReLU(), + ] + ) + + x = np.random.randint(-5, 5, (8,), dtype='int32') + kres = kmodel(x) + + for backend_id in ['Vivado', 'Quartus']: + + hmodel = hls4ml.converters.convert_from_keras_model( + kmodel, + output_dir=str(f'hls4mlprj_extensions_{backend_id}'), + backend=backend_id, + io_type='io_parallel', + hls_config={'Model': {'Precision': 'ap_int<6>', 'ReuseFactor': 1}}, + ) + + hmodel.compile() + hres = hmodel.predict(x.astype('float32')) + + np.testing.assert_array_equal(kres, hres) diff --git a/docs/flows.rst b/docs/flows.rst new file mode 100644 index 0000000000..28d423aa8e --- /dev/null +++ b/docs/flows.rst @@ -0,0 +1,6 @@ +==================== +Flows and Optimizers +==================== + +- Explain concept of flows and optimizers +- Describe FIFO buffer optimizer as an example? diff --git a/docs/index.rst b/docs/index.rst index da7e445348..c766f18365 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,12 +1,14 @@ .. toctree:: :hidden: - + Home release_notes status - setup + setup command concepts + flows + extension reference .. toctree:: @@ -23,7 +25,7 @@ autodoc/hls4ml autodoc/hls4ml.* - + ================================== Welcome to hls4ml's documentation! @@ -40,92 +42,10 @@ The project is currently in development, so please let us know if you are intere Project Status ================================= -For the latest status including current and planned features, see the :doc:`Status and Features ` page. +For the latest status including current and planned features, see the :doc:`Status and Features ` page. Tutorials ================================= Detailed tutorials on how to use ``hls4ml``'s various functionalities can be found at: https://github.com/fastmachinelearning/hls4ml-tutorial - -Citation -================================= -If you use this software in a publication, please cite the software - -.. code-block:: bibtex - - @software{vloncar_2021_5680908, - author = {{FastML Team}}, - title = {fastmachinelearning/hls4ml}, - year = 2021, - publisher = {Zenodo}, - doi = {10.5281/zenodo.1201549}, - url = {https://github.com/fastmachinelearning/hls4ml} - } - -and first publication: - -.. code-block:: bibtex - - @article{Duarte:2018ite, - author = "Duarte, Javier and others", - title = "{Fast inference of deep neural networks in FPGAs for particle physics}", - eprint = "1804.06913", - archivePrefix = "arXiv", - primaryClass = "physics.ins-det", - reportNumber = "FERMILAB-PUB-18-089-E", - doi = "10.1088/1748-0221/13/07/P07027", - journal = "JINST", - volume = "13", - number = "07", - pages = "P07027", - year = "2018" - } - -Additionally, if you use specific features developed in later papers, please cite those as well. For example, CNNs: - -.. code-block:: bibtex - - @article{Aarrestad:2021zos, - author = "Aarrestad, Thea and others", - title = "{Fast convolutional neural networks on FPGAs with hls4ml}", - eprint = "2101.05108", - archivePrefix = "arXiv", - primaryClass = "cs.LG", - reportNumber = "FERMILAB-PUB-21-130-SCD", - doi = "10.1088/2632-2153/ac0ea1", - journal = "Mach. Learn. Sci. Tech.", - volume = "2", - number = "4", - pages = "045015", - year = "2021" - } - @article{Ghielmetti:2022ndm, - author = "Ghielmetti, Nicol\`{o} and others", - title = "{Real-time semantic segmentation on FPGAs for autonomous vehicles with hls4ml}", - eprint = "2205.07690", - archivePrefix = "arXiv", - primaryClass = "cs.CV", - reportNumber = "FERMILAB-PUB-22-435-PPD", - doi = "10.1088/2632-2153/ac9cb5", - journal ="Mach. Learn. Sci. Tech.", - year = "2022" - } - -binary/ternary networks: - -.. code-block:: bibtex - - @article{Loncar:2020hqp, - author = "Ngadiuba, Jennifer and others", - title = "{Compressing deep neural networks on FPGAs to binary and ternary precision with HLS4ML}", - eprint = "2003.06308", - archivePrefix = "arXiv", - primaryClass = "cs.LG", - reportNumber = "FERMILAB-PUB-20-167-PPD-SCD", - doi = "10.1088/2632-2153/aba042", - journal = "Mach. Learn. Sci. Tech.", - volume = "2", - pages = "015001", - year = "2021" - } diff --git a/docs/reference.rst b/docs/reference.rst index 8216e0157e..aa4443653a 100644 --- a/docs/reference.rst +++ b/docs/reference.rst @@ -1,54 +1,96 @@ ============================ -Reference and Contributors +Citation and Contributors ============================ Citation -======== - -If you are using the package please cite: - - -* - .. image:: https://zenodo.org/badge/108329371.svg - :target: https://zenodo.org/badge/latestdoi/108329371 - :alt: DOI - -* J. Duarte *et al.*\ , "Fast inference of deep neural networks in FPGAs for particle physics", `JINST 13 P07027 (2018) `_\ , `arXiv:1804.06913 `_. - -If you are using the boosted decision tree implementation, please cite also: - - -* S. Summers *et al.*\ , "Fast inference of boosted decision trees in FPGAs for particle physics", `arXiv:2002.02534 `_. - If you are using the binary or ternary neural network implementation, please also cite: -* G. Di Guglielmo *et al.*\ , "Compressing deep neural networks on FPGAs to binary and ternary precision with hls4ml", `arXiv:2003.06308 `_ - -Additional Talks and Presentations -================================== - - -* eScience 2019: `talk `__ -* ACAT 2019: `talk `__ -* Zurich Hands-on Course: `course `__ -* TWEPP 2018: `talk `__ -* CHEP 2018: `talk `__ -* Connecting the Dots 2018: `talk `__ -* Fermilab Research Techniques Seminar: `talk `__ -* CERN EP/IT Data Science Seminar: `talk `__ +================================= +If you use this software in a publication, please cite the software + +.. code-block:: bibtex + + @software{vloncar_2021_5680908, + author = {{FastML Team}}, + title = {fastmachinelearning/hls4ml}, + year = 2021, + publisher = {Zenodo}, + doi = {10.5281/zenodo.1201549}, + url = {https://github.com/fastmachinelearning/hls4ml} + } + +and first publication: + +.. code-block:: bibtex + + @article{Duarte:2018ite, + author = "Duarte, Javier and others", + title = "{Fast inference of deep neural networks in FPGAs for particle physics}", + eprint = "1804.06913", + archivePrefix = "arXiv", + primaryClass = "physics.ins-det", + reportNumber = "FERMILAB-PUB-18-089-E", + doi = "10.1088/1748-0221/13/07/P07027", + journal = "JINST", + volume = "13", + number = "07", + pages = "P07027", + year = "2018" + } + +Additionally, if you use specific features developed in later papers, please cite those as well. For example, CNNs: + +.. code-block:: bibtex + + @article{Aarrestad:2021zos, + author = "Aarrestad, Thea and others", + title = "{Fast convolutional neural networks on FPGAs with hls4ml}", + eprint = "2101.05108", + archivePrefix = "arXiv", + primaryClass = "cs.LG", + reportNumber = "FERMILAB-PUB-21-130-SCD", + doi = "10.1088/2632-2153/ac0ea1", + journal = "Mach. Learn. Sci. Tech.", + volume = "2", + number = "4", + pages = "045015", + year = "2021" + } + @article{Ghielmetti:2022ndm, + author = "Ghielmetti, Nicol\`{o} and others", + title = "{Real-time semantic segmentation on FPGAs for autonomous vehicles with hls4ml}", + eprint = "2205.07690", + archivePrefix = "arXiv", + primaryClass = "cs.CV", + reportNumber = "FERMILAB-PUB-22-435-PPD", + doi = "10.1088/2632-2153/ac9cb5", + journal ="Mach. Learn. Sci. Tech.", + year = "2022" + } + +binary/ternary networks: + +.. code-block:: bibtex + + @article{Loncar:2020hqp, + author = "Ngadiuba, Jennifer and others", + title = "{Compressing deep neural networks on FPGAs to binary and ternary precision with HLS4ML}", + eprint = "2003.06308", + archivePrefix = "arXiv", + primaryClass = "cs.LG", + reportNumber = "FERMILAB-PUB-20-167-PPD-SCD", + doi = "10.1088/2632-2153/aba042", + journal = "Mach. Learn. Sci. Tech.", + volume = "2", + pages = "015001", + year = "2021" + } Contributors ============ +Thanks to our contributors! -* Vladimir Loncar, Jennifer Ngadiuba, Maurizio Pierini, Sioni Summers [CERN] -* Javier Duarte [University of California San Diego] -* Sergo Jindariani, Benjamin Kreis, Ryan Rivera, Nhan Tran [Fermilab] -* Edward Kreinar [Hawkeye360] -* Song Han, Philip Harris, Dylan Rankin [MIT] -* Zhenbin Wu [University of Illinois at Chicago] -* Mark Neubauer [University of Illinois Urbana-Champaign] -* Shih-Chieh Hsu [University of Washington] -* Giuseppe Di Guglielmo [Columbia University] -* Duc Hoang [Rhodes College] -* Noah Paladino [Rutgers University] - +.. contributors:: fastmachinelearning/hls4ml + :avatars: + :limit: 100 + :order: DESC diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 5db2480be9..b0cbe6c3d6 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -6,19 +6,134 @@ See `here `__ for offici ---- +**v0.7.0 / TBD** + +What's changed: + +* GarNet and GarNetStack in config.py by @yiiyama in https://github.com/fastmachinelearning/hls4ml/pull/344 +* support ZeroPadding layers by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/480 +* New backend development framework by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/395 +* Register ``ApplyAlpha`` layer templates by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/499 +* Parsing extended by @nicologhielmetti in https://github.com/fastmachinelearning/hls4ml/pull/501 +* Remove intermediate casting in product by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/490 +* Add QKeras as a package dependency by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/511 +* Copy flows from config by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/510 +* VivadoAccelerator backend updates by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/508 +* Optimized look-up table by @nemerchiedde in https://github.com/fastmachinelearning/hls4ml/pull/527 +* Upsampling2D test case by @ChiRuiChen in https://github.com/fastmachinelearning/hls4ml/pull/520 +* Support UpSampling1D by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/475 +* RNN support (part 1) by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/521 +* Quartus Custom Matrix Multiplication & Quantization by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/523 +* Vivado-equivalent implementation of Softmax on Quartus by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/540 +* Ensure 2 bits for scale in po2 quantizers by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/531 +* Link update by @bkmgit in https://github.com/fastmachinelearning/hls4ml/pull/519 +* Fix removal of nodes ingested by multiple downstream nodes by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/544 +* Enable SeparableConv2d by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/547 +* Extension API by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/528 +* change string ReuseFactor to int by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/416 +* Make the size of bn scale and bias what they really are by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/532 +* Raise runtime error when a layer is named `input` by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/482 +* fix insertion before a node with multiple inputs + support additional broadcasting by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/551 +* Pointwise conv1d/2d resource by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/471 +* Quartus Embedding Layer by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/548 +* Fix for QActivations passed as an argument by @AdrianAlan in https://github.com/fastmachinelearning/hls4ml/pull/553 +* Don't override precision directly in the QKeras optimizer by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/567 +* Remove the in/out size from top function by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/559 +* Transpose2d, Concatenate2d, and up to 3 Clones for io_stream by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/402 +* Remove io_serial as io_stream and add some more info in docs. by @Duchstf in https://github.com/fastmachinelearning/hls4ml/pull/334 +* Update docs for v0.6.0 by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/453 +* Use correct number of args for multiple outputs by @apfusco in https://github.com/fastmachinelearning/hls4ml/pull/487 +* Fixed a few typos in the documentation by @pitmonticone in https://github.com/fastmachinelearning/hls4ml/pull/467 +* returning integer from _compute_n_samples by @JochiSt in https://github.com/fastmachinelearning/hls4ml/pull/537 +* Providing support for Alveo boards by @selwyn96 in https://github.com/fastmachinelearning/hls4ml/pull/552 +* Make layer names case sensitive in config. by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/577 +* Add issue and PR templates by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/582 +* Vivado Backend GRU/LSTM support by @drankincms in https://github.com/fastmachinelearning/hls4ml/pull/560 +* Update CI template syntax by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/593 +* Update flow dependencies by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/588 +* Fix parsing of ZeroPadding layers by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/595 +* remove cppname by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/562 +* Remove email helpline from the docs by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/601 +* Fixes for GRU/LSTM in Vivado backend by @drankincms in https://github.com/fastmachinelearning/hls4ml/pull/598 +* Remove io_serial by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/609 +* Fix test_graph by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/611 +* Override parent backend optimizer passes with derived backend passes by @thesps in https://github.com/fastmachinelearning/hls4ml/pull/597 +* Enforce function pipelining when using io_parallel with Resource strategy by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/605 +* FIFO depth optimization by @nicologhielmetti in https://github.com/fastmachinelearning/hls4ml/pull/509 +* Add tracing support for the quartus backend by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/583 +* Quartus streaming support for Activations, Dense & Batch Normalization by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/557 +* QConv alpha != 1 bug fix by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/612 +* Quartus Stream Embedding by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/625 +* change master to main by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/602 +* Edit order of the optimizers in the flow so that BramFactor is followed by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/621 +* Softmax LUT Optimization by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/570 +* Quartus Synthesis Flow Improvement by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/618 +* Quartus Extensions by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/628 +* Quartus GRU by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/596 +* Quartus Merge layers by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/634 +* fix nondefault project name handling by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/626 +* Fix parsing of logic synthesis reports by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/639 +* Fix conv1d stream implementation hls directives by @Jonathan-Shoemaker in https://github.com/fastmachinelearning/hls4ml/pull/635 +* Implementation and optimizations linked to Simple-RNN and LSTM for qu… by @nemerchiedde in https://github.com/fastmachinelearning/hls4ml/pull/575 +* Softsign optimization by @nemerchiedde in https://github.com/fastmachinelearning/hls4ml/pull/585 +* Parallel CNNs, Pooling & Image Layers for Quartus Backend by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/561 +* Quartus Streaming Softsign (PR #585 contd.) by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/655 +* Remove final reshapes even for Quartus by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/661 +* Unrolled CNN implementation by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/600 +* the strategy was not propagated in the pytest by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/663 +* Fix keras model loading issue with loading model with KerasH5 by @calad0i in https://github.com/fastmachinelearning/hls4ml/pull/664 +* append applied_flows container before filling instead of after by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/641 +* set version using ``setuptools_scm`` by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/479 +* Argmax Softmax by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/627 +* Fix version extraction in Sphinx config by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/669 +* Add requested citations to README by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/615 +* skip BatchNorm fusion when input/output is used multiple times by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/481 +* Use wider accum_t for (average) pooling by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/681 +* Quartus Streaming Conv, Pooling & Image layers by @bo3z in https://github.com/fastmachinelearning/hls4ml/pull/656 +* Create branch on PR by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/636 +* Delete ``example-prjs`` directory by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/682 +* Adiabatically turn on `pre-commit` by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/678 +* Add causal padding by @cgutsche in https://github.com/fastmachinelearning/hls4ml/pull/688 +* Update ``pre-commit`` GitHub Action by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/689 +* New config_from_keras_model by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/690 +* remove obsolete np.int and np.float by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/703 +* Update p-clang-format to work on mac by @jmduarte in https://github.com/fastmachinelearning/hls4ml/pull/704 +* Fix function call in Alveo tcl script by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/694 +* add readme for contrib by @jmitrevs in https://github.com/fastmachinelearning/hls4ml/pull/706 +* WIP Add custom KL loss layer HLS implementation by @katyagovorkova in https://github.com/fastmachinelearning/hls4ml/pull/606 +* Fix incorrectly linted build() command by @vloncar in https://github.com/fastmachinelearning/hls4ml/pull/709 + +New contributors: + +* @nemerchiedde made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/527 +* @ChiRuiChen made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/520 +* @bo3z made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/523 +* @bkmgit made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/519 +* @apfusco made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/487 +* @pitmonticone made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/467 +* @JochiSt made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/537 +* @selwyn96 made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/552 +* @Jonathan-Shoemaker made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/635 +* @calad0i made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/664 +* @cgutsche made their first contribution in https://github.com/fastmachinelearning/hls4ml/pull/688 + +**Full Changelog**: https://github.com/fastmachinelearning/hls4ml/compare/v0.6.0...v0.7.0 + +---- + **v0.6.0 / coris** What's changed: * ``VivadoAccelerator`` backend: target ``pynq-z2`` and ``zcu102`` boards directly from hls4ml by @nicologhielmetti -* Updated ``PyTorch`` and ``ONNX`` converters by @Duchstf -* ``line_buffer`` Conv2D implementation for ``io_stream``: reduced resource usage and latency by @Keb-L, @violatingcp, @vloncar -* Support ``QConv2DBatchnorm`` layer from ``QKeras`` by @nicologhielmetti -* Improved profiling plots - easier to compare original vs ``hls4ml`` converted models by @maksgraczyk -* Better derivation of data types for ``QKeras`` models by @jmduarte, @thesps +* Updated ``PyTorch`` and ``ONNX`` converters by @Duchstf +* ``line_buffer`` Conv2D implementation for ``io_stream``: reduced resource usage and latency by @Keb-L, @violatingcp, @vloncar +* Support ``QConv2DBatchnorm`` layer from ``QKeras`` by @nicologhielmetti +* Improved profiling plots - easier to compare original vs ``hls4ml`` converted models by @maksgraczyk +* Better derivation of data types for ``QKeras`` models by @jmduarte, @thesps * Improved CI by @thesps -* More support for models with branches, skip connections, ``Merge`` and ``Concatenate`` layers by @jmduarte, @vloncar -* Support for ``Dense`` layers over multi-dimensional tensors by @vloncar +* More support for models with branches, skip connections, ``Merge`` and ``Concatenate`` layers by @jmduarte, @vloncar +* Support for ``Dense`` layers over multi-dimensional tensors by @vloncar * Overall improvements by @vloncar, @jmduarte, @thesps, @jmitrevs & others New contributors: @@ -133,8 +248,6 @@ Bugfixes: **v0.0.2**\ : first alpha release -* full translation of DNNs from Keras +* full translation of DNNs from Keras * an example Conv1D exists * parallel mode is supported (serial mode, not yet) - - diff --git a/docs/requirements.txt b/docs/requirements.txt index e4295d37eb..8ff43b5e0b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,12 +1,13 @@ -sphinx>=3.2.1 -sphinx_rtd_theme -toposort>=1.5.0 -numpy -six -pyyaml h5py +matplotlib +numpy onnx>=1.4.0 pandas +pyyaml seaborn -matplotlib setuptools_scm[toml]>=5 +six +sphinx>=3.2.1 +sphinx_contributors +sphinx_rtd_theme +toposort>=1.5.0 diff --git a/docs/status.rst b/docs/status.rst index e91b605add..b76d08584d 100644 --- a/docs/status.rst +++ b/docs/status.rst @@ -5,7 +5,7 @@ Status and Features Status ======== -The latest stable release is :doc:`v0.6.0 `. This release brings the new VivadoAccelerator backend to easily target boards like pynq-z2 and zcu102, with support for more boards like Alveo planned. +The latest stable release is :doc:`v0.7.0 `. Features @@ -13,15 +13,24 @@ Features A list of supported ML codes and architectures, including a summary table is below. Dependencies are given in the :doc:`Setup ` page. -ML code support: +ML code support: -* Keras/Tensorflow/QKeras, PyTorch, Onnx +* Keras/Tensorflow/QKeras +* PyTorch (limited) +* (Q)ONNX (in development) Neural network architectures: -* Fully Connected NNs (multi-layer perceptron) -* Convolutional NNs (1D/2D) -* Recurrent NN/LSTM, in prototyping +* Fully connected NNs (multilayer perceptron, MLP) +* Convolutional NNs (1D and 2D) +* Recurrent NN (LSTM) +* Graph NN (GarNet) + +HLS backends: + +* Vivado HLS +* Vitis HLS (experimental) +* Intel HLS A summary of the on-going status of the ``hls4ml`` tool is in the table below. @@ -31,15 +40,15 @@ A summary of the on-going status of the ``hls4ml`` tool is in the table below. * - Architectures/Toolkits - Keras/TensorFlow/QKeras - PyTorch - - ONNX + - (Q)ONNX * - MLP - ``supported`` - ``supported`` - ``supported`` - * - Conv1D/Conv2D + * - CNN - ``supported`` - ``in development`` - - ``in development`` + - ``in development`` * - RNN/LSTM - ``in development`` - ``in development`` @@ -48,7 +57,7 @@ A summary of the on-going status of the ``hls4ml`` tool is in the table below. Other feature notes: -* ``hls4ml`` is tested on Linux, and supports Vivado HLS versions 2018.2 to 2020.1. Vitis HLS is not yet supported. Windows and macOS are not supported. +* ``hls4ml`` is tested on Linux, and supports Vivado HLS versions 2018.2 to 2020.1 and Intel HLS versions XXX. Vitis HLS is experimentally supported in v0.7.0. Windows and macOS are not supported. * BDT support has moved to the `Conifer `__ package @@ -56,4 +65,3 @@ Example Models ============== We also provide and documented several example models that have been implemented in ``hls4ml`` in `this Github repository `_. - diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py index b1b586f6c4..793a1d24be 100644 --- a/hls4ml/backends/vivado/vivado_backend.py +++ b/hls4ml/backends/vivado/vivado_backend.py @@ -189,13 +189,13 @@ def build( curr_dir = os.getcwd() os.chdir(model.config.get_output_dir()) vivado_cmd = ( - f'vivado_hls -f build_prj.tcl "reset={reset}' - f'csim={csim}' - f'synth={synth}' - f'cosim={cosim}' - f'validation={validation}' - f'export={export}' - f'vsynth={vsynth}' + f'vivado_hls -f build_prj.tcl "reset={reset} ' + f'csim={csim} ' + f'synth={synth} ' + f'cosim={cosim} ' + f'validation={validation} ' + f'export={export} ' + f'vsynth={vsynth} ' f'fifo_opt={fifo_opt}"' ) os.system(vivado_cmd) diff --git a/setup.cfg b/setup.cfg index bc6dbf643d..9ff049d343 100644 --- a/setup.cfg +++ b/setup.cfg @@ -8,7 +8,7 @@ author = hls4ml Team license = Apache-2.0 license_file = LICENSE classifiers = - Development Status :: 3 - Alpha + Development Status :: 4 - Beta Intended Audience :: Developers Intended Audience :: Science/Research License :: OSI Approved :: Apache Software License diff --git a/test/pytest/test_extensions.py b/test/pytest/test_extensions.py index 1c8e07198a..e97a58d1f7 100644 --- a/test/pytest/test_extensions.py +++ b/test/pytest/test_extensions.py @@ -118,7 +118,7 @@ def format(self, node): @pytest.fixture(scope='session', autouse=True) -def regsister_custom_layer(): +def register_custom_layer(): # Register the converter for custom Keras layer hls4ml.converters.register_keras_layer_handler('KReverse', parse_reverse_layer)