fastmachinelearning
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎docs/attr_doc_gen.py‎
Lines changed: 149 additions & 0 deletions b/‎docs/attr_doc_gen.py‎
Lines changed: 149 additions & 0 deletions
diff --git a/‎docs/faq.rst‎
Lines changed: 52 additions & 0 deletions b/‎docs/faq.rst‎
Lines changed: 52 additions & 0 deletions
diff --git a/‎docs/index.rst‎
Lines changed: 2 additions & 3 deletions b/‎docs/index.rst‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎docs/ir/conv.rst‎
Lines changed: 6 additions & 0 deletions b/‎docs/ir/conv.rst‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎docs/requirements.txt‎
Lines changed: 0 additions & 1 deletion b/‎docs/requirements.txt‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎hls4ml/backends/catapult/catapult_backend.py‎
Lines changed: 16 additions & 7 deletions b/‎hls4ml/backends/catapult/catapult_backend.py‎
Lines changed: 16 additions & 7 deletions
diff --git a/‎hls4ml/backends/fpga/fpga_backend.py‎
Lines changed: 18 additions & 6 deletions b/‎hls4ml/backends/fpga/fpga_backend.py‎
Lines changed: 18 additions & 6 deletions
@@ -63,6 +63,10 @@ hls_model.build()
 hls4ml.report.read_vivado_report('my-hls-test')
 ```
 
+# FAQ
+
+List of frequently asked questions and common HLS synthesis can be found [here](https://fastmachinelearning.org/hls4ml/faq.html)
+
 # Citation
 If you use this software in a publication, please cite the software
 ```bibtex
 
@@ -0,0 +1,149 @@
+import numbers
+
+import hls4ml.backends as backends
+import hls4ml.model.attributes as attributes
+import hls4ml.model.layers as layers
+
+
+class AttrList:
+    def __init__(self, cls_name, cls_attrs) -> None:
+        self.cls_name = cls_name
+        self.config_attrs = [attr for attr in cls_attrs if attr.configurable is True]
+        self.type_attrs = [attr for attr in cls_attrs if attr.__class__.__name__ == 'TypeAttribute']
+        self.weight_attrs = [attr for attr in cls_attrs if attr.__class__.__name__ == 'WeightAttribute']
+        self.base_attrs = [attr for attr in cls_attrs if attr not in self.config_attrs + self.type_attrs + self.weight_attrs]
+        self.backend_attrs = {}
+        self.reverse_backend_attrs = []  # Will hold (attr, backend_name) pairs, used temporarily
+        self.unique_backend_attrs = []
+
+    def add_backend_attrs(self, backend_name, backend_attrs):
+        self.backend_attrs[backend_name] = backend_attrs
+
+        for attr in backend_attrs:
+            self.reverse_backend_attrs.append((attr, backend_name))
+
+    def sift_backend_attrs(self):
+        grouped_dict = {}
+        for attr, backend_name in self.reverse_backend_attrs:
+            if attr not in grouped_dict:
+                grouped_dict[attr] = []
+            grouped_dict[attr].append(backend_name)
+
+        for attr, backend_names in grouped_dict.items():
+            attr.available_in = backend_names
+            self.unique_backend_attrs.append(attr)
+
+    @property
+    def only_configurable(self):
+        all_attrs = self.config_attrs + self.type_attrs + self.unique_backend_attrs
+        return [attr for attr in all_attrs if attr.configurable is True]
+
+
+def convert_to_attr_list():
+    all_backends = backends.get_available_backends()
+    # Removing duplicates but preserving order
+    all_layers = list(dict.fromkeys(layers.layer_map.values()))
+    all_layers_attrs = []
+
+    for layer_cls in all_layers:
+        base_attrs = layer_cls.expected_attributes
+
+        attr_list = AttrList(layer_cls.__name__, base_attrs)
+
+        for backend_name in all_backends:
+            backend = backends.get_backend(backend_name)
+
+            backend_cls = backend.create_layer_class(layer_cls)
+            backend_attrs = backend_cls.expected_attributes
+
+            diff_atts = [
+                attr for attr in backend_attrs if attr not in base_attrs
+            ]  # Sets are faster, but don't preserve order
+            if len(diff_atts) > 0:
+                attr_list.add_backend_attrs(backend.name, diff_atts)
+
+        all_layers_attrs.append(attr_list)
+
+    for attr_list in all_layers_attrs:
+        attr_list.sift_backend_attrs()
+
+    return all_layers_attrs
+
+
+def print_attrs(attrs, file):
+    for attr in attrs:
+        if attr.value_type == numbers.Integral:
+            vtype = 'int'
+        elif attr.__class__ == attributes.ChoiceAttribute:
+            choices = ','.join([str(c) for c in attr.choices])
+            vtype = f'list [{choices}]'
+        else:
+            vtype = attr.value_type.__name__ if hasattr(attr.value_type, '__name__') else str(attr.value_type)
+
+        if attr.default is None:
+            file.write('* ' + attr.name + ': ' + vtype + '\n\n')
+        else:
+            file.write('* ' + attr.name + ': ' + vtype + ' (Default: ' + str(attr.default) + ')\n\n')
+
+        if attr.description is not None:
+            file.write('  * ' + attr.description + '\n\n')
+
+        if hasattr(attr, 'available_in'):
+            file.write('  * Available in: ' + ', '.join(attr.available_in) + '\n\n')
+
+
+def write_all_attributes(all_layers_attrs):
+    with open('attributes.rst', mode='w') as file:
+        file.write('================\n')
+        file.write('Layer attributes\n')
+        file.write('================\n\n\n')
+
+        for attr_list in all_layers_attrs:
+            file.write(attr_list.cls_name + '\n')
+            file.write('=' * len(attr_list.cls_name) + '\n')
+
+            if len(attr_list.base_attrs) > 0:
+                file.write('Base attributes\n')
+                file.write('---------------\n')
+                print_attrs(attr_list.type_attrs, file)
+
+            if len(attr_list.type_attrs) > 0:
+                file.write('Type attributes\n')
+                file.write('---------------\n')
+                print_attrs(attr_list.base_attrs, file)
+
+            if len(attr_list.weight_attrs) > 0:
+                file.write('Weight attributes\n')
+                file.write('-----------------\n')
+                print_attrs(attr_list.weight_attrs, file)
+
+            if len(attr_list.config_attrs) > 0:
+                file.write('Configurable attributes\n')
+                file.write('-----------------------\n')
+                print_attrs(attr_list.config_attrs, file)
+
+            if len(attr_list.backend_attrs) > 0:
+                file.write('Backend-specific attributes\n')
+                file.write('---------------------------\n')
+                print_attrs(attr_list.unique_backend_attrs, file)
+
+
+def write_only_configurable(all_layers_attrs):
+    with open('attributes.rst', mode='w') as file:
+        file.write('================\n')
+        file.write('Layer attributes\n')
+        file.write('================\n\n\n')
+
+        for attr_list in all_layers_attrs:
+            file.write(attr_list.cls_name + '\n')
+            file.write('=' * len(attr_list.cls_name) + '\n')
+
+            config_attrs = attr_list.only_configurable
+            if len(config_attrs) > 0:
+                print_attrs(config_attrs, file)
+
+
+if __name__ == '__main__':
+    all_layers_attrs = convert_to_attr_list()
+    write_all_attributes(all_layers_attrs)
+    # write_only_configurable(all_layers_attrs)
@@ -0,0 +1,52 @@
+Frequently asked questions
+==========================
+
+**What is hls4ml?**
+
+``hls4ml`` is a tool for converting neural network models into FPGA firmware. hls4ml is aimed at low-latency applications, such as triggering at the Large Hadron Collider (LHC) at CERN, but is applicable to other domains requiring microsecond latency. See the full documentation for more details.
+
+**How does hls4ml work?**
+
+``hls4ml`` takes the models from Keras, PyTorch and ONNX (optionally quantized with the respective quantization libraries) and produces high-level synthesis code (based on C++) that can be converted to FPGA firmware using the HLS compilers from different vendors (AMD/Xilinx, Intel/Altera, Catapult...).
+
+**How is hls4ml so fast?**
+
+``hls4ml`` stores all weights on-chip for fast access and has tuneable parallelism. As a consequence, the size of the model that can be successfully converted into firmware with hls4ml largely depends on the amount of available resources on the target FPGA. Therefore it is highly recommended to compress the model with quantization (via QKeras or HGQ for Keras or Brevitas for PyTorch) and pruning. Additionally, ``hls4ml`` exploits the parallelism available in an FPGA or ASIC by implementing a spatial dataflow architecture.
+
+**Will my model work with hls4ml?**
+
+``hls4ml`` supports many common layers found in MLP, CNN and RNN architectures, however some seldom-used features of these layers may not be supported. Novel architectures such as graph networks or transformers are in various stages of development and are currently not stable for end-users. See the status and features page for more information. Models with custom layers can be supported through extension API. If you encounter a feature not yet supported, open a new issue.
+
+**Will my model with X parameters fit an FPGA model Y?**
+
+It depends. ``hls4ml`` has been successfully used with quantized models with `O` (10k) parameters, while for some architectures going beyond `O` (1000) parameters is not doable even on the largest FPGAs. The number of parameters of a model is generally not a good estimate of the performance on an FPGA as the computational complexity of different types of NN layers has big effects on the resource consumption on an FPGA. For example, a CNN or GNN may reuse the same parameter in many operations. Furthermore, model compression in the form of quantization and pruning can significantly change the footprint of the model on the FPGA. For these reasons, we discourage the use of this metric for estimating performance.
+
+If you're looking for a quick estimate of the resource usage and latency for a given model without synthesis, look into `rule4ml <https://github.com/IMPETUS-UdeS/rule4ml>`_ and `wa-hls4ml <https://github.com/Dendendelen/wa-hls4ml>`_ projects.
+
+LLMs and large vision transformers are not supported nor planned.
+
+**How do I get started with hls4ml?**
+
+We strongly recommend interested users unfamiliar with FPGAs or model compression techniques to review the `hls4ml tutorials <https://github.com/fastmachinelearning/hls4ml-tutorial>`_ to get an overview of the features and conversion workflow.
+
+**How do I contribute to hls4ml development?**
+
+We're always welcoming new contributions. If you have an interesting feature in mind feel free to start a new discussion thread with your proposal. We also have regular meetings online to discuss the status of developments where you can be invited to present your work. To receive announcements, `request to be added to our CERN e-group <https://e-groups.cern.ch/e-groups/Egroup.do?egroupName=hls-fml>`_. Furthermore, check the `CONTRIBUTING <https://github.com/fastmachinelearning/hls4ml/blob/main/CONTRIBUTING.md>`_ document for a set of technical requirements for making contributions to the hls4ml project.
+
+
+Common HLS synthesis issues
+***************************
+
+**Stop unrolling loop ... because it may cause large runtime and excessive memory usage due to increase in code size.**
+
+This error is common with models that are too large to fit on the FPGA given the ``IOType`` used. If you are using ``io_parallel``, consider switching to ``io_stream``, which prevents unrolling all arrays. It may help to also use the ``Resource`` strategy. Pruning or quantizing the model may not help as it is related to the size of the loops. If possible, try to reduce the number of neurons/filters of your model to reduce the size of the activation tensors and thus number of iterations of loops.
+
+**cannot open shared object file ...: No such file or directory.**
+
+This is usually an indication that the compilation failed due to incorrect HLS code being produced. It is most likely a bug in hls4ml. Please open a bug report. Note that the displayed error message may be the same but the cause can be different. Unless you're sure that the existing bug reports show the same underlying issue, it is better to open a separate bug report.
+
+**My hls4ml predictions don't match the original Keras/PyTorch/ONNX ones**
+
+``hls4ml`` uses fixed-point precision types to represent internal data structures, unlike the floating-point precision types used for computation in upstream ML toolkits. If the used bit width is not sufficiently wide, you may encounter issues with computation accuracy that propagates through the layers. This is especially true for models that are not fully quantized, or models with insufficient ``accum_t`` bitwidth. Look into automatic precision inference and profiling tools to resolve the issue.
+
+Note that bit-exact behavior is not always possible, as many math functions (used by activation functions) are approximated with lookup tables.
@@ -5,6 +5,7 @@
     concepts
     status
     setup
+    faq
     release_notes
     reference
 
@@ -74,6 +75,4 @@ For the latest status including current and planned features, see the :ref:`Stat
 
 Tutorials
 =================================
-Detailed tutorials on how to use ``hls4ml``'s various functionalities can be found at:
-
-https://github.com/fastmachinelearning/hls4ml-tutorial
+Detailed tutorials on how to use ``hls4ml``'s various functionalities can be found `here <https://github.com/fastmachinelearning/hls4ml-tutorial>`_.
@@ -30,3 +30,9 @@ Depthwise convolutions
 
 Pointwise convolutions
 ======================
+
+Pointwise convolutions are a special case of convolution where the filter size is 1 for 1D or 1x1 for 2D.
+
+For the Xilinx backend, there is a dedicated io_parallel ``Latency`` strategy implementation of 1D pointwise convolutional layers integrated in `#881 <https://github.com/fastmachinelearning/hls4ml/pull/881>`_ developed for `arXiv:2402.01876 <https://arxiv.org/abs/2402.01876>`_.
+The reuse factor (RF) is used to split the layer execution and reuse the existing module RF times. The RF also limits the number of multipliers in each module.
+The initiation interval scales as the RF. One limitation is that it assumes ``in_width`` is divisible by the RF.
@@ -4,5 +4,4 @@ sphinx>=3.2.1
 sphinx_contributors
 sphinx_github_changelog
 sphinx_rtd_theme
-tensorflow<=2.15
 toposort>=1.5.0
@@ -32,6 +32,7 @@
 from hls4ml.model.optimizer import get_backend_passes, layer_optimizer
 from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, PackedType
 from hls4ml.report import parse_catapult_report
+from hls4ml.utils import attribute_descriptions as descriptions
 from hls4ml.utils.fixed_point_utils import ceil_log2
 
 
@@ -51,10 +52,12 @@ def _register_layer_attributes(self):
 
         for layer in rnn_layers:
             attrs = self.attribute_map.get(layer, [])
-            attrs.append(ConfigurableAttribute('recurrent_reuse_factor', default=1))
-            attrs.append(ConfigurableAttribute('static', value_type=bool, default=True))
-            attrs.append(ConfigurableAttribute('table_size', default=1024))
-            attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8)))
+            attrs.append(ConfigurableAttribute('recurrent_reuse_factor', default=1, description=descriptions.reuse_factor))
+            attrs.append(
+                ConfigurableAttribute('static', value_type=bool, default=True, description=descriptions.recurrent_static)
+            )
+            attrs.append(ConfigurableAttribute('table_size', default=1024, description=descriptions.table_size))
+            attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8), description=descriptions.table_type))
             self.attribute_map[layer] = attrs
 
         # Add ParallelizationFactor to Conv1D/2D
@@ -65,16 +68,22 @@ def _register_layer_attributes(self):
 
         for layer in pf_layers:
             attrs = self.attribute_map.get(layer, [])
-            attrs.append(ConfigurableAttribute('parallelization_factor', default=1))
+            attrs.append(ConfigurableAttribute('parallelization_factor', default=1, description=descriptions.conv_pf))
             self.attribute_map[layer] = attrs
 
         # Add ConvImplementation to Convolution+Pooling layers
         cnn_layers = [Conv1D, Conv2D, SeparableConv1D, SeparableConv2D, DepthwiseConv2D, Pooling1D, Pooling2D]
 
         for layer in cnn_layers:
             attrs = self.attribute_map.get(layer, [])
-            # attrs.append(ConfigurableAttribute('conv_implementation', value_type=str, default='LineBuffer'))
-            attrs.append(ChoiceAttribute('conv_implementation', choices=['LineBuffer', 'Encoded'], default='LineBuffer'))
+            attrs.append(
+                ChoiceAttribute(
+                    'conv_implementation',
+                    choices=['LineBuffer', 'Encoded'],
+                    default='LineBuffer',
+                    description=descriptions.conv_implementation,
+                )
+            )
             self.attribute_map[layer] = attrs
 
         sep_conv_layers = [SeparableConv1D, SeparableConv2D]
 
@@ -45,6 +45,7 @@
     UnspecifiedPrecisionType,
     XnorPrecisionType,
 )
+from hls4ml.utils import attribute_descriptions as descriptions
 from hls4ml.writer import get_writer
 
 
@@ -74,7 +75,7 @@ def __init__(self, name):
 
         for layer in accum_layers:
             attrs = self.attribute_map.get(layer, [])
-            attrs.append(TypeAttribute('accum'))
+            attrs.append(TypeAttribute('accum', description=descriptions.accum_type))
             self.attribute_map[layer] = attrs
 
         rf_layers = accum_layers + [
@@ -90,7 +91,7 @@ def __init__(self, name):
 
         for layer in rf_layers:
             attrs = self.attribute_map.get(layer, [])
-            attrs.append(ConfigurableAttribute('reuse_factor', default=1))
+            attrs.append(ConfigurableAttribute('reuse_factor', default=1, description=descriptions.reuse_factor))
             self.attribute_map[layer] = attrs
 
         # seperable is kind of special because it is effectively two layers that will be split
@@ -104,23 +105,34 @@ def __init__(self, name):
             self.attribute_map[layer] = attrs
 
         act_attrs = self.attribute_map.get(Activation, [])
-        act_attrs.append(ConfigurableAttribute('table_size', default=1024))
-        act_attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8)))
+        act_attrs.append(ConfigurableAttribute('table_size', default=1024, description=descriptions.table_size))
+        act_attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8), description=descriptions.table_type))
         self.attribute_map[Activation] = act_attrs
 
         softmax_attrs = self.attribute_map.get(Softmax, [])
-        softmax_attrs.append(ChoiceAttribute('implementation', ['latency', 'stable', 'argmax', 'legacy'], default='stable'))
-        softmax_attrs.append(ConfigurableAttribute('skip', value_type=bool, default=False))
+        softmax_attrs.append(
+            ChoiceAttribute(
+                'implementation',
+                ['latency', 'stable', 'argmax', 'legacy'],
+                default='stable',
+                description=descriptions.softmax_implementation,
+            )
+        )
+        softmax_attrs.append(
+            ConfigurableAttribute('skip', value_type=bool, default=False, description=descriptions.softmax_skip)
+        )
         softmax_attrs.append(
             TypeAttribute(
                 'exp_table',
                 default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
+                description=descriptions.table_type,
             )
         )
         softmax_attrs.append(
             TypeAttribute(
                 'inv_table',
                 default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
+                description=descriptions.table_type,
             )
         )
         self.attribute_map[Softmax] = softmax_attrs