Skip to content

Commit 7cf4134

Browse files
committed
Merge branch 'update-readme' of https://github.com/fastmachinelearning/hls4ml into update-readme
2 parents c65e915 + eac61dd commit 7cf4134

File tree

19 files changed

+406
-52
lines changed

19 files changed

+406
-52
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ hls_model.build()
6363
hls4ml.report.read_vivado_report('my-hls-test')
6464
```
6565

66+
# FAQ
67+
68+
List of frequently asked questions and common HLS synthesis can be found [here](https://fastmachinelearning.org/hls4ml/faq.html)
69+
6670
# Citation
6771
If you use this software in a publication, please cite the software
6872
```bibtex

docs/attr_doc_gen.py

Lines changed: 149 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import numbers
2+
3+
import hls4ml.backends as backends
4+
import hls4ml.model.attributes as attributes
5+
import hls4ml.model.layers as layers
6+
7+
8+
class AttrList:
9+
def __init__(self, cls_name, cls_attrs) -> None:
10+
self.cls_name = cls_name
11+
self.config_attrs = [attr for attr in cls_attrs if attr.configurable is True]
12+
self.type_attrs = [attr for attr in cls_attrs if attr.__class__.__name__ == 'TypeAttribute']
13+
self.weight_attrs = [attr for attr in cls_attrs if attr.__class__.__name__ == 'WeightAttribute']
14+
self.base_attrs = [attr for attr in cls_attrs if attr not in self.config_attrs + self.type_attrs + self.weight_attrs]
15+
self.backend_attrs = {}
16+
self.reverse_backend_attrs = [] # Will hold (attr, backend_name) pairs, used temporarily
17+
self.unique_backend_attrs = []
18+
19+
def add_backend_attrs(self, backend_name, backend_attrs):
20+
self.backend_attrs[backend_name] = backend_attrs
21+
22+
for attr in backend_attrs:
23+
self.reverse_backend_attrs.append((attr, backend_name))
24+
25+
def sift_backend_attrs(self):
26+
grouped_dict = {}
27+
for attr, backend_name in self.reverse_backend_attrs:
28+
if attr not in grouped_dict:
29+
grouped_dict[attr] = []
30+
grouped_dict[attr].append(backend_name)
31+
32+
for attr, backend_names in grouped_dict.items():
33+
attr.available_in = backend_names
34+
self.unique_backend_attrs.append(attr)
35+
36+
@property
37+
def only_configurable(self):
38+
all_attrs = self.config_attrs + self.type_attrs + self.unique_backend_attrs
39+
return [attr for attr in all_attrs if attr.configurable is True]
40+
41+
42+
def convert_to_attr_list():
43+
all_backends = backends.get_available_backends()
44+
# Removing duplicates but preserving order
45+
all_layers = list(dict.fromkeys(layers.layer_map.values()))
46+
all_layers_attrs = []
47+
48+
for layer_cls in all_layers:
49+
base_attrs = layer_cls.expected_attributes
50+
51+
attr_list = AttrList(layer_cls.__name__, base_attrs)
52+
53+
for backend_name in all_backends:
54+
backend = backends.get_backend(backend_name)
55+
56+
backend_cls = backend.create_layer_class(layer_cls)
57+
backend_attrs = backend_cls.expected_attributes
58+
59+
diff_atts = [
60+
attr for attr in backend_attrs if attr not in base_attrs
61+
] # Sets are faster, but don't preserve order
62+
if len(diff_atts) > 0:
63+
attr_list.add_backend_attrs(backend.name, diff_atts)
64+
65+
all_layers_attrs.append(attr_list)
66+
67+
for attr_list in all_layers_attrs:
68+
attr_list.sift_backend_attrs()
69+
70+
return all_layers_attrs
71+
72+
73+
def print_attrs(attrs, file):
74+
for attr in attrs:
75+
if attr.value_type == numbers.Integral:
76+
vtype = 'int'
77+
elif attr.__class__ == attributes.ChoiceAttribute:
78+
choices = ','.join([str(c) for c in attr.choices])
79+
vtype = f'list [{choices}]'
80+
else:
81+
vtype = attr.value_type.__name__ if hasattr(attr.value_type, '__name__') else str(attr.value_type)
82+
83+
if attr.default is None:
84+
file.write('* ' + attr.name + ': ' + vtype + '\n\n')
85+
else:
86+
file.write('* ' + attr.name + ': ' + vtype + ' (Default: ' + str(attr.default) + ')\n\n')
87+
88+
if attr.description is not None:
89+
file.write(' * ' + attr.description + '\n\n')
90+
91+
if hasattr(attr, 'available_in'):
92+
file.write(' * Available in: ' + ', '.join(attr.available_in) + '\n\n')
93+
94+
95+
def write_all_attributes(all_layers_attrs):
96+
with open('attributes.rst', mode='w') as file:
97+
file.write('================\n')
98+
file.write('Layer attributes\n')
99+
file.write('================\n\n\n')
100+
101+
for attr_list in all_layers_attrs:
102+
file.write(attr_list.cls_name + '\n')
103+
file.write('=' * len(attr_list.cls_name) + '\n')
104+
105+
if len(attr_list.base_attrs) > 0:
106+
file.write('Base attributes\n')
107+
file.write('---------------\n')
108+
print_attrs(attr_list.type_attrs, file)
109+
110+
if len(attr_list.type_attrs) > 0:
111+
file.write('Type attributes\n')
112+
file.write('---------------\n')
113+
print_attrs(attr_list.base_attrs, file)
114+
115+
if len(attr_list.weight_attrs) > 0:
116+
file.write('Weight attributes\n')
117+
file.write('-----------------\n')
118+
print_attrs(attr_list.weight_attrs, file)
119+
120+
if len(attr_list.config_attrs) > 0:
121+
file.write('Configurable attributes\n')
122+
file.write('-----------------------\n')
123+
print_attrs(attr_list.config_attrs, file)
124+
125+
if len(attr_list.backend_attrs) > 0:
126+
file.write('Backend-specific attributes\n')
127+
file.write('---------------------------\n')
128+
print_attrs(attr_list.unique_backend_attrs, file)
129+
130+
131+
def write_only_configurable(all_layers_attrs):
132+
with open('attributes.rst', mode='w') as file:
133+
file.write('================\n')
134+
file.write('Layer attributes\n')
135+
file.write('================\n\n\n')
136+
137+
for attr_list in all_layers_attrs:
138+
file.write(attr_list.cls_name + '\n')
139+
file.write('=' * len(attr_list.cls_name) + '\n')
140+
141+
config_attrs = attr_list.only_configurable
142+
if len(config_attrs) > 0:
143+
print_attrs(config_attrs, file)
144+
145+
146+
if __name__ == '__main__':
147+
all_layers_attrs = convert_to_attr_list()
148+
write_all_attributes(all_layers_attrs)
149+
# write_only_configurable(all_layers_attrs)

docs/faq.rst

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
Frequently asked questions
2+
==========================
3+
4+
**What is hls4ml?**
5+
6+
``hls4ml`` is a tool for converting neural network models into FPGA firmware. hls4ml is aimed at low-latency applications, such as triggering at the Large Hadron Collider (LHC) at CERN, but is applicable to other domains requiring microsecond latency. See the full documentation for more details.
7+
8+
**How does hls4ml work?**
9+
10+
``hls4ml`` takes the models from Keras, PyTorch and ONNX (optionally quantized with the respective quantization libraries) and produces high-level synthesis code (based on C++) that can be converted to FPGA firmware using the HLS compilers from different vendors (AMD/Xilinx, Intel/Altera, Catapult...).
11+
12+
**How is hls4ml so fast?**
13+
14+
``hls4ml`` stores all weights on-chip for fast access and has tuneable parallelism. As a consequence, the size of the model that can be successfully converted into firmware with hls4ml largely depends on the amount of available resources on the target FPGA. Therefore it is highly recommended to compress the model with quantization (via QKeras or HGQ for Keras or Brevitas for PyTorch) and pruning. Additionally, ``hls4ml`` exploits the parallelism available in an FPGA or ASIC by implementing a spatial dataflow architecture.
15+
16+
**Will my model work with hls4ml?**
17+
18+
``hls4ml`` supports many common layers found in MLP, CNN and RNN architectures, however some seldom-used features of these layers may not be supported. Novel architectures such as graph networks or transformers are in various stages of development and are currently not stable for end-users. See the status and features page for more information. Models with custom layers can be supported through extension API. If you encounter a feature not yet supported, open a new issue.
19+
20+
**Will my model with X parameters fit an FPGA model Y?**
21+
22+
It depends. ``hls4ml`` has been successfully used with quantized models with `O` (10k) parameters, while for some architectures going beyond `O` (1000) parameters is not doable even on the largest FPGAs. The number of parameters of a model is generally not a good estimate of the performance on an FPGA as the computational complexity of different types of NN layers has big effects on the resource consumption on an FPGA. For example, a CNN or GNN may reuse the same parameter in many operations. Furthermore, model compression in the form of quantization and pruning can significantly change the footprint of the model on the FPGA. For these reasons, we discourage the use of this metric for estimating performance.
23+
24+
If you're looking for a quick estimate of the resource usage and latency for a given model without synthesis, look into `rule4ml <https://github.com/IMPETUS-UdeS/rule4ml>`_ and `wa-hls4ml <https://github.com/Dendendelen/wa-hls4ml>`_ projects.
25+
26+
LLMs and large vision transformers are not supported nor planned.
27+
28+
**How do I get started with hls4ml?**
29+
30+
We strongly recommend interested users unfamiliar with FPGAs or model compression techniques to review the `hls4ml tutorials <https://github.com/fastmachinelearning/hls4ml-tutorial>`_ to get an overview of the features and conversion workflow.
31+
32+
**How do I contribute to hls4ml development?**
33+
34+
We're always welcoming new contributions. If you have an interesting feature in mind feel free to start a new discussion thread with your proposal. We also have regular meetings online to discuss the status of developments where you can be invited to present your work. To receive announcements, `request to be added to our CERN e-group <https://e-groups.cern.ch/e-groups/Egroup.do?egroupName=hls-fml>`_. Furthermore, check the `CONTRIBUTING <https://github.com/fastmachinelearning/hls4ml/blob/main/CONTRIBUTING.md>`_ document for a set of technical requirements for making contributions to the hls4ml project.
35+
36+
37+
Common HLS synthesis issues
38+
***************************
39+
40+
**Stop unrolling loop ... because it may cause large runtime and excessive memory usage due to increase in code size.**
41+
42+
This error is common with models that are too large to fit on the FPGA given the ``IOType`` used. If you are using ``io_parallel``, consider switching to ``io_stream``, which prevents unrolling all arrays. It may help to also use the ``Resource`` strategy. Pruning or quantizing the model may not help as it is related to the size of the loops. If possible, try to reduce the number of neurons/filters of your model to reduce the size of the activation tensors and thus number of iterations of loops.
43+
44+
**cannot open shared object file ...: No such file or directory.**
45+
46+
This is usually an indication that the compilation failed due to incorrect HLS code being produced. It is most likely a bug in hls4ml. Please open a bug report. Note that the displayed error message may be the same but the cause can be different. Unless you're sure that the existing bug reports show the same underlying issue, it is better to open a separate bug report.
47+
48+
**My hls4ml predictions don't match the original Keras/PyTorch/ONNX ones**
49+
50+
``hls4ml`` uses fixed-point precision types to represent internal data structures, unlike the floating-point precision types used for computation in upstream ML toolkits. If the used bit width is not sufficiently wide, you may encounter issues with computation accuracy that propagates through the layers. This is especially true for models that are not fully quantized, or models with insufficient ``accum_t`` bitwidth. Look into automatic precision inference and profiling tools to resolve the issue.
51+
52+
Note that bit-exact behavior is not always possible, as many math functions (used by activation functions) are approximated with lookup tables.

docs/index.rst

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
concepts
66
status
77
setup
8+
faq
89
release_notes
910
reference
1011

@@ -74,6 +75,4 @@ For the latest status including current and planned features, see the :ref:`Stat
7475

7576
Tutorials
7677
=================================
77-
Detailed tutorials on how to use ``hls4ml``'s various functionalities can be found at:
78-
79-
https://github.com/fastmachinelearning/hls4ml-tutorial
78+
Detailed tutorials on how to use ``hls4ml``'s various functionalities can be found `here <https://github.com/fastmachinelearning/hls4ml-tutorial>`_.

docs/ir/conv.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,9 @@ Depthwise convolutions
3030

3131
Pointwise convolutions
3232
======================
33+
34+
Pointwise convolutions are a special case of convolution where the filter size is 1 for 1D or 1x1 for 2D.
35+
36+
For the Xilinx backend, there is a dedicated io_parallel ``Latency`` strategy implementation of 1D pointwise convolutional layers integrated in `#881 <https://github.com/fastmachinelearning/hls4ml/pull/881>`_ developed for `arXiv:2402.01876 <https://arxiv.org/abs/2402.01876>`_.
37+
The reuse factor (RF) is used to split the layer execution and reuse the existing module RF times. The RF also limits the number of multipliers in each module.
38+
The initiation interval scales as the RF. One limitation is that it assumes ``in_width`` is divisible by the RF.

docs/requirements.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,4 @@ sphinx>=3.2.1
44
sphinx_contributors
55
sphinx_github_changelog
66
sphinx_rtd_theme
7-
tensorflow<=2.15
87
toposort>=1.5.0

hls4ml/backends/catapult/catapult_backend.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from hls4ml.model.optimizer import get_backend_passes, layer_optimizer
3333
from hls4ml.model.types import FixedPrecisionType, IntegerPrecisionType, NamedType, PackedType
3434
from hls4ml.report import parse_catapult_report
35+
from hls4ml.utils import attribute_descriptions as descriptions
3536
from hls4ml.utils.fixed_point_utils import ceil_log2
3637

3738

@@ -51,10 +52,12 @@ def _register_layer_attributes(self):
5152

5253
for layer in rnn_layers:
5354
attrs = self.attribute_map.get(layer, [])
54-
attrs.append(ConfigurableAttribute('recurrent_reuse_factor', default=1))
55-
attrs.append(ConfigurableAttribute('static', value_type=bool, default=True))
56-
attrs.append(ConfigurableAttribute('table_size', default=1024))
57-
attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8)))
55+
attrs.append(ConfigurableAttribute('recurrent_reuse_factor', default=1, description=descriptions.reuse_factor))
56+
attrs.append(
57+
ConfigurableAttribute('static', value_type=bool, default=True, description=descriptions.recurrent_static)
58+
)
59+
attrs.append(ConfigurableAttribute('table_size', default=1024, description=descriptions.table_size))
60+
attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8), description=descriptions.table_type))
5861
self.attribute_map[layer] = attrs
5962

6063
# Add ParallelizationFactor to Conv1D/2D
@@ -65,16 +68,22 @@ def _register_layer_attributes(self):
6568

6669
for layer in pf_layers:
6770
attrs = self.attribute_map.get(layer, [])
68-
attrs.append(ConfigurableAttribute('parallelization_factor', default=1))
71+
attrs.append(ConfigurableAttribute('parallelization_factor', default=1, description=descriptions.conv_pf))
6972
self.attribute_map[layer] = attrs
7073

7174
# Add ConvImplementation to Convolution+Pooling layers
7275
cnn_layers = [Conv1D, Conv2D, SeparableConv1D, SeparableConv2D, DepthwiseConv2D, Pooling1D, Pooling2D]
7376

7477
for layer in cnn_layers:
7578
attrs = self.attribute_map.get(layer, [])
76-
# attrs.append(ConfigurableAttribute('conv_implementation', value_type=str, default='LineBuffer'))
77-
attrs.append(ChoiceAttribute('conv_implementation', choices=['LineBuffer', 'Encoded'], default='LineBuffer'))
79+
attrs.append(
80+
ChoiceAttribute(
81+
'conv_implementation',
82+
choices=['LineBuffer', 'Encoded'],
83+
default='LineBuffer',
84+
description=descriptions.conv_implementation,
85+
)
86+
)
7887
self.attribute_map[layer] = attrs
7988

8089
sep_conv_layers = [SeparableConv1D, SeparableConv2D]

hls4ml/backends/fpga/fpga_backend.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
UnspecifiedPrecisionType,
4646
XnorPrecisionType,
4747
)
48+
from hls4ml.utils import attribute_descriptions as descriptions
4849
from hls4ml.writer import get_writer
4950

5051

@@ -74,7 +75,7 @@ def __init__(self, name):
7475

7576
for layer in accum_layers:
7677
attrs = self.attribute_map.get(layer, [])
77-
attrs.append(TypeAttribute('accum'))
78+
attrs.append(TypeAttribute('accum', description=descriptions.accum_type))
7879
self.attribute_map[layer] = attrs
7980

8081
rf_layers = accum_layers + [
@@ -90,7 +91,7 @@ def __init__(self, name):
9091

9192
for layer in rf_layers:
9293
attrs = self.attribute_map.get(layer, [])
93-
attrs.append(ConfigurableAttribute('reuse_factor', default=1))
94+
attrs.append(ConfigurableAttribute('reuse_factor', default=1, description=descriptions.reuse_factor))
9495
self.attribute_map[layer] = attrs
9596

9697
# seperable is kind of special because it is effectively two layers that will be split
@@ -104,23 +105,34 @@ def __init__(self, name):
104105
self.attribute_map[layer] = attrs
105106

106107
act_attrs = self.attribute_map.get(Activation, [])
107-
act_attrs.append(ConfigurableAttribute('table_size', default=1024))
108-
act_attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8)))
108+
act_attrs.append(ConfigurableAttribute('table_size', default=1024, description=descriptions.table_size))
109+
act_attrs.append(TypeAttribute('table', default=FixedPrecisionType(18, 8), description=descriptions.table_type))
109110
self.attribute_map[Activation] = act_attrs
110111

111112
softmax_attrs = self.attribute_map.get(Softmax, [])
112-
softmax_attrs.append(ChoiceAttribute('implementation', ['latency', 'stable', 'argmax', 'legacy'], default='stable'))
113-
softmax_attrs.append(ConfigurableAttribute('skip', value_type=bool, default=False))
113+
softmax_attrs.append(
114+
ChoiceAttribute(
115+
'implementation',
116+
['latency', 'stable', 'argmax', 'legacy'],
117+
default='stable',
118+
description=descriptions.softmax_implementation,
119+
)
120+
)
121+
softmax_attrs.append(
122+
ConfigurableAttribute('skip', value_type=bool, default=False, description=descriptions.softmax_skip)
123+
)
114124
softmax_attrs.append(
115125
TypeAttribute(
116126
'exp_table',
117127
default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
128+
description=descriptions.table_type,
118129
)
119130
)
120131
softmax_attrs.append(
121132
TypeAttribute(
122133
'inv_table',
123134
default=FixedPrecisionType(18, 8, rounding_mode=RoundingMode.RND, saturation_mode=SaturationMode.SAT),
135+
description=descriptions.table_type,
124136
)
125137
)
126138
self.attribute_map[Softmax] = softmax_attrs

0 commit comments

Comments
 (0)