jmitrevs
diff --git a/‎.gitlab-ci.yml‎
Lines changed: 1 addition & 1 deletion b/‎.gitlab-ci.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 4 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎CITATION.cff‎
Lines changed: 1 addition & 1 deletion b/‎CITATION.cff‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 17 additions & 2 deletions b/‎README.md‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎docs/api/configuration.rst‎
Lines changed: 2 additions & 2 deletions b/‎docs/api/configuration.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/reference.rst‎
Lines changed: 32 additions & 4 deletions b/‎docs/reference.rst‎
Lines changed: 32 additions & 4 deletions
diff --git a/‎hls4ml/backends/fpga/passes/clone.py‎
Lines changed: 6 additions & 8 deletions b/‎hls4ml/backends/fpga/passes/clone.py‎
Lines changed: 6 additions & 8 deletions
diff --git a/‎hls4ml/backends/fpga/passes/fix_softmax_table_size.py‎
Lines changed: 65 additions & 0 deletions b/‎hls4ml/backends/fpga/passes/fix_softmax_table_size.py‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎hls4ml/backends/fpga/passes/repack_stream.py‎
Lines changed: 2 additions & 0 deletions b/‎hls4ml/backends/fpga/passes/repack_stream.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎hls4ml/backends/quartus/quartus_backend.py‎
Lines changed: 1 addition & 0 deletions b/‎hls4ml/backends/quartus/quartus_backend.py‎
Lines changed: 1 addition & 0 deletions
@@ -7,7 +7,7 @@ generator:
   stage: generate
   image: python:3.8-alpine
   tags:
-    - docker
+    - k8s-default
   before_script:
     - pip install pyyaml
   script:
 
@@ -2,15 +2,15 @@ exclude: (^hls4ml\/templates\/(vivado|quartus)\/(ap_types|ac_types)\/|^test/pyte
 
 repos:
 - repo: https://github.com/psf/black
-  rev: 23.7.0
+  rev: 23.11.0
   hooks:
   - id: black
     language_version: python3
     args: ['--line-length=125',
            '--skip-string-normalization']
 
 - repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.4.0
+  rev: v4.5.0
   hooks:
   - id: check-added-large-files
   - id: check-case-conflict
@@ -30,13 +30,13 @@ repos:
     args: ["--profile", "black", --line-length=125]
 
 - repo: https://github.com/asottile/pyupgrade
-  rev: v3.10.1
+  rev: v3.15.0
   hooks:
   - id: pyupgrade
     args: ["--py36-plus"]
 
 - repo: https://github.com/asottile/setup-cfg-fmt
-  rev: v2.4.0
+  rev: v2.5.0
   hooks:
   - id: setup-cfg-fmt
 
 
@@ -4,7 +4,7 @@ type: software
 authors:
 - given-names: "FastML Team"
 title: "hls4ml"
-version: "v0.7.1"
+version: "v0.8.0"
 doi: 10.5281/zenodo.1201549
 repository-code: "https://github.com/fastmachinelearning/hls4ml"
 url: "https://fastmachinelearning.org/hls4ml"
 
@@ -1,4 +1,4 @@
-<p float="left">
+<p align="center">
    <img src="https://github.com/fastmachinelearning/fastmachinelearning.github.io/raw/master/images/hls4ml_logo.svg" alt="hls4ml" width="400"/>
 </p>
 
@@ -69,7 +69,7 @@ If you use this software in a publication, please cite the software
   title        = {fastmachinelearning/hls4ml},
   year         = 2023,
   publisher    = {Zenodo},
-  version      = {v0.7.1},
+  version      = {v0.8.0},
   doi          = {10.5281/zenodo.1201549},
   url          = {https://github.com/fastmachinelearning/hls4ml}
 }
@@ -135,3 +135,18 @@ binary/ternary networks:
     year = "2021"
 }
 ```
+
+# Acknowledgments
+If you benefited from participating in our community, we ask that you please acknowledge the Fast Machine Learning collaboration, and particular individuals who helped you, in any publications.
+Please use the following text for this acknowledgment:
+  > We acknowledge the Fast Machine Learning collective as an open community of multi-domain experts and collaborators. This community and \<names of individuals\>, in particular, were important for the development of this project.
+
+# Funding
+We gratefully acknowledge previous and current support from the U.S. National Science Foundation (NSF) Harnessing the Data Revolution (HDR) Institute for <a href="https://a3d3.ai">Accelerating AI Algorithms for Data Driven Discovery (A3D3)</a> under Cooperative Agreement No. <a href="https://www.nsf.gov/awardsearch/showAward?AWD_ID=2117997">OAC-2117997</a>, U.S. Department of Energy (DOE) Office of Science, Office of Advanced Scientific Computing Research under the Real‐time Data Reduction Codesign at the Extreme Edge for Science (XDR) Project (<a href="https://science.osti.gov/-/media/grants/pdf/foas/2021/SC_FOA_0002501.pdf">DE-FOA-0002501</a>), DOE Office of Science, Office of High Energy Physics Early Career Research Program (<a href="https://pamspublic.science.energy.gov/WebPAMSExternal/Interface/Common/ViewPublicAbstract.aspx?rv=df0ae4ab-a46e-481a-9acc-3856b6b041e5&rtc=24&PRoleId=10">DE-SC0021187</a>, DE-0000247070), and the European Research Council (ERC) under the European Union's Horizon 2020 research and innovation program (Grant No. <a href="https://doi.org/10.3030/772369">772369</a>).
+
+<p align="center">
+<img src="https://github.com/fastmachinelearning/hls4ml/assets/29201053/bd1217d4-9930-47b7-8917-ad3fc430c75d" alt="A3D3" width="130"/>
+<img src="https://github.com/fastmachinelearning/hls4ml/assets/4932543/16e77374-9829-40a8-800e-8d12018a7cb3" alt="NSF" width="130"/>
+<img src="https://github.com/fastmachinelearning/hls4ml/assets/4932543/de6ca6ea-4d1c-4c56-9d93-f759914bbbf9" alt="DOE" width="130"/>
+<img src="https://github.com/fastmachinelearning/hls4ml/assets/4932543/7a369971-a381-4bb8-932a-7162b173cbac" alt="ERC" width="130"/>
+</p>
@@ -70,7 +70,7 @@ It looks like this:
    OutputPredictions: keras/KERAS_3layer_predictions.dat
 
    # Backend section (Vivado backend)
-   Part: xcku115-flvb2104-2-i
+   Part: xcvu13p-flga2577-2-e
    ClockPeriod: 5
    IOType: io_parallel # options: io_parallel/io_stream
 
@@ -97,7 +97,7 @@ There are a number of configuration options that you have.  Let's go through the
 The backend-specific section of the configuration depends on the backend. You can get a starting point for the necessary settings using, for example `hls4ml.templates.get_backend('Vivado').create_initial_config()`.
 For Vivado backend the options are:
 
-* **Part**\ : the particular FPGA part number that you are considering, here it's a Xilinx Virtex-7 FPGA
+* **Part**\ : the particular FPGA part number that you are considering, here it's a Xilinx Virtex UltraScale+ VU13P FPGA
 * **ClockPeriod**\ : the clock period, in ns, at which your algorithm runs
   Then you have some optimization parameters for how your algorithm runs:
 * **IOType**\ : your options are ``io_parallel`` or ``io_stream`` which defines the type of data structure used for inputs, intermediate activations between layers, and outputs. For ``io_parallel``, arrays are used that, in principle, can be fully unrolled and are typically implemented in RAMs. For ``io_stream``, HLS streams are used, which are a more efficient/scalable mechanism to represent data that are produced and consumed in a sequential manner. Typically, HLS streams are implemented with FIFOs instead of RAMs. For more information see `here <https://docs.xilinx.com/r/en-US/ug1399-vitis-hls/pragma-HLS-stream>`__.
 
@@ -1,6 +1,6 @@
-============================
-Citation and Contributors
-============================
+===========================================
+Citation, Acknowledgments, and Contributors
+===========================================
 
 
 Citation
@@ -14,7 +14,7 @@ If you use this software in a publication, please cite the software
     title        = {fastmachinelearning/hls4ml},
     year         = 2023,
     publisher    = {Zenodo},
-    version      = {v0.7.1},
+    version      = {v0.8.0},
     doi          = {10.5281/zenodo.1201549},
     url          = {https://github.com/fastmachinelearning/hls4ml}
     }
@@ -86,6 +86,34 @@ binary/ternary networks:
         year = "2021"
     }
 
+Acknowledgments
+===============
+If you benefited from participating in our community, we ask that you please acknowledge the Fast Machine Learning collaboration, and particular individuals who helped you, in any publications.
+Please use the following text for this acknowledgment:
+
+  We acknowledge the Fast Machine Learning collective as an open community of multi-domain experts and collaborators. This community and \<names of individuals\>, in particular, were important for the development of this project.
+
+
+Funding
+=======
+We gratefully acknowledge previous and current support from the U.S. National Science Foundation (NSF) Harnessing the Data Revolution (HDR) Institute for `Accelerating AI Algorithms for Data Driven Discovery (A3D3) <https://a3d3.ai>`_ under Cooperative Agreement No. `OAC-2117997 <https://www.nsf.gov/awardsearch/showAward?AWD_ID=2117997>`_, U.S. Department of Energy (DOE) Office of Science, Office of Advanced Scientific Computing Research under the Real‐time Data Reduction Codesign at the Extreme Edge for Science (XDR) Project (`DE-FOA-0002501 <https://science.osti.gov/-/media/grants/pdf/foas/2021/SC_FOA_0002501.pdf>`_), DOE Office of Science, Office of High Energy Physics Early Career Research Program (`DE-SC0021187 <https://pamspublic.science.energy.gov/WebPAMSExternal/Interface/Common/ViewPublicAbstract.aspx?rv=df0ae4ab-a46e-481a-9acc-3856b6b041e5&rtc=24&PRoleId=10>`_, DE-0000247070), and the European Research Council (ERC) under the European Union's Horizon 2020 research and innovation program (Grant No. `772369 <https://doi.org/10.3030/772369>`_).
+
+.. image:: https://github.com/fastmachinelearning/hls4ml/assets/4932543/d4b6e2a3-3537-4413-9809-8153a7d624d6
+    :height: 200
+    :align: center
+
+.. image:: https://github.com/fastmachinelearning/hls4ml/assets/4932543/16e77374-9829-40a8-800e-8d12018a7cb3
+    :height: 200
+    :align: center
+
+.. image:: https://github.com/fastmachinelearning/hls4ml/assets/4932543/de6ca6ea-4d1c-4c56-9d93-f759914bbbf9
+    :height: 200
+    :align: center
+
+.. image:: https://github.com/fastmachinelearning/hls4ml/assets/4932543/7a369971-a381-4bb8-932a-7162b173cbac
+    :height: 200
+    :align: center
+
 Contributors
 ============
 
 
@@ -20,21 +20,19 @@ def initialize(self):
 class CloneFunctionTemplate(FunctionCallTemplate):
     def __init__(self):
         super().__init__(Clone, include_header=clone_include_list)
-        self.template = None  # to be filled once number of clones known
 
     def format(self, node):
         params = self._default_function_params(node)
         for i, _output in enumerate(node.outputs):
             params['output' + str(i + 1)] = node.variables[node.outputs[i]].name
 
-        if self.template is None:
-            self.template = (
-                'nnet::clone_stream<{input_t}, {output_t}, {size}>({input}, '
-                + ', '.join(['{output' + str(i + 1) + '}' for i in range(len(node.outputs))])
-                + ');'
-            )
+        template = (
+            'nnet::clone_stream<{input_t}, {output_t}, {size}>({input}, '
+            + ', '.join(['{output' + str(i + 1) + '}' for i in range(len(node.outputs))])
+            + ');'
+        )
 
-        return self.template.format(**params)
+        return template.format(**params)
 
 
 def register_clone(backend):
 
@@ -0,0 +1,65 @@
+import warnings
+
+from hls4ml.model.layers import Layer, Softmax
+from hls4ml.model.optimizer import OptimizerPass
+
+
+class FixSoftmaxTableSize(OptimizerPass):
+    def match(self, node):
+        return isinstance(node, Softmax)
+
+    def transform(self, model, node: Layer):
+        inp_layer = node.get_input_node()  # type: ignore
+        if not isinstance(inp_layer, Layer):
+            raise RuntimeError(f'Softmax layer {node.name} does not have an input layer')
+
+        input_bw: int = inp_layer.get_attr('result_t').precision.width  # type: ignore
+        table_bw: int = node.get_attr('inv_table_t').precision.width  # type: ignore
+        table_size = int(node.get_attr('table_size'))  # type: ignore
+
+        backend = model.config.config['Backend']
+
+        # Somehow, Intel want one extra bits for the table.
+        # I don't know why but if not simulation will crash with segmentation fault.
+        backend_limitation = -1 if backend == 'Quartus' else 0
+
+        if 2 ** (min(input_bw, table_bw) + backend_limitation) < table_size:
+            # If table size is too large w.r.t. input bitwidth and table bitwidth,
+            # reduce table size to avoid undefined behavior when cutting indices from,
+            # fixed point number.
+            node.set_attr('table_size', str(2 ** (min(input_bw, table_bw) + backend_limitation)))
+            if 2**input_bw < table_size:
+                # The warning message does not have to be looking like this, but you are asking
+                # 125 characters long line.
+                warnings.warn(
+                    (
+                        f"Softmax layer {node.name} table size is too large for input"
+                        f"bitwidth {input_bw}. Setting table size to {2**input_bw}."
+                        "To avoid this warning, please increase input bitwidth or"
+                        "decrease table size."
+                    ),
+                    stacklevel=1,
+                )
+            if 2**table_bw < table_size:
+                warnings.warn(
+                    (
+                        f"Softmax layer {node.name} table size is too large for input"
+                        f"bitwidth {input_bw}. Setting table size to {2**input_bw}."
+                        "To avoid this warning, please increase input bitwidth or"
+                        "decrease table size."
+                    ),
+                    stacklevel=1,
+                )
+            if backend == 'Quartus':
+                warnings.warn(
+                    (
+                        "Quartus backend's table size is half of 2^min(input_bw-1,table_bw-1)"
+                        " instead of 2^min(input_bw,table_bw)."
+                    ),
+                    stacklevel=1,
+                )
+            return False
+
+
+def register_softmax__table_size_fix(backend):
+    backend.register_pass('fix_softmax_table_size', FixSoftmaxTableSize)
@@ -59,6 +59,8 @@ def transform(self, model, node):
 
         # Insert new Repack node instead of Reshape
         repack_layer = model.make_node(Repack, 'repack_' + node.name, attrs, node.inputs.copy())
+        # As result_t attribute is not honored by type conversion, set it manually here
+        repack_layer.attributes[repack_layer.name].type = node.attributes[node.name].type
         model.replace_node(node, repack_layer)
 
         return True
@@ -72,6 +72,7 @@ def _register_flows(self):
             'quartus:inplace_parallel_reshape',
             'quartus:inplace_stream_flatten',
             'quartus:skip_softmax',
+            'quartus:fix_softmax_table_size',
         ]
         optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)
Original file line number	Diff line number	Diff line change
`@@ -72,6 +72,7 @@ def _register_flows(self):`
`72`	`72`	`'quartus:inplace_parallel_reshape',`
`73`	`73`	`'quartus:inplace_stream_flatten',`
`74`	`74`	`'quartus:skip_softmax',`
	`75`	`+ 'quartus:fix_softmax_table_size',`
`75`	`76`	`]`
`76`	`77`	`optimization_flow = register_flow('optimize', optimization_passes, requires=[init_flow], backend=self.name)`
`77`	`78`