fastmachinelearning
diff --git a/‎example-models‎ b/‎example-models‎
diff --git a/‎hls4ml/backends/fpga/passes/codegen.py‎ renamed to ‎hls4ml/backends/fpga/passes/im2col_codegen.py‎ b/‎hls4ml/backends/fpga/passes/codegen.py‎ renamed to ‎hls4ml/backends/fpga/passes/im2col_codegen.py‎
diff --git a/‎hls4ml/backends/vivado/passes/convolution_templates.py‎
Lines changed: 23 additions & 2 deletions b/‎hls4ml/backends/vivado/passes/convolution_templates.py‎
Lines changed: 23 additions & 2 deletions
diff --git a/‎hls4ml/backends/vivado/passes/pointwise_codegen.py‎
Lines changed: 84 additions & 0 deletions b/‎hls4ml/backends/vivado/passes/pointwise_codegen.py‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎hls4ml/backends/vivado/vivado_backend.py‎
Lines changed: 1 addition & 0 deletions b/‎hls4ml/backends/vivado/vivado_backend.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎hls4ml/converters/onnx/reshape.py‎
Lines changed: 23 additions & 1 deletion b/‎hls4ml/converters/onnx/reshape.py‎
Lines changed: 23 additions & 1 deletion
diff --git a/‎hls4ml/model/layers.py‎
Lines changed: 60 additions & 13 deletions b/‎hls4ml/model/layers.py‎
Lines changed: 60 additions & 13 deletions
diff --git a/‎hls4ml/model/optimizer/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎hls4ml/model/optimizer/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎hls4ml/model/optimizer/passes/resize_remove_constants.py‎
Lines changed: 38 additions & 0 deletions b/‎hls4ml/model/optimizer/passes/resize_remove_constants.py‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎hls4ml/templates/vitis/nnet_utils/nnet_conv1d.h‎
Lines changed: 21 additions & 9 deletions b/‎hls4ml/templates/vitis/nnet_utils/nnet_conv1d.h‎
Lines changed: 21 additions & 9 deletions
@@ -60,6 +60,8 @@
     typedef {config_t} mult_config;
     template<unsigned K, unsigned S, unsigned W>
     using scale_index = nnet::{scale_index_type}<K, S, W>;
+    template<class data_T, class res_T, class CONFIG_T>
+    using conv_kernel = nnet::{conv_fn}<data_T, res_T, CONFIG_T>;
 }};
 const ap_uint<config{index}::filt_width> config{index}::pixels[] = {{{instructions}}};\n"""
 
@@ -93,11 +95,30 @@ def format(self, node):
         else:
             params['fill_fn'] = 'FillConv1DBuffer'
 
+        is_pointwise_parallel_latency = (
+            node.get_attr('filt_width') == 1
+            and node.get_attr('strategy').lower() == 'latency'
+            and node.model.config.get_config_value('IOType') == 'io_parallel'
+        )
+        if is_pointwise_parallel_latency:
+            params['conv_fn'] = f'pointwise_conv_{node.index}'
+        else:
+            if node.get_attr('strategy').lower() == 'latency':
+                params['conv_fn'] = 'Conv1DLatency'
+            else:
+                params['conv_fn'] = 'Conv1DResource'
+
         conv_config = self.template.format(**params)
 
         mult_params = self._default_config_params(node)
-        mult_params['n_in'] = node.get_attr('n_chan') * node.get_attr('filt_width')
-        mult_params['n_out'] = node.get_attr('n_filt')
+        if is_pointwise_parallel_latency:
+            mult_params['n_in'] = int(
+                node.get_attr('in_width') * node.get_attr('n_chan') * node.get_attr('filt_width') / mult_params['reuse']
+            )
+            mult_params['n_out'] = int(node.get_attr('in_width') * node.get_attr('n_filt') / mult_params['reuse'])
+        else:
+            mult_params['n_in'] = node.get_attr('n_chan') * node.get_attr('filt_width')
+            mult_params['n_out'] = node.get_attr('n_filt')
         mult_params['nzeros'] = node.get_weights('weight').nzeros
         mult_params['product_type'] = get_backend('vivado').product_type(
             node.get_input_variable().type.precision, node.get_weights('weight').type.precision
 
@@ -0,0 +1,84 @@
+from hls4ml.model.layers import Conv1D
+from hls4ml.model.optimizer import OptimizerPass
+from hls4ml.model.types import Source
+
+
+def generate_pointwise_conv1d_fn(layer_idx, reuse_factor=1):
+    """Generate a C++ function for a pointwise convolution layer.
+
+    Args:
+        layer_idx (int): Index of layer ('index' attribute).
+        reuse_factor (int): Number of partitions to divide the input into.
+
+    Returns:
+        str: Generated C++ function
+    """
+
+    generated_code = (
+        'template<class data_T, class res_T, typename CONFIG_T>\n'
+        'class pointwise_conv_{index} : public Conv1DKernel<data_T, res_T, CONFIG_T> {{\n'
+        '  public:\n'
+        '    static void conv(\n'
+        '                     data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],\n'
+        '                     res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],\n'
+        '                     typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],\n'
+        '                     typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) {{\n'
+        '        data_T data_tmp[CONFIG_T::reuse_factor][CONFIG_T::in_width * CONFIG_T::n_chan / CONFIG_T::reuse_factor];\n'  # noqa: E501
+        '        #pragma HLS ARRAY_PARTITION variable=data_tmp complete dim=0\n'
+        '        res_T res_tmp[CONFIG_T::reuse_factor][CONFIG_T::out_width * CONFIG_T::n_filt / CONFIG_T::reuse_factor];\n'  # noqa: E501
+        '        #pragma HLS ARRAY_PARTITION variable=res_tmp complete dim=0\n\n'
+        '    RFInputLoop:\n'
+        '        for (int jj = 0; jj < CONFIG_T::reuse_factor; jj++) {{\n'
+        '        #pragma HLS UNROLL\n'
+        '        InnerInputLoop:\n'
+        '            for (int ii = 0; ii < CONFIG_T::in_width * CONFIG_T::n_chan / CONFIG_T::reuse_factor; ii++) {{\n'
+        '                #pragma HLS UNROLL\n'
+        '                data_tmp[jj][ii] = data[jj * CONFIG_T::in_width * CONFIG_T::n_chan / CONFIG_T::reuse_factor + ii];\n'  # noqa: E501
+        '            }}\n'
+        '        }}\n\n'
+    ).format(index=layer_idx)
+    indent = '        '
+    for i in range(reuse_factor):
+        generated_code += indent
+        generated_code += (
+            f'pointwise_conv_1d_latency_cl<data_T, res_T, CONFIG_T>(data_tmp[{i}], res_tmp[{i}], weights, biases);\n'
+        )
+
+    generated_code += (
+        '\n'
+        '    RFOutputLoop:\n'
+        '        for (int jj = 0; jj < CONFIG_T::reuse_factor; jj++) {\n'
+        '        #pragma HLS UNROLL\n'
+        '        InnerOutputLoop:\n'
+        '            for (int ii = 0; ii < CONFIG_T::out_width * CONFIG_T::n_filt / CONFIG_T::reuse_factor; ii++) {\n'
+        '                #pragma HLS UNROLL\n'
+        '                res[jj * CONFIG_T::out_width * CONFIG_T::n_filt / CONFIG_T::reuse_factor + ii] = res_tmp[jj][ii];\n'  # noqa: E501
+        '            }\n'
+        '        }\n'
+        '    }\n'
+        '};\n'
+    )
+
+    return generated_code
+
+
+class GeneratePointwiseConv1D(OptimizerPass):
+    '''Generates code for pointwise 1D convolution'''
+
+    def match(self, node):
+        return (
+            isinstance(node, Conv1D)
+            and node.model.config.get_config_value('IOType') == 'io_parallel'
+            and node.get_attr('filt_width') == 1
+        )
+
+    def transform(self, model, node):
+        self._generate_pointwise_conv1d(node)
+
+    def _generate_pointwise_conv1d(self, node):
+        code_str = generate_pointwise_conv1d_fn(
+            node.get_attr('index'),
+            node.get_attr('reuse_factor'),
+        )
+
+        node.set_attr('pointwise_conv1d_codegen', Source(code_str))
@@ -123,6 +123,7 @@ def _register_flows(self):
             'vivado:generate_conv_streaming_instructions',
             'vivado:apply_resource_strategy',
             'vivado:generate_conv_im2col',
+            'vivado:generate_pointwise_conv1_d',
             'vivado:generate_unrolled_dense_resource',
             'vivado:set_pipeline_style',
         ]
 
@@ -1,4 +1,4 @@
-from hls4ml.converters.onnx_to_hls import onnx_handler
+from hls4ml.converters.onnx_to_hls import get_onnx_attribute, onnx_handler
 
 
 @onnx_handler('Transpose')
@@ -36,3 +36,25 @@ def parse_flatten_layer(node, input_names, input_shapes, graph):
     layer['target_shape'] = [-1]  # does not contain batch dimension
 
     return layer
+
+
+@onnx_handler('Resize')
+def parse_resize_layer(node, input_names, input_shapes, graph):
+    layer = {}
+    layer['name'] = node.name
+    layer['class_name'] = 'Resize'
+    layer['inputs'] = input_names
+    layer['outputs'] = list(node.output)
+    layer['in_height'] = input_shapes[0][2]
+    layer['in_width'] = input_shapes[0][1]
+    layer['out_width'] = input_shapes[0][1]
+    layer['out_height'] = input_shapes[0][2]
+    layer['n_chan'] = input_shapes[0][3]
+    layer['algorithm'] = get_onnx_attribute(node, 'mode')
+    # The following is used in initialize() method.
+    # Probably a better solution would be to have a channels last parameter at QONNX level
+    layer['data_format'] = (
+        'channels_last' if any(node.domain == 'qonnx.custom_op.channels_last' for node in graph.node) else 'channels_first'
+    )
+
+    return layer
@@ -1148,20 +1148,67 @@ class Resize(Layer):
     def initialize(self):
         inp = self.get_input_variable()
 
-        if self.get_attr('data_format') == 'channels_last':
-            if len(inp.shape) == 2:  # 1D -> width + chan
-                shape = [self.get_attr('out_width'), self.get_attr('n_chan')]
-                dims = [f'OUT_WIDTH_{self.index}', f'N_CHAN_{self.index}']
-            elif len(inp.shape) == 3:  # 2D -> height + width + chan
-                shape = [self.get_attr('out_height'), self.get_attr('out_width'), self.get_attr('n_chan')]
-                dims = [f'OUT_HEIGHT_{self.index}', f'OUT_WIDTH_{self.index}', f'N_CHAN_{self.index}']
+        if len(self.inputs) > 1:
+            # In order to be correctly ingested by hls4ml the QONNX resize node should have 3 inputs set with RoI left empty
+            if len(self.inputs) == 2:
+                raise Exception(
+                    'The number of inputs to Resize node is equal to 2. '
+                    'In this case, either one is trying to use a version 10 node '
+                    'or one is using the RoI parameter only to perform the resize operation, '
+                    'both not supported in hls4ml'
+                )
+            if len(self.inputs) == 4:
+                raise Exception('Sizes parameter is not supported by hls4ml. Use scales instead')
+            # get the scales of Resize node from QONNX frontend
+            # see doc here: https://onnx.ai/onnx/operators/onnx__Resize.html
+            scales_idx = 2 if len(self.inputs) == 3 or len(self.inputs) == 4 else 1
+            scales = self.get_input_node(self.inputs[scales_idx]).get_attr('value')
+            if len(scales) == 4:  # Resize 2D
+                self.set_attr('out_width', int(self.get_attr('in_width') * scales[1]))
+                self.set_attr('out_height', int(self.get_attr('in_height') * scales[2]))
+                self.set_attr('n_chan', int(self.get_attr('n_chan') * scales[3]))
+            elif len(scales) == 3:  # Resize 1D
+                self.set_attr('out_width', int(self.get_attr('in_width') * scales[1]))
+                self.set_attr('n_chan', int(self.get_attr('n_chan') * scales[2]))
+            else:
+                raise Exception('Resize 1D and Resize 2D are the ones supported in hls4ml')
+            if self.get_attr('data_format') == 'channels_last':
+                if len(inp.shape) == 2:  # 1D -> width + chan
+                    shape = [int(self.get_attr('out_width')), int(self.get_attr('n_chan'))]
+                    dims = [f'OUT_WIDTH_{self.index}', f'N_CHAN_{self.index}']
+                elif len(inp.shape) == 3:  # 2D -> height + width + chan
+                    shape = [
+                        int(self.get_attr('out_height')),
+                        int(self.get_attr('out_width')),
+                        int(self.get_attr('n_chan')),
+                    ]
+                    dims = [f'OUT_HEIGHT_{self.index}', f'OUT_WIDTH_{self.index}', f'N_CHAN_{self.index}']
+            else:
+                if len(inp.shape) == 2:  # 1D -> width + chan
+                    shape = [int(self.get_attr('n_chan')), int(self.get_attr('out_width'))]
+                    dims = [f'N_CHAN_{self.index}', f'OUT_WIDTH_{self.index}']
+                elif len(inp.shape) == 3:  # 2D -> height + width + chan
+                    shape = [
+                        int(self.get_attr('n_chan')),
+                        int(self.get_attr('out_height')),
+                        int(self.get_attr('out_width')),
+                    ]
+                    dims = [f'N_CHAN_{self.index}', f'OUT_HEIGHT_{self.index}', f'OUT_WIDTH_{self.index}']
         else:
-            if len(inp.shape) == 2:  # 1D -> width + chan
-                shape = [self.get_attr('n_chan'), self.get_attr('out_width')]
-                dims = [f'N_CHAN_{self.index}', f'OUT_WIDTH_{self.index}']
-            elif len(inp.shape) == 3:  # 2D -> height + width + chan
-                shape = [self.get_attr('n_chan'), self.get_attr('out_height'), self.get_attr('out_width')]
-                dims = [f'N_CHAN_{self.index}', f'OUT_HEIGHT_{self.index}', f'OUT_WIDTH_{self.index}']
+            if self.get_attr('data_format') == 'channels_last':
+                if len(inp.shape) == 2:  # 1D -> width + chan
+                    shape = [self.get_attr('out_width'), self.get_attr('n_chan')]
+                    dims = [f'OUT_WIDTH_{self.index}', f'N_CHAN_{self.index}']
+                elif len(inp.shape) == 3:  # 2D -> height + width + chan
+                    shape = [self.get_attr('out_height'), self.get_attr('out_width'), self.get_attr('n_chan')]
+                    dims = [f'OUT_HEIGHT_{self.index}', f'OUT_WIDTH_{self.index}', f'N_CHAN_{self.index}']
+            else:
+                if len(inp.shape) == 2:  # 1D -> width + chan
+                    shape = [self.get_attr('n_chan'), self.get_attr('out_width')]
+                    dims = [f'N_CHAN_{self.index}', f'OUT_WIDTH_{self.index}']
+                elif len(inp.shape) == 3:  # 2D -> height + width + chan
+                    shape = [self.get_attr('n_chan'), self.get_attr('out_height'), self.get_attr('out_width')]
+                    dims = [f'N_CHAN_{self.index}', f'OUT_HEIGHT_{self.index}', f'OUT_WIDTH_{self.index}']
 
         self.add_output_variable(shape, dims, precision=inp.type.precision)
 
 
@@ -34,6 +34,7 @@
     'parse_qonnx',
     [
         'reshape_constant',
+        'resize_remove_constants',
         'quant_constant_parameters',
         'quant_to_activation',
         'fuse_quant_with_constant',
 
@@ -0,0 +1,38 @@
+from warnings import warn
+
+from hls4ml.model.layers import Constant, Resize
+from hls4ml.model.optimizer import OptimizerPass
+
+
+class ResizeRemoveConstants(OptimizerPass):
+    """
+    This optimizer is intended to clean the Resize node from RoI and Scales parameters that if left cause issues in hls4ml.
+    """
+
+    def match(self, node):
+        is_match = isinstance(node, Resize) and len(node.inputs) > 1
+        return is_match
+
+    def transform(self, model, node):
+        """
+        Remove RoI and Scale Constant from new shape input.
+        """
+        # see doc here: https://onnx.ai/onnx/operators/onnx__Resize.html
+        roi_index = 1
+        scales_idx = 2
+        scales_node = node.get_input_node(node.inputs[scales_idx])
+        node.inputs[scales_idx] = ''
+        if not isinstance(scales_node, Constant):
+            raise RuntimeError("Non-constant shape inputs are not supported")
+        model.remove_node(scales_node, rewire=False)
+        # RoI position is always 1 when present
+        roi_node = node.get_input_node(node.inputs[roi_index])
+        if roi_node.get_attr('value'):
+            warn('RoI value vector is not empty. Consider that RoI is not supported in hls4ml', stacklevel=2)
+        node.inputs[roi_index] = ''
+        if not isinstance(roi_node, Constant):
+            raise RuntimeError("Non-constant RoI inputs are not supported")
+        model.remove_node(roi_node, rewire=False)
+        # Clean all the '' inputs
+        node.inputs = list(filter(None, node.inputs))
+        return True
@@ -4,6 +4,7 @@
 #include "nnet_common.h"
 #include "nnet_conv1d_latency.h"
 #include "nnet_conv1d_resource.h"
+#include "nnet_function_stubs.h"
 #include <cstdlib>
 
 namespace nnet {
@@ -38,11 +39,7 @@ void conv_1d_cl(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan], res_T res[CO
     // Inlining helps reduce latency, but may also cause timing issues in some cases, use carefully.
     //#pragma HLS INLINE recursive
 
-    if (CONFIG_T::strategy == nnet::latency) {
-        conv_1d_latency_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
-    } else {
-        conv_1d_resource_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
-    }
+    CONFIG_T::template conv_kernel<data_T, res_T, CONFIG_T>::conv(data, res, weights, biases);
 }
 
 template <class data_T, class res_T, typename CONFIG_T>
@@ -55,13 +52,28 @@ void pointwise_conv_1d_cl(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
     // Inlining helps reduce latency, but may also cause timing issues in some cases, use carefully.
     //#pragma HLS INLINE recursive
 
-    // Nothing special to be done for io_parallel implementation
-    if (CONFIG_T::strategy == nnet::latency) {
+    CONFIG_T::template conv_kernel<data_T, res_T, CONFIG_T>::conv(data, res, weights, biases);
+}
+
+template <class data_T, class res_T, typename CONFIG_T> class Conv1DLatency : public Conv1DKernel<data_T, res_T, CONFIG_T> {
+  public:
+    static void conv(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan], res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],
+                     typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
+                     typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) {
+        //#pragma HLS INLINE region
         conv_1d_latency_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
-    } else {
+    }
+};
+
+template <class data_T, class res_T, typename CONFIG_T> class Conv1DResource : public Conv1DKernel<data_T, res_T, CONFIG_T> {
+  public:
+    static void conv(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan], res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],
+                     typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan * CONFIG_T::n_filt],
+                     typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) {
+        //#pragma HLS INLINE region
         conv_1d_resource_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
     }
-}
+};
 
 } // namespace nnet
Original file line number	Diff line number	Diff line change
`@@ -123,6 +123,7 @@ def _register_flows(self):`
`123`	`123`	`'vivado:generate_conv_streaming_instructions',`
`124`	`124`	`'vivado:apply_resource_strategy',`
`125`	`125`	`'vivado:generate_conv_im2col',`
	`126`	`+ 'vivado:generate_pointwise_conv1_d',`
`126`	`127`	`'vivado:generate_unrolled_dense_resource',`
`127`	`128`	`'vivado:set_pipeline_style',`
`128`	`129`	`]`
Original file line number	Diff line number	Diff line change
`@@ -34,6 +34,7 @@`
`34`	`34`	`'parse_qonnx',`
`35`	`35`	`[`
`36`	`36`	`'reshape_constant',`
	`37`	`+ 'resize_remove_constants',`
`37`	`38`	`'quant_constant_parameters',`
`38`	`39`	`'quant_to_activation',`
`39`	`40`	`'fuse_quant_with_constant',`