Skip to content

Commit 2a78f93

Browse files
authored
Merge branch 'main' into qonnx-1p0
2 parents b565067 + 0236eff commit 2a78f93

File tree

13 files changed

+583
-57
lines changed

13 files changed

+583
-57
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ repos:
3030
args: ["--profile", "black", --line-length=125]
3131

3232
- repo: https://github.com/asottile/pyupgrade
33-
rev: v3.15.2
33+
rev: v3.16.0
3434
hooks:
3535
- id: pyupgrade
3636
args: ["--py36-plus"]
@@ -41,7 +41,7 @@ repos:
4141
- id: setup-cfg-fmt
4242

4343
- repo: https://github.com/pycqa/flake8
44-
rev: 7.0.0
44+
rev: 7.1.0
4545
hooks:
4646
- id: flake8
4747
exclude: docs/conf.py

hls4ml/backends/fpga/fpga_backend.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -701,7 +701,7 @@ def generate_conv1d_line_buffer_fn(self, layer_idx, n_partitions, in_W, in_C, ke
701701
702702
The HLS compiler produces suboptimal designs for a im2col algorithm implementation, so a trick we use is
703703
to generate a resulting a result of im2col transformation explicitly, instead of relying on loops. Since
704-
the result depends on the paraleters of the convolution layer (the input size, the kernel size, stride etc),
704+
the result depends on the parameters of the convolution layer (the input size, the kernel size, stride etc),
705705
we need to do this for every convolution layer.
706706
707707
Args:
@@ -798,7 +798,7 @@ def generate_conv2d_line_buffer_fn(
798798
799799
The HLS compiler produces suboptimal designs for a im2col algorithm implementation, so a trick we use is
800800
to generate a resulting a result of im2col transformation explicitly, instead of relying on loops. Since
801-
the result depends on the paraleters of the convolution layer (the input size, the kernel size, stride etc),
801+
the result depends on the parameters of the convolution layer (the input size, the kernel size, stride etc),
802802
we need to do this for every convolution layer.
803803
804804
Args:

hls4ml/backends/fpga/passes/codegen.py

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from hls4ml.model.layers import Conv1D, Conv2D
1+
from hls4ml.model.layers import Conv1D, Conv2D, SeparableConv1D, SeparableConv2D
22
from hls4ml.model.optimizer import OptimizerPass
33
from hls4ml.model.types import Source
44

@@ -7,16 +7,27 @@ class GenerateConvIm2col(OptimizerPass):
77
'''Generates tcode for im2col step of 1D/2d convolution'''
88

99
def match(self, node):
10-
return isinstance(node, (Conv1D, Conv2D)) and node.model.config.get_config_value('IOType') == 'io_parallel'
10+
return (
11+
isinstance(node, (Conv1D, Conv2D, SeparableConv1D, SeparableConv2D))
12+
and node.model.config.get_config_value('IOType') == 'io_parallel'
13+
)
1114

1215
def transform(self, model, node):
13-
node_class = node.__class__.__name__
14-
if '1D' in node_class:
15-
self._generate_im2col_1d(node)
16-
elif '2D' in node_class:
17-
self._generate_im2col_2d(node)
16+
node_class = node.class_name
17+
if 'Separable' in node_class:
18+
if '1D' in node_class:
19+
self._generate_separable_im2col_1d(node)
20+
elif '2D' in node_class:
21+
self._generate_separable_im2col_2d(node)
22+
else:
23+
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
1824
else:
19-
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
25+
if '1D' in node_class:
26+
self._generate_im2col_1d(node)
27+
elif '2D' in node_class:
28+
self._generate_im2col_2d(node)
29+
else:
30+
raise Exception(f'Cannot generate instructions for node {node.name} ({node_class})')
2031

2132
def _generate_im2col_1d(self, node):
2233
code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
@@ -49,3 +60,56 @@ def _generate_im2col_2d(self, node):
4960
)
5061

5162
node.set_attr('line_buffer_codegen', Source(code_str))
63+
64+
def _generate_separable_im2col_1d(self, node):
65+
dw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
66+
str(node.get_attr('index')) + '_dw',
67+
node.get_attr('n_partitions'),
68+
node.get_input_variable().shape[0],
69+
node.get_input_variable().shape[1],
70+
kernel=node.get_attr('filt_width'),
71+
stride=node.get_attr('stride_width'),
72+
pad=(node.get_attr('pad_left'), node.get_attr('pad_right')),
73+
)
74+
75+
node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
76+
77+
pw_code_str = node.model.config.backend.generate_conv1d_line_buffer_fn(
78+
str(node.get_attr('index')) + '_pw',
79+
node.get_attr('n_partitions'),
80+
node.get_output_variable().shape[0],
81+
node.get_input_variable().shape[1],
82+
kernel=1,
83+
)
84+
85+
node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))
86+
87+
def _generate_separable_im2col_2d(self, node):
88+
dw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
89+
str(node.get_attr('index')) + '_dw',
90+
node.get_attr('n_partitions'),
91+
node.get_input_variable().shape[0],
92+
node.get_input_variable().shape[1],
93+
node.get_input_variable().shape[2],
94+
kernel=(node.get_attr('filt_height'), node.get_attr('filt_width')),
95+
stride=(node.get_attr('stride_height'), node.get_attr('stride_width')),
96+
pad=(
97+
node.get_attr('pad_top'),
98+
node.get_attr('pad_bottom'),
99+
node.get_attr('pad_left'),
100+
node.get_attr('pad_right'),
101+
),
102+
)
103+
104+
node.set_attr('dw_line_buffer_codegen', Source(dw_code_str))
105+
106+
pw_code_str = node.model.config.backend.generate_conv2d_line_buffer_fn(
107+
str(node.get_attr('index')) + '_pw',
108+
node.get_attr('n_partitions'),
109+
node.get_output_variable().shape[0],
110+
node.get_output_variable().shape[1],
111+
node.get_input_variable().shape[2],
112+
kernel=(1, 1),
113+
)
114+
115+
node.set_attr('pw_line_buffer_codegen', Source(pw_code_str))

hls4ml/backends/vivado/passes/convolution_templates.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -254,8 +254,8 @@ def __init__(self):
254254
'{input}, {output}, {d}, {p}, {z}, {b});'
255255
)
256256

257-
sepconv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_sepconv1d_stream.h']
258-
sepconv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_sepconv2d_stream.h']
257+
sepconv1d_include_list = ['nnet_utils/nnet_conv1d.h', 'nnet_utils/nnet_sepconv1d.h', 'nnet_utils/nnet_sepconv1d_stream.h']
258+
sepconv2d_include_list = ['nnet_utils/nnet_conv2d.h', 'nnet_utils/nnet_sepconv2d.h', 'nnet_utils/nnet_sepconv2d_stream.h']
259259

260260

261261
class SeparableConv1DConfigTemplate(LayerConfigTemplate):
@@ -286,7 +286,10 @@ def format(self, node):
286286
params['index'] = str(node.index) + '_depthwise'
287287
params['weight_t'] = node.get_weights('depthwise').type
288288
params['bias_t'] = node.get_weights('zero_bias').type
289-
params['fill_fn'] = 'FillConv1DBuffer'
289+
if node.model.config.get_config_value('IOType') == 'io_parallel':
290+
params['fill_fn'] = f'fill_buffer_{node.index}_dw'
291+
else:
292+
params['fill_fn'] = 'FillConv1DBuffer'
290293

291294
if node.get_attr('unscaled'):
292295
params['scale_index_type'] = 'scale_index_unscaled'
@@ -317,13 +320,17 @@ def format(self, node):
317320

318321
params['filt_width'] = 1
319322
params['stride_width'] = 1
323+
params['pad_left'] = params['pad_right'] = 0
320324
params['dilation'] = node.get_attr('dilation', 1)
321325
params['nzeros'] = node.get_weights('pointwise').nzeros
322326
params['index'] = str(node.index) + '_pointwise'
323327
params['weight_t'] = node.get_weights('pointwise').type
324328
params['min_width'] = params['in_width']
325329
params['instructions'] = '0'
326-
params['fill_fn'] = 'FillConv1DBuffer'
330+
if node.model.config.get_config_value('IOType') == 'io_parallel':
331+
params['fill_fn'] = f'fill_buffer_{node.index}_pw'
332+
else:
333+
params['fill_fn'] = 'FillConv1DBuffer'
327334

328335
if node.get_attr('unscaled'):
329336
params['scale_index_type'] = 'scale_index_unscaled'
@@ -402,7 +409,10 @@ def format(self, node):
402409
params['nzeros'] = node.get_weights('depthwise').nzeros
403410
params['index'] = str(node.index) + '_depthwise'
404411
params['weight_t'] = node.get_weights('depthwise').type
405-
params['fill_fn'] = 'FillConv2DBuffer'
412+
if node.model.config.get_config_value('IOType') == 'io_parallel':
413+
params['fill_fn'] = f'fill_buffer_{node.index}_dw'
414+
else:
415+
params['fill_fn'] = 'FillConv2DBuffer'
406416

407417
if node.get_attr('unscaled_h'):
408418
params['scale_index_height_type'] = 'scale_index_unscaled'
@@ -440,14 +450,19 @@ def format(self, node):
440450

441451
params['filt_height'] = params['filt_width'] = 1
442452
params['stride_height'] = params['stride_width'] = 1
453+
params['pad_left'] = params['pad_right'] = 0
454+
params['pad_top'] = params['pad_bottom'] = 0
443455
params['dilation'] = node.get_attr('dilation', 1)
444456
params['nzeros'] = node.get_weights('pointwise').nzeros
445457
params['index'] = str(node.index) + '_pointwise'
446458
params['weight_t'] = node.get_weights('pointwise').type
447459
params['min_height'] = params['in_height']
448460
params['min_width'] = params['in_width']
449461
params['instructions'] = '0'
450-
params['fill_fn'] = 'FillConv2DBuffer'
462+
if node.model.config.get_config_value('IOType') == 'io_parallel':
463+
params['fill_fn'] = f'fill_buffer_{node.index}_pw'
464+
else:
465+
params['fill_fn'] = 'FillConv2DBuffer'
451466

452467
if node.get_attr('unscaled_h'):
453468
params['scale_index_height_type'] = 'scale_index_unscaled'

hls4ml/backends/vivado/vivado_backend.py

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -295,9 +295,20 @@ def init_sepconv1d(self, layer):
295295
else:
296296
layer.set_attr('strategy', 'latency')
297297

298-
layer.set_attr(
299-
'n_partitions', 1
300-
) # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
298+
out_width = layer.get_output_variable().shape[0]
299+
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
300+
valid_pf = self.get_valid_conv_partition_splits(1, out_width)
301+
if chosen_pf not in valid_pf:
302+
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
303+
valid_pf_str = ','.join(map(str, valid_pf))
304+
print(
305+
f'WARNING: Invalid ParallelizationFactor={chosen_pf} in layer "{layer.name}".'
306+
f'Using ParallelizationFactor={closest_pf} instead. Valid ParallelizationFactor(s): {valid_pf_str}.'
307+
)
308+
else:
309+
closest_pf = chosen_pf
310+
layer.set_attr('n_partitions', out_width // closest_pf)
311+
301312
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
302313

303314
# Set the output type of the depthwise phase
@@ -350,9 +361,21 @@ def init_sepconv2d(self, layer):
350361
else:
351362
layer.set_attr('strategy', 'latency')
352363

353-
layer.set_attr(
354-
'n_partitions', 1
355-
) # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
364+
out_height = layer.get_output_variable().shape[0]
365+
out_width = layer.get_output_variable().shape[1]
366+
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
367+
valid_pf = self.get_valid_conv_partition_splits(out_height, out_width)
368+
if chosen_pf not in valid_pf:
369+
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
370+
valid_pf_str = ','.join(map(str, valid_pf))
371+
print(
372+
f'WARNING: Invalid ParallelizationFactor={chosen_pf} in layer "{layer.name}".'
373+
f'Using ParallelizationFactor={closest_pf} instead. Valid ParallelizationFactor(s): {valid_pf_str}.'
374+
)
375+
else:
376+
closest_pf = chosen_pf
377+
layer.set_attr('n_partitions', out_height * out_width // closest_pf)
378+
356379
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
357380

358381
# Set the output type of the depthwise phase
@@ -373,9 +396,21 @@ def init_depconv2d(self, layer):
373396
else:
374397
layer.set_attr('strategy', 'latency')
375398

376-
layer.set_attr(
377-
'n_partitions', 1
378-
) # TODO Once we have SeparableConv implementation for io_parallel this should be set properly
399+
out_height = layer.get_output_variable().shape[0]
400+
out_width = layer.get_output_variable().shape[1]
401+
chosen_pf = layer.model.config.get_layer_config_value(layer, 'ParallelizationFactor', 1)
402+
valid_pf = self.get_valid_conv_partition_splits(out_height, out_width)
403+
if chosen_pf not in valid_pf:
404+
closest_pf = self.get_closest_reuse_factor(valid_pf, chosen_pf)
405+
valid_pf_str = ','.join(map(str, valid_pf))
406+
print(
407+
f'WARNING: Invalid ParallelizationFactor={chosen_pf} in layer "{layer.name}".'
408+
f'Using ParallelizationFactor={closest_pf} instead. Valid ParallelizationFactor(s): {valid_pf_str}.'
409+
)
410+
else:
411+
closest_pf = chosen_pf
412+
layer.set_attr('n_partitions', out_height * out_width // closest_pf)
413+
379414
layer.set_attr('implementation', layer.model.config.get_conv_implementation(layer).lower())
380415

381416
def _set_pooling_accum_t(self, layer, pool_size):
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#ifndef NNET_SEPARABLE_CONV1D_H_
2+
#define NNET_SEPARABLE_CONV1D_H_
3+
4+
#include "nnet_common.h"
5+
#include "nnet_conv1d.h"
6+
#include "nnet_sepconv1d_latency.h"
7+
//#include "nnet_sepconv1d_resource.h"
8+
#include <cstdlib>
9+
10+
namespace nnet {
11+
12+
template <class data_T, class res_T, typename CONFIG_T>
13+
void depthwise_conv_1d_cl(data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],
14+
res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],
15+
typename CONFIG_T::weight_t weights[CONFIG_T::filt_width * CONFIG_T::n_chan],
16+
typename CONFIG_T::bias_t biases[CONFIG_T::n_chan]) {
17+
#pragma HLS INLINE recursive
18+
if (CONFIG_T::strategy == nnet::latency) {
19+
depthwise_conv_1d_latency_cl<data_T, res_T, CONFIG_T>(data, res, weights, biases);
20+
} else {
21+
assert("Resource strategy for DepthwiseConv1D is not supported." && false);
22+
}
23+
}
24+
25+
template <class data_T, class dw_res_T, class res_T, typename CONFIG_T>
26+
void separable_conv_1d_cl(data_T data[CONFIG_T::depthwise_config::in_width * CONFIG_T::depthwise_config::n_chan],
27+
res_T res[CONFIG_T::pointwise_config::out_width * CONFIG_T::pointwise_config::n_filt],
28+
typename CONFIG_T::depthwise_config::weight_t
29+
depthwise_weights[CONFIG_T::depthwise_config::filt_width * CONFIG_T::depthwise_config::n_chan],
30+
typename CONFIG_T::pointwise_config::weight_t
31+
pointwise_weights[CONFIG_T::pointwise_config::n_chan * CONFIG_T::pointwise_config::n_filt],
32+
typename CONFIG_T::depthwise_config::bias_t depthwise_biases[CONFIG_T::depthwise_config::n_chan],
33+
typename CONFIG_T::pointwise_config::bias_t pointwise_biases[CONFIG_T::pointwise_config::n_filt]) {
34+
#pragma HLS INLINE recursive
35+
36+
dw_res_T depthwise_res[CONFIG_T::depthwise_config::out_width * CONFIG_T::depthwise_config::n_filt];
37+
38+
depthwise_conv_1d_cl<data_T, dw_res_T, typename CONFIG_T::depthwise_config>(data, depthwise_res, depthwise_weights,
39+
depthwise_biases);
40+
pointwise_conv_1d_cl<dw_res_T, res_T, typename CONFIG_T::pointwise_config>(depthwise_res, res, pointwise_weights,
41+
pointwise_biases);
42+
}
43+
44+
} // namespace nnet
45+
46+
#endif

0 commit comments

Comments
 (0)