@@ -15,48 +15,48 @@ def generate_pointwise_conv1d_fn(layer_idx, reuse_factor=1):
1515 """
1616
1717 generated_code = (
18- " template<class data_T, class res_T, typename CONFIG_T>\n "
19- " class pointwise_conv_{index} : public PointwiseConv1D <data_T, res_T, CONFIG_T> {{\n "
20- " public:\n "
21- " static void pointwise_conv (\n "
22- " data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],\n "
23- " res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],\n "
24- " typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],\n "
25- " typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) {{\n "
26- " data_T data_tmp[CONFIG_T::reuse_factor][CONFIG_T::in_width * CONFIG_T::n_chan / CONFIG_T::reuse_factor];\n " # noqa: E501
27- " #pragma HLS ARRAY_PARTITION variable=data_tmp complete dim=0\n "
28- " res_T res_tmp[CONFIG_T::reuse_factor][CONFIG_T::out_width * CONFIG_T::n_filt / CONFIG_T::reuse_factor];\n " # noqa: E501
29- " #pragma HLS ARRAY_PARTITION variable=res_tmp complete dim=0\n \n "
30- " RFInputLoop:\n "
31- " for (int jj = 0; jj < CONFIG_T::reuse_factor; jj++) {{\n "
32- " #pragma HLS UNROLL\n "
33- " InnerInputLoop:\n "
34- " for (int ii = 0; ii < CONFIG_T::in_width * CONFIG_T::n_chan / CONFIG_T::reuse_factor; ii++) {{\n "
35- " #pragma HLS UNROLL\n "
36- " data_tmp[jj][ii] = data[jj * CONFIG_T::in_width * CONFIG_T::n_chan / CONFIG_T::reuse_factor + ii];\n " # noqa: E501
37- " }}\n "
38- " }}\n \n "
18+ ' template<class data_T, class res_T, typename CONFIG_T>\n '
19+ ' class pointwise_conv_{index} : public Conv1DKernel <data_T, res_T, CONFIG_T> {{\n '
20+ ' public:\n '
21+ ' static void conv (\n '
22+ ' data_T data[CONFIG_T::in_width * CONFIG_T::n_chan],\n '
23+ ' res_T res[CONFIG_T::out_width * CONFIG_T::n_filt],\n '
24+ ' typename CONFIG_T::weight_t weights[CONFIG_T::n_chan * CONFIG_T::n_filt],\n '
25+ ' typename CONFIG_T::bias_t biases[CONFIG_T::n_filt]) {{\n '
26+ ' data_T data_tmp[CONFIG_T::reuse_factor][CONFIG_T::in_width * CONFIG_T::n_chan / CONFIG_T::reuse_factor];\n ' # noqa: E501
27+ ' #pragma HLS ARRAY_PARTITION variable=data_tmp complete dim=0\n '
28+ ' res_T res_tmp[CONFIG_T::reuse_factor][CONFIG_T::out_width * CONFIG_T::n_filt / CONFIG_T::reuse_factor];\n ' # noqa: E501
29+ ' #pragma HLS ARRAY_PARTITION variable=res_tmp complete dim=0\n \n '
30+ ' RFInputLoop:\n '
31+ ' for (int jj = 0; jj < CONFIG_T::reuse_factor; jj++) {{\n '
32+ ' #pragma HLS UNROLL\n '
33+ ' InnerInputLoop:\n '
34+ ' for (int ii = 0; ii < CONFIG_T::in_width * CONFIG_T::n_chan / CONFIG_T::reuse_factor; ii++) {{\n '
35+ ' #pragma HLS UNROLL\n '
36+ ' data_tmp[jj][ii] = data[jj * CONFIG_T::in_width * CONFIG_T::n_chan / CONFIG_T::reuse_factor + ii];\n ' # noqa: E501
37+ ' }}\n '
38+ ' }}\n \n '
3939 ).format (index = layer_idx )
40- indent = " "
40+ indent = ' '
4141 for i in range (reuse_factor ):
4242 generated_code += indent
4343 generated_code += (
44- f" pointwise_conv_1d_latency_cl<data_T, res_T, CONFIG_T>(data_tmp[{ i } ], res_tmp[{ i } ], weights, biases);\n "
44+ f' pointwise_conv_1d_latency_cl<data_T, res_T, CONFIG_T>(data_tmp[{ i } ], res_tmp[{ i } ], weights, biases);\n '
4545 )
4646
4747 generated_code += (
48- " \n "
49- " RFOutputLoop:\n "
50- " for (int jj = 0; jj < CONFIG_T::reuse_factor; jj++) {\n "
51- " #pragma HLS UNROLL\n "
52- " InnerOutputLoop:\n "
53- " for (int ii = 0; ii < CONFIG_T::out_width * CONFIG_T::n_filt / CONFIG_T::reuse_factor; ii++) {\n "
54- " #pragma HLS UNROLL\n "
55- " res[jj * CONFIG_T::out_width * CONFIG_T::n_filt / CONFIG_T::reuse_factor + ii] = res_tmp[jj][ii];\n " # noqa: E501
56- " }\n "
57- " }\n "
58- " }\n "
59- " };\n "
48+ ' \n '
49+ ' RFOutputLoop:\n '
50+ ' for (int jj = 0; jj < CONFIG_T::reuse_factor; jj++) {\n '
51+ ' #pragma HLS UNROLL\n '
52+ ' InnerOutputLoop:\n '
53+ ' for (int ii = 0; ii < CONFIG_T::out_width * CONFIG_T::n_filt / CONFIG_T::reuse_factor; ii++) {\n '
54+ ' #pragma HLS UNROLL\n '
55+ ' res[jj * CONFIG_T::out_width * CONFIG_T::n_filt / CONFIG_T::reuse_factor + ii] = res_tmp[jj][ii];\n ' # noqa: E501
56+ ' }\n '
57+ ' }\n '
58+ ' }\n '
59+ ' };\n '
6060 )
6161
6262 return generated_code
@@ -66,14 +66,10 @@ class GeneratePointwiseConv1D(OptimizerPass):
6666 '''Generates code for pointwise 1D convolution'''
6767
6868 def match (self , node ):
69- return isinstance (node , Conv1D ) and node .model .config .get_config_value ('IOType' ) == 'io_parallel'
69+ return isinstance (node , Conv1D ) and node .model .config .get_config_value ('IOType' ) == 'io_parallel' and node . get_attr ( 'filt_width' ) == 1
7070
7171 def transform (self , model , node ):
72- node_class = node .__class__ .__name__
73- if '1D' in node_class :
74- self ._generate_pointwise_conv1d (node )
75- else :
76- raise Exception (f'Cannot generate instructions for node { node .name } ({ node_class } )' )
72+ self ._generate_pointwise_conv1d (node )
7773
7874 def _generate_pointwise_conv1d (self , node ):
7975 code_str = generate_pointwise_conv1d_fn (
0 commit comments