@@ -26,7 +26,7 @@ namespace dsp {
2626// ========================================================
2727// Depthwise convolution 2D template
2828// ========================================================
29- template <typename io_T, typename w_T, typename b_T, typename acc_T>
29+ template <typename io_T, typename w_T, typename b_T, typename acc_T, int fix_kernel_width, int fix_kernel_height >
3030MLI_FORCE_INLINE void depthwise_convolution2D_hwcn_nopad (
3131 const tensor_private_t <MLI_PTR(io_T)> &in,
3232 const conv2d_weights_tensor_private_t<MLI_PTR(w_T)> &w,
@@ -171,7 +171,7 @@ MLI_FORCE_INLINE void depthwise_convolution2D_hwcn_nopad(
171171 } // for ch_mult_idx
172172}
173173
174- template <typename io_T, typename w_T, typename b_T, typename acc_T>
174+ template <typename io_T, typename w_T, typename b_T, typename acc_T, int fix_kernel_width, int fix_kernel_height >
175175MLI_FORCE_INLINE void depthwise_convolution2D_hwcn (
176176 const tensor_private_t <MLI_PTR(io_T)> &in,
177177 const conv2d_weights_tensor_private_t<MLI_PTR(w_T)> &w,
@@ -330,7 +330,7 @@ MLI_FORCE_INLINE void depthwise_convolution2D_hwcn(
330330 }
331331}
332332
333- template <typename io_T, typename w_T, typename b_T, typename acc_T>
333+ template <typename io_T, typename w_T, typename b_T, typename acc_T, int fix_kernel_width, int fix_kernel_height >
334334MLI_FORCE_INLINE void depthwise_convolution2D_hwcn_nopad (
335335 const tensor_private_t <MLI_PTR(io_T)> &in,
336336 const conv2d_weights_tensor_private_t<MLI_PTR(w_T)> &w,
@@ -344,7 +344,7 @@ MLI_FORCE_INLINE void depthwise_convolution2D_hwcn_nopad(
344344 const int dilation_height, const int dilation_width,
345345 const int padding_top, const int padding_left,
346346 const int padding_bot, const int padding_right) {
347- mli::krn::ref::depthwise_convolution2D<io_T, w_T, b_T, acc_T, fx_quant_specific_params>(
347+ mli::krn::ref::depthwise_convolution2D<io_T, w_T, b_T, acc_T, fx_quant_specific_params, fix_kernel_width, fix_kernel_height >(
348348 in, w, biases, out, perception_area, quant_params,
349349 val_min_limit, val_max_limit,
350350 stride_height, stride_width,
@@ -353,7 +353,7 @@ MLI_FORCE_INLINE void depthwise_convolution2D_hwcn_nopad(
353353 padding_bot, padding_right);
354354}
355355
356- template <typename io_T, typename w_T, typename b_T, typename acc_T>
356+ template <typename io_T, typename w_T, typename b_T, typename acc_T, int fix_kernel_width, int fix_kernel_height >
357357MLI_FORCE_INLINE void depthwise_convolution2D_hwcn (
358358 const tensor_private_t <MLI_PTR(io_T)> &in,
359359 const conv2d_weights_tensor_private_t<MLI_PTR(w_T)> &w,
@@ -367,7 +367,7 @@ MLI_FORCE_INLINE void depthwise_convolution2D_hwcn(
367367 const int dilation_height, const int dilation_width,
368368 const int padding_top, const int padding_left,
369369 const int padding_bot, const int padding_right) {
370- mli::krn::ref::depthwise_convolution2D<io_T, w_T, b_T, acc_T, fx_quant_specific_params>(
370+ mli::krn::ref::depthwise_convolution2D<io_T, w_T, b_T, acc_T, fx_quant_specific_params, fix_kernel_width, fix_kernel_height >(
371371 in, w, biases, out, perception_area, quant_params,
372372 val_min_limit, val_max_limit,
373373 stride_height, stride_width,
@@ -377,7 +377,7 @@ MLI_FORCE_INLINE void depthwise_convolution2D_hwcn(
377377
378378}
379379
380- template <typename io_T, typename w_T, typename b_T, typename acc_T, typename quant_T>
380+ template <typename io_T, typename w_T, typename b_T, typename acc_T, typename quant_T, int fix_kernel_width, int fix_kernel_height >
381381MLI_FORCE_INLINE void depthwise_convolution2D (
382382 const tensor_private_t <MLI_PTR(io_T)> &in,
383383 const conv2d_weights_tensor_private_t<MLI_PTR(w_T)> &w,
@@ -402,7 +402,7 @@ MLI_FORCE_INLINE void depthwise_convolution2D(
402402
403403 if ((perception_area_nopad.row_end > perception_area_nopad.row_beg )
404404 && (perception_area_nopad.clmn_end > perception_area_nopad.clmn_beg )){
405- depthwise_convolution2D_hwcn_nopad<io_T, w_T, b_T, acc_T>(
405+ depthwise_convolution2D_hwcn_nopad<io_T, w_T, b_T, acc_T, fix_kernel_width, fix_kernel_height >(
406406 in, w, biases, out, perception_area_nopad, quant_params,
407407 val_min_limit, val_max_limit,
408408 stride_height, stride_width,
@@ -441,7 +441,7 @@ MLI_FORCE_INLINE void depthwise_convolution2D(
441441 perc_areas[areas_num++].clmn_end = out.width ;
442442 }
443443 for (int i = 0 ; i < areas_num; i ++) {
444- depthwise_convolution2D_hwcn<io_T, w_T, b_T, acc_T>(
444+ depthwise_convolution2D_hwcn<io_T, w_T, b_T, acc_T, fix_kernel_width, fix_kernel_height >(
445445 in, w, biases, out, perc_areas[i], quant_params,
446446 val_min_limit, val_max_limit,
447447 stride_height, stride_width,
0 commit comments