Skip to content

Commit 0e3c5dc

Browse files
committed
add specialization for avepool k2x2
1 parent 38eb2ed commit 0e3c5dc

File tree

4 files changed

+161
-1
lines changed

4 files changed

+161
-1
lines changed

include/api/mli_krn_avepool_spec_api.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ mli_status mli_krn_avepool_chw_fx16_k3x3_nopad(const mli_tensor * in, const mli_
3838
mli_status mli_krn_avepool_chw_fx16_k5x5_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
3939
mli_status mli_krn_avepool_chw_fx16_k7x7_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
4040
mli_status mli_krn_avepool_chw_fx16_k9x9_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
41+
mli_status mli_krn_avepool_chw_fx16_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
4142
mli_status mli_krn_avepool_chw_fx16_k4x4_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
4243
mli_status mli_krn_avepool_chw_fx16_k6x6_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
4344
mli_status mli_krn_avepool_chw_fx16_k8x8_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
@@ -69,6 +70,7 @@ mli_status mli_krn_avepool_chw_fx8_k3x3_nopad(const mli_tensor * in, const mli_p
6970
mli_status mli_krn_avepool_chw_fx8_k5x5_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
7071
mli_status mli_krn_avepool_chw_fx8_k7x7_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
7172
mli_status mli_krn_avepool_chw_fx8_k9x9_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
73+
mli_status mli_krn_avepool_chw_fx8_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
7274
mli_status mli_krn_avepool_chw_fx8_k4x4_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
7375
mli_status mli_krn_avepool_chw_fx8_k6x6_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);
7476
mli_status mli_krn_avepool_chw_fx8_k8x8_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out);

lib/gen/mli_krn_avepool_gen.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@
7878

7979
corefunc = "avepool_chw_nopad_k4_Nx2_N_even"
8080
stride = 0
81-
kernel_range = range(4, 9, 2)
81+
kernel_range = range(2, 9, 2)
8282
ch = 0
8383
f_list.extend([Func(fbase, k, k, ch, stride, stride, corefunc, "nopad") for k in kernel_range])
8484

lib/src/kernels/pooling/mli_krn_avepool_chw_fx16.cc

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,83 @@ mli_status mli_krn_avepool_chw_fx16_k9x9_nopad(const mli_tensor * in, const mli_
11071107
return MLI_STATUS_OK;
11081108
}
11091109

1110+
mli_status mli_krn_avepool_chw_fx16_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out) {
1111+
mli_status ret = MLI_CHECK_STATUS(mli_chk_avepool_chw_fx16(in, cfg, out), __func__);
1112+
if (ret != MLI_STATUS_OK)
1113+
return ret;
1114+
1115+
// Extract general avepool parameters
1116+
int stride_width = cfg->stride_width;
1117+
int stride_height = cfg->stride_height;
1118+
int padding_top = cfg->padding_top;
1119+
int padding_bot = cfg->padding_bottom;
1120+
int padding_left = cfg->padding_left;
1121+
int padding_right = cfg->padding_right;
1122+
int channels_num = in->shape[FMAP_C_DIM_CHW];
1123+
int kernel_height = cfg->kernel_height;
1124+
int kernel_width = cfg->kernel_width;
1125+
// assign hard coded values for this variation to some variables
1126+
#if 0
1127+
MLI_CHECK_AND_FIX(stride_width, 0);
1128+
#endif
1129+
#if 0
1130+
MLI_CHECK_AND_FIX(stride_height, 0);
1131+
#endif
1132+
#if 1
1133+
MLI_CHECK_AND_FIX(padding_top, 0);
1134+
MLI_CHECK_AND_FIX(padding_bot, 0);
1135+
MLI_CHECK_AND_FIX(padding_left, 0);
1136+
MLI_CHECK_AND_FIX(padding_right, 0);
1137+
#endif
1138+
#if 2
1139+
MLI_CHECK_AND_FIX(kernel_width, 2);
1140+
#endif
1141+
#if 2
1142+
MLI_CHECK_AND_FIX(kernel_height, 2);
1143+
#endif
1144+
#if 0
1145+
MLI_CHECK_AND_FIX(channels_num, 0);
1146+
#endif
1147+
1148+
// Data pointers
1149+
MLI_PTR(int16_t) in_ftrs = (MLI_PTR(int16_t ))in->data;
1150+
MLI_OUT_PTR(int16_t) out_ftrs = (MLI_OUT_PTR(int16_t ))out->data;
1151+
1152+
// Define Data dimensions
1153+
const int in_height = in->shape[FMAP_H_DIM_CHW];
1154+
const int in_width = in->shape[FMAP_W_DIM_CHW];
1155+
1156+
const int out_width = CEIL_DIV(in_width + padding_left + padding_right - kernel_width + 1, stride_width);
1157+
const int out_height = CEIL_DIV(in_height + padding_top + padding_bot - kernel_height + 1, stride_height);
1158+
1159+
const int row_beg = 0;
1160+
const int row_end = out_height;
1161+
const int clmn_beg = 0;
1162+
const int clmn_end = out_width;
1163+
1164+
mli_prv_fx_init_dsp_ctrl();
1165+
1166+
avepool_chw_nopad_k4_Nx2_N_even(
1167+
row_beg, row_end,
1168+
clmn_beg, clmn_end,
1169+
in_ftrs, out_ftrs,
1170+
channels_num, in_width, in_height,
1171+
out_width, out_height,
1172+
kernel_height, kernel_width,
1173+
stride_height, stride_width,
1174+
padding_top, padding_left, padding_right, padding_bot);
1175+
1176+
// fill output tensor parameters
1177+
out->el_type = in->el_type;
1178+
out->rank = in->rank;
1179+
out->shape[FMAP_C_DIM_CHW] = channels_num;
1180+
out->shape[FMAP_H_DIM_CHW] = out_height;
1181+
out->shape[FMAP_W_DIM_CHW] = out_width;
1182+
out->el_params.fx.frac_bits = in->el_params.fx.frac_bits;
1183+
1184+
return MLI_STATUS_OK;
1185+
}
1186+
11101187
mli_status mli_krn_avepool_chw_fx16_k4x4_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out) {
11111188
mli_status ret = MLI_CHECK_STATUS(mli_chk_avepool_chw_fx16(in, cfg, out), __func__);
11121189
if (ret != MLI_STATUS_OK)
@@ -2390,6 +2467,8 @@ mli_status mli_krn_avepool_chw_fx16(const mli_tensor * in, const mli_pool_cfg *
23902467
return mli_krn_avepool_chw_fx16_k3x1_nopad(in, cfg, out);
23912468
} else if ((kernel_w == 3) && (kernel_h == 1) && (padding_top <= 0) && (padding_bot <= 0) && (padding_left <= 1) && (padding_right <= 1)) {
23922469
return mli_krn_avepool_chw_fx16_k3x1_krnpad(in, cfg, out);
2470+
} else if ((kernel_w == 2) && (kernel_h == 2) && (padding_top == 0) && (padding_bot == 0) && (padding_left == 0) && (padding_right == 0)) {
2471+
return mli_krn_avepool_chw_fx16_k2x2_nopad(in, cfg, out);
23932472
} else if ((kernel_w == 2) && (kernel_h == 2) && (padding_top <= 0) && (padding_bot <= 1) && (padding_left <= 0) && (padding_right <= 1)) {
23942473
return mli_krn_avepool_chw_fx16_k2x2_krnpad(in, cfg, out);
23952474
} else if ((kernel_w == 2) && (kernel_h == 1) && (padding_top == 0) && (padding_bot == 0) && (padding_left == 0) && (padding_right == 0)) {

lib/src/kernels/pooling/mli_krn_avepool_chw_fx8.cc

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,6 +1107,83 @@ mli_status mli_krn_avepool_chw_fx8_k9x9_nopad(const mli_tensor * in, const mli_p
11071107
return MLI_STATUS_OK;
11081108
}
11091109

1110+
mli_status mli_krn_avepool_chw_fx8_k2x2_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out) {
1111+
mli_status ret = MLI_CHECK_STATUS(mli_chk_avepool_chw_fx8(in, cfg, out), __func__);
1112+
if (ret != MLI_STATUS_OK)
1113+
return ret;
1114+
1115+
// Extract general avepool parameters
1116+
int stride_width = cfg->stride_width;
1117+
int stride_height = cfg->stride_height;
1118+
int padding_top = cfg->padding_top;
1119+
int padding_bot = cfg->padding_bottom;
1120+
int padding_left = cfg->padding_left;
1121+
int padding_right = cfg->padding_right;
1122+
int channels_num = in->shape[FMAP_C_DIM_CHW];
1123+
int kernel_height = cfg->kernel_height;
1124+
int kernel_width = cfg->kernel_width;
1125+
// assign hard coded values for this variation to some variables
1126+
#if 0
1127+
MLI_CHECK_AND_FIX(stride_width, 0);
1128+
#endif
1129+
#if 0
1130+
MLI_CHECK_AND_FIX(stride_height, 0);
1131+
#endif
1132+
#if 1
1133+
MLI_CHECK_AND_FIX(padding_top, 0);
1134+
MLI_CHECK_AND_FIX(padding_bot, 0);
1135+
MLI_CHECK_AND_FIX(padding_left, 0);
1136+
MLI_CHECK_AND_FIX(padding_right, 0);
1137+
#endif
1138+
#if 2
1139+
MLI_CHECK_AND_FIX(kernel_width, 2);
1140+
#endif
1141+
#if 2
1142+
MLI_CHECK_AND_FIX(kernel_height, 2);
1143+
#endif
1144+
#if 0
1145+
MLI_CHECK_AND_FIX(channels_num, 0);
1146+
#endif
1147+
1148+
// Data pointers
1149+
MLI_PTR(int8_t) in_ftrs = (MLI_PTR(int8_t ))in->data;
1150+
MLI_OUT_PTR(int8_t) out_ftrs = (MLI_OUT_PTR(int8_t ))out->data;
1151+
1152+
// Define Data dimensions
1153+
const int in_height = in->shape[FMAP_H_DIM_CHW];
1154+
const int in_width = in->shape[FMAP_W_DIM_CHW];
1155+
1156+
const int out_width = CEIL_DIV(in_width + padding_left + padding_right - kernel_width + 1, stride_width);
1157+
const int out_height = CEIL_DIV(in_height + padding_top + padding_bot - kernel_height + 1, stride_height);
1158+
1159+
const int row_beg = 0;
1160+
const int row_end = out_height;
1161+
const int clmn_beg = 0;
1162+
const int clmn_end = out_width;
1163+
1164+
mli_prv_fx_init_dsp_ctrl();
1165+
1166+
avepool_chw_nopad_k4_Nx2_N_even(
1167+
row_beg, row_end,
1168+
clmn_beg, clmn_end,
1169+
in_ftrs, out_ftrs,
1170+
channels_num, in_width, in_height,
1171+
out_width, out_height,
1172+
kernel_height, kernel_width,
1173+
stride_height, stride_width,
1174+
padding_top, padding_left, padding_right, padding_bot);
1175+
1176+
// fill output tensor parameters
1177+
out->el_type = in->el_type;
1178+
out->rank = in->rank;
1179+
out->shape[FMAP_C_DIM_CHW] = channels_num;
1180+
out->shape[FMAP_H_DIM_CHW] = out_height;
1181+
out->shape[FMAP_W_DIM_CHW] = out_width;
1182+
out->el_params.fx.frac_bits = in->el_params.fx.frac_bits;
1183+
1184+
return MLI_STATUS_OK;
1185+
}
1186+
11101187
mli_status mli_krn_avepool_chw_fx8_k4x4_nopad(const mli_tensor * in, const mli_pool_cfg * cfg, mli_tensor * out) {
11111188
mli_status ret = MLI_CHECK_STATUS(mli_chk_avepool_chw_fx8(in, cfg, out), __func__);
11121189
if (ret != MLI_STATUS_OK)
@@ -2390,6 +2467,8 @@ mli_status mli_krn_avepool_chw_fx8(const mli_tensor * in, const mli_pool_cfg * c
23902467
return mli_krn_avepool_chw_fx8_k3x1_nopad(in, cfg, out);
23912468
} else if ((kernel_w == 3) && (kernel_h == 1) && (padding_top <= 0) && (padding_bot <= 0) && (padding_left <= 1) && (padding_right <= 1)) {
23922469
return mli_krn_avepool_chw_fx8_k3x1_krnpad(in, cfg, out);
2470+
} else if ((kernel_w == 2) && (kernel_h == 2) && (padding_top == 0) && (padding_bot == 0) && (padding_left == 0) && (padding_right == 0)) {
2471+
return mli_krn_avepool_chw_fx8_k2x2_nopad(in, cfg, out);
23932472
} else if ((kernel_w == 2) && (kernel_h == 2) && (padding_top <= 0) && (padding_bot <= 1) && (padding_left <= 0) && (padding_right <= 1)) {
23942473
return mli_krn_avepool_chw_fx8_k2x2_krnpad(in, cfg, out);
23952474
} else if ((kernel_w == 2) && (kernel_h == 1) && (padding_top == 0) && (padding_bot == 0) && (padding_left == 0) && (padding_right == 0)) {

0 commit comments

Comments
 (0)