@@ -785,7 +785,8 @@ static __attribute__ ((always_inline)) void pointwise_convolution2D_hwc_nopad(
785785 int even_in_ch = in_ch & (~0x3 );
786786
787787 if ((in_ch & 0x3 ) == 0 ) {
788- for (int H_idx = row_begin; H_idx < row_end; H_idx++) {
788+ for (int H_idx = 0 ; H_idx < amount_rows; H_idx++) {
789+ #if !defined(_ARCVER_ARCv2HS)
789790 int32_t init_accum_val = weights_add;
790791 acc_T accu = mli_prv_init_accu (init_accum_val);
791792 for (int j = 0 ; j < (in_ch / 4 ); j++) {
@@ -801,9 +802,15 @@ static __attribute__ ((always_inline)) void pointwise_convolution2D_hwc_nopad(
801802 in_ptr += in_ch * (stride_width - 1 );
802803 w_ptr -= in_ch;
803804
804- for (int W_idx = clmn_begin + 1 ; W_idx < clmn_end ; W_idx++) {
805+ for (int W_idx = 1 ; W_idx < amount_columns ; W_idx++) {
805806 init_accum_val = weights_add;
806807 accu = mli_prv_init_accu (init_accum_val);
808+ #else
809+ for (int W_idx = 0 ; W_idx < amount_columns; W_idx++) {
810+ int32_t init_accum_val = weights_add;
811+ acc_T accu = mli_prv_init_accu (init_accum_val);
812+ #endif
813+
807814LOOP_PIPELINE_ENABLE
808815 for (int j = 0 ; j < (in_ch / 4 ); j++) {
809816 mli_prv_load_mac_vec4 (&accu, in_ptr, w_ptr);
@@ -822,10 +829,10 @@ LOOP_PIPELINE_ENABLE
822829 in_ptr += stride_height * in_width * in_ch - in_compensation_clmn_loop;
823830 } // for H_idx
824831 } else {
825- for (int H_idx = row_begin ; H_idx < row_end ; H_idx++) {
832+ for (int H_idx = 0 ; H_idx < amount_rows ; H_idx++) {
826833 int32_t init_accum_val = weights_add;
827834 acc_T accu = mli_prv_init_accu (init_accum_val);
828- for (int W_idx = clmn_begin ; W_idx < clmn_end ; W_idx++) {
835+ for (int W_idx = 0 ; W_idx < amount_columns ; W_idx++) {
829836
830837 for (int k = 0 ; k < odd_rest_of_in_ch; k++) {
831838 mli_prv_load_mac (&accu, in_ptr++, w_ptr++);
0 commit comments