resize bilinear user test and kernel

yaroslav · essayed · commit feb0130df708 · 2022-10-27T09:14:55.000Z
diff --git a/include/api/mli_ref_runtime_api.hpp b/include/api/mli_ref_runtime_api.hpp
@@ -1190,6 +1190,9 @@ class MoveBroadcast : public ExecutionInterface {
     void MoveBroadcastRun(Tensor<buf_T, N> &src, Tensor<buf_T, N> &dst);
 };
 
+// TODO: remove this after standard version of ResizeBilinear with Issue() will be added
+void run_mli_resize_bilinear_standalone(const mli_tensor* in, const ResizeOpConfig& cfg, mli_tensor* out);
+
 } // namespace snps_arc::metaware::mli::ref
 
 #endif // _MLI_REF_RUNTIME_API_HPP_
diff --git a/include/mli_kernels_factory_ref.hpp b/include/mli_kernels_factory_ref.hpp
@@ -318,7 +318,7 @@ class KernelsFactory : public lib_mli::KernelsFactory {
          * The MLI classes need to be 32 bit aligned
          */
         assert(kernel_buffer != nullptr);
-        assert(((unsigned long) kernel_buffer % kMliAlignment) == 0);
+        assert(((size_t) kernel_buffer % kMliAlignment) == 0);
         return new(kernel_buffer) lib_ref::SumPool2D_CS(m_pd, in, cfg, output_tile_shape);
     }
 
@@ -331,7 +331,7 @@ class KernelsFactory : public lib_mli::KernelsFactory {
          * The MLI classes need to be 32 bit aligned
          */
         assert(kernel_buffer != nullptr);
-        assert(((unsigned long) kernel_buffer % kMliAlignment) == 0);
+        assert(((size_t) kernel_buffer % kMliAlignment) == 0);
         return new(kernel_buffer) lib_ref::SumPool2D_CS(m_pd, in, cfg, out);
     }
 
diff --git a/include/mli_types.hpp b/include/mli_types.hpp
@@ -141,7 +141,6 @@ constexpr short int kReduceMaxIterRank = 4;
 constexpr unsigned kMoveBroadcastRank = 4;      // ToDo: when mli_tensor takes [rank=5] -> change rank from 4 to 5.
 constexpr unsigned kMoveBroadcastIterRank = 4;  // ToDo: when mli_tensor takes [rank=5] -> change rank from 4 to 5.
 
-constexpr short int kResizeDim = 2;
 constexpr short int kResizeBilinearRank = 4;
 constexpr short int kResizeBilinearIterRank = 4;
 
@@ -1006,17 +1005,19 @@ struct PreluOpConfig {
 };
 
 struct ResizeOpConfig {
+  static constexpr unsigned kResizeParamRank = 2;
+
   ResizeOpConfig() = default;
-  ResizeOpConfig(int16_t *stride, int16_t *offset, int8_t shift) {
-    for(int8_t i = 0; i < kResizeDim; i++) {
+  ResizeOpConfig(int16_t stride[kResizeParamRank], int16_t offset[kResizeParamRank], int8_t shift) {
+    for(unsigned i = 0; i < kResizeParamRank; i++) {
       this->stride[i] = stride[i];
       this->offset[i] = offset[i];
     }
     this->shift = shift;
   }
 
-  int16_t stride[kResizeDim];    /**< [stride_H, stride_W] */
-  int16_t offset[kResizeDim];    /**< [offset_H, offset_W] */
+  int16_t stride[kResizeParamRank];    /**< [stride_H, stride_W] */
+  int16_t offset[kResizeParamRank];    /**< [offset_H, offset_W] */
   int8_t shift;         /**< Shift value (for fractional stride and offset) */
 
 };
diff --git a/lib/mli_lib.cmake b/lib/mli_lib.cmake
@@ -80,6 +80,8 @@ set(MLI_LIB_SOURCE_FILES
     ${MLI_LIB_CMAKE_DIR}/src/kernels/conversion/mli_reduce_max_runtime.cc
     ${MLI_LIB_CMAKE_DIR}/src/kernels/conversion/mli_reduce_sum_compiler.cc
     ${MLI_LIB_CMAKE_DIR}/src/kernels/conversion/mli_reduce_sum_runtime.cc
+    ${MLI_LIB_CMAKE_DIR}/src/kernels/conversion/mli_resize_bilinear_compiler.cc
+    ${MLI_LIB_CMAKE_DIR}/src/kernels/conversion/mli_resize_bilinear_runtime.cc
     ${MLI_LIB_CMAKE_DIR}/src/kernels/clip/mli_krn_clip_compiler.cc
     ${MLI_LIB_CMAKE_DIR}/src/kernels/clip/mli_krn_clip_runtime.cc
     ${MLI_LIB_CMAKE_DIR}/src/kernels/transform/mli_krn_prelu_compiler.cc
diff --git a/lib/src/kernels/conversion/impl/mli_resize_bilinear_ref.hpp b/lib/src/kernels/conversion/impl/mli_resize_bilinear_ref.hpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2022, Synopsys, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-3-Clause license found in
+ * the LICENSE file in the root directory of this source tree.
+ *
+ */
+
+#ifndef _MLI_RESIZE_BILINEAR_REF_HPP_
+#define _MLI_RESIZE_BILINEAR_REF_HPP_
+
+#include "mli_types.h"
+#include "mli_prv_dsp.h"
+#include "mli_prv_tensor.h"
+#include "mli_mem_info.h"
+
+namespace snps_arc::metaware::mli {
+namespace krn {
+namespace ref {
+
+// TODO: change mli_tensor to Tensor
+// TODO: change BHWC to BHWGC
+mli_status mli_resize_bilinear(const mli_tensor* in, const ResizeOpConfig& cfg, mli_tensor* out) {
+
+    mli_prv_fx_init_dsp_ctrl();
+
+    auto in_prv = mli_prv_get_generic_tensor<MLI_PTR(int8_t)>(in);
+    auto out_prv = mli_prv_get_generic_tensor<MLI_OUT_PTR(int32_t)>(out);
+
+    int one_fx = (1 << cfg.shift);
+    int row_fx, row_int, delta_row_fx, input_row0_int, input_row1_int;
+    int col_fx, col_int, delta_col_fx, input_col0_int, input_col1_int;
+    int8_t v00, v01, v10, v11;
+    int32_t out_val;
+    int b, h, w, c;
+    for (b = 0; b < out_prv.shape[kTensorBatchDim]; b++) {
+        for (h = 0; h < out_prv.shape[kTensorHeightDim]; h++) {
+            row_fx = h * cfg.stride[0] + cfg.offset[0];
+            row_int = row_fx >> cfg.shift;
+            delta_row_fx = row_fx - (row_int << cfg.shift);
+            input_row0_int = MIN(MAX(row_int, 0), in_prv.shape[kTensorHeightDim] - 1);
+            input_row1_int = MIN(row_int + 1, in_prv.shape[kTensorHeightDim] - 1);
+            for (w = 0; w < out_prv.shape[kTensorWidthDim]; w++) {
+                col_fx = w * cfg.stride[1] + cfg.offset[1];
+                col_int = col_fx >> cfg.shift;
+                delta_col_fx = col_fx - (col_int << cfg.shift);
+                input_col0_int = MIN(MAX(col_int, 0), in_prv.shape[kTensorWidthDim] - 1);
+                input_col1_int = MIN(col_int + 1, in_prv.shape[kTensorWidthDim] - 1);
+                for (c = 0; c < out_prv.shape[kTensorChannelDim]; c++) {
+                    // read the nearest 4 input values around the output point
+                    v00 = mli_prv_tensor_read(in_prv, b, input_row0_int, input_col0_int, c);
+                    v01 = mli_prv_tensor_read(in_prv, b, input_row0_int, input_col1_int, c);
+                    v10 = mli_prv_tensor_read(in_prv, b, input_row1_int, input_col0_int, c);
+                    v11 = mli_prv_tensor_read(in_prv, b, input_row1_int, input_col1_int, c);
+
+                    // compute and write output point
+                    out_val = v00 * (one_fx - delta_row_fx) * (one_fx - delta_col_fx) +
+                              v01 * (one_fx - delta_row_fx) * delta_col_fx            +
+                              v10 * delta_row_fx            * (one_fx - delta_col_fx) +
+                              v11 * delta_row_fx            * delta_col_fx;
+                    mli_prv_tensor_write(out_val, out_prv, b, h, w, c);
+
+                }
+            }
+        }
+    }
+
+    return MLI_STATUS_OK;
+}
+
+
+} // namespace ref
+} // namespace krn
+} // namespace snps_arc::metaware::mli
+
+#endif // _MLI_RESIZE_BILINEAR_REF_HPP_
diff --git a/lib/src/kernels/conversion/mli_resize_bilinear.hpp b/lib/src/kernels/conversion/mli_resize_bilinear.hpp
@@ -0,0 +1,39 @@
+/*
+* Copyright 2022, Synopsys, Inc.
+* All rights reserved.
+*
+* This source code is licensed under the BSD-3-Clause license found in
+* the LICENSE file in the root directory of this source tree.
+*
+*/
+
+#ifndef _MLI_RESIZE_BILINEAR_HPP_
+#define _MLI_RESIZE_BILINEAR_HPP_
+
+#include "mli_resize_bilinear_decl.hpp"
+
+////////////////////////////////////////////////////////////////////////////////
+// Setting up namespace
+////////////////////////////////////////////////////////////////////////////////
+// Selecting between different variants (depending on hardware features) is
+// done with 'using'. A completely different implementation can be used/'using'.
+// However, also only a part of the reference together with optimized functions
+// (for example *_dsp) can be used/'using'.
+
+namespace snps_arc::metaware::mli {
+namespace krn {
+
+using snps_arc::metaware::mli::krn::ref::mli_resize_bilinear;
+
+} // namespace krn
+} // namespace snps_arc::metaware::mli
+
+////////////////////////////////////////////////////////////////////////////////
+// Include implementation
+////////////////////////////////////////////////////////////////////////////////
+// The reference (*_ref.h) implementation can run on all platforms and is always
+// included. Other variants are included based on capabilities. Implementations
+// below can depend on each other through declarations in *_decl.h.
+#include "impl/mli_resize_bilinear_ref.hpp"
+
+#endif // _MLI_RESIZE_BILINEAR_HPP_
diff --git a/lib/src/kernels/conversion/mli_resize_bilinear_compiler.cc b/lib/src/kernels/conversion/mli_resize_bilinear_compiler.cc
@@ -0,0 +1,75 @@
+/*
+ * Copyright 2022, Synopsys, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-3-Clause license found in
+ * the LICENSE file in the root directory of this source tree.
+ *
+ */
+#include <cstring>
+
+#include "mli_ref_runtime_api.hpp"
+#include "mli_ref_compiler_api.hpp"
+#include "mli_ref_private_types.hpp"
+
+namespace snps_arc::metaware::mli::ref {
+
+ResizeBilinear_CS::ResizeBilinear_CS(const lib_mli::PlatformDescription pd,
+                                     const TensorIterator<NoBuffer, kResizeBilinearRank, kResizeBilinearIterRank> &in,
+                                     const ResizeOpConfig &cfg,
+                                     const TensorIterator<NoBuffer, kResizeBilinearRank, kResizeBilinearIterRank> &out)
+                                     : m_cfg(cfg), m_in(in), m_out(out), m_pd(pd) {
+
+  MLI_ASSERT((m_cfg.stride[0] > 0) && (m_cfg.stride[1] > 0));
+  MLI_ASSERT((m_cfg.shift >= 1) && (m_cfg.shift <= 11));
+
+  // resizing factor is limited to 1/16 downscaling
+  if (m_cfg.shift >= 4) {
+    MLI_ASSERT(m_cfg.stride[0] > (1 << (m_cfg.shift - 4)));
+    MLI_ASSERT(m_cfg.stride[1] > (1 << (m_cfg.shift - 4)));
+  }
+ 
+  // resizing factor is limited to x16 upscaling
+  const int16_t val_16_fx = 16 << cfg.shift;
+  MLI_ASSERT(m_cfg.stride[0] < val_16_fx);
+  MLI_ASSERT(m_cfg.stride[1] < val_16_fx);
+
+  // offset range is limited to maximum 16 pixels 
+  MLI_ASSERT((m_cfg.offset[0] > -val_16_fx) && (m_cfg.offset[0] < val_16_fx));
+  MLI_ASSERT((m_cfg.offset[1] > -val_16_fx) && (m_cfg.offset[1] < val_16_fx));
+}
+
+mli_status ResizeBilinear_CS::AttachBufferOffsets(const OffsetBuffer &input,
+                                                  const OffsetBuffer &output,
+                                                  const OffsetBuffer &ctrl_buffer) {
+
+    m_in.set_buf(input);
+    m_out.set_buf(output);
+
+    return MLI_STATUS_OK;
+}
+
+mli_status ResizeBilinear_CS::GetKernelPrivateData(void *kernel_private_data_buffer) {
+
+    MLI_ASSERT(m_in.get_elem_size() == sizeof(int8_t) && m_out.get_elem_size() == sizeof(int32_t));
+    MLI_ASSERT(m_in.get_tensor().get_rank() == m_out.get_tensor().get_rank());
+
+    ResizeBilinearPrivateData opaque_obj;
+    opaque_obj.input = m_in;
+    opaque_obj.output = m_out;
+    opaque_obj.config = m_cfg;
+
+    std::memcpy(kernel_private_data_buffer, (void *)&opaque_obj, sizeof(opaque_obj));
+
+    return MLI_STATUS_OK;
+}
+
+unsigned ResizeBilinear_CS::GetKernelPrivateDataSize() const {
+    return sizeof(ResizeBilinearPrivateData);
+}
+
+unsigned ResizeBilinear_CS::GetRuntimeObjectSize() const {
+    return sizeof(ResizeBilinear);
+}
+
+}  // namespace snps_arc::metaware::mli::ref
diff --git a/lib/src/kernels/conversion/mli_resize_bilinear_decl.hpp b/lib/src/kernels/conversion/mli_resize_bilinear_decl.hpp
@@ -0,0 +1,36 @@
+/*
+* Copyright 2022, Synopsys, Inc.
+* All rights reserved.
+*
+* This source code is licensed under the BSD-3-Clause license found in
+* the LICENSE file in the root directory of this source tree.
+*
+*/
+
+#ifndef _MLI_RESIZE_BILINEAR_DECL_HPP_
+#define _MLI_RESIZE_BILINEAR_DECL_HPP_
+
+#include "mli_config.h"
+#include "mli_types.h"
+
+namespace snps_arc::metaware::mli {
+namespace krn {
+////////////////////////////////////////////////////////////////////////////////
+// Functions (in *_ref/*_dsp/*vdsp) that can be called from outside their own
+// file must be declared here. This includes all overloads. For example, if we
+// have: io_T f(io_T a) and int8_t f(int8_t a), then both must be declared.
+// Not doing so, can cause the compiler to use the wrong overload.
+////////////////////////////////////////////////////////////////////////////////
+
+////////////////////////////////////////////////////////////////////////////////
+// REF
+////////////////////////////////////////////////////////////////////////////////
+namespace ref {
+
+mli_status mli_resize_bilinear(const mli_tensor* in, const ResizeOpConfig& cfg, mli_tensor* out) ;
+
+} // namespace ref
+} // namespace krn
+} // namespace snps_arc::metaware::mli
+
+#endif // _MLI_RESIZE_BILINEAR_DECL_HPP_
diff --git a/lib/src/kernels/conversion/mli_resize_bilinear_runtime.cc b/lib/src/kernels/conversion/mli_resize_bilinear_runtime.cc
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2022, Synopsys, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-3-Clause license found in
+ * the LICENSE file in the root directory of this source tree.
+ *
+ */
+
+#include <cstring>
+
+#include "mli_debug.h"
+#include "mli_ref_runtime_api.hpp"
+#include "mli_resize_bilinear.hpp"
+#include "mli_ref_private_types.hpp"
+
+namespace snps_arc::metaware::mli::ref {
+
+namespace mli_krn = ::snps_arc::metaware::mli::krn;
+
+// TODO: remove this after standart version of ResizeBilinear with Issue() will be added
+void run_mli_resize_bilinear_standalone(const mli_tensor* in, const ResizeOpConfig& cfg, mli_tensor* out){
+
+
+  MLI_ASSERT((cfg.stride[0] > 0) && (cfg.stride[1] > 0));
+  MLI_ASSERT((cfg.shift >= 1) && (cfg.shift <= 11));
+
+  // resizing factor is limited to 1/16 downscaling
+  if (cfg.shift >= 4) {
+    MLI_ASSERT(cfg.stride[0] > (1 << (cfg.shift - 4)));
+    MLI_ASSERT(cfg.stride[1] > (1 << (cfg.shift - 4)));
+  }
+  // resizing factor is limited to x16 upscaling
+  const int16_t val_16_fx = 16 << cfg.shift;
+  MLI_ASSERT(cfg.stride[0] < val_16_fx);
+  MLI_ASSERT(cfg.stride[1] < val_16_fx);
+
+  // offset range is limited to maximum 16 pixels 
+  MLI_ASSERT(cfg.offset[0] > -val_16_fx && cfg.offset[0] < val_16_fx);
+  MLI_ASSERT(cfg.offset[1] > -val_16_fx && cfg.offset[1] < val_16_fx);
+
+  mli_krn::mli_resize_bilinear(in, cfg, out);
+}
+
+}  // namespace snps_arc::metaware::mli::ref
+
diff --git a/user_tests/CMakeLists.txt b/user_tests/CMakeLists.txt
@@ -259,3 +259,8 @@ add_user_test(krn eltwise_30)
 #======================================================
 add_user_test(krn reduce_max_30)
 add_user_test(krn reduce_sum_30)
+
+#======================================================
+# Resize Group
+#======================================================
+add_user_test(krn resize_bilinear_30)
diff --git a/user_tests/make/Makefile b/user_tests/make/Makefile
@@ -65,9 +65,8 @@ KERNELS = \
 	reduce_sum_30 \
 	permute_30 \
 	move_broadcast_30 \
-	matmul_30
-
-
+	matmul_30 \
+	resize_bilinear_30
 
 BIN_FILES=$(patsubst %,$(BIN_PATH)$(PS)test_mli_hlp_%$(BIN_EXT),$(HELPERS))
 BIN_FILES+=$(patsubst %,$(BIN_PATH)$(PS)test_mli_krn_%$(BIN_EXT),$(KERNELS))
diff --git a/user_tests/tests/mli_krn_resize_bilinear_30/tests_mli_krn_resize_bilinear_30.cc b/user_tests/tests/mli_krn_resize_bilinear_30/tests_mli_krn_resize_bilinear_30.cc
diff --git a/user_tests/tests/mli_krn_resize_bilinear_30/vectors_mli_krn_resize_bilinear_30.inc b/user_tests/tests/mli_krn_resize_bilinear_30/vectors_mli_krn_resize_bilinear_30.inc