foss-for-synopsys-dwc-arc-processors
diff --git a/‎include/api/mli_ref_compiler_api.hpp‎
Lines changed: 43 additions & 31 deletions b/‎include/api/mli_ref_compiler_api.hpp‎
Lines changed: 43 additions & 31 deletions
diff --git a/‎include/api/mli_ref_runtime_api.hpp‎
Lines changed: 45 additions & 7 deletions b/‎include/api/mli_ref_runtime_api.hpp‎
Lines changed: 45 additions & 7 deletions
diff --git a/‎include/internal/mli_ref_private_types.hpp‎
Lines changed: 3 additions & 58 deletions b/‎include/internal/mli_ref_private_types.hpp‎
Lines changed: 3 additions & 58 deletions
diff --git a/‎include/mli_compiler_api.hpp‎
Lines changed: 50 additions & 8 deletions b/‎include/mli_compiler_api.hpp‎
Lines changed: 50 additions & 8 deletions
@@ -455,9 +455,9 @@ class MaxPool2D_CS : public lib_mli::MaxPool2D_CS {
      * @param output_tile_shape [O] Output tensor (tile shape, BHWC layout)
      */
     MaxPool2D_CS(const lib_mli::PlatformDescription pd,
-                 const Tensor<NoBuffer, kMaxpoolRank> in,
+                 const Tensor<NoBuffer, kPoolRank> in,
                  const PoolOpConfig &cfg,
-                 const Tensor<NoBuffer, kMaxpoolRank> output_tile_shape);
+                 const Tensor<NoBuffer, kPoolRank> output_tile_shape);
 
      /**
      * @brief Constructor to create a MaxPool2D compiler support object.
@@ -472,21 +472,16 @@ class MaxPool2D_CS : public lib_mli::MaxPool2D_CS {
      * @param out [O] Output tensor iterator (BHWC layout)
      */
     MaxPool2D_CS(const lib_mli::PlatformDescription pd,
-                 const TensorIterator<NoBuffer, kMaxpoolRank, kMaxpoolIterRank> in,
+                 const TensorIterator<NoBuffer, kPoolRank, kPoolIterRank> in,
                  const PoolOpConfig& cfg,
-                 const TensorIterator<NoBuffer, kMaxpoolRank, kMaxpoolIterRank> out);
+                 const TensorIterator<NoBuffer, kPoolRank, kPoolIterRank> out);
 
     unsigned GetKernelPrivateDataSize() const override;
     unsigned GetRuntimeObjectSize() const override;
-    
-    /**
-     * Tensor buffer sizes could depend on the platform and/or parameters.
-     * These functions can be used to query how much memory needs to be allocated for
-     * the input, weights and output tensors.
-     * Note, that these sizes are for full tensors, not tiles. 
-     */
+
     unsigned GetInputBufferSize() const override;
     unsigned GetOutputBufferSize() const override;
+
     /**
      * @return Always returns zero for reference kernel.
      */
@@ -496,55 +491,72 @@ class MaxPool2D_CS : public lib_mli::MaxPool2D_CS {
     /**
      * @deprecated
      */
-    mli_status AttachBufferOffsets(const Tensor<OffsetBuffer, kMaxpoolRank> &input,
-                                   const Tensor<OffsetBuffer, kMaxpoolRank> &output,
+    mli_status AttachBufferOffsets(const Tensor<OffsetBuffer, kPoolRank> &input,
+                                   const Tensor<OffsetBuffer, kPoolRank> &output,
                                    const OffsetBuffer &ctrl_buffer) override;
 
     mli_status AttachBufferOffsets(const OffsetBuffer& input,
                                    const OffsetBuffer& output,
                                    const OffsetBuffer& ctrl_buffer) override;
 
 private:
-    TensorIterator<OffsetBuffer, kMaxpoolRank, kMaxpoolIterRank> m_input;
-    TensorIterator<OffsetBuffer, kMaxpoolRank, kMaxpoolIterRank> m_output;
+    TensorIterator<OffsetBuffer, kPoolRank, kPoolIterRank> m_input;
+    TensorIterator<OffsetBuffer, kPoolRank, kPoolIterRank> m_output;
 
     PoolOpConfig m_config;
 
-    uint32_t m_input_buffer_size;
-    uint32_t m_output_buffer_size;
-
     lib_mli::PlatformDescription m_pd;
 };
 
 class SumPool2D_CS : public lib_mli::SumPool2D_CS {
 public:
-
     SumPool2D_CS(const lib_mli::PlatformDescription pd,
-                 const Tensor<NoBuffer, 4> in,
+                 const Tensor<NoBuffer, kPoolRank> in,
                  const PoolOpConfig &cfg,
-                 const Tensor<NoBuffer, 4> output_tile_shape);
+                 const Tensor<NoBuffer, kPoolRank> output_tile_shape);
+    /**
+     * @brief Constructor to create a SumPool2D compiler support object.
+     *
+     * This constructor can be used to create a Sum Pooling 2D compiler support
+     * object. This kernel computes each value of the output tensor as the sum 
+     * of all values in the related perception area of a single channel of the input tensor.
+     *
+     * @param pd [I] Platform description
+     * @param in [I] Input tensor iterator (BHWC layout)
+     * @param cfg [I] PoolOpConfig structure
+     * @param out [I] Output tensor iterator (BHWC layout)
+     */
+    SumPool2D_CS(const lib_mli::PlatformDescription pd,
+                 const TensorIterator<NoBuffer, kPoolRank, kPoolIterRank> &in,
+                 const PoolOpConfig &cfg,
+                 const TensorIterator<NoBuffer, kPoolRank, kPoolIterRank> &out);
 
     // From CompilerGenericInterface
     unsigned GetKernelPrivateDataSize() const override;
     unsigned GetRuntimeObjectSize() const override;
-    mli_status GetKernelPrivateData(void* kernel_private_data_buffer) override;
-    mli_status AttachBufferOffsets(const Tensor<OffsetBuffer, 4> &input,
-                                   const Tensor<OffsetBuffer, 4> &output,
-                                   const OffsetBuffer &ctrl_buffer) override;
 
-    // From SumPool2D_CS
     unsigned GetInputBufferSize() const override;
     unsigned GetOutputBufferSize() const override;
 
+    mli_status GetKernelPrivateData(void* kernel_private_data_buffer) override;
+
+    /**
+     * @deprecated
+    */
+    mli_status AttachBufferOffsets(const Tensor<OffsetBuffer, kPoolRank> &input,
+                                   const Tensor<OffsetBuffer, kPoolRank> &output,
+                                   const OffsetBuffer &ctrl_buffer) override;
+
+    mli_status AttachBufferOffsets(const OffsetBuffer& input,
+                                   const OffsetBuffer& output,
+                                   const OffsetBuffer& ctrl_buffer) override;
+
 private:
-    Tensor<OffsetBuffer, 4> m_in;
-    Tensor<OffsetBuffer, 4> m_output;
+    TensorIterator<OffsetBuffer, kPoolRank, kPoolIterRank> m_input;
+    TensorIterator<OffsetBuffer, kPoolRank, kPoolIterRank> m_output;
 
     PoolOpConfig m_config;
 
-    uint32_t m_input_buffer_size;
-    uint32_t m_output_buffer_size;
-
     lib_mli::PlatformDescription m_pd;
 };
 
 
@@ -284,14 +284,14 @@ class MaxPool2D : public ExecutionInterface {
     mli_status Update() override;
 
     // TODO: remove this method and replace with usage of Move kernel (not possible now)
-    void GetIOSizesAndOffsets(uint32_t input_size[kMaxpoolRank], uint32_t output_size[kMaxpoolRank],
-                              int32_t input_offsets[kMaxpoolRank], int32_t output_offsets[kMaxpoolRank]);
+    void GetIOSizesAndOffsets(uint32_t input_size[kPoolRank], uint32_t output_size[kPoolRank],
+                              int32_t input_offsets[kPoolRank], int32_t output_offsets[kPoolRank]);
 
 private:
     void UpdateTilePaddings();
 
-    TensorIterator<OffsetBuffer, kMaxpoolRank, kMaxpoolIterRank> m_input;
-    TensorIterator<OffsetBuffer, kMaxpoolRank, kMaxpoolIterRank> m_output;
+    TensorIterator<OffsetBuffer, kPoolRank, kPoolIterRank> m_input;
+    TensorIterator<OffsetBuffer, kPoolRank, kPoolIterRank> m_output;
 
     mli_pool_cfg m_cfg;
     int32_t m_input_batch_offset;
@@ -356,6 +356,30 @@ class FullyConnected : public ExecutionInterface {
 class SumPool2D : public ExecutionInterface {
 
 public:
+    /**
+     * @brief Construct a new Sum Pooling 2D object
+     *
+     * This method will create and initialize the Sum Pooling 2D object using the information
+     * stored in the kernel_private_data_buffer that has been computed at compile time
+     * by the GetKernelPrivateData() method of SumPool2D_CS class
+     * 
+     * This kernel computes each value of the output tensor as the sum 
+     * of all values in the related perception area of a single channel of the input tensor.
+     *
+     * @param kernel_private_data_buffer [I] Pointer to the compilation time computed initialization data.
+     * @param size        [I] Size of the data is used to check for coding errors.
+     * @param membases[]  [I] The kernel private data may contain offsets inside a (vector) memory.
+     *                        At run-time specific locations in memory are allocated for
+     *                        the graph, the membase array contains the start of
+     *                        each memory region.
+     *                        This base will be added to all memory offsets in the constructor
+     *                        according to the memory ID associated with that offset.
+     *                        Each platform can have different (number of) memories. For mli
+     *                        this is completely transparent. Compiler needs to use the same
+     *                        memory id's when attaching the buffers as are used by the
+     *                        xop-interpreter to set the membases.
+     * @param num_mems    [I] Number of memory regions passed with membases array.
+    */
     SumPool2D(void* kernel_private_data_buffer, size_t size, uint64_t membases[], int num_mems);
 
     mli_status Issue() override;
@@ -364,15 +388,29 @@ class SumPool2D : public ExecutionInterface {
 
     mli_status Update() override;
 
+    // TODO: remove this method and replace with usage of Move kernel (not possible now)
+    void GetIOSizesAndOffsets(uint32_t input_size[kPoolRank], uint32_t output_size[kPoolRank],
+                              int32_t input_offsets[kPoolRank], int32_t output_offsets[kPoolRank]);
+
 private:
+    void UpdateTilePaddings();
+    
     mli_pool_cfg m_cfg;
-    mli_tensor m_input;
-    mli_tensor m_output;
+
+    TensorIterator<OffsetBuffer, kPoolRank, kPoolIterRank> m_input;
+    TensorIterator<OffsetBuffer, kPoolRank, kPoolIterRank> m_output;
+
     int32_t m_input_batch_offset;
     int32_t m_output_batch_offset;
-    uint32_t m_batch_number;
+    
     uint32_t m_i_elem_size;
     uint32_t m_o_elem_size;
+
+    // Tile state
+    uint32_t m_tile_batch_size;
+    mli_tensor m_tile_input;
+    mli_tensor m_tile_output;
+    mli_pool_cfg m_tile_cfg;
 };
 
 /**
 
@@ -185,70 +185,15 @@ class MovePrivateData : public PrivateData {
     TensorIterator<OffsetBuffer, kMoveRank, kMoveIterRank> dst_it;
 };
 
-/**
- *  TODO: to remove this after Pool2DPrivateData will be updated
- *  to do this SumPool2D_CS needs to be updated same was as MaxPool2D_CS
- */ 
-class MaxPool2DPrivateData : public PrivateData {
-
-public:
-  MaxPool2DPrivateData(kernel_id_t id)
-    : PrivateData(id, sizeof(MaxPool2DPrivateData)) {}
-
-  TensorIterator<OffsetBuffer, kMaxpoolRank, kMaxpoolIterRank> input;
-  TensorIterator<OffsetBuffer, kMaxpoolRank, kMaxpoolIterRank> output;
-  PoolOpConfig config;
-};
-
 class Pool2DPrivateData : public PrivateData {
 
 public:
     Pool2DPrivateData(kernel_id_t id)
         : PrivateData(id, sizeof(Pool2DPrivateData)) {}
 
-    OffsetBuffer input_buffer;
-    OffsetBuffer output_buffer;
-
-    uint32_t input_b;
-    uint32_t input_h;
-    uint32_t input_w;
-    uint32_t input_c;
-
-    int32_t input_b_stride;
-    int32_t input_h_stride;
-    int32_t input_w_stride;
-    int32_t input_c_stride;
-
-    uint32_t output_b;
-    uint32_t output_h;
-    uint32_t output_w;
-    uint32_t output_c;
-
-    int32_t output_b_stride;
-    int32_t output_h_stride;
-    int32_t output_w_stride;
-    int32_t output_c_stride;
-
-    uint8_t kernel_height;
-    uint8_t kernel_width;
-    uint8_t stride_height;
-    uint8_t stride_width;
-    uint8_t padding_top;
-    uint8_t padding_bottom;
-    uint8_t padding_left;
-    uint8_t padding_right;
-
-    // Tile Parameters BHWC
-    bool m_use_tiling;
-    uint32_t m_tile_total_input_size[4];
-    uint32_t m_tile_total_output_size[4];
-    uint32_t m_tile_iteration_order[4];
-    uint32_t m_tile_first_size[4];
-    uint32_t m_tile_size[4];
-    uint32_t m_tile_input_first_inc[4];
-    uint32_t m_tile_input_inc[4];
-    uint32_t m_tile_output_first_inc[4];
-    uint32_t m_tile_output_inc[4];
+    TensorIterator<OffsetBuffer, kPoolRank, kPoolIterRank> input;
+    TensorIterator<OffsetBuffer, kPoolRank, kPoolIterRank> output;
+    PoolOpConfig config;
 };
 
 class EltwisePrivateData : public PrivateData {
 
@@ -723,14 +723,18 @@ class MaxPool2D_CS : public CompilerGenericInterface {
 
     /**
      * @brief Method to get the input buffer size
-     *
+     * @deprecated 
+     * 
+     * TODO: will be removed 
      * @return Size of the input buffer in bytes
      */
     virtual unsigned GetInputBufferSize() const = 0;
 
     /**
      * @brief Method to get the output buffer size
-     *
+     * @deprecated 
+     * 
+     * TODO: will be removed 
      * @return Size of the output buffer in bytes
      */
     virtual unsigned GetOutputBufferSize() const = 0;
@@ -751,8 +755,8 @@ class MaxPool2D_CS : public CompilerGenericInterface {
      * 
      * @return MLI status code
      */
-    virtual mli_status AttachBufferOffsets(const Tensor<OffsetBuffer, kMaxpoolRank> &input,
-                                           const Tensor<OffsetBuffer, kMaxpoolRank> &output,
+    virtual mli_status AttachBufferOffsets(const Tensor<OffsetBuffer, kPoolRank> &input,
+                                           const Tensor<OffsetBuffer, kPoolRank> &output,
                                            const OffsetBuffer &ctrl_buffer) = 0;
 
     /**
@@ -788,18 +792,56 @@ class SumPool2D_CS : public CompilerGenericInterface {
 
     /**
      * @brief Methods to get buffer sizes
+     * 
+     * @deprecated 
+     * 
+     * TODO: to be removed later
      */
 
     virtual unsigned GetInputBufferSize() const = 0;
     virtual unsigned GetOutputBufferSize() const = 0;
+    
     /**
-
-     * @brief Methods to set buffer offsets
+     * @brief Method to set buffer memory offsets and memory IDs for the kernel
+     * 
+     * @deprecated 
+     * 
+     * Compiler computes a memory map and buffer offsets are set using this method.
+     * Compiler also needs to indicate in which memory the buffers reside.
+     * These ID's need to match the array of memory bases that the xop-interpreter passes to
+     * the Create function.
      *
+     * In this method you specify offsets for tensors passed to the constructor.
+     * 
+     * @param input [I] Tensor descriptor containing input OffsetBuffer and tensor shape and memory strides
+     * @param output [I] Tensor descriptor containing output OffsetBuffer and tensor shape and memory strides
+     * @param ctrl_buffer [I] data OffsetBuffer
+     * 
+     * @return MLI status code
      */
-    virtual mli_status AttachBufferOffsets(const Tensor<OffsetBuffer, 4> &input,
-                                           const Tensor<OffsetBuffer, 4> &output,
+    virtual mli_status AttachBufferOffsets(const Tensor<OffsetBuffer, kPoolRank> &input,
+                                           const Tensor<OffsetBuffer, kPoolRank> &output,
                                            const OffsetBuffer &ctrl_buffer) = 0;
+
+    /**
+     * @brief Method to set buffer memory offsets and memory IDs for the kernel
+     *
+     * Compiler computes a memory map and buffer offsets are set using this method.
+     * Compiler also needs to indicate in which memory the buffers reside.
+     * These ID's need to match the array of memory bases that the xop-interpreter passes to
+     * the Create function.
+     *
+     * In this method you specify offsets for tensors passed to the constructor.
+     *
+     * @param input       [I] OffsetBuffer containing Memory Identifier and Offset in that memory
+     * @param output      [I] OffsetBuffer containing Memory Identifier and Offset in that memory
+     * @param ctrl_buffer [I] OffsetBuffer containing Memory Identifier and Offset in that memory
+     *
+     * @return MLI status code
+     */
+    virtual mli_status AttachBufferOffsets(const OffsetBuffer &input,
+                                           const OffsetBuffer &output,
+                                           const OffsetBuffer &ctrl_buffer)  = 0;
 };