Skip to content

Commit aeb46a9

Browse files
msayedJaccovG
authored andcommitted
MatMul implementation
1 parent 665585b commit aeb46a9

File tree

14 files changed

+1477
-10
lines changed

14 files changed

+1477
-10
lines changed

include/api/mli_ref_compiler_api.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1430,7 +1430,8 @@ class MatMul_CS : public lib_mli::MatMul_CS {
14301430
// From MatMul_CS
14311431
mli_status EncodeParams(const Buffer &in_bias1,
14321432
const Buffer &in_bias2,
1433-
const Buffer &encoded_params) override;
1433+
Buffer &encoded_params) override;
1434+
unsigned GetEncodedParamsSize() const override;
14341435

14351436
private:
14361437

@@ -1439,6 +1440,7 @@ class MatMul_CS : public lib_mli::MatMul_CS {
14391440
TensorIterator<OffsetBuffer, kMatMulRank, kMatMulIterRank> m_output;
14401441

14411442
OffsetBuffer m_encoded_params;
1443+
uint32_t m_encoded_params_buffer_size;
14421444

14431445
lib_mli::PlatformDescription m_pd;
14441446
};

include/api/mli_ref_runtime_api.hpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -855,12 +855,19 @@ class MatMul : public ExecutionInterface {
855855

856856
mli_status Update() override;
857857

858+
void GetIOSizesAndOffsets(uint32_t input_left_size[kMatMulRank], uint32_t input_right_size[kMatMulRank],
859+
uint32_t output_size[kMatMulRank], int32_t input_left_offsets[kMatMulRank],
860+
int32_t input_right_offsets[kMatMulRank], int32_t output_offsets[kMatMulRank]) const;
861+
858862
private:
859863
TensorIterator<OffsetBuffer, kMatMulRank, kMatMulIterRank> m_input_left;
860864
TensorIterator<OffsetBuffer, kMatMulRank, kMatMulIterRank> m_input_right;
861865
TensorIterator<OffsetBuffer, kMatMulRank, kMatMulIterRank> m_output;
862-
863-
OffsetBuffer m_encoded_params;
866+
Tensor<InternalBuffer, kMatMulRank> m_tile_input_left;
867+
Tensor<InternalBuffer, kMatMulRank> m_tile_input_right;
868+
Tensor<InternalBuffer, kMatMulRank> m_tile_output;
869+
870+
InternalBuffer m_encoded_params;
864871

865872
uint32_t m_i_elem_size;
866873
uint32_t m_o_elem_size;

include/mli_compiler_api.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1439,7 +1439,13 @@ class MatMul_CS : public CompilerGenericInterface {
14391439
*/
14401440
virtual mli_status EncodeParams(const Buffer &in_bias1,
14411441
const Buffer &in_bias2,
1442-
const Buffer &encoded_params) = 0;
1442+
Buffer &encoded_params) = 0;
1443+
/**
1444+
* @brief Method to query the size of the encoded parameters buffer
1445+
*
1446+
* This function returns the size of the buffer that is needed by the EncodeParams method
1447+
*/
1448+
virtual unsigned GetEncodedParamsSize() const = 0;
14431449

14441450
/**
14451451
* @brief Methods to set buffer offsets

include/mli_kernels_factory_ref.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -357,14 +357,14 @@ class KernelsFactory : public lib_mli::KernelsFactory {
357357
return new(kernel_buffer) lib_ref::Permute_CS(m_pd, in, cfg, out);
358358
}
359359

360-
uint32_t MatMul_CS_GetSize() const override { return 0 /*sizeof(lib_ref::MatMul_CS)*/; }
360+
uint32_t MatMul_CS_GetSize() const override { return sizeof(lib_ref::MatMul_CS); }
361361

362362
lib_mli::MatMul_CS* MatMul_CS(void *kernel_buffer,
363363
const TensorIterator<NoBuffer, kMatMulRank, kMatMulIterRank> &in_left,
364364
const TensorIterator<NoBuffer, kMatMulRank, kMatMulIterRank> &in_right,
365365
const TensorIterator<NoBuffer, kMatMulRank, kMatMulIterRank> &output) override {
366-
/* return new(kernel_buffer) lib_ref::MatMul_CS(m_pd, in_left, in_right, output);*/
367-
return nullptr;
366+
return new(kernel_buffer) lib_ref::MatMul_CS(m_pd, in_left, in_right, output);
367+
368368
}
369369

370370
uint32_t MoveBroadcast_CS_GetSize() const override { return sizeof(lib_ref::MoveBroadcast_CS); }

include/mli_types.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ constexpr unsigned kReduceSumRank = 4;
118118
constexpr unsigned kReduceSumIterRank = 4;
119119
constexpr short int kMatMulRank = 2;
120120
constexpr short int kMatMulIterRank = 2;
121+
constexpr short int kMatMulHeightDim = 0;
122+
constexpr short int kMatMulWidthDim = 1;
121123

122124
constexpr short int kArgMaxInRank = 4;
123125
constexpr short int kArgMaxInIterRank = 4;

lib/mli_lib.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ set(MLI_LIB_SOURCE_FILES
6464
${MLI_LIB_CMAKE_DIR}/src/kernels/convolution/mli_krn_group_conv2d_hwcn.cc
6565
${MLI_LIB_CMAKE_DIR}/src/kernels/convolution/mli_krn_depthwise_conv2d_compiler.cc
6666
${MLI_LIB_CMAKE_DIR}/src/kernels/convolution/mli_krn_depthwise_conv2d_runtime.cc
67+
${MLI_LIB_CMAKE_DIR}/src/kernels/convolution/mli_krn_matmul_compiler.cc
68+
${MLI_LIB_CMAKE_DIR}/src/kernels/convolution/mli_krn_matmul_runtime.cc
6769
${MLI_LIB_CMAKE_DIR}/src/kernels/common/mli_krn_fully_connected.cc
6870
${MLI_LIB_CMAKE_DIR}/src/kernels/common/impl/mli_krn_fully_connected_compiler.cc
6971
${MLI_LIB_CMAKE_DIR}/src/kernels/common/impl/mli_krn_fully_connected_runtime.cc
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Copyright 2022, Synopsys, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-3-Clause license found in
6+
* the LICENSE file in the root directory of this source tree.
7+
*
8+
*/
9+
10+
#include <cstring>
11+
#include "mli_ref_compiler_api.hpp"
12+
#include "mli_ref_runtime_api.hpp"
13+
#include "mli_service_functions.hpp"
14+
namespace snps_arc::metaware::mli::ref {
15+
16+
MatMul_CS::MatMul_CS(const lib_mli::PlatformDescription &pd,
17+
const TensorIterator<NoBuffer, kMatMulRank, kMatMulIterRank> &in_left,
18+
const TensorIterator<NoBuffer, kMatMulRank, kMatMulIterRank> &in_right,
19+
const TensorIterator<NoBuffer, kMatMulRank, kMatMulIterRank> &output)
20+
: m_in_left(in_left),
21+
m_in_right(in_right),
22+
m_output(output),
23+
m_pd(pd) {
24+
m_encoded_params_buffer_size = sizeof(int8_t) * kMatMulRank;
25+
}
26+
27+
unsigned MatMul_CS::GetKernelPrivateDataSize() const {
28+
return sizeof(MatMulPrivateData);
29+
}
30+
31+
unsigned MatMul_CS::GetRuntimeObjectSize() const {
32+
return sizeof(MatMul);
33+
}
34+
35+
mli_status MatMul_CS::GetKernelPrivateData(void* kernel_private_data_buffer) {
36+
37+
MLI_ASSERT(kernel_private_data_buffer != nullptr);
38+
39+
MatMulPrivateData prv_data;
40+
41+
prv_data.m_in_left = m_in_left;
42+
prv_data.m_in_right = m_in_right;
43+
prv_data.m_output = m_output;
44+
prv_data.encoded_params = m_encoded_params;
45+
46+
std::memcpy(kernel_private_data_buffer, (void *)&prv_data, sizeof(prv_data));
47+
48+
return MLI_STATUS_OK;
49+
}
50+
51+
mli_status MatMul_CS::AttachBufferOffsets(const OffsetBuffer &input_left,
52+
const OffsetBuffer &input_right,
53+
const OffsetBuffer &output,
54+
const OffsetBuffer &encoded_params,
55+
const OffsetBuffer &ctrl_buffer) {
56+
57+
m_in_left.set_buf(input_left);
58+
m_in_right.set_buf(input_right);
59+
m_output.set_buf(output);
60+
m_encoded_params = encoded_params;
61+
62+
return MLI_STATUS_OK;
63+
}
64+
65+
mli_status MatMul_CS::EncodeParams(const Buffer &in_bias1,
66+
const Buffer &in_bias2,
67+
Buffer &encoded_params) {
68+
// the element size of source should eqaul to the encoded one's
69+
assert(in_bias1.get_size() + in_bias2.get_size() == encoded_params.get_size());
70+
assert(in_bias1.get_size() == in_bias2.get_size() == 1);
71+
72+
// in_zp must be int8_t
73+
assert(in_bias1.get_elem_size() == sizeof(int8_t));
74+
encoded_params.write<int8_t>(0, in_bias1.read<int8_t>(0));
75+
encoded_params.write<int8_t>(1, in_bias2.read<int8_t>(0));
76+
77+
return MLI_STATUS_OK;
78+
}
79+
80+
unsigned MatMul_CS::GetEncodedParamsSize() const {
81+
return m_encoded_params_buffer_size;
82+
}
83+
84+
}
Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
/*
2+
* Copyright 2022, Synopsys, Inc.
3+
* All rights reserved.
4+
*
5+
* This code is licensed under the BSD-3-Clause license found in
6+
* the LICENSE file in the root directory of this source tree.
7+
*
8+
*/
9+
#ifndef _MLI_KRN_MATMUL_REF_H_
10+
#define _MLI_KRN_MATMUL_REF_H_
11+
12+
#include <cstring>
13+
#include "mli_debug.h"
14+
#include "mli_ref_runtime_api.hpp"
15+
#include "mli_types.hpp"
16+
17+
using snps_arc::metaware::mli::InternalBuffer;
18+
using snps_arc::metaware::mli::Tensor;
19+
using snps_arc::metaware::mli::OffsetBuffer;
20+
using snps_arc::metaware::mli::kMatMulHeightDim;
21+
using snps_arc::metaware::mli::kMatMulWidthDim;
22+
using snps_arc::metaware::mli::kMatMulRank;
23+
24+
namespace mli {
25+
namespace krn {
26+
namespace ref {
27+
28+
#pragma MLI_CODE_SECTION_START(".mli_lib")
29+
30+
31+
template <typename in1_t, typename in2_t,typename out_t, uint32_t rank>
32+
void MatMul_prepare_and_run(Tensor<InternalBuffer, rank> &in_left,
33+
Tensor<InternalBuffer, rank> &in_right,
34+
Tensor<InternalBuffer, rank> &output,
35+
InternalBuffer &encoded_params) {
36+
/**
37+
* layout = HW
38+
* H of left = W of right
39+
* output shape must be of shape Hr * Wl
40+
* rank = 2
41+
*/
42+
MLI_ASSERT(rank == kMatMulRank);
43+
MLI_ASSERT(in_left.get_dim(kMatMulWidthDim) == in_right.get_dim(kMatMulHeightDim));
44+
MLI_ASSERT(output.get_dim(kMatMulHeightDim) == in_left.get_dim(kMatMulHeightDim));
45+
MLI_ASSERT(output.get_dim(kMatMulWidthDim) == in_right.get_dim(kMatMulWidthDim));
46+
MLI_ASSERT(encoded_params.get_elem_size() == sizeof(int8_t));
47+
MLI_ASSERT(encoded_params.get_size() == kMatMulRank);
48+
49+
in1_t val1;
50+
in2_t val2;
51+
out_t acc;
52+
int8_t in_left_zp = encoded_params.read<int8_t>(kMatMulHeightDim);
53+
int8_t in_right_zp = encoded_params.read<int8_t>(kMatMulWidthDim);
54+
uint32_t left_h = in_left.get_dim(kMatMulHeightDim);
55+
uint32_t right_w = in_right.get_dim(kMatMulWidthDim);
56+
uint32_t left_w = in_left.get_dim(kMatMulWidthDim);
57+
for(uint32_t i = 0; i < left_h; ++i) {
58+
for (uint32_t j = 0; j < right_w; ++j) {
59+
acc = 0;
60+
for (uint32_t k = 0; k < left_w; ++k) {
61+
val1 = in_left.template read<in1_t>(i * left_w + k) - in_left_zp;
62+
val2 = in_right.template read<in2_t>(k * right_w + j) - in_right_zp;
63+
acc += val1 * val2;
64+
}
65+
output.template write<out_t>( i * right_w + j, static_cast<out_t>(acc) );
66+
}
67+
}
68+
}
69+
70+
#pragma MLI_CODE_SECTION_END()
71+
} // namespace snps_arc::metaware::mli::ref
72+
}
73+
}
74+
#endif // _MLI_KRN_CONVOLUTION_REF_H_
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Copyright 2022, Synopsys, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-3-Clause license found in
6+
* the LICENSE file in the root directory of this source tree.
7+
*
8+
*/
9+
10+
#include "mli_krn_matmul_ref.hpp"
11+
12+
using mli::krn::ref::MatMul_prepare_and_run;
13+
namespace snps_arc::metaware::mli::ref {
14+
15+
MatMul::MatMul(void* kernel_private_data_buffer,
16+
size_t size,
17+
uint64_t membases[], int num_mems) {
18+
MLI_ASSERT(size == sizeof(MatMulPrivateData));
19+
MatMulPrivateData private_data;
20+
memcpy(&private_data, kernel_private_data_buffer, sizeof(MatMulPrivateData));
21+
MLI_ASSERT(private_data.kernel_id == kMatMulId);
22+
MLI_ASSERT(private_data.size == sizeof(MatMulPrivateData));
23+
24+
m_i_elem_size = private_data.m_in_left.get_elem_size();
25+
m_o_elem_size = private_data.m_output.get_elem_size();
26+
27+
MLI_ASSERT(sizeof(int8_t) == m_i_elem_size);
28+
MLI_ASSERT(sizeof(int32_t) == m_o_elem_size);
29+
30+
// left and right input have the same type
31+
MLI_ASSERT(private_data.m_in_right.get_elem_size() == m_i_elem_size);
32+
33+
m_input_left = private_data.m_in_left;
34+
m_input_right = private_data.m_in_right;
35+
m_output = private_data.m_output;
36+
37+
m_encoded_params = InternalBuffer(private_data.encoded_params, membases, num_mems);
38+
39+
m_tile_input_left = Tensor<InternalBuffer, kMatMulRank>(m_input_left.GetSubTensor(), membases, num_mems);
40+
m_tile_input_right = Tensor<InternalBuffer, kMatMulRank>(m_input_right.GetSubTensor(), membases, num_mems);
41+
m_tile_output = Tensor<InternalBuffer, kMatMulRank>(m_output.GetSubTensor(), membases, num_mems);
42+
43+
44+
}
45+
46+
mli_status MatMul::Issue() {
47+
48+
if (m_i_elem_size == sizeof(int8_t) &&
49+
m_o_elem_size == sizeof(int32_t)) {
50+
51+
MatMul_prepare_and_run<int8_t, int8_t, int32_t, kMatMulRank>
52+
(m_tile_input_left, m_tile_input_right, m_tile_output, m_encoded_params);
53+
} else {
54+
// not supported yet
55+
return MLI_STATUS_NOT_SUPPORTED;
56+
}
57+
58+
return MLI_STATUS_OK;
59+
}
60+
61+
mli_status MatMul::Prefetch() { return MLI_STATUS_OK; }
62+
63+
mli_status MatMul::Update() {
64+
65+
m_input_left.Next();
66+
m_input_right.Next();
67+
m_output.Next();
68+
69+
const auto input_left_tile_tensor = m_input_left.GetSubTensor();
70+
uint32_t input_left_tile_shape[kMatMulRank];
71+
input_left_tile_tensor.get_dims(input_left_tile_shape);
72+
m_tile_input_left = Tensor<InternalBuffer, kMatMulRank>(m_tile_input_left, input_left_tile_shape);
73+
74+
const auto input_right_tile_tensor = m_input_right.GetSubTensor();
75+
uint32_t input_right_tile_shape[kMatMulRank];
76+
input_right_tile_tensor.get_dims(input_right_tile_shape);
77+
m_tile_input_right = Tensor<InternalBuffer, kMatMulRank>(m_tile_input_right, input_right_tile_shape);
78+
79+
const auto output_tile_tensor = m_output.GetSubTensor();
80+
uint32_t output_tile_shape[kMatMulRank];
81+
output_tile_tensor.get_dims(output_tile_shape);
82+
m_tile_output = Tensor<InternalBuffer, kMatMulRank>(m_tile_output, output_tile_shape);
83+
84+
85+
return MLI_STATUS_OK;
86+
87+
}
88+
89+
void MatMul::GetIOSizesAndOffsets(uint32_t input_left_size[kMatMulRank], uint32_t input_right_size[kMatMulRank],
90+
uint32_t output_size[kMatMulRank],
91+
int32_t input_left_offsets[kMatMulRank], int32_t input_right_offsets[kMatMulRank],
92+
int32_t output_offsets[kMatMulRank]) const{
93+
94+
m_input_left.get_pos(input_left_offsets);
95+
m_input_left.get_pos(input_right_offsets);
96+
m_output.get_pos(output_offsets);
97+
98+
m_tile_input_left.get_dims(input_left_size);
99+
m_tile_input_right.get_dims(input_right_size);
100+
m_tile_output.get_dims(output_size);
101+
}
102+
} // namespace snps_arc::metaware::mli::ref

lib/src/private/src/mli_runtime.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,7 @@ ExecutionInterface* ExecutionInterface::Create(
193193
break;
194194
case kMatMulId:
195195
if(alloc_buf_size >= sizeof(MatMul)) {
196-
// obj = new (allocation_memory_buffer) MatMul(kernel_private_data_buffer, private_data_size, membases, num_mems);
197-
obj = nullptr; // until the implementation
196+
obj = new (allocation_memory_buffer) MatMul(kernel_private_data_buffer, private_data_size, membases, num_mems);
198197
} else {
199198
MLI_PRINTF("\nMLI_ERROR: Insufficient space for [MatMul] runtime object\n");
200199
}

0 commit comments

Comments
 (0)