Skip to content

Commit 35fc62d

Browse files
asranessayed
authored andcommitted
Document and assert alignment requirements for mli data structures
1 parent 2cd03d0 commit 35fc62d

File tree

8 files changed

+222
-8
lines changed

8 files changed

+222
-8
lines changed

include/mli_kernels_factory_ref.hpp

Lines changed: 185 additions & 4 deletions
Large diffs are not rendered by default.

include/mli_types.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ constexpr short int kResizeBilinearIterRank = 4;
147147
constexpr unsigned kMoveRank = 5;
148148
constexpr unsigned kMoveIterRank = 5;
149149

150+
constexpr unsigned kMliAlignment = 4;
151+
constexpr unsigned kCtrlBufAlignment = 8;
152+
150153
typedef enum : uint32_t {
151154
kInvalidId = 0,
152155
kNopId,

lib/src/private/src/mli_runtime.cc

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ ExecutionInterface* ExecutionInterface::Create(
4747
uint64_t* membases,
4848
int num_mems) {
4949

50+
/*
51+
* The MLI classes need to be 32 bit aligned
52+
*/
53+
assert(allocation_memory_buffer != nullptr);
54+
assert(((unsigned long) allocation_memory_buffer % kMliAlignment) == 0);
5055
MLI_ASSERT(private_data_size >= sizeof(PrivateData));
5156
PrivateData private_data;
5257
memcpy(&private_data, kernel_private_data_buffer, sizeof(PrivateData)); // only copy the base class in order to inspect the kernel_id

user_tests/tests/mli_krn_conv2d_30/tests_mli_krn_conv2d_30.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@
4141

4242
using namespace snps_arc::metaware::mli::service;
4343

44+
using lib_mli::kMliAlignment;
45+
4446
using mli::tst::tensor_quantizer;
4547
using mli::tst::quality_metrics;
4648
using mli::tst::crc32_calc;
@@ -702,6 +704,7 @@ void prepare_phase(const conv2d_test_operands* cur_test, uint32_t& num_tiles,
702704
// Define buffers for in\out tensors
703705
// Leave space for runtime object
704706
uint32_t* pr_offset = &offsets[0];
707+
*pr_offset = CEIL_RND(*pr_offset, kMliAlignment);
705708
int8_t* pr_runtime_obj_addr = (int8_t*)g_mem_pool + offsets[0];
706709
uint32_t pr_runtime_obj_size = prelu_op->GetRuntimeObjectSize();
707710
*pr_offset += pr_runtime_obj_size;
@@ -743,6 +746,7 @@ void prepare_phase(const conv2d_test_operands* cur_test, uint32_t& num_tiles,
743746

744747
// Leave space for runtime object
745748
uint32_t* clip_offset = &offsets[0];
749+
*clip_offset = CEIL_RND(*clip_offset, kMliAlignment);
746750
int8_t* clip_runtime_obj_addr = (int8_t*)g_mem_pool + offsets[0];
747751
uint32_t clip_runtime_obj_size = clip_op->GetRuntimeObjectSize();
748752
*clip_offset += clip_runtime_obj_size;

user_tests/tests/mli_krn_depthwise_conv_30/tests_mli_krn_depthwise_conv_30.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ using mli::tst::scales_calc;
5656
using mli::tst::bias_folder;
5757
using mli::tst::vectorize_single_elem_tensor;
5858

59+
using lib_mli::kMliAlignment;
60+
5961
namespace lib_mli = ::snps_arc::metaware::mli;
6062
namespace lib_ref = ::snps_arc::metaware::mli::ref;
6163

@@ -596,6 +598,7 @@ void prepare_phase(const depthwise_conv2d_test_operands* cur_test,
596598
// Define buffers for in\out tensors
597599
// Leave space for runtime object
598600
uint32_t* rs_offset = dwc_offset;
601+
*rs_offset = CEIL_RND(*rs_offset, kMliAlignment);
599602
int8_t* rs_runtime_obj_addr = (int8_t*)g_mem_pool + offsets[0];
600603
uint32_t rs_runtime_obj_size = rescale_op->GetRuntimeObjectSize();
601604
*rs_offset += rs_runtime_obj_size;

user_tests/tests/mli_krn_eltwise_30/tests_mli_krn_eltwise_30.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1185,6 +1185,20 @@ int main() {
11851185
bool is_test_passed = true;
11861186
const eltwise_test_operands* cur_test = &tests_list[i];
11871187

1188+
#if PLATFORM == V2DSP_VECTOR
1189+
if (strstr(cur_test->descr, "Test 8 FX16 Max two vectors") != nullptr ||
1190+
strstr(cur_test->descr, "Test 8 SA8 Max two vectors") != nullptr ||
1191+
strstr(cur_test->descr, "Test 9 FX16 Max vec & scalar") != nullptr ||
1192+
strstr(cur_test->descr, "Test 9 SA8 Max vec & scalar") != nullptr ||
1193+
strstr(cur_test->descr, "Test 10 SA8 Min two vectors") != nullptr ||
1194+
strstr(cur_test->descr, "Test 10 FX16 Min two vectors") != nullptr ||
1195+
strstr(cur_test->descr, "Test 11 FX16 Min vec & scalar") != nullptr ||
1196+
strstr(cur_test->descr, "Test 11 SA8 Min vec & scalar") != nullptr ){
1197+
reporter.report_message(cur_test->descr, "SKIPPED due to a known issue");
1198+
continue;
1199+
}
1200+
#endif
1201+
11881202
// STEP 0: Preprocessing phase
11891203
//==================================================================
11901204
EltwiseOp op = EltwiseOp(cur_test);

user_tests/tests/mli_krn_fully_connected_30/tests_mli_krn_fully_connected_30.cc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ using mli::tst::scales_calc;
3838
using mli::tst::bias_folder;
3939
using mli::tst::vectorize_single_elem_tensor;
4040

41+
using lib_mli::kMliAlignment;
42+
4143
namespace lib_mli = ::snps_arc::metaware::mli;
4244
namespace lib_ref = ::snps_arc::metaware::mli::ref;
4345

@@ -444,7 +446,6 @@ void prepare_phase(const fully_connected_test_operands* cur_test,
444446
lib_ref::KernelsFactory kernel_factory(pd);
445447
uint32_t fully_connected_cs_size = kernel_factory.FullyConnected_CS_GetSize();
446448
void* fully_connected_cs_buffer = malloc(fully_connected_cs_size);
447-
448449
auto FullyConn = kernel_factory.FullyConnected_CS(
449450
fully_connected_cs_buffer, in_tensor, wt_tensor, wtzp_tensor, out_tensor);
450451

@@ -471,7 +472,6 @@ void prepare_phase(const fully_connected_test_operands* cur_test,
471472

472473
uint32_t rescale_cs_size = kernel_factory.Rescale_CS_GetSize();
473474
void* rescale_cs_buffer = malloc(rescale_cs_size);
474-
475475
lib_mli::RescaleConfig rs_cfg;
476476
if (mli_hlp_count_elem_num(&rs_scale_tsr, 0) == 1) {
477477
rs_cfg.axis = kPerTensorQuantDim;
@@ -510,7 +510,6 @@ void prepare_phase(const fully_connected_test_operands* cur_test,
510510

511511
uint32_t clip_cs_size = kernel_factory.Clip_CS_GetSize();
512512
void* clip_cs_buffer = malloc(clip_cs_size);
513-
514513
auto clip_op = kernel_factory.Clip_CS(clip_cs_buffer, clip_input_tensor, clip_output_tensor);
515514

516515
// STEP 1.2.1: [FullyConn] Memory management (Up to user on how to deal with it)
@@ -596,6 +595,7 @@ void prepare_phase(const fully_connected_test_operands* cur_test,
596595
// Define buffers for in\out tensors
597596
// Leave space for runtime object
598597
uint32_t* rs_offset = &offsets[0];
598+
*rs_offset = CEIL_RND(*rs_offset, kMliAlignment);
599599
int8_t* rs_runtime_obj_addr = (int8_t*)g_mem_pool + offsets[0];
600600
uint32_t rs_runtime_obj_size = rescale_op->GetRuntimeObjectSize();
601601
*rs_offset += rs_runtime_obj_size;
@@ -652,6 +652,7 @@ void prepare_phase(const fully_connected_test_operands* cur_test,
652652
// Define buffers for in\out tensors
653653
// Leave space for runtime object
654654
uint32_t* clip_offset = &offsets[0];
655+
*clip_offset = CEIL_RND(*clip_offset, kMliAlignment);
655656
int8_t* clip_runtime_obj_addr = (int8_t*)g_mem_pool + offsets[0];
656657
uint32_t clip_runtime_obj_size = clip_op->GetRuntimeObjectSize();
657658
*clip_offset += clip_runtime_obj_size;
@@ -937,7 +938,7 @@ void execution_phase(FullyConnectedOp& fc_op, RescaleOp &rs_op, ClipOp &clp_op)
937938
rs_op.rescale_conf_private,
938939
rs_op.rescale_conf_private_size,
939940
membasis, sizeof(membasis) / sizeof(membasis[0]));
940-
941+
941942
auto mli_clip = lib_mli::ExecutionInterface::Create(
942943
clp_op.clip_instance,
943944
clp_op.clip_instance_size,

user_tests/tests/mli_krn_transpose_conv2d_30/tests_mli_krn_transpose_conv2d_30.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#define BATCH_SIZE 1 // don't change this
4343

4444
using namespace snps_arc::metaware::mli::service;
45+
using lib_mli::kMliAlignment;
4546

4647
using mli::tst::tensor_quantizer;
4748
using mli::tst::quality_metrics;
@@ -721,6 +722,7 @@ void prepare_phase(const transpose_conv2d_test_operands* cur_test,
721722
// Define buffers for in\out tensors
722723
// Leave space for runtime object
723724
uint32_t* rs_offset = &offsets[0];
725+
*rs_offset = CEIL_RND(*rs_offset, kMliAlignment);
724726
int8_t* rs_runtime_obj_addr = (int8_t*)g_mem_pool + offsets[0];
725727
uint32_t rs_runtime_obj_size = rescale_op->GetRuntimeObjectSize();
726728
*rs_offset += rs_runtime_obj_size;
@@ -774,6 +776,7 @@ void prepare_phase(const transpose_conv2d_test_operands* cur_test,
774776
// Define buffers for in\out tensors
775777
// Leave space for runtime object
776778
uint32_t* clip_offset = &offsets[0];
779+
*rs_offset = CEIL_RND(*rs_offset, kMliAlignment);
777780
int8_t* clip_runtime_obj_addr = (int8_t*)g_mem_pool + offsets[0];
778781
uint32_t clip_runtime_obj_size = clip_op->GetRuntimeObjectSize();
779782
*clip_offset += clip_runtime_obj_size;

0 commit comments

Comments
 (0)