Skip to content

Commit b795e49

Browse files
committed
Merge branch 'mli_dev' into kws_example
2 parents a447d32 + 4b6c6ee commit b795e49

File tree

97 files changed

+14585
-1908
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

97 files changed

+14585
-1908
lines changed

Makefile

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,21 @@ TCF ?= ../../hw/em9d.tcf
1414
all: app
1515

1616
lib:
17-
gmake -C lib/make TCF_FILE=$(TCF)
17+
$(MAKE) -C lib/make TCF_FILE=$(TCF)
1818

1919
app: lib
20-
gmake -C lib/make TCF_FILE=$(TCF)
21-
gmake -C examples/example_cifar10_caffe TCF_FILE=$(TCF)
22-
gmake -C examples/example_har_smartphone TCF_FILE=$(TCF)
20+
$(MAKE) -C lib/make TCF_FILE=$(TCF)
21+
$(MAKE) -C examples/example_cifar10_caffe TCF_FILE=$(TCF)
22+
$(MAKE) -C examples/example_har_smartphone TCF_FILE=$(TCF)
2323

2424
cleanapp:
25-
gmake -C examples/example_cifar10_caffe clean
26-
gmake -C examples/example_har_smartphone clean
25+
$(MAKE) -C examples/example_cifar10_caffe clean
26+
$(MAKE) -C examples/example_har_smartphone clean
2727

2828
cleanall:
29-
gmake -C lib/make clean
30-
gmake -C examples/example_cifar10_caffe cleanall
31-
gmake -C examples/example_har_smartphone cleanall
29+
$(MAKE) -C lib/make clean
30+
$(MAKE) -C examples/example_cifar10_caffe cleanall
31+
$(MAKE) -C examples/example_har_smartphone cleanall
3232

3333
libclean:
34-
gmake -C lib/make clean
34+
$(MAKE) -C lib/make clean

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ documentation and examples. Read the documentation at [embarc.org](https://embar
2626
./doc - contains the API documentation of the embARC MLI library
2727
./include - include files with API prototypes and types
2828
./lib/src - source code of embARC MLI Library
29+
./lib/gen - auxiliary generation scripts for LUT tables and library source code
2930
./examples - source code of examples
3031
./examples/example_cifar10_caffe - example illustrating implementation of CIFAR10 Caffe
3132
./examples/example_har_smartphone - example illustrating implementation of Human Activity Recognition

doc/documents/MLI_FP_data_format/MLI_FP_data_format.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,11 @@ container implied equal to sign bit:
7979

8080
Given 0x0020 (32) in Q.10 format,
8181

82-
• For a 16-bit container (Q5.10), this represents 0.3125 real value.
82+
• For a 16-bit container (Q5.10), this represents 0.03125 real value.
8383

8484
• The value also can be stored in an 8-bit container without
8585
misrepresentation. Therefore, 0x20 in Q-3.10 format is equivalent to
86-
0.3125 real value.
86+
0.03125 real value.
8787

8888
Given 0x0220 (544) in Q.10 format,
8989

doc/documents/MLI_helpers/convert_tensor.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ Parameters
5757
| ``in`` | [IN] Pointer to input |
5858
| | tensor |
5959
+-----------------------+-----------------------+
60-
| ``start_dim`` | [OUT] Pointer to |
60+
| ``out`` | [OUT] Pointer to |
6161
| | output tensor |
6262
+-----------------------+-----------------------+
6363

doc/documents/MLI_helpers/get_basic_elem_size.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ Prototype
1616

1717
.. code:: c
1818
19-
uint32_t mli_hlp_count_elem_num (mli_tensor *in)
19+
uint32_t mli_hlp_tensor_element_size(mli_tensor *in)
2020
..
2121
2222
Parameters

examples/auxiliary/tensor_transform.c

Lines changed: 121 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@
1818
//=================================================================================
1919
mli_status mli_hlp_float_to_fx_tensor (const float *src, uint32_t src_size, mli_tensor * dst) {
2020
mli_status ret = MLI_STATUS_OK;
21-
const uint32_t scale_val = 1u << (dst->el_params.fx.frac_bits);
21+
float scale_val = (float) ((int64_t)1u << mli_hlp_tensor_scale_shift(dst)) / (float) mli_hlp_tensor_scale(dst, 0);
22+
int16_t zero_offset = mli_hlp_tensor_zero_offset(dst, 0);
2223

2324
if (dst->el_type == MLI_EL_FX_16) {
2425
if (dst->capacity < src_size * sizeof (int16_t))
@@ -30,7 +31,7 @@ mli_status mli_hlp_float_to_fx_tensor (const float *src, uint32_t src_size, mli_
3031
int32_t dst_val = (int32_t) (scale_val * src[idx] + round_val);
3132
dst_arr[idx] = (int16_t) (MIN (MAX (dst_val, INT16_MIN), INT16_MAX));
3233
}
33-
} else {
34+
} else if (dst->el_type == MLI_EL_FX_8){
3435
if (dst->capacity < src_size * sizeof (int8_t))
3536
return MLI_STATUS_LENGTH_ERROR;
3637

@@ -40,6 +41,74 @@ mli_status mli_hlp_float_to_fx_tensor (const float *src, uint32_t src_size, mli_
4041
const int32_t dst_val = (int32_t) (scale_val * src[idx] + round_val);
4142
dst_arr[idx] = (int8_t) (MIN (MAX (dst_val, INT8_MIN), INT8_MAX));
4243
}
44+
} else if (dst->el_type == MLI_EL_ASYM_I8){
45+
if (dst->capacity < src_size * sizeof (int8_t))
46+
return MLI_STATUS_LENGTH_ERROR;
47+
48+
int8_t *dst_arr = dst->data;
49+
if (dst->el_params.asym.dim < 0) {
50+
for (int idx = 0; idx < src_size; idx++) {
51+
const float round_val = (src[idx] > 0) ? 0.5f : -0.5f;
52+
const int32_t dst_val = (int32_t) (scale_val * src[idx] + round_val);
53+
dst_arr[idx] = (int8_t) (MIN (MAX (dst_val + zero_offset, INT8_MIN), INT8_MAX));
54+
}
55+
} else {
56+
const int concat_dim = dst->el_params.asym.dim;
57+
const int total_elements = mli_hlp_count_elem_num(dst, 0);
58+
const int elements_to_convert = (concat_dim + 1 == dst->rank)? 1: mli_hlp_count_elem_num(dst, concat_dim + 1);
59+
const int step_after_conv = mli_hlp_count_elem_num(dst, concat_dim);
60+
const int conversions_num = dst->shape[concat_dim];
61+
62+
if (total_elements != src_size)
63+
return MLI_STATUS_LENGTH_ERROR;
64+
65+
for (int c_idx = 0; c_idx < conversions_num; c_idx++) {
66+
scale_val = ((int64_t)1u << mli_hlp_tensor_scale_shift(dst)) / (float)mli_hlp_tensor_scale(dst, (uint32_t)c_idx);
67+
zero_offset = mli_hlp_tensor_zero_offset(dst, c_idx);
68+
for (int data_idx = c_idx * elements_to_convert; data_idx < total_elements; data_idx += step_after_conv) {
69+
for (int el_idx = 0; el_idx < elements_to_convert; ++el_idx) {
70+
const float round_val = (src[data_idx + el_idx] > 0) ? 0.5f : -0.5f;
71+
int32_t dst_val = (int32_t) (scale_val * src[data_idx + el_idx] + round_val);
72+
dst_arr[data_idx + el_idx] = (int8_t)(MIN(MAX(dst_val + zero_offset, INT8_MIN), INT8_MAX));
73+
}
74+
}
75+
}
76+
}
77+
} else if (dst->el_type == MLI_EL_ASYM_I32) {
78+
if (dst->capacity < src_size * sizeof (int32_t))
79+
return MLI_STATUS_LENGTH_ERROR;
80+
81+
int32_t *dst_arr = dst->data;
82+
if (dst->el_params.asym.dim < 0) {
83+
for (int idx = 0; idx < src_size; idx++) {
84+
const float round_val = (src[idx] > 0) ? 0.5f : -0.5f;
85+
int32_t dst_val = (int32_t) (scale_val * src[idx] + round_val);
86+
dst_arr[idx] = dst_val + zero_offset;
87+
}
88+
} else {
89+
const int concat_dim = dst->el_params.asym.dim;
90+
const int total_elements = mli_hlp_count_elem_num(dst, 0);
91+
const int elements_to_convert = (concat_dim + 1 == dst->rank)? 1: mli_hlp_count_elem_num(dst, concat_dim + 1);
92+
const int step_after_conv = mli_hlp_count_elem_num(dst, concat_dim);
93+
const int conversions_num = dst->shape[concat_dim];
94+
95+
if (total_elements != src_size)
96+
return MLI_STATUS_LENGTH_ERROR;
97+
98+
for (int c_idx = 0; c_idx < conversions_num; c_idx++) {
99+
scale_val = ((int64_t)1u << mli_hlp_tensor_scale_shift(dst)) / (float)mli_hlp_tensor_scale(dst, (uint32_t)c_idx);
100+
zero_offset = mli_hlp_tensor_zero_offset(dst, c_idx);
101+
for (int data_idx = c_idx * elements_to_convert; data_idx < total_elements; data_idx += step_after_conv) {
102+
for (int el_idx = 0; el_idx < elements_to_convert; ++el_idx) {
103+
const float round_val = (src[data_idx + el_idx] > 0) ? 0.5f : -0.5f;
104+
int32_t dst_val = (int32_t) (scale_val * src[data_idx + el_idx] + round_val);
105+
dst_arr[data_idx + el_idx] = dst_val + zero_offset;
106+
}
107+
}
108+
}
109+
}
110+
} else {
111+
ret = MLI_STATUS_TYPE_MISMATCH;
43112
}
44113
return ret;
45114
}
@@ -54,15 +123,62 @@ mli_status mli_hlp_fx_tensor_to_float (const mli_tensor * src, float *dst, uint3
54123
if (elem_num == 0)
55124
return MLI_STATUS_BAD_TENSOR;
56125

57-
const float scale_val = 1.0f / (float) (1u << (src->el_params.fx.frac_bits));
126+
float scale_val = (float)mli_hlp_tensor_scale(src, 0) / (float) (1u << mli_hlp_tensor_scale_shift(src));
127+
int16_t zero_offset = mli_hlp_tensor_zero_offset(src, 0);
58128
if (src->el_type == MLI_EL_FX_16) {
59129
int16_t *src_arr = src->data;
60130
for (int idx = 0; idx < elem_num; idx++)
61131
dst[idx] = (float) (scale_val * src_arr[idx]);
62-
} else {
132+
} else if (src->el_type == MLI_EL_FX_8){
63133
int8_t *src_arr = src->data;
64134
for (int idx = 0; idx < elem_num; idx++)
65-
dst[idx] = (float) (scale_val * src_arr[idx]);
135+
dst[idx] = (float) (scale_val * (src_arr[idx] - zero_offset));
136+
} else if (src->el_type == MLI_EL_ASYM_I8){
137+
int8_t *src_arr = src->data;
138+
if (src->el_params.asym.dim < 0) {
139+
for (int idx = 0; idx < elem_num; idx++)
140+
dst[idx] = (float) (scale_val * (src_arr[idx] - zero_offset));
141+
} else {
142+
const int concat_dim = src->el_params.asym.dim;
143+
const int total_elements = mli_hlp_count_elem_num(src, 0);
144+
const int elements_to_convert = (concat_dim + 1 == src->rank)? 1: mli_hlp_count_elem_num(src, concat_dim + 1);
145+
const int step_after_conv = mli_hlp_count_elem_num(src, concat_dim);
146+
const int conversions_num = src->shape[concat_dim];
147+
148+
for (int c_idx = 0; c_idx < conversions_num; c_idx++) {
149+
scale_val = (float)mli_hlp_tensor_scale(src, c_idx) / (float) (1u << mli_hlp_tensor_scale_shift(src));
150+
zero_offset = mli_hlp_tensor_zero_offset(src, c_idx);
151+
for (int data_idx = c_idx * elements_to_convert; data_idx < total_elements; data_idx += step_after_conv) {
152+
for (int el_idx = 0; el_idx < elements_to_convert; ++el_idx) {
153+
dst[data_idx + el_idx] = (float) (scale_val * (src_arr[data_idx + el_idx] - zero_offset));
154+
}
155+
}
156+
}
157+
}
158+
} else if (src->el_type == MLI_EL_ASYM_I32) {
159+
int32_t *src_arr = src->data;
160+
if (src->el_params.asym.dim < 0) {
161+
for (int idx = 0; idx < elem_num; idx++)
162+
dst[idx] = (float) (scale_val * (src_arr[idx] - zero_offset));
163+
} else {
164+
const int concat_dim = src->el_params.asym.dim;
165+
const int total_elements = mli_hlp_count_elem_num(src, 0);
166+
const int elements_to_convert = (concat_dim + 1 == src->rank)? 1: mli_hlp_count_elem_num(src, concat_dim + 1);
167+
const int step_after_conv = mli_hlp_count_elem_num(src, concat_dim);
168+
const int conversions_num = src->shape[concat_dim];
169+
170+
for (int c_idx = 0; c_idx < conversions_num; c_idx++) {
171+
scale_val = (float)mli_hlp_tensor_scale(src, c_idx) / (float) (1u << mli_hlp_tensor_scale_shift(src));
172+
zero_offset = mli_hlp_tensor_zero_offset(src, c_idx);
173+
for (int data_idx = c_idx * elements_to_convert; data_idx < total_elements; data_idx += step_after_conv) {
174+
for (int el_idx = 0; el_idx < elements_to_convert; ++el_idx) {
175+
dst[data_idx + el_idx] = (float) (scale_val * (src_arr[data_idx + el_idx] - zero_offset));
176+
}
177+
}
178+
}
179+
}
180+
} else {
181+
return MLI_STATUS_TYPE_MISMATCH;
66182
}
67183
return MLI_STATUS_OK;
68184
}

0 commit comments

Comments
 (0)