foss-for-synopsys-dwc-arc-processors
diff --git a/‎Makefile‎
Lines changed: 10 additions & 10 deletions b/‎Makefile‎
Lines changed: 10 additions & 10 deletions
diff --git a/‎README.md‎
Lines changed: 1 addition & 0 deletions b/‎README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/documents/MLI_FP_data_format/MLI_FP_data_format.rst‎
Lines changed: 2 additions & 2 deletions b/‎doc/documents/MLI_FP_data_format/MLI_FP_data_format.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/documents/MLI_helpers/convert_tensor.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/documents/MLI_helpers/convert_tensor.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/documents/MLI_helpers/get_basic_elem_size.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/documents/MLI_helpers/get_basic_elem_size.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/auxiliary/tensor_transform.c‎
Lines changed: 121 additions & 5 deletions b/‎examples/auxiliary/tensor_transform.c‎
Lines changed: 121 additions & 5 deletions
@@ -14,21 +14,21 @@ TCF ?= ../../hw/em9d.tcf
 all: app
 
 lib:
-	gmake -C lib/make TCF_FILE=$(TCF)
+	$(MAKE) -C lib/make TCF_FILE=$(TCF)
 
 app: lib
-	gmake -C lib/make TCF_FILE=$(TCF)
-	gmake -C examples/example_cifar10_caffe TCF_FILE=$(TCF)
-	gmake -C examples/example_har_smartphone TCF_FILE=$(TCF) 
+	$(MAKE) -C lib/make TCF_FILE=$(TCF)
+	$(MAKE) -C examples/example_cifar10_caffe TCF_FILE=$(TCF)
+	$(MAKE) -C examples/example_har_smartphone TCF_FILE=$(TCF) 
 
 cleanapp:
-	gmake -C examples/example_cifar10_caffe clean 
-	gmake -C examples/example_har_smartphone clean 
+	$(MAKE) -C examples/example_cifar10_caffe clean 
+	$(MAKE) -C examples/example_har_smartphone clean 
 
 cleanall:
-	gmake -C lib/make clean
-	gmake -C examples/example_cifar10_caffe cleanall 
-	gmake -C examples/example_har_smartphone cleanall
+	$(MAKE) -C lib/make clean
+	$(MAKE) -C examples/example_cifar10_caffe cleanall 
+	$(MAKE) -C examples/example_har_smartphone cleanall
 
 libclean:
-	gmake -C lib/make clean
+	$(MAKE) -C lib/make clean
@@ -26,6 +26,7 @@ documentation and examples. Read the documentation at [embarc.org](https://embar
 ./doc                             		- contains the API documentation of the embARC MLI library  
 ./include                         		- include files with API prototypes and types  
 ./lib/src                         		- source code of embARC MLI Library  
+./lib/gen                         		- auxiliary generation scripts for LUT tables and library source code  
 ./examples                        		- source code of examples  
 ./examples/example_cifar10_caffe  		- example illustrating implementation of CIFAR10 Caffe  
 ./examples/example_har_smartphone 		- example illustrating implementation of Human Activity Recognition  
 
@@ -79,11 +79,11 @@ container implied equal to sign bit:
 
 	Given 0x0020 (32) in Q.10 format,
 
-	• For a 16-bit container (Q5.10), this represents 0.3125 real value.
+	• For a 16-bit container (Q5.10), this represents 0.03125 real value.
 
 	• The value also can be stored in an 8-bit container without
 	  misrepresentation. Therefore, 0x20 in Q-3.10 format is equivalent to
-	  0.3125 real value.
+	  0.03125 real value.
 
 	Given 0x0220 (544) in Q.10 format,
 
 
@@ -57,7 +57,7 @@ Parameters
    | ``in``                | [IN] Pointer to input |
    |                       | tensor                |
    +-----------------------+-----------------------+
-   | ``start_dim``         | [OUT] Pointer to      |
+   | ``out``               | [OUT] Pointer to      |
    |                       | output tensor         |
    +-----------------------+-----------------------+	
 
 
@@ -16,7 +16,7 @@ Prototype
 
 .. code:: c                      
                                  
-   uint32_t mli_hlp_count_elem_num (mli_tensor *in)                
+   uint32_t mli_hlp_tensor_element_size(mli_tensor *in)
 ..
 
 Parameters
 
@@ -18,7 +18,8 @@
 //=================================================================================
 mli_status mli_hlp_float_to_fx_tensor (const float *src, uint32_t src_size, mli_tensor * dst) {
     mli_status ret = MLI_STATUS_OK;
-    const uint32_t scale_val = 1u << (dst->el_params.fx.frac_bits);
+    float scale_val = (float) ((int64_t)1u << mli_hlp_tensor_scale_shift(dst)) / (float) mli_hlp_tensor_scale(dst, 0);
+    int16_t zero_offset = mli_hlp_tensor_zero_offset(dst, 0);
 
     if (dst->el_type == MLI_EL_FX_16) {
         if (dst->capacity < src_size * sizeof (int16_t))
@@ -30,7 +31,7 @@ mli_status mli_hlp_float_to_fx_tensor (const float *src, uint32_t src_size, mli_
             int32_t dst_val = (int32_t) (scale_val * src[idx] + round_val);
             dst_arr[idx] = (int16_t) (MIN (MAX (dst_val, INT16_MIN), INT16_MAX));
         }
-    } else {
+    } else if (dst->el_type == MLI_EL_FX_8){
         if (dst->capacity < src_size * sizeof (int8_t))
             return MLI_STATUS_LENGTH_ERROR;
 
@@ -40,6 +41,74 @@ mli_status mli_hlp_float_to_fx_tensor (const float *src, uint32_t src_size, mli_
             const int32_t dst_val = (int32_t) (scale_val * src[idx] + round_val);
             dst_arr[idx] = (int8_t) (MIN (MAX (dst_val, INT8_MIN), INT8_MAX));
         }
+    } else if (dst->el_type == MLI_EL_ASYM_I8){
+        if (dst->capacity < src_size * sizeof (int8_t))
+            return MLI_STATUS_LENGTH_ERROR;
+
+        int8_t *dst_arr = dst->data;
+        if (dst->el_params.asym.dim < 0) {
+            for (int idx = 0; idx < src_size; idx++) {
+                const float round_val = (src[idx] > 0) ? 0.5f : -0.5f;
+                const int32_t dst_val = (int32_t) (scale_val * src[idx] + round_val);
+                dst_arr[idx] = (int8_t) (MIN (MAX (dst_val + zero_offset, INT8_MIN), INT8_MAX));
+            }
+        } else {
+            const int concat_dim = dst->el_params.asym.dim;
+            const int total_elements = mli_hlp_count_elem_num(dst, 0);
+            const int elements_to_convert = (concat_dim + 1 == dst->rank)? 1: mli_hlp_count_elem_num(dst, concat_dim + 1);
+            const int step_after_conv = mli_hlp_count_elem_num(dst, concat_dim);
+            const int conversions_num = dst->shape[concat_dim];
+        
+            if (total_elements != src_size)
+                return MLI_STATUS_LENGTH_ERROR;
+
+            for (int c_idx = 0; c_idx < conversions_num; c_idx++) {
+                scale_val = ((int64_t)1u << mli_hlp_tensor_scale_shift(dst)) / (float)mli_hlp_tensor_scale(dst, (uint32_t)c_idx);
+                zero_offset = mli_hlp_tensor_zero_offset(dst, c_idx);
+                for (int data_idx = c_idx * elements_to_convert; data_idx < total_elements; data_idx += step_after_conv) {
+                    for (int el_idx = 0; el_idx < elements_to_convert; ++el_idx) {
+                        const float round_val = (src[data_idx + el_idx] > 0) ? 0.5f : -0.5f;
+                        int32_t dst_val = (int32_t) (scale_val * src[data_idx + el_idx] + round_val);
+                        dst_arr[data_idx + el_idx] = (int8_t)(MIN(MAX(dst_val + zero_offset, INT8_MIN), INT8_MAX));
+                    }
+                }
+            }
+        }
+    } else if (dst->el_type == MLI_EL_ASYM_I32) {
+        if (dst->capacity < src_size * sizeof (int32_t))
+            return MLI_STATUS_LENGTH_ERROR;
+
+        int32_t *dst_arr = dst->data;
+        if (dst->el_params.asym.dim < 0) {
+            for (int idx = 0; idx < src_size; idx++) {
+                const float round_val = (src[idx] > 0) ? 0.5f : -0.5f;
+                int32_t dst_val = (int32_t) (scale_val * src[idx] + round_val);
+                dst_arr[idx] = dst_val + zero_offset;
+            }
+        } else {
+            const int concat_dim = dst->el_params.asym.dim;
+            const int total_elements = mli_hlp_count_elem_num(dst, 0);
+            const int elements_to_convert = (concat_dim + 1 == dst->rank)? 1: mli_hlp_count_elem_num(dst, concat_dim + 1);
+            const int step_after_conv = mli_hlp_count_elem_num(dst, concat_dim);
+            const int conversions_num = dst->shape[concat_dim];
+        
+            if (total_elements != src_size)
+                return MLI_STATUS_LENGTH_ERROR;
+
+            for (int c_idx = 0; c_idx < conversions_num; c_idx++) {
+                scale_val = ((int64_t)1u << mli_hlp_tensor_scale_shift(dst)) / (float)mli_hlp_tensor_scale(dst, (uint32_t)c_idx);
+                zero_offset = mli_hlp_tensor_zero_offset(dst, c_idx);
+                for (int data_idx = c_idx * elements_to_convert; data_idx < total_elements; data_idx += step_after_conv) {
+                    for (int el_idx = 0; el_idx < elements_to_convert; ++el_idx) {
+                        const float round_val = (src[data_idx + el_idx] > 0) ? 0.5f : -0.5f;
+                        int32_t dst_val = (int32_t) (scale_val * src[data_idx + el_idx] + round_val);
+                        dst_arr[data_idx + el_idx] = dst_val + zero_offset;
+                    }
+                }
+            }
+        }
+    } else {
+        ret = MLI_STATUS_TYPE_MISMATCH;
     }
     return ret;
 }
@@ -54,15 +123,62 @@ mli_status mli_hlp_fx_tensor_to_float (const mli_tensor * src, float *dst, uint3
     if (elem_num == 0)
         return MLI_STATUS_BAD_TENSOR;
 
-    const float scale_val = 1.0f / (float) (1u << (src->el_params.fx.frac_bits));
+    float scale_val = (float)mli_hlp_tensor_scale(src, 0) / (float) (1u << mli_hlp_tensor_scale_shift(src));
+    int16_t zero_offset = mli_hlp_tensor_zero_offset(src, 0);
     if (src->el_type == MLI_EL_FX_16) {
         int16_t *src_arr = src->data;
         for (int idx = 0; idx < elem_num; idx++)
             dst[idx] = (float) (scale_val * src_arr[idx]);
-    } else {
+    } else if (src->el_type == MLI_EL_FX_8){
         int8_t *src_arr = src->data;
         for (int idx = 0; idx < elem_num; idx++)
-            dst[idx] = (float) (scale_val * src_arr[idx]);
+            dst[idx] = (float) (scale_val * (src_arr[idx] - zero_offset));
+    } else if (src->el_type == MLI_EL_ASYM_I8){
+        int8_t *src_arr = src->data;
+        if (src->el_params.asym.dim < 0) {
+            for (int idx = 0; idx < elem_num; idx++)
+                dst[idx] = (float) (scale_val * (src_arr[idx] - zero_offset));
+        } else {
+            const int concat_dim = src->el_params.asym.dim;
+            const int total_elements = mli_hlp_count_elem_num(src, 0);
+            const int elements_to_convert = (concat_dim + 1 == src->rank)? 1: mli_hlp_count_elem_num(src, concat_dim + 1);
+            const int step_after_conv = mli_hlp_count_elem_num(src, concat_dim);
+            const int conversions_num = src->shape[concat_dim];
+
+            for (int c_idx = 0; c_idx < conversions_num; c_idx++) {
+                scale_val = (float)mli_hlp_tensor_scale(src, c_idx) / (float) (1u << mli_hlp_tensor_scale_shift(src));
+                zero_offset = mli_hlp_tensor_zero_offset(src, c_idx);
+                for (int data_idx = c_idx * elements_to_convert; data_idx < total_elements; data_idx += step_after_conv) {
+                    for (int el_idx = 0; el_idx < elements_to_convert; ++el_idx) {
+                        dst[data_idx + el_idx] = (float) (scale_val * (src_arr[data_idx + el_idx] - zero_offset));
+                    }
+                }
+            }
+        }
+    } else if (src->el_type == MLI_EL_ASYM_I32) {
+        int32_t *src_arr = src->data;
+        if (src->el_params.asym.dim < 0) {
+            for (int idx = 0; idx < elem_num; idx++)
+                dst[idx] = (float) (scale_val * (src_arr[idx] - zero_offset));
+        } else {
+            const int concat_dim = src->el_params.asym.dim;
+            const int total_elements = mli_hlp_count_elem_num(src, 0);
+            const int elements_to_convert = (concat_dim + 1 == src->rank)? 1: mli_hlp_count_elem_num(src, concat_dim + 1);
+            const int step_after_conv = mli_hlp_count_elem_num(src, concat_dim);
+            const int conversions_num = src->shape[concat_dim];
+
+            for (int c_idx = 0; c_idx < conversions_num; c_idx++) {
+                scale_val = (float)mli_hlp_tensor_scale(src, c_idx) / (float) (1u << mli_hlp_tensor_scale_shift(src));
+                zero_offset = mli_hlp_tensor_zero_offset(src, c_idx);
+                for (int data_idx = c_idx * elements_to_convert; data_idx < total_elements; data_idx += step_after_conv) {
+                    for (int el_idx = 0; el_idx < elements_to_convert; ++el_idx) {
+                        dst[data_idx + el_idx] = (float) (scale_val * (src_arr[data_idx + el_idx] - zero_offset));
+                    }
+                }
+            }
+        }
+    } else {
+        return MLI_STATUS_TYPE_MISMATCH;
     }
     return MLI_STATUS_OK;
 }