mit-han-lab
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎code_generator/CodeGenerator.py‎
Lines changed: 6 additions & 11 deletions b/‎code_generator/CodeGenerator.py‎
Lines changed: 6 additions & 11 deletions
diff --git a/‎code_generator/TfliteConvertor.py‎
Lines changed: 1 addition & 1 deletion b/‎code_generator/TfliteConvertor.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎code_generator/codetemplate/depthwiseTemplate.py‎
Lines changed: 26 additions & 25 deletions b/‎code_generator/codetemplate/depthwiseTemplate.py‎
Lines changed: 26 additions & 25 deletions
diff --git a/‎experimental/mnist_demo/README.md‎
Lines changed: 63 additions & 0 deletions b/‎experimental/mnist_demo/README.md‎
Lines changed: 63 additions & 0 deletions
diff --git a/‎experimental/mnist_demo/application/Makefile‎
Lines changed: 43 additions & 0 deletions b/‎experimental/mnist_demo/application/Makefile‎
Lines changed: 43 additions & 0 deletions
@@ -22,7 +22,7 @@ repos:
     hooks:
       - id: black
   - repo: https://github.com/pycqa/isort
-    rev: 5.10.1
+    rev: 5.12.0
     hooks:
       - id: isort
         args: ["--sp", "pyproject.toml"]
 
@@ -485,6 +485,8 @@ def _genMemBuffer(self):
 
         # activation buffers
         string = "\n/* sram:" + str(schedule.peakmem) + ", flash:" + str(schedule.flash) + " */\n"
+        fp.write(string)
+        string = "#define PEAK_MEM " + str(schedule.peakmem) + "\n" + "#define MODEL_SIZE " + str(schedule.flash) + "\n"
         fp.write(string + "\n")
 
         string = "static signed char buffer[" + str(schedule.peakmem) + "];\n"
@@ -493,14 +495,11 @@ def _genMemBuffer(self):
         string = "static signed char *buffer0 = &buffer[" + str(accumulate_ptr) + "];\n"
         accumulate_ptr += int(schedule.buffers["input_output"])
         fp.write(string)
-        string = "static signed char *buffer1 = &buffer[" + str(accumulate_ptr) + "];\n"
-        accumulate_ptr += int(schedule.buffers["residual"])
-        fp.write(string)
 
         string = "static int16_t *sbuf = (int16_t *)&buffer[" + str(accumulate_ptr) + "];\n"
         accumulate_ptr += int(schedule.buffers["im2col"])
         fp.write(string)
-        string = "static int32_t *kbuf = (int32_t *)&buffer[" + str(accumulate_ptr) + "];\n"
+        string = "static int16_t *kbuf = (int16_t *)&buffer[" + str(accumulate_ptr) + "];\n"
         accumulate_ptr += int(schedule.buffers["kernel"])
         fp.write(string)
         string = "const int SBuffer_size = " + str(int(schedule.buffers["im2col"])) + ";\n"
@@ -511,14 +510,12 @@ def _genMemBuffer(self):
     def _includeHeaders(self):
         include_string = """/* Automatically generated source file */
 #include <float.h>
-#include "arm_nnfunctions.h"
+#include <tinyengine_function.h>
+#include <tinyengine_function_fp.h>
 
 #include "genNN.h"
 #include "genModel.h"
-
-#include "tinyengine_function.h"
-#include "tinyengine_function_fp.h"
-
+#include "genInclude.h"
 """
         if self.profile_mode:
             include_string += '#include "profile.h"\n'
@@ -527,8 +524,6 @@ def _includeHeaders(self):
             """
 /* Variables used by all ops */
 ADD_params add_params;
-//Conv_Params conv_params;
-//Depthwise_Params dpconv_params;
 int i;
 int8_t *int8ptr,*int8ptr2;
 int32_t *int32ptr;
 
@@ -137,7 +137,7 @@ def _handleOperator(self, op):
                 self.layer.append(ret_op)
         elif op_code_str == "TRANSPOSE":
             self._convert_TRANSPOSE(op)
-        elif op_code_str in "FULLY_CONNECTED":
+        elif op_code_str == "FULLY_CONNECTED":
             self.layer.append(TF_Parser.parse_fc(op, self.model))
         elif op_code_str in SKIP_OPs:
             pass
 
@@ -16,6 +16,7 @@
 # Target ISA:  ARMv7E-M
 # ----------------------------------------------------------------------
 
+
 class depthwiseInplace:
     def __init__(
         self,
@@ -53,6 +54,7 @@ def _genCode(self):
         elif self.dataflow == "CWH":
             retString += self._genInplaceKernelCWH()
 
+        retString += "    return STATE_SUCCESS;"
         retString += self._genEndStr()
         if self.dataflow == "CHW":
             retString += "\n" + self._genKernel()
@@ -178,7 +180,6 @@ def _genHeader(self):
             + """
  * Author: [email protected]
  * -------------------------------------------------------------------- */
-#include "arm_nnsupportfunctions.h" //TODO: remove this in the future for self-contained
 #include "tinyengine_function.h"\n"""
         )
 
@@ -562,14 +563,14 @@ def _genKernel(self):
             /* requantize */
             sum0 = (float) sum0 * *scales;
             sum0 += output_offset;
-            sum0 = MAX(sum0, activation_min);
-            sum0 = MIN(sum0, activation_max);
+            sum0 = TN_MAX(sum0, activation_min);
+            sum0 = TN_MIN(sum0, activation_max);
             output[(i * output_x + j * 2) * channel_offset] = sum0;
 
             sum1 = (float) sum1 * *scales;
             sum1 += output_offset;
-            sum1 = MAX(sum1, activation_min);
-            sum1 = MIN(sum1, activation_max);
+            sum1 = TN_MAX(sum1, activation_min);
+            sum1 = TN_MIN(sum1, activation_max);
             output[(i * output_x + (j * 2 + 1)) * channel_offset] = sum1;
 
             cols_8b_iterptr += STRIDE * 2;
@@ -580,14 +581,14 @@ def _genKernel(self):
             /* requantize */
             sum0 = arm_nn_requantize(sum0 + biasR[0], *multiplier, *shift);
             sum0 += output_offset;
-            sum0 = MAX(sum0, activation_min);
-            sum0 = MIN(sum0, activation_max);
+            sum0 = TN_MAX(sum0, activation_min);
+            sum0 = TN_MIN(sum0, activation_max);
             output[(i * output_x + j * 2) * channel_offset] = sum0;
 
             sum1 = arm_nn_requantize(sum1 + biasR[0], *multiplier, *shift);
             sum1 += output_offset;
-            sum1 = MAX(sum1, activation_min);
-            sum1 = MIN(sum1, activation_max);
+            sum1 = TN_MAX(sum1, activation_min);
+            sum1 = TN_MIN(sum1, activation_max);
             output[(i * output_x + (j * 2 + 1)) * channel_offset] = sum1;
 
             cols_8b_iterptr += STRIDE * 2;
@@ -605,8 +606,8 @@ def _genKernel(self):
             retString += """
             sum = (float) sum * *scales;
             sum += output_offset;
-            sum = MAX(sum, activation_min);
-            sum = MIN(sum, activation_max);
+            sum = TN_MAX(sum, activation_min);
+            sum = TN_MIN(sum, activation_max);
             output[(i * output_x + output_x - 1) * channel_offset] = sum;
 
             cols_8b_iterptr += STRIDE;
@@ -616,8 +617,8 @@ def _genKernel(self):
             retString += """
             sum = arm_nn_requantize(sum + biasR[0], *multiplier, *shift);
             sum += output_offset;
-            sum = MAX(sum, activation_min);
-            sum = MIN(sum, activation_max);
+            sum = TN_MAX(sum, activation_min);
+            sum = TN_MIN(sum, activation_max);
             output[(i * output_x + output_x - 1) * channel_offset] = sum;
 
             cols_8b_iterptr += STRIDE;
@@ -661,14 +662,14 @@ def _genKernelCWH(self):
             /* requantize */
             sum0 = (float) sum0 * *scales;
             sum0 += output_offset;
-            sum0 = MAX(sum0, activation_min);
-            sum0 = MIN(sum0, activation_max);
+            sum0 = TN_MAX(sum0, activation_min);
+            sum0 = TN_MIN(sum0, activation_max);
             output[(i * output_x + j * 2) * channel_offset] = sum0;
 
             sum1 = (float) sum1 * *scales;
             sum1 += output_offset;
-            sum1 = MAX(sum1, activation_min);
-            sum1 = MIN(sum1, activation_max);
+            sum1 = TN_MAX(sum1, activation_min);
+            sum1 = TN_MIN(sum1, activation_max);
             output[(i * output_x + (j * 2 + 1)) * channel_offset] = sum1;
 
             cols_8b_iterptr += STRIDE * 2;
@@ -679,14 +680,14 @@ def _genKernelCWH(self):
             /* requantize */
             sum0 = arm_nn_requantize(sum0 + biasR[0], *multiplier, *shift);
             sum0 += output_offset;
-            sum0 = MAX(sum0, activation_min);
-            sum0 = MIN(sum0, activation_max);
+            sum0 = TN_MAX(sum0, activation_min);
+            sum0 = TN_MIN(sum0, activation_max);
             output[(i * output_x + j * 2) * channel_offset] = sum0;
 
             sum1 = arm_nn_requantize(sum1 + biasR[0], *multiplier, *shift);
             sum1 += output_offset;
-            sum1 = MAX(sum1, activation_min);
-            sum1 = MIN(sum1, activation_max);
+            sum1 = TN_MAX(sum1, activation_min);
+            sum1 = TN_MIN(sum1, activation_max);
             output[(i * output_x + (j * 2 + 1)) * channel_offset] = sum1;
 
             cols_8b_iterptr += STRIDE * 2;
@@ -704,8 +705,8 @@ def _genKernelCWH(self):
             retString += """
             sum = (float) sum * *scales;
             sum += output_offset;
-            sum = MAX(sum, activation_min);
-            sum = MIN(sum, activation_max);
+            sum = TN_MAX(sum, activation_min);
+            sum = TN_MIN(sum, activation_max);
             output[(i * output_x + output_x - 1) * channel_offset] = sum;
 
             cols_8b_iterptr += STRIDE;
@@ -715,8 +716,8 @@ def _genKernelCWH(self):
             retString += """
             sum = arm_nn_requantize(sum + biasR[0], *multiplier, *shift);
             sum += output_offset;
-            sum = MAX(sum, activation_min);
-            sum = MIN(sum, activation_max);
+            sum = TN_MAX(sum, activation_min);
+            sum = TN_MIN(sum, activation_max);
             output[(i * output_x + output_x - 1) * channel_offset] = sum;
 
             cols_8b_iterptr += STRIDE;
 
@@ -0,0 +1,63 @@
+# Image Classification with MNIST on laptop/desktop
+
+This demo is currently on the dev/platform-independent-vww branch. Please run the following to clone the repo and switch to the branch.
+
+```bash
+git clone --recursive https://github.com/mit-han-lab/tinyengine.git
+cd tinyengine
+git checkout dev/platform-independent-vww
+```
+
+## Run the demo with MacOS
+
+- Requirement
+  - Opencv: The application needs opencv in order to show images and prediction results, so let's install opencv first.
+  ```bash
+  brew install opencv
+  ```
+- Update Line 8 in `experimental/mnist_demo/application/Makefile` according to the path of opencv on your machine. (use `brew info opencv` to get the installation path.)
+- Enter the application directory, compile the code, and run the demo.
+  ```bash
+  cd experimental/mnist_demo/application
+  make
+  ./main
+  ```
+
+Note: The current implementation only supports Intel devices due to its SIMD dependency. Support for M1 chip is still on-going.
+
+## Run the demo with Windows
+
+- Requirement
+
+  - Visual studio (ver. 2022 is recommended)
+  - Opencv 4.7.0
+    - Option 1: Use the prebuilt the opencv dependency
+    ```bash
+    cd experimental/mnist_demo/windows_visual_studio_mnist
+    unzip opencv_deps.zip
+    ```
+  - Option 2: Build and opencv from the source (refer the official guide) and put the generated libraries and headers in the following structure.
+    ```
+    ├── ...
+    ├── mnist_demo
+    │   ├── windows_visual_studio_mnist
+    │   │   ├── opencv_deps                    # Visual studio project directory
+    │   │   │   ├── include                    # Header files
+    │   │   │   │   └── opencv2
+    │   │   │   |       ├── opencv.hpp
+    │   │   │   |       └── ...
+    │   │   │   ├── lib                        # Static libraries
+    │   │   │   |   ├── opencv_core470.lib
+    │   │   │   |   └── ...
+    │   │   │   └── bin                        # Dynamic libraries
+    │   │   │       ├── opencv_core470.dll
+    │   │   │       └── ...
+    │   │   ├── main.cc                        # Entry point of the windows program
+    │   │   └── ...
+    │   └── ...
+    └── ...
+    ```
+
+- Open the project (experimental/mnist_demo/windows_visual_studio.sln) with visual studio
+
+- Choose `Release` and run the demo
@@ -0,0 +1,43 @@
+CXX = g++
+CXXFLAGS = -std=c++14
+CFLAGS = -Wall -ansi
+INCLUDE = ../include
+SOURCE = ../src
+# Please set up your own path for OpenCV
+OPENCV_PATH = /usr/local/Cellar/opencv/4.7.0_1
+OPENCV_INCLUDE = $(OPENCV_PATH)/include/opencv4/
+OPENCV_LIB = -L$(OPENCV_PATH)/lib
+OPENCV_LINK = -lopencv_gapi -lopencv_stitching -lopencv_alphamat -lopencv_aruco -lopencv_barcode -lopencv_bgsegm -lopencv_bioinspired \
+			-lopencv_ccalib -lopencv_dnn_objdetect -lopencv_dnn_superres -lopencv_dpm -lopencv_face -lopencv_freetype -lopencv_fuzzy \
+			-lopencv_hfs -lopencv_img_hash -lopencv_intensity_transform -lopencv_line_descriptor -lopencv_mcc -lopencv_quality \
+			-lopencv_rapid -lopencv_reg -lopencv_rgbd -lopencv_saliency -lopencv_sfm -lopencv_stereo -lopencv_structured_light \
+			-lopencv_phase_unwrapping -lopencv_superres -lopencv_optflow -lopencv_surface_matching -lopencv_tracking -lopencv_highgui \
+			-lopencv_datasets -lopencv_text -lopencv_plot -lopencv_videostab -lopencv_videoio -lopencv_viz -lopencv_wechat_qrcode \
+			-lopencv_xfeatures2d -lopencv_shape -lopencv_ml -lopencv_ximgproc -lopencv_video -lopencv_xobjdetect -lopencv_objdetect \
+			-lopencv_calib3d -lopencv_imgcodecs -lopencv_features2d -lopencv_dnn -lopencv_flann -lopencv_xphoto -lopencv_photo \
+			-lopencv_imgproc -lopencv_core  # Here we list most of the linkers for convenience. Feel free to trim unnecessary ones, if needed.
+LIB = -L../src
+BATCH_OBJECTS = main_batch.o batch_model.o
+OBJECTS = main.o model.o
+
+.SUFFIXS: .c .cc .cpp
+main: clean $(OBJECTS) $(BATCH_OBJECTS)
+	@$(CXX) $(CXXFLAGS) $(CFLAGS) $(OBJECTS) -o main -I$(INCLUDE) $(LIB) -I$(OPENCV_INCLUDE) $(OPENCV_LIB) $(OPENCV_LINK)
+	@$(CXX) $(CXXFLAGS) $(CFLAGS) $(BATCH_OBJECTS) -o main_batch -I$(INCLUDE) $(LIB) -I$(OPENCV_INCLUDE) $(OPENCV_LIB) $(OPENCV_LINK)
+	@echo "Compilation and build completed - [MIT Hanlab] Platform-independent TinyEngine MNIST Demo"
+
+main.o: main.cc
+	@$(CXX) $(CXXFLAGS) -Wall -Ofast -c main.cc -I$(INCLUDE) -I$(OPENCV_INCLUDE)
+
+model.o: $(SOURCE)/model.cc
+	@$(CXX) $(CXXFLAGS) $(CFLAGS) -c $(SOURCE)/model.cc -I$(INCLUDE)
+
+main_batch.o: main_batch.cc
+	@$(CXX) $(CXXFLAGS) -Wall -Ofast -c main_batch.cc -I$(INCLUDE) -I$(OPENCV_INCLUDE)
+
+batch_model.o: $(SOURCE)/batch_model.cc
+	@$(CXX) $(CXXFLAGS) $(CFLAGS) -c $(SOURCE)/batch_model.cc -I$(INCLUDE)
+
+.PHONY: main clean
+clean:
+	-rm *.o main main_batch