Skip to content

Commit 9a79c3a

Browse files
meenchenRaymondWang0
andauthored
Platform independent vww and mnist (#63)
* complation skeleton * pointwise imp * clean up headers * clean up bin * fix k3inpch3 kernel * add test * minor * bug fix * fix bug * pointwise func tests * fix 3x3 * fix randomize * missing file * minor * depthwise conv template * Support OpenCV with camera * convert kernel to c * update ground true * fix * Update to c * fix path * upload codegen * include sequence * minor * readme * readme * minor * update makefile * update opencv path info * update ins * revise demo UI * revise demo UI * fix bug * minor fix * larger model * force formating * Support MNIST * minor * draft version * basline finish * update main * c to cc * source * opt template * support demo w/ multi img * revise UI * minor * simd * README * include ifdef * fix bias * update ui and imp choice * minor * readme * minor * fix simd * minor * batch implementation * remove weight * refine weights * windows support * readme for windows * clean up vww op * tiling * reorganize * fix * multithreading * simd * fix multithread * opt imp * minor * rename * mv dirs * update readme --------- Co-authored-by: RaymondWang0 <[email protected]>
1 parent b116d7d commit 9a79c3a

File tree

61 files changed

+57097
-38
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

61 files changed

+57097
-38
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ repos:
2222
hooks:
2323
- id: black
2424
- repo: https://github.com/pycqa/isort
25-
rev: 5.10.1
25+
rev: 5.12.0
2626
hooks:
2727
- id: isort
2828
args: ["--sp", "pyproject.toml"]

code_generator/CodeGenerator.py

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,8 @@ def _genMemBuffer(self):
485485

486486
# activation buffers
487487
string = "\n/* sram:" + str(schedule.peakmem) + ", flash:" + str(schedule.flash) + " */\n"
488+
fp.write(string)
489+
string = "#define PEAK_MEM " + str(schedule.peakmem) + "\n" + "#define MODEL_SIZE " + str(schedule.flash) + "\n"
488490
fp.write(string + "\n")
489491

490492
string = "static signed char buffer[" + str(schedule.peakmem) + "];\n"
@@ -493,14 +495,11 @@ def _genMemBuffer(self):
493495
string = "static signed char *buffer0 = &buffer[" + str(accumulate_ptr) + "];\n"
494496
accumulate_ptr += int(schedule.buffers["input_output"])
495497
fp.write(string)
496-
string = "static signed char *buffer1 = &buffer[" + str(accumulate_ptr) + "];\n"
497-
accumulate_ptr += int(schedule.buffers["residual"])
498-
fp.write(string)
499498

500499
string = "static int16_t *sbuf = (int16_t *)&buffer[" + str(accumulate_ptr) + "];\n"
501500
accumulate_ptr += int(schedule.buffers["im2col"])
502501
fp.write(string)
503-
string = "static int32_t *kbuf = (int32_t *)&buffer[" + str(accumulate_ptr) + "];\n"
502+
string = "static int16_t *kbuf = (int16_t *)&buffer[" + str(accumulate_ptr) + "];\n"
504503
accumulate_ptr += int(schedule.buffers["kernel"])
505504
fp.write(string)
506505
string = "const int SBuffer_size = " + str(int(schedule.buffers["im2col"])) + ";\n"
@@ -511,14 +510,12 @@ def _genMemBuffer(self):
511510
def _includeHeaders(self):
512511
include_string = """/* Automatically generated source file */
513512
#include <float.h>
514-
#include "arm_nnfunctions.h"
513+
#include <tinyengine_function.h>
514+
#include <tinyengine_function_fp.h>
515515
516516
#include "genNN.h"
517517
#include "genModel.h"
518-
519-
#include "tinyengine_function.h"
520-
#include "tinyengine_function_fp.h"
521-
518+
#include "genInclude.h"
522519
"""
523520
if self.profile_mode:
524521
include_string += '#include "profile.h"\n'
@@ -527,8 +524,6 @@ def _includeHeaders(self):
527524
"""
528525
/* Variables used by all ops */
529526
ADD_params add_params;
530-
//Conv_Params conv_params;
531-
//Depthwise_Params dpconv_params;
532527
int i;
533528
int8_t *int8ptr,*int8ptr2;
534529
int32_t *int32ptr;

code_generator/TfliteConvertor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ def _handleOperator(self, op):
137137
self.layer.append(ret_op)
138138
elif op_code_str == "TRANSPOSE":
139139
self._convert_TRANSPOSE(op)
140-
elif op_code_str in "FULLY_CONNECTED":
140+
elif op_code_str == "FULLY_CONNECTED":
141141
self.layer.append(TF_Parser.parse_fc(op, self.model))
142142
elif op_code_str in SKIP_OPs:
143143
pass

code_generator/codetemplate/depthwiseTemplate.py

Lines changed: 26 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
# Target ISA: ARMv7E-M
1717
# ----------------------------------------------------------------------
1818

19+
1920
class depthwiseInplace:
2021
def __init__(
2122
self,
@@ -53,6 +54,7 @@ def _genCode(self):
5354
elif self.dataflow == "CWH":
5455
retString += self._genInplaceKernelCWH()
5556

57+
retString += " return STATE_SUCCESS;"
5658
retString += self._genEndStr()
5759
if self.dataflow == "CHW":
5860
retString += "\n" + self._genKernel()
@@ -178,7 +180,6 @@ def _genHeader(self):
178180
+ """
179181
180182
* -------------------------------------------------------------------- */
181-
#include "arm_nnsupportfunctions.h" //TODO: remove this in the future for self-contained
182183
#include "tinyengine_function.h"\n"""
183184
)
184185

@@ -562,14 +563,14 @@ def _genKernel(self):
562563
/* requantize */
563564
sum0 = (float) sum0 * *scales;
564565
sum0 += output_offset;
565-
sum0 = MAX(sum0, activation_min);
566-
sum0 = MIN(sum0, activation_max);
566+
sum0 = TN_MAX(sum0, activation_min);
567+
sum0 = TN_MIN(sum0, activation_max);
567568
output[(i * output_x + j * 2) * channel_offset] = sum0;
568569
569570
sum1 = (float) sum1 * *scales;
570571
sum1 += output_offset;
571-
sum1 = MAX(sum1, activation_min);
572-
sum1 = MIN(sum1, activation_max);
572+
sum1 = TN_MAX(sum1, activation_min);
573+
sum1 = TN_MIN(sum1, activation_max);
573574
output[(i * output_x + (j * 2 + 1)) * channel_offset] = sum1;
574575
575576
cols_8b_iterptr += STRIDE * 2;
@@ -580,14 +581,14 @@ def _genKernel(self):
580581
/* requantize */
581582
sum0 = arm_nn_requantize(sum0 + biasR[0], *multiplier, *shift);
582583
sum0 += output_offset;
583-
sum0 = MAX(sum0, activation_min);
584-
sum0 = MIN(sum0, activation_max);
584+
sum0 = TN_MAX(sum0, activation_min);
585+
sum0 = TN_MIN(sum0, activation_max);
585586
output[(i * output_x + j * 2) * channel_offset] = sum0;
586587
587588
sum1 = arm_nn_requantize(sum1 + biasR[0], *multiplier, *shift);
588589
sum1 += output_offset;
589-
sum1 = MAX(sum1, activation_min);
590-
sum1 = MIN(sum1, activation_max);
590+
sum1 = TN_MAX(sum1, activation_min);
591+
sum1 = TN_MIN(sum1, activation_max);
591592
output[(i * output_x + (j * 2 + 1)) * channel_offset] = sum1;
592593
593594
cols_8b_iterptr += STRIDE * 2;
@@ -605,8 +606,8 @@ def _genKernel(self):
605606
retString += """
606607
sum = (float) sum * *scales;
607608
sum += output_offset;
608-
sum = MAX(sum, activation_min);
609-
sum = MIN(sum, activation_max);
609+
sum = TN_MAX(sum, activation_min);
610+
sum = TN_MIN(sum, activation_max);
610611
output[(i * output_x + output_x - 1) * channel_offset] = sum;
611612
612613
cols_8b_iterptr += STRIDE;
@@ -616,8 +617,8 @@ def _genKernel(self):
616617
retString += """
617618
sum = arm_nn_requantize(sum + biasR[0], *multiplier, *shift);
618619
sum += output_offset;
619-
sum = MAX(sum, activation_min);
620-
sum = MIN(sum, activation_max);
620+
sum = TN_MAX(sum, activation_min);
621+
sum = TN_MIN(sum, activation_max);
621622
output[(i * output_x + output_x - 1) * channel_offset] = sum;
622623
623624
cols_8b_iterptr += STRIDE;
@@ -661,14 +662,14 @@ def _genKernelCWH(self):
661662
/* requantize */
662663
sum0 = (float) sum0 * *scales;
663664
sum0 += output_offset;
664-
sum0 = MAX(sum0, activation_min);
665-
sum0 = MIN(sum0, activation_max);
665+
sum0 = TN_MAX(sum0, activation_min);
666+
sum0 = TN_MIN(sum0, activation_max);
666667
output[(i * output_x + j * 2) * channel_offset] = sum0;
667668
668669
sum1 = (float) sum1 * *scales;
669670
sum1 += output_offset;
670-
sum1 = MAX(sum1, activation_min);
671-
sum1 = MIN(sum1, activation_max);
671+
sum1 = TN_MAX(sum1, activation_min);
672+
sum1 = TN_MIN(sum1, activation_max);
672673
output[(i * output_x + (j * 2 + 1)) * channel_offset] = sum1;
673674
674675
cols_8b_iterptr += STRIDE * 2;
@@ -679,14 +680,14 @@ def _genKernelCWH(self):
679680
/* requantize */
680681
sum0 = arm_nn_requantize(sum0 + biasR[0], *multiplier, *shift);
681682
sum0 += output_offset;
682-
sum0 = MAX(sum0, activation_min);
683-
sum0 = MIN(sum0, activation_max);
683+
sum0 = TN_MAX(sum0, activation_min);
684+
sum0 = TN_MIN(sum0, activation_max);
684685
output[(i * output_x + j * 2) * channel_offset] = sum0;
685686
686687
sum1 = arm_nn_requantize(sum1 + biasR[0], *multiplier, *shift);
687688
sum1 += output_offset;
688-
sum1 = MAX(sum1, activation_min);
689-
sum1 = MIN(sum1, activation_max);
689+
sum1 = TN_MAX(sum1, activation_min);
690+
sum1 = TN_MIN(sum1, activation_max);
690691
output[(i * output_x + (j * 2 + 1)) * channel_offset] = sum1;
691692
692693
cols_8b_iterptr += STRIDE * 2;
@@ -704,8 +705,8 @@ def _genKernelCWH(self):
704705
retString += """
705706
sum = (float) sum * *scales;
706707
sum += output_offset;
707-
sum = MAX(sum, activation_min);
708-
sum = MIN(sum, activation_max);
708+
sum = TN_MAX(sum, activation_min);
709+
sum = TN_MIN(sum, activation_max);
709710
output[(i * output_x + output_x - 1) * channel_offset] = sum;
710711
711712
cols_8b_iterptr += STRIDE;
@@ -715,8 +716,8 @@ def _genKernelCWH(self):
715716
retString += """
716717
sum = arm_nn_requantize(sum + biasR[0], *multiplier, *shift);
717718
sum += output_offset;
718-
sum = MAX(sum, activation_min);
719-
sum = MIN(sum, activation_max);
719+
sum = TN_MAX(sum, activation_min);
720+
sum = TN_MIN(sum, activation_max);
720721
output[(i * output_x + output_x - 1) * channel_offset] = sum;
721722
722723
cols_8b_iterptr += STRIDE;

experimental/mnist_demo/README.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# Image Classification with MNIST on laptop/desktop
2+
3+
This demo is currently on the dev/platform-independent-vww branch. Please run the following to clone the repo and switch to the branch.
4+
5+
```bash
6+
git clone --recursive https://github.com/mit-han-lab/tinyengine.git
7+
cd tinyengine
8+
git checkout dev/platform-independent-vww
9+
```
10+
11+
## Run the demo with MacOS
12+
13+
- Requirement
14+
- Opencv: The application needs opencv in order to show images and prediction results, so let's install opencv first.
15+
```bash
16+
brew install opencv
17+
```
18+
- Update Line 8 in `experimental/mnist_demo/application/Makefile` according to the path of opencv on your machine. (use `brew info opencv` to get the installation path.)
19+
- Enter the application directory, compile the code, and run the demo.
20+
```bash
21+
cd experimental/mnist_demo/application
22+
make
23+
./main
24+
```
25+
26+
Note: The current implementation only supports Intel devices due to its SIMD dependency. Support for M1 chip is still on-going.
27+
28+
## Run the demo with Windows
29+
30+
- Requirement
31+
32+
- Visual studio (ver. 2022 is recommended)
33+
- Opencv 4.7.0
34+
- Option 1: Use the prebuilt the opencv dependency
35+
```bash
36+
cd experimental/mnist_demo/windows_visual_studio_mnist
37+
unzip opencv_deps.zip
38+
```
39+
- Option 2: Build and opencv from the source (refer the official guide) and put the generated libraries and headers in the following structure.
40+
```
41+
├── ...
42+
├── mnist_demo
43+
│ ├── windows_visual_studio_mnist
44+
│ │ ├── opencv_deps # Visual studio project directory
45+
│ │ │ ├── include # Header files
46+
│ │ │ │ └── opencv2
47+
│ │ │ | ├── opencv.hpp
48+
│ │ │ | └── ...
49+
│ │ │ ├── lib # Static libraries
50+
│ │ │ | ├── opencv_core470.lib
51+
│ │ │ | └── ...
52+
│ │ │ └── bin # Dynamic libraries
53+
│ │ │ ├── opencv_core470.dll
54+
│ │ │ └── ...
55+
│ │ ├── main.cc # Entry point of the windows program
56+
│ │ └── ...
57+
│ └── ...
58+
└── ...
59+
```
60+
61+
- Open the project (experimental/mnist_demo/windows_visual_studio.sln) with visual studio
62+
63+
- Choose `Release` and run the demo
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
CXX = g++
2+
CXXFLAGS = -std=c++14
3+
CFLAGS = -Wall -ansi
4+
INCLUDE = ../include
5+
SOURCE = ../src
6+
# Please set up your own path for OpenCV
7+
OPENCV_PATH = /usr/local/Cellar/opencv/4.7.0_1
8+
OPENCV_INCLUDE = $(OPENCV_PATH)/include/opencv4/
9+
OPENCV_LIB = -L$(OPENCV_PATH)/lib
10+
OPENCV_LINK = -lopencv_gapi -lopencv_stitching -lopencv_alphamat -lopencv_aruco -lopencv_barcode -lopencv_bgsegm -lopencv_bioinspired \
11+
-lopencv_ccalib -lopencv_dnn_objdetect -lopencv_dnn_superres -lopencv_dpm -lopencv_face -lopencv_freetype -lopencv_fuzzy \
12+
-lopencv_hfs -lopencv_img_hash -lopencv_intensity_transform -lopencv_line_descriptor -lopencv_mcc -lopencv_quality \
13+
-lopencv_rapid -lopencv_reg -lopencv_rgbd -lopencv_saliency -lopencv_sfm -lopencv_stereo -lopencv_structured_light \
14+
-lopencv_phase_unwrapping -lopencv_superres -lopencv_optflow -lopencv_surface_matching -lopencv_tracking -lopencv_highgui \
15+
-lopencv_datasets -lopencv_text -lopencv_plot -lopencv_videostab -lopencv_videoio -lopencv_viz -lopencv_wechat_qrcode \
16+
-lopencv_xfeatures2d -lopencv_shape -lopencv_ml -lopencv_ximgproc -lopencv_video -lopencv_xobjdetect -lopencv_objdetect \
17+
-lopencv_calib3d -lopencv_imgcodecs -lopencv_features2d -lopencv_dnn -lopencv_flann -lopencv_xphoto -lopencv_photo \
18+
-lopencv_imgproc -lopencv_core # Here we list most of the linkers for convenience. Feel free to trim unnecessary ones, if needed.
19+
LIB = -L../src
20+
BATCH_OBJECTS = main_batch.o batch_model.o
21+
OBJECTS = main.o model.o
22+
23+
.SUFFIXS: .c .cc .cpp
24+
main: clean $(OBJECTS) $(BATCH_OBJECTS)
25+
@$(CXX) $(CXXFLAGS) $(CFLAGS) $(OBJECTS) -o main -I$(INCLUDE) $(LIB) -I$(OPENCV_INCLUDE) $(OPENCV_LIB) $(OPENCV_LINK)
26+
@$(CXX) $(CXXFLAGS) $(CFLAGS) $(BATCH_OBJECTS) -o main_batch -I$(INCLUDE) $(LIB) -I$(OPENCV_INCLUDE) $(OPENCV_LIB) $(OPENCV_LINK)
27+
@echo "Compilation and build completed - [MIT Hanlab] Platform-independent TinyEngine MNIST Demo"
28+
29+
main.o: main.cc
30+
@$(CXX) $(CXXFLAGS) -Wall -Ofast -c main.cc -I$(INCLUDE) -I$(OPENCV_INCLUDE)
31+
32+
model.o: $(SOURCE)/model.cc
33+
@$(CXX) $(CXXFLAGS) $(CFLAGS) -c $(SOURCE)/model.cc -I$(INCLUDE)
34+
35+
main_batch.o: main_batch.cc
36+
@$(CXX) $(CXXFLAGS) -Wall -Ofast -c main_batch.cc -I$(INCLUDE) -I$(OPENCV_INCLUDE)
37+
38+
batch_model.o: $(SOURCE)/batch_model.cc
39+
@$(CXX) $(CXXFLAGS) $(CFLAGS) -c $(SOURCE)/batch_model.cc -I$(INCLUDE)
40+
41+
.PHONY: main clean
42+
clean:
43+
-rm *.o main main_batch

0 commit comments

Comments
 (0)