Skip to content

Commit 34d1ba9

Browse files
committed
Refactor multiple old CV models
1 parent b87f66a commit 34d1ba9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+7616
-4131
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
models
12
build
23
*.wts
34
*.engine

README.md

Lines changed: 163 additions & 100 deletions
Large diffs are not rendered by default.

alexnet/CMakeLists.txt

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,45 @@
1-
cmake_minimum_required(VERSION 2.6)
1+
cmake_minimum_required(VERSION 3.14)
22

3-
project(alexnet)
3+
project(
4+
alexnet
5+
VERSION 0.1
6+
LANGUAGES C CXX CUDA)
47

5-
add_definitions(-std=c++11)
8+
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
9+
set(CMAKE_CUDA_ARCHITECTURES
10+
60
11+
70
12+
72
13+
75
14+
80
15+
86
16+
89)
17+
endif()
618

7-
option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
8-
set(CMAKE_CXX_STANDARD 11)
9-
set(CMAKE_BUILD_TYPE Debug)
19+
set(CMAKE_CXX_STANDARD 17)
20+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
21+
set(CMAKE_CUDA_STANDARD 17)
22+
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
23+
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
24+
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)
1025

11-
include_directories(${PROJECT_SOURCE_DIR}/include)
12-
# include and link dirs of cuda and tensorrt, you need adapt them if yours are different
13-
# cuda
14-
include_directories(/usr/local/cuda/include)
15-
link_directories(/usr/local/cuda/lib64)
16-
# tensorrt
17-
include_directories(/usr/include/x86_64-linux-gnu/)
18-
link_directories(/usr/lib/x86_64-linux-gnu/)
26+
option(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cudaruntime library" OFF)
1927

20-
add_executable(alexnet ${PROJECT_SOURCE_DIR}/alex.cpp)
21-
target_link_libraries(alexnet nvinfer)
22-
target_link_libraries(alexnet cudart)
28+
find_package(Threads REQUIRED)
29+
find_package(CUDAToolkit REQUIRED)
30+
find_package(OpenCV REQUIRED)
2331

24-
add_definitions(-O2 -pthread)
32+
if(NOT TARGET TensorRT::TensorRT)
33+
include(FindTensorRT.cmake)
34+
else()
35+
message("TensorRT has been found, skipping for ${PROJECT_NAME}")
36+
endif()
2537

38+
add_executable(${PROJECT_NAME} alexnet.cc)
39+
40+
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_LIST_DIR}
41+
${OpenCV_INCLUDE_DIRS})
42+
43+
target_link_libraries(
44+
${PROJECT_NAME} PRIVATE Threads::Threads TensorRT::TensorRT CUDA::cudart
45+
${OpenCV_LIBS})

alexnet/FindTensorRT.cmake

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
cmake_minimum_required(VERSION 3.17.0)
2+
3+
set(TRT_VERSION
4+
$ENV{TRT_VERSION}
5+
CACHE
6+
STRING
7+
"TensorRT version, e.g. \"8.6.1.6\" or \"8.6.1.6+cuda12.0.1.011\", etc")
8+
9+
function(_guess_path var_name required_files)
10+
set(_result "")
11+
12+
foreach(path_entry IN LISTS ARGN)
13+
if(NOT EXISTS "${path_entry}")
14+
message(DEBUG "skip non-existing path '${path_entry}'")
15+
continue()
16+
endif()
17+
18+
set(_ok TRUE)
19+
foreach(required_file IN LISTS required_files)
20+
if(NOT EXISTS "${path_entry}/${required_file}")
21+
set(_ok FALSE)
22+
message(DEBUG "'${path_entry}' missing '${required_file}'")
23+
break()
24+
endif()
25+
endforeach()
26+
27+
if(_ok)
28+
list(APPEND _result "${path_entry}")
29+
message(DEBUG "accept '${path_entry}'")
30+
else()
31+
message(DEBUG "reject '${path_entry}'")
32+
endif()
33+
endforeach()
34+
35+
if(_result STREQUAL "")
36+
message(
37+
FATAL_ERROR
38+
"_guess_path(${var_name}) failed: no valid path found. required_files='${required_files}' candidates='${ARGN}'"
39+
)
40+
endif()
41+
42+
set(${var_name}
43+
"${_result}"
44+
PARENT_SCOPE)
45+
endfunction()
46+
47+
# find TensorRT include folder
48+
if(NOT DEFINED TensorRT_INCLUDE_DIR)
49+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
50+
_guess_path(
51+
TensorRT_INCLUDE_DIR "NvInfer.h" "/usr/include/aarch64-linux-gnu"
52+
"/usr/include" "/usr/local/cuda/targets/aarch64-linux/include")
53+
else()
54+
_guess_path(
55+
TensorRT_INCLUDE_DIR "NvInfer.h"
56+
"/usr/local/tensorrt/targets/x86_64-linux-gnu/include"
57+
"/usr/include/x86_64-linux-gnu" "/usr/include")
58+
endif()
59+
message(STATUS "TensorRT includes: ${TensorRT_INCLUDE_DIR}")
60+
endif()
61+
62+
# find TensorRT library folder
63+
if(NOT TensorRT_LIBRARY_DIR)
64+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
65+
_guess_path(
66+
TensorRT_LIBRARY_DIR "libnvinfer.so;libnvinfer_plugin.so"
67+
"/usr/lib/aarch64-linux-gnu;/usr/lib/aarch64-linux-gnu/tegra" "/usr/lib")
68+
else()
69+
_guess_path(
70+
TensorRT_LIBRARY_DIR
71+
"libnvinfer.so;libnvinfer_plugin.so"
72+
"/usr/lib/x86_64-linux-gnu;/usr/local/tensorrt/targets/x86_64-linux-gnu/lib;/usr/lib"
73+
)
74+
endif()
75+
message(STATUS "TensorRT libraries: ${TensorRT_LIBRARY_DIR}")
76+
endif()
77+
78+
set(TensorRT_LIBRARIES)
79+
80+
message(STATUS "Found TensorRT lib: ${TensorRT_LIBRARIES}")
81+
82+
# process for different TensorRT version
83+
if(DEFINED TRT_VERSION AND NOT TRT_VERSION STREQUAL "")
84+
string(REGEX MATCH "([0-9]+)" _match ${TRT_VERSION})
85+
set(TRT_MAJOR_VERSION "${_match}")
86+
set(_modules nvinfer nvinfer_plugin)
87+
unset(_match)
88+
89+
if(TRT_MAJOR_VERSION GREATER_EQUAL 8)
90+
list(APPEND _modules nvinfer_vc_plugin nvinfer_dispatch nvinfer_lean)
91+
endif()
92+
else()
93+
message(FATAL_ERROR "Please set a environment variable \"TRT_VERSION\"")
94+
endif()
95+
96+
# find and add all modules of TensorRT into list
97+
foreach(lib IN LISTS _modules)
98+
find_library(
99+
TensorRT_${lib}_LIBRARY
100+
NAMES ${lib}
101+
HINTS ${TensorRT_LIBRARY_DIR})
102+
list(APPEND TensorRT_LIBRARIES ${TensorRT_${lib}_LIBRARY})
103+
endforeach()
104+
105+
# make the "TensorRT target"
106+
add_library(TensorRT IMPORTED INTERFACE)
107+
add_library(TensorRT::TensorRT ALIAS TensorRT)
108+
target_link_libraries(TensorRT INTERFACE ${TensorRT_LIBRARIES})
109+
110+
set_target_properties(
111+
TensorRT
112+
PROPERTIES C_STANDARD 17
113+
CXX_STANDARD 17
114+
POSITION_INDEPENDENT_CODE ON
115+
SKIP_BUILD_RPATH TRUE
116+
BUILD_WITH_INSTALL_RPATH TRUE
117+
INSTALL_RPATH "$ORIGIN"
118+
INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIR}")
119+
120+
unset(TRT_MAJOR_VERSION)
121+
unset(_modules)

alexnet/README.md

Lines changed: 91 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,110 @@
11
# alexnet
22

3-
AlexNet model architecture from the "One weird trick..." <https://arxiv.org/abs/1404.5997>`_ paper.
3+
## Introduction
44

5-
For the details, you can refer to [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet)
5+
AlexNet model architecture comes from this paper: [One weird trick for parallelizing convolutional neural networks](https://arxiv.org/abs/1404.5997). To generate `.wts` file, you can refer to [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet). To check the pytorch implementation of AlexNet, refer to [HERE](https://github.com/pytorch/vision/blob/main/torchvision/models/alexnet.py#L17)
66

7-
This alexnet is just several `conv-relu-pool` blocks followed by several `fc-relu`, nothing special. All layers can be implemented by tensorrt api, including `addConvolution`, `addActivation`, `addPooling`, `addFullyConnected`.
7+
AlexNet consists of 3 major parts: features, adaptive average pooling, and classifier:
88

9+
- features: just several stacked `CRP`(conv-relu-pool) and `CR` layers
10+
- adaptive average pooling: pytorch can decide its inner parameters, but we need to calculate it ourselves in TensorRT API
11+
- classifier: just several `fc-relu` layers. All layers can be implemented by tensorrt api, including `addConvolution`, `addActivation`, `addPooling`, `addMatrixMultiply`, `addElementWise` etc.
12+
13+
## Use AlexNet from PyTorch
14+
15+
We can use torchvision to load the pretrained alexnet model:
16+
17+
```python
18+
alexnet = torchvision.models.alexnet(pretrained=True)
919
```
10-
// 1. generate alexnet.wts from [pytorchx/alexnet](https://github.com/wang-xinyu/pytorchx/tree/master/alexnet)
1120

12-
// 2. put alexnet.wts into tensorrtx/alexnet
21+
The model structure is:
22+
23+
```bash
24+
AlexNet(
25+
(features): Sequential(
26+
(0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
27+
(1): ReLU(inplace=True)
28+
(2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
29+
(3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
30+
(4): ReLU(inplace=True)
31+
(5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
32+
(6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
33+
(7): ReLU(inplace=True)
34+
(8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
35+
(9): ReLU(inplace=True)
36+
(10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
37+
(11): ReLU(inplace=True)
38+
(12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
39+
)
40+
(avgpool): AdaptiveAvgPool2d(output_size=(6, 6))
41+
(classifier): Sequential(
42+
(0): Dropout(p=0.5, inplace=False)
43+
(1): Linear(in_features=9216, out_features=4096, bias=True)
44+
(2): ReLU(inplace=True)
45+
(3): Dropout(p=0.5, inplace=False)
46+
(4): Linear(in_features=4096, out_features=4096, bias=True)
47+
(5): ReLU(inplace=True)
48+
(6): Linear(in_features=4096, out_features=1000, bias=True)
49+
)
50+
)
51+
```
1352

14-
// 3. build and run
53+
## Usage
1554

16-
cd tensorrtx/alexnet
55+
1. use `gen_wts.py` to generate wts file.
1756

18-
mkdir build
57+
```bash
58+
python3 gen_wts.py
59+
```
1960

20-
cd build
61+
2. build C++ code
2162

22-
cmake ..
63+
```bash
64+
pushd tensorrtx/alexnet
65+
cmake -S . -B build -G Ninja --fresh
66+
cmake --build build
67+
```
2368

24-
make
69+
3. serialize wts model to engine file.
2570

26-
sudo ./alexnet -s // serialize model to plan file i.e. 'alexnet.engine'
71+
```bash
72+
./build/alexnet -s
73+
```
2774

28-
sudo ./alexnet -d // deserialize plan file and run inference
75+
4. run inference
2976

30-
// 4. see if the output is same as pytorchx/alexnet
77+
```bash
78+
./build/alexnet -d
3179
```
3280

81+
output looks like:
82+
83+
```txt
84+
...
85+
====
86+
Execution time: 1ms
87+
0.1234, -0.5678, ...
88+
====
89+
prediction result:
90+
Top: 0 idx: 285, logits: 9.9, label: Egyptian cat
91+
Top: 1 idx: 281, logits: 8.304, label: tabby, tabby cat
92+
Top: 2 idx: 282, logits: 6.859, label: tiger cat
93+
```
94+
95+
## FAQ
96+
97+
### How to align the output with Pytorch?
98+
99+
If your output is different from pytorch, you have to check which TensorRT API or your code cause this. A simple solution would be check the `.engine` output part by part, e.g., you can set the early layer of alexnet as output:
100+
101+
```c++
102+
fc3_1->getOutput(0)->setName(OUTPUT_NAME);
103+
network->markOutput(*pool3->getOutput(0)); // original is: "*fc3_1->getOutput(0)"
104+
```
105+
106+
For this line of code, i use the output from "feature" part of alexnet, ignoring the rest of the model, then, don't forget to change the `OUTPUT_SIZE` macro on top of the file, lastly, build the `.engine` file to apply the changes.
107+
108+
You can sum up all output from C++ code, and compare it with Pytorch output, for Pytorch, you can do this by: `torch.sum(x)` at debug phase. The ideal value deviation between 2 values would be $[10^{-1}, 10^{-2}]$, for this example, since the output elements for "feature" is $256 * 6 * 6$ (bacth = 1), the final error would roughly be $10^{-4}$.
33109
110+
Note: This is a quick check, for more accurate check, you have to save the output tensor into a file to compare them value by value, but this situation is rare.

0 commit comments

Comments
 (0)