Skip to content

Commit 7eb1776

Browse files
author
happy
committed
Merge github.com:shouxieai/tensorRT_cpp
2 parents e12a098 + 7dc4eaf commit 7eb1776

File tree

15 files changed

+148
-72
lines changed

15 files changed

+148
-72
lines changed

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,5 @@ __pycache__
6767

6868
__pycache__
6969

70-
!/workspace/wget.exe
70+
!/workspace/wget.exe
71+
/workspace/*.mp4

.vscode/tasks.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
"label": "build",
88
"type": "shell",
99
"command": "make pro -j25"
10+
11+
// for cmake
12+
//"command": "cd build && make pro -j25"
1013
}
1114
]
1215
}

CMakeLists.txt

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@ set(CMAKE_BUILD_TYPE Debug)
88
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/workspace)
99
set(HAS_PYTHON ON)
1010

11-
# 取决于你的设备,这是编译cu文件的配置,一般可以不用设置,除非你调用了特定函数例如half新特性
11+
# 如果要支持python则设置python路径
12+
set(PythonRoot "/data/datav/newbb/lean/anaconda3/envs/torch1.8")
13+
set(PythonName "python3.9")
14+
1215
# 如果你是不同显卡,请设置为显卡对应的号码参考这里:https://developer.nvidia.com/zh-cn/cuda-gpus#compute
1316
set(CUDA_GEN_CODE "-gencode=arch=compute_75,code=sm_75")
1417

@@ -27,10 +30,6 @@ set(TENSORRT_DIR "/data/sxai/lean/TensorRT-8.0.1.6-cuda10.2-cudnn8.2")
2730
# 因为protobuf,需要用特定版本,所以这里指定路径
2831
set(PROTOBUF_DIR "/data/sxai/lean/protobuf3.11.4")
2932

30-
# 如果要支持python则设置python路径
31-
set(PythonDIR "/data/datav/newbb/lean/anaconda3/envs/torch1.8")
32-
set(PythonName "python3.9")
33-
3433

3534
find_package(CUDA REQUIRED)
3635
find_package(OpenCV)
@@ -57,9 +56,9 @@ link_directories(
5756
)
5857

5958
if("${HAS_PYTHON}" STREQUAL "ON")
60-
message("Usage Python ${PythonDIR}")
61-
include_directories(${PythonDIR}/include/${PythonName})
62-
link_directories(${PythonDIR}/lib)
59+
message("Usage Python ${PythonRoot}")
60+
include_directories(${PythonRoot}/include/${PythonName})
61+
link_directories(${PythonRoot}/lib)
6362
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DHAS_PYTHON")
6463
endif()
6564

Makefile

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,9 @@ lean_tensor_rt := /data/sxai/lean/TensorRT-8.0.1.6-cuda10.2-cudnn8.2
2626
lean_cudnn := /data/sxai/lean/cudnn8.2.2.26
2727
lean_opencv := /data/sxai/lean/opencv4.2.0
2828
lean_cuda := /data/sxai/lean/cuda10.2
29-
lean_python := /data/datav/newbb/lean/anaconda3/envs/torch1.8
3029
use_python := true
30+
python_root := /data/datav/newbb/lean/anaconda3/envs/torch1.8
31+
python_name := python3.9
3132

3233
include_paths := src \
3334
src/application \
@@ -42,7 +43,7 @@ include_paths := src \
4243
library_paths := $(lean_protobuf)/lib \
4344
$(lean_opencv)/lib \
4445
$(lean_tensor_rt)/lib \
45-
$(lean_cuda)/lib \
46+
$(lean_cuda)/lib64 \
4647
$(lean_cudnn)/lib
4748

4849
link_librarys := opencv_core opencv_imgproc opencv_videoio opencv_imgcodecs \
@@ -55,9 +56,9 @@ link_librarys := opencv_core opencv_imgproc opencv_videoio opencv_imgcodecs \
5556
support_define :=
5657

5758
ifeq ($(use_python), true)
58-
include_paths += $(lean_python)/include/python3.9
59-
library_paths += $(lean_python)/lib
60-
link_librarys += python3.9
59+
include_paths += $(python_root)/include/$(python_name)
60+
library_paths += $(python_root)/lib
61+
link_librarys += $(python_name)
6162
support_define += -DHAS_PYTHON
6263
endif
6364

@@ -67,6 +68,7 @@ library_paths := $(foreach item,$(library_paths),-L$(item))
6768
link_librarys := $(foreach item,$(link_librarys),-l$(item))
6869

6970
# 如果是其他显卡,请修改-gencode=arch=compute_75,code=sm_75为对应显卡的能力
71+
# 显卡对应的号码参考这里:https://developer.nvidia.com/zh-cn/cuda-gpus#compute
7072
cpp_compile_flags := -std=c++11 -fPIC -m64 -g -fopenmp -w -O0 $(support_define)
7173
cu_compile_flags := -std=c++11 -m64 -Xcompiler -fPIC -g -w -gencode=arch=compute_75,code=sm_75 -O0 $(support_define)
7274
link_flags := -pthread -fopenmp -Wl,-rpath='$$ORIGIN'

README.md

Lines changed: 13 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
##
12
## B站同步视频讲解
23
- https://www.bilibili.com/video/BV1Xw411f7FW
34
- 相关PPTX下载:http://zifuture.com:1556/fs/sxai/tensorRT.pptx
@@ -14,7 +15,7 @@
1415
3. TensorRT.vcxproj文件中,修改`<Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 10.0.targets" />`为你配置的CUDA路径
1516
4. TensorRT.vcxproj文件中,修改`<CodeGeneration>compute_61,sm_61</CodeGeneration>`为你显卡配备的计算能力
1617
- 根据型号参考这里:https://developer.nvidia.com/zh-cn/cuda-gpus#compute
17-
5. 配置依赖,或者下载依赖到lean中。配置VC++目录->包含目录和引用目录
18+
5. 配置依赖或者下载依赖到lean中。配置VC++目录->包含目录和引用目录
1819
6. 配置环境,调试->环境,设置PATH路径
1920
7. 编译并运行案例
2021

@@ -27,7 +28,7 @@ image = cv2.imread("inference/car.jpg")
2728
bboxes = yolo.commit(image).get()
2829
```
2930

30-
- Pytorch无缝对接
31+
- Pytorch的无缝对接
3132
```python
3233
model = models.resnet18(True).eval().to(device)
3334
trt_model = tp.convert_torch_to_trt(model, input)
@@ -76,31 +77,7 @@ auto box = engine->commit(image).get();
7677
## 效果图
7778
![](workspace/yq.jpg)
7879
79-
## YoloV5-ONNX推理支持-第一种,使用提供的onnx
80-
- 这个yolov5m.onnx模型使用官方最新版本直接导出得到
81-
- CMake
82-
- 在CMakeLists.txt中配置依赖路径tensorRT、cuda、cudnn、protobuf
83-
```bash
84-
git clone [email protected]:shouxieai/tensorRT_cpp.git
85-
cd tensorRT_cpp
86-
87-
mkdir build
88-
cd build
89-
cmake ..
90-
make yolo -j32
91-
92-
# 或者make alphapose -j32
93-
```
94-
95-
- Makefile
96-
- 在Makefile中配置好依赖的tensorRT、cuda、cudnn、protobuf
97-
```bash
98-
git clone [email protected]:shouxieai/tensorRT_cpp.git
99-
cd tensorRT_cpp
100-
make yolo -j32
101-
```
102-
103-
## YoloV5-ONNX推理支持-第二种,自行从官方导出onnx
80+
## YoloV5支持
10481
- yolov5的onnx,你的pytorch版本>=1.7时,导出的onnx模型可以直接被当前框架所使用
10582
- 你的pytorch版本低于1.7时,或者对于yolov5其他版本(2.0、3.0、4.0),可以对opset进行简单改动后直接被框架所支持
10683
- 如果你想实现低版本pytorch的tensorRT推理、动态batchsize等更多更高级的问题,请打开我们[博客地址](http://zifuture.com:8090)后找到二维码进群交流
@@ -137,7 +114,7 @@ torch.onnx.export(dynamic_axes={'images': {0: 'batch'}, # shape(1,3,640,640)
137114
3. 导出onnx模型
138115
```bash
139116
cd yolov5
140-
python export.py --weights=yolov5s.pt --dynamic --opset=11
117+
python export.py --weights=yolov5s.pt --dynamic --include=onnx --opset=11
141118
```
142119
4. 复制模型并执行
143120
```bash
@@ -146,6 +123,7 @@ cd tensorRT_cpp
146123
make yolo -j32
147124
```
148125

126+
149127
## YoloX的支持
150128
- https://github.com/Megvii-BaseDetection/YOLOX
151129
- 你可以选择直接make run,会从镜像地址下载onnx并推理运行看到效果。不需要自行导出
@@ -190,6 +168,7 @@ model.head.decode_in_inference = True
190168
191169
3. 导出onnx模型
192170
```bash
171+
193172
# 下载模型,或许你需要翻墙
194173
# wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_m.pth
195174
@@ -335,6 +314,7 @@ auto int8process = [](int current, int count, vector<string>& images, shared_ptr
335314
}
336315
};
337316
317+
338318
// 编译模型指定为INT8
339319
auto model_file = "yolov5m.int8.trtmodel";
340320
TRT::compile(
@@ -363,11 +343,11 @@ engine->print();
363343
// 加载图像
364344
auto image = imread("demo.jpg");
365345
366-
// 获取模型的输入和输出tensor节点,可以根据名字或者索引获取第几个
346+
// 获取模型的输入和输出tensor节点,可以根据名字或者索引获取具体第几个
367347
auto input = engine->input(0);
368348
auto output = engine->output(0);
369349
370-
// 把图像塞到input tensor中,这里是减去均值,除以标准差
350+
// 把图像塞到input tensor中,这里是减去均值,并除以标准差
371351
float mean[] = {0, 0, 0};
372352
float std[] = {1, 1, 1};
373353
input->set_norm_mat(i, image, mean, std);
@@ -409,6 +389,7 @@ int HSwish::enqueue(const std::vector<GTensor>& inputs, std::vector<GTensor>& ou
409389
return 0;
410390
}
411391
392+
412393
RegisterPlugin(HSwish);
413394
```
414395
@@ -461,9 +442,9 @@ Engine 0x23dd7780 detail
461442
[2021-07-22 14:37:42][info][_main.cpp:124]:outputs[0].size = 2
462443
[2021-07-22 14:37:42][info][_main.cpp:124]:outputs[1].size = 5
463444
[2021-07-22 14:37:42][info][_main.cpp:124]:outputs[2].size = 1
464-
```
465445
446+
```
466447
467448
## 关于
468449
- 我们的博客地址:http://www.zifuture.com:8090/
469-
- 我们的B站地址:https://space.bilibili.com/1413433465
450+
- 我们的B站地址: https://space.bilibili.com/1413433465

TensorRT.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
<ClCompile Include="src\application\app_scrfd.cpp" />
3232
<ClCompile Include="src\application\app_scrfd\scrfd.cpp" />
3333
<ClCompile Include="src\application\app_yolo.cpp" />
34+
<ClCompile Include="src\application\app_plugin.cpp" />
3435
<ClCompile Include="src\application\app_lesson.cpp" />
3536
<ClCompile Include="src\application\app_yolo\yolo.cpp" />
3637
<ClCompile Include="src\application\tools\auto_download.cpp" />

TensorRT.vcxproj.filters

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,9 @@
154154
<ClCompile Include="src\application\app_arcface.cpp">
155155
<Filter>src\application</Filter>
156156
</ClCompile>
157+
<ClCompile Include="src\application\app_plugin.cpp">
158+
<Filter>src\application</Filter>
159+
</ClCompile>
157160
<ClCompile Include="src\application\app_retinaface\retinaface.cpp">
158161
<Filter>src\application\app_retinaface</Filter>
159162
</ClCompile>

python/test_yolov5.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,4 +23,5 @@
2323
saveto = "single_inference/yolov5.car.jpg"
2424
print(f"Save to {saveto}")
2525

26-
cv2.imwrite(saveto, image)
26+
cv2.imwrite(saveto, image)
27+

src/application/app_arcface.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include "app_arcface/arcface.hpp"
77
#include "tools/deepsort.hpp"
88
#include "tools/zmq_remote_show.hpp"
9+
#include <unordered_map>
910

1011
using namespace std;
1112
using namespace cv;

src/application/app_lesson.cpp

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,64 @@
33
#include <infer/trt_infer.hpp>
44
#include <builder/trt_builder.hpp>
55
#include "app_yolo/yolo.hpp"
6+
#include <cuda_runtime.h>
67

78
using namespace std;
89

10+
// static void test_tensor1(){
11+
12+
// size_t cpu_bytes = 1024;
13+
// size_t gpu_bytes = 2048;
14+
15+
// ///////////////////////////////////////////////////////////////////
16+
// // 封装效果,自动分配和释放
17+
// TRT::MixMemory memory;
18+
// void* host_ptr = memory.cpu(cpu_bytes);
19+
// void* device_ptr = memory.gpu(gpu_bytes);
20+
21+
// ///////////////////////////////////////////////////////////////////
22+
// // 不封装效果
23+
// void* host_ptr = nullptr;
24+
// void* device_ptr = nullptr;
25+
// cudaMallocHost(&host_ptr, cpu_bytes);
26+
// cudaMalloc(&device_ptr, gpu_bytes);
27+
28+
// cudaFreeHost(&host_ptr);
29+
// cudaFree(&device_ptr);
30+
// ///////////////////////////////////////////////////////////////////
31+
// }
32+
33+
static void test_tensor2(){
34+
35+
///////////////////////////////////////////////////////////////////
36+
// 内存的自动复制,依靠head属性标记数据最新的位置
37+
// 若访问的数据不是最新的,则会自动发生复制操作
38+
TRT::Tensor tensor({1, 3, 5, 5}, TRT::DataType::Float);
39+
INFO("tensor.head = %s", TRT::data_head_string(tensor.head())); // 输出 Init,内存没有分配
40+
41+
tensor.cpu<float>()[0] = 512; // 访问cpu时,分配cpu内存
42+
INFO("tensor.head = %s", TRT::data_head_string(tensor.head())); // 输出 Host
43+
44+
float* device_ptr = tensor.gpu<float>(); // 访问gpu时,最新数据在Host,发生复制动作并标记最新数据在Device
45+
INFO("tensor.head = %s", TRT::data_head_string(tensor.head())); // 输出 Device
46+
INFO("device_ptr[0] = %f", device_ptr[0]); // 输出 512.00000
47+
}
48+
49+
static void test_tensor3(){
50+
51+
///////////////////////////////////////////////////////////////////
52+
// 计算维度的偏移量
53+
TRT::Tensor tensor({1, 3, 5, 5, 2, 5}, TRT::DataType::Float);
54+
auto ptr_origin = tensor.cpu<float>();
55+
auto ptr_channel2 = tensor.cpu<float>(0, 2, 3, 2, 1, 3);
56+
57+
INFO("Offset = %d", ptr_channel2 - ptr_origin); // 输出678
58+
INFO("Offset = %d", tensor.offset(0, 2, 3, 2, 1, 3)); // 输出678
59+
60+
int offset_compute = ((((0 * 3 + 2) * 5 + 3) * 5 + 2) * 2 + 1) * 5 + 3;
61+
INFO("Compute = %d", offset_compute); // 输出678
62+
}
63+
964
static void lesson1(){
1065

1166
/** 模型编译,onnx到trtmodel **/
@@ -180,19 +235,11 @@ void lesson_cache1frame(){
180235
int app_lesson(){
181236

182237
iLogger::set_log_level(iLogger::LogLevel::Verbose);
183-
lesson1();
238+
test_tensor3();
239+
// lesson1();
184240
// lesson2();
185241
// lesson3();
186242
// lesson_cache1frame();
187243
return 0;
188244
}
189245

190-
191-
192-
193-
194-
195-
196-
197-
198-

0 commit comments

Comments
 (0)