Skip to content

Commit 358d96e

Browse files
author
Wish
committed
update readme
1 parent 3a91dd3 commit 358d96e

File tree

7 files changed

+112
-50
lines changed

7 files changed

+112
-50
lines changed

.vscode/tasks.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
"label": "build",
88
"type": "shell",
99
"command": "make pro -j25"
10+
11+
// for cmake
12+
//"command": "cd build && make pro -j25"
1013
}
1114
]
1215
}

README.md

Lines changed: 2 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -76,31 +76,7 @@ auto box = engine->commit(image).get();
7676
## 效果图
7777
![](workspace/yq.jpg)
7878
79-
## YoloV5-ONNX推理支持-第一种,使用提供的onnx
80-
- 这个yolov5m.onnx模型使用官方最新版本直接导出得到
81-
- CMake
82-
- 在CMakeLists.txt中配置依赖路径tensorRT、cuda、cudnn、protobuf
83-
```bash
84-
git clone [email protected]:shouxieai/tensorRT_cpp.git
85-
cd tensorRT_cpp
86-
87-
mkdir build
88-
cd build
89-
cmake ..
90-
make yolo -j32
91-
92-
# 或者make alphapose -j32
93-
```
94-
95-
- Makefile
96-
- 在Makefile中配置好依赖的tensorRT、cuda、cudnn、protobuf
97-
```bash
98-
git clone [email protected]:shouxieai/tensorRT_cpp.git
99-
cd tensorRT_cpp
100-
make yolo -j32
101-
```
102-
103-
## YoloV5-ONNX推理支持-第二种,自行从官方导出onnx
79+
## YoloV5支持
10480
- yolov5的onnx,你的pytorch版本>=1.7时,导出的onnx模型可以直接被当前框架所使用
10581
- 你的pytorch版本低于1.7时,或者对于yolov5其他版本(2.0、3.0、4.0),可以对opset进行简单改动后直接被框架所支持
10682
- 如果你想实现低版本pytorch的tensorRT推理、动态batchsize等更多更高级的问题,请打开我们[博客地址](http://zifuture.com:8090)后找到二维码进群交流
@@ -137,7 +113,7 @@ torch.onnx.export(dynamic_axes={'images': {0: 'batch'}, # shape(1,3,640,640)
137113
3. 导出onnx模型
138114
```bash
139115
cd yolov5
140-
python export.py --weights=yolov5s.pt --dynamic --opset=11
116+
python export.py --weights=yolov5s.pt --dynamic --include=onnx --opset=11
141117
```
142118
4. 复制模型并执行
143119
```bash

src/application/app_lesson.cpp

Lines changed: 57 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,64 @@
33
#include <infer/trt_infer.hpp>
44
#include <builder/trt_builder.hpp>
55
#include "app_yolo/yolo.hpp"
6+
#include <cuda_runtime.h>
67

78
using namespace std;
89

10+
// static void test_tensor1(){
11+
12+
// size_t cpu_bytes = 1024;
13+
// size_t gpu_bytes = 2048;
14+
15+
// ///////////////////////////////////////////////////////////////////
16+
// // 封装效果,自动分配和释放
17+
// TRT::MixMemory memory;
18+
// void* host_ptr = memory.cpu(cpu_bytes);
19+
// void* device_ptr = memory.gpu(gpu_bytes);
20+
21+
// ///////////////////////////////////////////////////////////////////
22+
// // 不封装效果
23+
// void* host_ptr = nullptr;
24+
// void* device_ptr = nullptr;
25+
// cudaMallocHost(&host_ptr, cpu_bytes);
26+
// cudaMalloc(&device_ptr, gpu_bytes);
27+
28+
// cudaFreeHost(&host_ptr);
29+
// cudaFree(&device_ptr);
30+
// ///////////////////////////////////////////////////////////////////
31+
// }
32+
33+
static void test_tensor2(){
34+
35+
///////////////////////////////////////////////////////////////////
36+
// 内存的自动复制,依靠head属性标记数据最新的位置
37+
// 若访问的数据不是最新的,则会自动发生复制操作
38+
TRT::Tensor tensor({1, 3, 5, 5}, TRT::DataType::Float);
39+
INFO("tensor.head = %s", TRT::data_head_string(tensor.head())); // 输出 Init,内存没有分配
40+
41+
tensor.cpu<float>()[0] = 512; // 访问cpu时,分配cpu内存
42+
INFO("tensor.head = %s", TRT::data_head_string(tensor.head())); // 输出 Host
43+
44+
float* device_ptr = tensor.gpu<float>(); // 访问gpu时,最新数据在Host,发生复制动作并标记最新数据在Device
45+
INFO("tensor.head = %s", TRT::data_head_string(tensor.head())); // 输出 Device
46+
INFO("device_ptr[0] = %f", device_ptr[0]); // 输出 512.00000
47+
}
48+
49+
static void test_tensor3(){
50+
51+
///////////////////////////////////////////////////////////////////
52+
// 计算维度的偏移量
53+
TRT::Tensor tensor({1, 3, 5, 5, 2, 5}, TRT::DataType::Float);
54+
auto ptr_origin = tensor.cpu<float>();
55+
auto ptr_channel2 = tensor.cpu<float>(0, 2, 3, 2, 1, 3);
56+
57+
INFO("Offset = %d", ptr_channel2 - ptr_origin); // 输出678
58+
INFO("Offset = %d", tensor.offset(0, 2, 3, 2, 1, 3)); // 输出678
59+
60+
int offset_compute = ((((0 * 3 + 2) * 5 + 3) * 5 + 2) * 2 + 1) * 5 + 3;
61+
INFO("Compute = %d", offset_compute); // 输出678
62+
}
63+
964
static void lesson1(){
1065

1166
/** 模型编译,onnx到trtmodel **/
@@ -180,19 +235,11 @@ void lesson_cache1frame(){
180235
int app_lesson(){
181236

182237
iLogger::set_log_level(iLogger::LogLevel::Verbose);
183-
lesson1();
238+
test_tensor3();
239+
// lesson1();
184240
// lesson2();
185241
// lesson3();
186242
// lesson_cache1frame();
187243
return 0;
188244
}
189245

190-
191-
192-
193-
194-
195-
196-
197-
198-

src/application/app_yolo.cpp

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,47 @@ static void test(Yolo::Type type, TRT::Mode mode, const string& model){
169169
forward_engine(model_file, type);
170170
}
171171

172+
void my_test(){
173+
174+
TRT::compile(
175+
TRT::Mode::FP32,
176+
5,
177+
"/data/sxai/temp/yolov5-5.0/yolov5s.onnx",
178+
"my-yolov5-5.0s.trtmodel"
179+
);
180+
INFO("Done");
181+
182+
auto yolo = Yolo::create_infer(
183+
"my-yolov5-5.0s.trtmodel",
184+
Yolo::Type::V5,
185+
0, 0.25f, 0.5f
186+
);
187+
188+
auto image = cv::imread("/data/sxai/tensorRT/workspace/inference/car.jpg");
189+
auto bboxes = yolo->commits({image, image})[1].get();
190+
191+
for(auto& box : bboxes){
192+
193+
uint8_t r, g, b;
194+
tie(r, g, b) = iLogger::random_color(box.class_label);
195+
196+
cv::rectangle(
197+
image,
198+
cv::Point(box.left, box.top),
199+
cv::Point(box.right, box.bottom),
200+
cv::Scalar(b, g, r),
201+
3
202+
);
203+
}
204+
cv::imwrite("my-yolov5s-car.jpg", image);
205+
}
206+
172207
int app_yolo(){
173208

209+
// my_test();
174210
//iLogger::set_log_level(iLogger::LogLevel::Info);
175211
test(Yolo::Type::X, TRT::Mode::FP32, "yolox_m");
176-
//test(Yolo::Type::V5, TRT::Mode::FP32, "yolov5s");
212+
// test(Yolo::Type::V5, TRT::Mode::FP32, "yolov5s");
177213
// test(Yolo::Type::X, TRT::Mode::FP16, "yolox_s");
178214
// test(Yolo::Type::V5, TRT::Mode::FP16, "yolov5s");
179215
// test_int8(Yolo::Type::X, "yolox_s");

src/tensorRT/common/trt_tensor.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ namespace TRT {
7979
virtual ~Tensor();
8080

8181
int numel() const;
82-
int ndims() const{return shape_.size();}
82+
inline int ndims() const{return shape_.size();}
8383
inline int size(int index) const{return shape_[index];}
8484
inline int shape(int index) const{return shape_[index];}
8585

src/tensorRT/onnxplugin/plugins/DCNv2.cu

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,8 @@ __global__ void DCNIm2colKernel(
229229
}
230230
}
231231

232-
template<typename _T>
233-
static __global__ void biasKernel(_T* data_input, const _T* bias, const int f_area, int edge) {
232+
template<typename DataType>
233+
static __global__ void biasKernel(DataType* data_input, const DataType* bias, const int f_area, int edge) {
234234

235235
KernelPositionBlock;
236236
int bias_index = position / f_area;
@@ -276,7 +276,7 @@ inline void segemm_native(cublasHandle_t handle,
276276
cublasCheck(cublasGemmEx(handle, transa, transb, m, n, k, &halpha, A, CUDA_R_16F, lda, B, CUDA_R_16F, ldb, &hbeta, C, CUDA_R_16F, ldc, CUDA_R_16F, CUBLAS_GEMM_DFALT));
277277
}
278278

279-
template<typename _T>
279+
template<typename DataType>
280280
static void enqueue_native(cublasHandle_t handle, const std::vector<GTensor>& inputs, std::vector<GTensor>& outputs, const std::vector<GTensor>& weights, void* workspace, cudaStream_t stream) {
281281
auto& data = inputs[0];
282282
auto& om = inputs[1];
@@ -295,16 +295,16 @@ static void enqueue_native(cublasHandle_t handle, const std::vector<GTensor>& in
295295

296296
cublasCheck(cublasSetStream(handle, stream));
297297
for (int ibatch = 0; ibatch < data.batch(); ++ibatch) {
298-
_T* maskWorkspacePtr = (_T*)workspace + (maskSize + im2colSize) * ibatch;
299-
_T* im2colWorkspacePtr = (_T*)workspace + (maskSize + im2colSize) * ibatch + maskSize;
298+
DataType* maskWorkspacePtr = (DataType*)workspace + (maskSize + im2colSize) * ibatch;
299+
DataType* im2colWorkspacePtr = (DataType*)workspace + (maskSize + im2colSize) * ibatch + maskSize;
300300

301-
_T* inputMask = om.ptr<_T>(ibatch, om.channel() / 3 * 2);
301+
DataType* inputMask = om.ptr<DataType>(ibatch, om.channel() / 3 * 2);
302302
checkCudaKernel(
303303
sigmoidKernel<<<CUDATools::grid_dims(maskSize), CUDATools::block_dims(maskSize), 0, stream>>>(inputMask, maskWorkspacePtr, maskSize);
304304
);
305305

306-
_T* datainput = data.ptr<_T>(ibatch);
307-
_T* offset = om.ptr<_T>(ibatch);
306+
DataType* datainput = data.ptr<DataType>(ibatch);
307+
DataType* offset = om.ptr<DataType>(ibatch);
308308

309309
auto jobs = (size_t)data.channel() * out.height() * out.width();
310310
checkCudaKernel(
@@ -314,17 +314,17 @@ static void enqueue_native(cublasHandle_t handle, const std::vector<GTensor>& in
314314
);
315315
);
316316

317-
_T* weightKernel = weights[0].ptr<_T>();
318-
segemm_native(handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, alpha, im2colWorkspacePtr, n, weightKernel, k, beta, out.ptr<_T>(ibatch), n);
317+
DataType* weightKernel = weights[0].ptr<DataType>();
318+
segemm_native(handle, CUBLAS_OP_N, CUBLAS_OP_N, n, m, k, alpha, im2colWorkspacePtr, n, weightKernel, k, beta, out.ptr<DataType>(ibatch), n);
319319

320320
if (weights.size() > 1) {
321-
_T* weightBias = weights[1].ptr<_T>();
321+
DataType* weightBias = weights[1].ptr<DataType>();
322322
size_t edge = out.count(1);
323323
size_t area = out.count(2);
324324

325325
checkCudaKernel(
326326
biasKernel<<<CUDATools::grid_dims(edge), CUDATools::block_dims(edge), 0, stream>>>(
327-
out.ptr<_T>(ibatch), weightBias, area, edge
327+
out.ptr<DataType>(ibatch), weightBias, area, edge
328328
);
329329
);
330330
}

workspace/my-yolov5s-car.jpg

476 KB
Loading

0 commit comments

Comments
 (0)