Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 13 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,23 +1,25 @@
cmake_minimum_required(VERSION 3.10)
PROJECT(Linfer VERSION 1.0.0 LANGUAGES C CXX CUDA)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_BUILD_TYPE Debug)
#set(CMAKE_BUILD_TYPE Release)
set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/workspace)
set(LIBRARY_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/workspace)
set(CMAKE_SKIP_BUILD_RPATH False)
set(CMAKE_SKIP_RPATH False)
set(CMAKE_BUILD_RPATH "/home/lsf/Third_party/TensorRT-8.6.1.6/lib")
set(CMAKE_BUILD_RPATH "/usr/local/TensorRT-8.5.1.7/lib") # TODO: Modify this
find_package(PkgConfig)
pkg_check_modules(YAMLCPP REQUIRED yaml-cpp>=0.5)
include_directories(${YAMLCPP_INCLUDE_DIRS})

file(GLOB_RECURSE CPPS
${PROJECT_SOURCE_DIR}/apps/*.cpp
${PROJECT_SOURCE_DIR}/apps/*.cu
${PROJECT_SOURCE_DIR}/trt_common/*.cpp
${PROJECT_SOURCE_DIR}/trt_common/*.cu
)

set(CUDA_DIR "/usr/local/cuda")
set(TENSORRT_DIR "/home/lsf/Third_party/TensorRT-8.6.1.6")
set(CUDA_DIR "/usr/local/cuda-11.8") # TODO: Modify this
set(TENSORRT_DIR "/usr/local/TensorRT-8.5.1.7") # TODO: Modify this
find_package(OpenCV REQUIRED)
if(POLICY CMP0146)
cmake_policy(SET CMP0146 OLD)
Expand All @@ -28,7 +30,7 @@ include_directories(
${OpenCV_INCLUDE_DIRS}
${CUDA_DIR}/include
${TENSORRT_DIR}/include
"/usr/include/eigen3"
"/usr/include/eigen3" # TODO: Modify this (maybe)
)
link_directories(
${CUDA_DIR}/lib64
Expand All @@ -41,15 +43,16 @@ list(APPEND ALL_LIBS
)

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wfatal-errors -pthread -w")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++11 -g -O0 -Xcompiler -fPIC")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -std=c++17 -g -O0 -Xcompiler -fPIC")

add_library(${PROJECT_NAME} SHARED ${CPPS})
target_link_libraries(${PROJECT_NAME} ${ALL_LIBS})

# reference:https://developer.nvidia.com/cuda-gpus#compute
set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 89)
# set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 89)
set_property(TARGET ${PROJECT_NAME} PROPERTY CUDA_ARCHITECTURES 86) # TODO: Modify this if necessary
target_compile_options(${PROJECT_NAME} PUBLIC
$<$<COMPILE_LANGUAGE:CUDA>:--default-stream per-thread -lineinfo --use_fast_math --disable-warnings>)

add_executable(pro main.cpp)
target_link_libraries(pro ${PROJECT_NAME} ${ALL_LIBS})
target_link_libraries(pro ${PROJECT_NAME} ${ALL_LIBS} ${YAMLCPP_LIBRARIES})
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

![Language](https://img.shields.io/badge/language-c++-brightgreen) ![Language](https://img.shields.io/badge/CUDA-12.1-brightgreen) ![Language](https://img.shields.io/badge/TensorRT-8.6.1.6-brightgreen) ![Language](https://img.shields.io/badge/OpenCV-4.5.5-brightgreen) ![Language](https://img.shields.io/badge/ubuntu-20.04-brightorigin)

[English](README_EN.md) | 简体中文

## Introduction

基于 TensorRT 的 C++ 高性能推理库。
Expand Down
154 changes: 154 additions & 0 deletions README_EN.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
# Linfer

![Language](https://img.shields.io/badge/language-c++-brightgreen) ![Language](https://img.shields.io/badge/CUDA-12.1-brightgreen) ![Language](https://img.shields.io/badge/TensorRT-8.6.1.6-brightgreen) ![Language](https://img.shields.io/badge/OpenCV-4.5.5-brightgreen) ![Language](https://img.shields.io/badge/ubuntu-20.04-brightorigin)

English | [简体中文](README.md)

## Introduction
A high-performance inference library for C++ based on TensorRT.



## Update News
🚀(2024.12.24) Supports YAML Configuration

🚀(2024.06.06) Supports target detection algorithm Yolov10!

🚀(2024.05.23) Supports semantic segmentation algorithm: PP-LiteSeg and MobileSeg in PaddleSeg, which are lightweight and efficient and suitable for deployment!

🚀(2023.12.03) Supports Panoramic driving perception algorithm YOLOPv2, Better, Faster, Stronger!

🚀 (2023.11.06) Support panoramic driving perception algorithm YOLOP!

🚀 (2023.10.19) Support single target tracking OSTrack, LightTrack! The separate single target tracking repository is [github]( https://github.com/l-sf/Track-trt)

🚀(2023.10.09) Support target detection algorithm RT-DETR!

🚀(2023.08.26) Support PTQ quantization, Yolov5/7 QAT quantization!

🚀(2023.07.19) Support target detection Yolo series 5/X/7/8, multi-target tracking Bytetrack.

## Highlights

- Support panoramic driving perception YOLOPv2, Target detection RT-DETR, Yolov5/X/7/8/10, multi-target tracking Bytetrack, single target tracking OSTrack, LightTrack;
- Pre-processing and post-processing implement CUDA kernel functions, and high-performance reasoning can also be achieved on the Jetson edge;
- Encapsulate Tensor and Infer to achieve memory reuse, automatic CPU/GPU memory copying, engine context management, input and output binding, etc.;
- The inference process implements the producer-consumer model, realizes the parallelization of preprocessing and inference, and further improves performance; - Use RAII concept + interface mode to encapsulate applications, which is safe and convenient to use.

## Easy Using

The code structure of this project is as follows: The implementation code of each algorithm is stored in the `apps` folder, where `app_xxx.cpp` is the call demo function corresponding to the `xxx` algorithm. Each algorithm has no dependency on each other. If you only need to use yolopv2, you can delete all other algorithms in this folder without any impact; the `trt_common` folder includes the commonly used cuda_tools, which encapsulates TensorRT's Tensor and Infer, and the producer-consumer model; The `quant-tools` folder contains quantitative scripts, mainly yolov5/7.

Which algorithm to use is called in `main.cpp` demo function.

```bash
.
├── apps
│   ├── yolo
│   └── yolop
│   ├── app_yolo.cpp
│   ├── app_yolop.cpp
│   ├── ...
├── trt_common
│   ├── cuda_tools. hpp
│   ├── trt_infer.hpp
│   ├── trt_tensor.hpp
│   └── ...
├── quant-tools
│   └── ...
├── workspace
│ └── ...
├── CMakeLists .txt
└── main.cpp
```

If you want to deploy your own algorithm, just create a new folder for your algorithm in the `apps` folder, and imitate the `trt_infer/trt_tensor` in other algorithms. You can use it as you like. I will update more detailed instructions later when I have more free time.


## Project Build and Run

1. install cuda/tensorrt/opencv

[reference](https://github.com/l-sf/Notes/blob/main/notes/Ubuntu20.04_install_tutorials.md#%E4%BA%94cuda--cudnn--tensorrt-install)

2. compile engine

3. Download the onnx model from [google drive](https://drive.google.com/drive/folders/16ZqDaxlWm1aDXQsjsxLS7yFL0YqzHbxT?usp=sharing) or export it according to the tutorial, the tutorial is in README of each folder. Put your onnx file under `workspace/onnx_models` folder (create it)

```bash
cd Linfer/workspace
# Modify the onnx path bash compile_engine.sh

# Uncomment particular model form compile_engine.sh or copy any of the commands from it like this

# YOLOV8S
trtexec --onnx=./onnx_models/yolov8n.onnx \
--saveEngine=./yolov8n.trt \
--buildOnly \
--minShapes=images:1x3x640x640 \
--optShapes=images:1x3x640x640 \
--maxShapes=images:8x3x640x640 \
--fp16
```

4. build

```bash
# Modify CMakeLists.txt cuda/tensorrt/opencv is your own path cd Linfer
mkdir build && cd build
cmake .. && make -j4
```

5. Configure: make your configuration file config.yaml please see a demo of file in config.yaml for instance if you wnt to run bytetrack tracking algorithm with YOLOV8, you'll need to build .trt from step 3 and then provide it's path in config.yaml like below

```yaml
tasks:
- task: "track"
subtasks:
- type: "inference_bytetrack"
engine_file: "/home/e300/mahmood/code/Linfer/workspace/yolov8s.trt"
gpuid: 0
yolo_type: "V8"
video_file: "/home/e300/mahmood/code/Linfer/workspace/videos/snow.mp4"
output_save_path: ""
```

6. run (to avoid any errors please provide full paths always)

```bash
cd Linfer/workspace
./pro config.yaml
```

## Speed Test

Tested on Jetson Orin Nano 8G, the test includes the entire process (image preprocessing + model inference + post-processing decoding)

| Model | Precision | Resolution | FPS(bs=1) |
| :--------: | :-------: | :--------: | :-------: |
| yolov5_s | fp16 | 640x640 | 96.06 |
| yolox_s | fp16 | 640x640 | 79.64 |
| yolov7 | **int8** | 640x640 | 49.55 |
| yolov8_n | fp16 | 640x640 | 121.94 |
| yolov8_s | fp16 | 640x640 | 81.40 |
| yolov8_m | fp16 | 640x640 | 41.14 |
| yolov8_l | fp16 | 640x640 | 27.52 |
| yolov10_n | fp16 | 640x640 | 115.13 |
| yolov10_s | fp16 | 640x640 | 73.65 |
| yolov10_m | fp16 | 640x640 | 39.51 |
| yolov10_l | fp16 | 640x640 | 26.41 |
| rtdetr_r50 | fp16 | 640x640 | 11.25 |
| lighttrack | fp16 | 256x256 | 90.91 |
| ostrack | fp16 | 256x256 | 37.04 |
| yolop | fp16 | 640x640 | 31.4 |
| yolopv2 | fp16 | 480x640 | 21.9 |
| PP-LiteSeg | fp16 | 256x512 | 129.81 |
| MobileSeg | fp16 | 256x512 | 140.36 |



## Reference

- [tensorRT_Pro](https://github.com/shouxieai/tensorRT_Pro.git)
- [Video:详解TensorRT的C++/Python高性能部署,实战应用到项目](https://www.bilibili.com/video/BV1Xw411f7FW/?share_source=copy_web&vd_source=4bb05d1ac6ff39b7680900de14419dca)

19 changes: 13 additions & 6 deletions apps/app_mot.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@


#include "trt_common/ilogger.hpp"
#include "yolo/yolo.hpp"
#include <opencv2/opencv.hpp>
#include "bytetrack/BYTETracker.h"
#include <cstdio>
#include <filesystem>

using namespace std;
namespace fs = std::filesystem;


template<typename Cond>
static vector<Object> det2tracks(const Yolo::BoxArray& array, const Cond& cond){
Expand All @@ -30,7 +31,7 @@ static vector<Object> det2tracks(const Yolo::BoxArray& array, const Cond& cond){
}


void inference_bytetrack(const string& engine_file, int gpuid, Yolo::Type type, const string& video_file){
void inference_bytetrack(const string& engine_file, int gpuid, Yolo::Type type, const string& video_file, const string& output_save_path){

auto engine = Yolo::create_infer(
engine_file, // engine file
Expand Down Expand Up @@ -59,8 +60,15 @@ void inference_bytetrack(const string& engine_file, int gpuid, Yolo::Type type,
).set_per_frame_motion({0.1, 0.1, 0.1, 0.1,
0.2, 0.2, 1, 0.2}
).set_max_time_lost(150);

string output_path = output_save_path;
if (output_path.empty())
{
fs::path input_path(video_file);
output_path = input_path.stem().string() + "_output" + input_path.extension().string();
}

cv::VideoWriter writer("videos/res_mot.mp4", cv::VideoWriter::fourcc('M', 'P', 'E', 'G'), fps, cv::Size(width, height));
cv::VideoWriter writer(output_path, cv::VideoWriter::fourcc('M', 'P', 'E', 'G'), fps, cv::Size(width, height));
auto cond = [](const Yolo::Box& b){return b.label == 0;};

shared_future<vector<Yolo::Box>> prev_fut;
Expand Down Expand Up @@ -97,5 +105,4 @@ void inference_bytetrack(const string& engine_file, int gpuid, Yolo::Type type,

writer.release();
printf("Done.\n");
}

}
Loading