Skip to content

Commit bbd9c3d

Browse files
committed
TensorRT inference yolov13
1 parent 93ee5b5 commit bbd9c3d

25 files changed

+4070
-0
lines changed

yolov13/CMakeLists.txt

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
2+
3+
4+
cmake_minimum_required(VERSION 3.10)
5+
6+
project(yolov13)
7+
8+
# Set up environment-based paths for CUDA and TensorRT
9+
if(DEFINED ENV{CUDA_HOME})
10+
set(CUDA_TOOLKIT_ROOT_DIR $ENV{CUDA_HOME})
11+
else()
12+
set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
13+
endif()
14+
15+
if(DEFINED ENV{TENSORRT_DIR})
16+
set(TENSORRT_ROOT $ENV{TENSORRT_DIR})
17+
else()
18+
set(TENSORRT_ROOT "/opt/TensorRT-8.6.1.6")
19+
endif()
20+
21+
message(STATUS "Using CUDA from: ${CUDA_TOOLKIT_ROOT_DIR}")
22+
message(STATUS "Using TensorRT from: ${TENSORRT_ROOT}")
23+
24+
add_definitions(-std=c++11)
25+
add_definitions(-DAPI_EXPORTS)
26+
set(CMAKE_CXX_STANDARD 11)
27+
set(CMAKE_BUILD_TYPE Debug)
28+
29+
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc)
30+
enable_language(CUDA)
31+
32+
include_directories(${PROJECT_SOURCE_DIR}/include)
33+
include_directories(${PROJECT_SOURCE_DIR}/plugin)
34+
35+
# CUDA and TensorRT configuration
36+
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
37+
message("embed_platform on")
38+
include_directories(${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux/include)
39+
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux/lib)
40+
include_directories(${TENSORRT_ROOT}/include)
41+
link_directories(${TENSORRT_ROOT}/lib)
42+
else()
43+
message("embed_platform off")
44+
include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include)
45+
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
46+
include_directories(${TENSORRT_ROOT}/include)
47+
link_directories(${TENSORRT_ROOT}/lib)
48+
endif()
49+
50+
add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/plugin/yololayer.cu)
51+
target_link_libraries(myplugins nvinfer cudart)
52+
53+
find_package(OpenCV REQUIRED)
54+
include_directories(${OpenCV_INCLUDE_DIRS})
55+
56+
file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cu)
57+
58+
add_executable(yolov13-det ${PROJECT_SOURCE_DIR}/yolov13_det.cpp ${SRCS})
59+
target_link_libraries(yolov13-det nvinfer)
60+
target_link_libraries(yolov13-det cudart)
61+
target_link_libraries(yolov13-det myplugins)
62+
target_link_libraries(yolov13-det ${OpenCV_LIBS})

yolov13/gen_wts.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import sys # noqa: F401
2+
import argparse
3+
import os
4+
import struct
5+
import torch
6+
7+
8+
def parse_args():
9+
parser = argparse.ArgumentParser(description='Convert .pt file to .wts')
10+
parser.add_argument('-w', '--weights', required=True,
11+
help='Input weights (.pt) file path (required)')
12+
parser.add_argument(
13+
'-o', '--output', help='Output (.wts) file path (optional)')
14+
15+
args = parser.parse_args()
16+
if not os.path.isfile(args.weights):
17+
raise SystemExit('Invalid input file')
18+
if not args.output:
19+
args.output = os.path.splitext(args.weights)[0] + '.wts'
20+
elif os.path.isdir(args.output):
21+
args.output = os.path.join(
22+
args.output,
23+
os.path.splitext(os.path.basename(args.weights))[0] + '.wts')
24+
return args.weights, args.output
25+
26+
27+
pt_file, wts_file = parse_args()
28+
29+
print('Generating .wts for detection model')
30+
31+
# Load model
32+
print(f'Loading {pt_file}')
33+
34+
# Initialize
35+
device = 'cpu'
36+
37+
# Load model
38+
model = torch.load(pt_file, map_location=device, weights_only=False)['model'].float() # load to FP32
39+
40+
# Anchor handling for detection model
41+
anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None]
42+
delattr(model.model[-1], 'anchors')
43+
44+
model.to(device).eval()
45+
46+
with open(wts_file, 'w') as f:
47+
f.write('{}\n'.format(len(model.state_dict().keys())))
48+
for k, v in model.state_dict().items():
49+
vr = v.reshape(-1).cpu().numpy()
50+
f.write('{} {} '.format(k, len(vr)))
51+
for vv in vr:
52+
f.write(' ')
53+
f.write(struct.pack('>f', float(vv)).hex())
54+
f.write('\n')
55+
56+
# python3 gen_wts.py -w your_model.pt -o output_name.wts

yolov13/include/block.h

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#pragma once
2+
3+
#include <map>
4+
#include <string>
5+
#include <vector>
6+
#include "NvInfer.h"
7+
8+
using namespace std;
9+
std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file);
10+
11+
nvinfer1::IScaleLayer* addBatchNorm2d(nvinfer1::INetworkDefinition* network,
12+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
13+
std::string lname, float eps);
14+
15+
nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network,
16+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
17+
int ch, std::vector<int> k, int s, std::string lname, int p = 0, int g = 1,
18+
int d = 1);
19+
20+
nvinfer1::ILayer* Conv(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
21+
nvinfer1::ITensor& input, int c_out, std::string lname, int k = 1, int s = 1, int padding = 0,
22+
int g = 1, bool act = true);
23+
24+
nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
25+
nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname);
26+
27+
nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network,
28+
std::vector<nvinfer1::IConcatenationLayer*> dets, const int* px_arry,
29+
int px_arry_num);
30+
31+
nvinfer1::IElementWiseLayer* C3k(nvinfer1::INetworkDefinition* network,
32+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c2,
33+
std::string lname, int n = 1, bool shortcut = true, int g = 1, float e = 0.5,
34+
int k = 3);
35+
36+
nvinfer1::IElementWiseLayer* C3K2(nvinfer1::INetworkDefinition* network,
37+
std::map<std::string, nvinfer1::Weights>& weightMap, nvinfer1::ITensor& input, int c2,
38+
int n, std::string lname, bool c3k = false, float e = 0.5, int g = 1,
39+
bool shortcut = true);
40+
41+
nvinfer1::ILayer* AAttn(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
42+
nvinfer1::ITensor& input, int dim, int num_heads, std::string lname, int area = 1);
43+
44+
nvinfer1::ILayer* DWConv(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
45+
nvinfer1::ITensor& input, int ch, std::vector<int> k, int s, std::string lname);
46+
47+
nvinfer1::IElementWiseLayer* ABlock(nvinfer1::INetworkDefinition* network,
48+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
49+
int dim, int num_heads, std::string lname, float mlp_ratio = 1.2, int area = 1);
50+
51+
nvinfer1::ILayer* A2C2f(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>,
52+
nvinfer1::ITensor& input, int c2, int n, std::string lname, bool a2 = true, int area = 1,
53+
bool residual = false, float mlp_ratio = 2.0, float e = 0.5, int g = 1, bool shortcut = true);
54+
55+
nvinfer1::IElementWiseLayer* DSConv(nvinfer1::INetworkDefinition* network,
56+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
57+
int c_in, int c_out, std::string lname, int k = 3, int s = 1, int p = 0, int d = 1,
58+
bool bias = false);
59+
60+
nvinfer1::ILayer* DSBottleneck(nvinfer1::INetworkDefinition* network,
61+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c1,
62+
int c2, std::string lname, bool shortcut = true, float e = 0.5, int k1 = 3, int k2 = 5,
63+
int d2 = 1);
64+
65+
nvinfer1::ILayer* DSC3k(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
66+
nvinfer1::ITensor& input, int c2, int n, std::string lname, bool shortcut = true, int g = 1,
67+
float e = 0.5, int k1 = 3, int k2 = 5, int d2 = 1);
68+
69+
nvinfer1::ILayer* DSC3K2(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
70+
nvinfer1::ITensor& input, int c2, std::string lname, int n = 1, bool dsc3k = false,
71+
float e = 0.5, int g = 1, bool shortcut = true, int k1 = 3, int k2 = 7, int d2 = 1);
72+
73+
nvinfer1::ILayer* FuseModule(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
74+
std::vector<nvinfer1::ITensor*>& input, int c_in, bool channel_adjust, std::string lname);
75+
76+
// nvinfer1::ILayer* FuseModule(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
77+
// std::vector<nvinfer1::ITensor*>input, int c_in, bool channel_adjust, std::string lname);
78+
79+
nvinfer1::ISoftMaxLayer* AdaHyperedgeGen(nvinfer1::INetworkDefinition* network,
80+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
81+
int node_dim, int num_hyperedges, std::string lname, int num_heads = 4,
82+
std::string context = "both");
83+
84+
nvinfer1::IElementWiseLayer* GELU(nvinfer1::INetworkDefinition* network, nvinfer1::ITensor& input);
85+
86+
nvinfer1::IElementWiseLayer* AdaHGConv(nvinfer1::INetworkDefinition* network,
87+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
88+
int embed_dim, std::string lname, int num_hyperedges = 16, int num_heads = 4,
89+
std::string context = "both");
90+
91+
nvinfer1::IShuffleLayer* AdaHGComputation(nvinfer1::INetworkDefinition* network,
92+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
93+
int embed_dim, std::string lname, int num_hyperedges = 16, int num_heads = 8,
94+
std::string context = "both");
95+
96+
nvinfer1::ILayer* C3AH(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
97+
nvinfer1::ITensor& input, int c2, std::string lname, float e = 1.0, int num_hyperedges = 8,
98+
std::string context = "both");
99+
100+
nvinfer1::ILayer* HyperACE(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
101+
std::vector<nvinfer1::ITensor*> input, int c1, int c2, std::string lname, int n = 1,
102+
int num_hyperedges = 8, bool dsc3k = false, bool shortcut = false, float e1 = 0.5,
103+
float e2 = 1, std::string context = "both", bool channel_adjust = true);
104+
105+
nvinfer1::ILayer* DownsampleConv(nvinfer1::INetworkDefinition* network,
106+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
107+
int in_channels, std::string lname, bool channel_adjust);
108+
109+
nvinfer1::IElementWiseLayer* FullPad_Tunnel(nvinfer1::INetworkDefinition* network,
110+
std::map<std::string, nvinfer1::Weights> weightMap,
111+
std::vector<nvinfer1::ITensor*> input, std::string lname);
112+
113+
nvinfer1::ILayer* DownsampleConv(nvinfer1::INetworkDefinition* network,
114+
std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
115+
int in_channels, std::string lname, bool channel_adjust = true);
116+
117+
void cout_dim(nvinfer1::ITensor& input);

yolov13/include/calibrator.h

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#ifndef ENTROPY_CALIBRATOR_H
2+
#define ENTROPY_CALIBRATOR_H
3+
4+
#include <NvInfer.h>
5+
#include <string>
6+
#include <vector>
7+
#include "macros.h"
8+
9+
//! \class Int8EntropyCalibrator2
10+
//!
11+
//! \brief Implements Entropy calibrator 2.
12+
//! CalibrationAlgoType is kENTROPY_CALIBRATION_2.
13+
//!
14+
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
15+
public:
16+
Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name,
17+
const char* input_blob_name, bool read_cache = true);
18+
virtual ~Int8EntropyCalibrator2();
19+
int getBatchSize() const TRT_NOEXCEPT override;
20+
bool getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT override;
21+
const void* readCalibrationCache(size_t& length) TRT_NOEXCEPT override;
22+
void writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT override;
23+
24+
private:
25+
int batchsize_;
26+
int input_w_;
27+
int input_h_;
28+
int img_idx_;
29+
std::string img_dir_;
30+
std::vector<std::string> img_files_;
31+
size_t input_count_;
32+
std::string calib_table_name_;
33+
const char* input_blob_name_;
34+
bool read_cache_;
35+
void* device_input_;
36+
std::vector<char> calib_cache_;
37+
};
38+
39+
#endif // ENTROPY_CALIBRATOR_H
40+
41+
//#ifndef ENTROPY_CALIBRATOR_H
42+
//#define ENTROPY_CALIBRATOR_H
43+
//
44+
//#include <NvInfer.h>
45+
//#include <string>
46+
//#include <vector>
47+
//#include "macros.h"
48+
//
49+
//// Ð޸ļ̳йØÏµ
50+
//class Int8EntropyCalibrator2 : public nvinfer1::IInt8Calibrator {
51+
// public:
52+
// Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name,
53+
// const char* input_blob_name, bool read_cache = true);
54+
// virtual ~Int8EntropyCalibrator2();
55+
// int getBatchSize() const noexcept override;
56+
// bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override;
57+
// const void* readCalibrationCache(size_t& length) noexcept override;
58+
// void writeCalibrationCache(const void* cache, size_t length) noexcept override;
59+
//
60+
// private:
61+
// int batchsize_;
62+
// int input_w_;
63+
// int input_h_;
64+
// int img_idx_;
65+
// std::string img_dir_;
66+
// std::vector<std::string> img_files_;
67+
// size_t input_count_;
68+
// std::string calib_table_name_;
69+
// const char* input_blob_name_;
70+
// bool read_cache_;
71+
// void* device_input_;
72+
// std::vector<char> calib_cache_;
73+
//};
74+
//
75+
//#endif // ENTROPY_CALIBRATOR_H

yolov13/include/config.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// #define USE_FP16
2+
// #define USE_FP32
3+
#define USE_INT8
4+
5+
const static char* kInputTensorName = "images";
6+
const static char* kOutputTensorName = "output";
7+
const static int kNumClass = 80;
8+
const static int kBatchSize = 1;
9+
const static int kGpuId = 0;
10+
const static int kInputH = 640;
11+
const static int kInputW = 640;
12+
const static float kNmsThresh = 0.45f;
13+
const static float kConfThresh = 0.5f;
14+
const static int kMaxInputImageSize = 3000 * 3000;
15+
const static int kMaxNumOutputBbox = 1000;
16+
//Quantization input image folder path
17+
const static char* kInputQuantizationFolder = "./tensorrtx-int8calib-data/coco_calib";

yolov13/include/cuda_utils.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#ifndef TRTX_CUDA_UTILS_H_
2+
#define TRTX_CUDA_UTILS_H_
3+
4+
#include <cuda_runtime_api.h>
5+
6+
#ifndef CUDA_CHECK
7+
#define CUDA_CHECK(callstr) \
8+
{ \
9+
cudaError_t error_code = callstr; \
10+
if (error_code != cudaSuccess) { \
11+
std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
12+
assert(0); \
13+
} \
14+
}
15+
#endif // CUDA_CHECK
16+
17+
#endif // TRTX_CUDA_UTILS_H_

0 commit comments

Comments
 (0)