wang-xinyu
diff --git a/‎yolov13/CMakeLists.txt‎
Lines changed: 62 additions & 0 deletions b/‎yolov13/CMakeLists.txt‎
Lines changed: 62 additions & 0 deletions
diff --git a/‎yolov13/gen_wts.py‎
Lines changed: 56 additions & 0 deletions b/‎yolov13/gen_wts.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎yolov13/include/block.h‎
Lines changed: 117 additions & 0 deletions b/‎yolov13/include/block.h‎
Lines changed: 117 additions & 0 deletions
diff --git a/‎yolov13/include/calibrator.h‎
Lines changed: 75 additions & 0 deletions b/‎yolov13/include/calibrator.h‎
Lines changed: 75 additions & 0 deletions
diff --git a/‎yolov13/include/config.h‎
Lines changed: 17 additions & 0 deletions b/‎yolov13/include/config.h‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎yolov13/include/cuda_utils.h‎
Lines changed: 17 additions & 0 deletions b/‎yolov13/include/cuda_utils.h‎
Lines changed: 17 additions & 0 deletions
@@ -0,0 +1,62 @@
+
+
+
+cmake_minimum_required(VERSION 3.10)
+
+project(yolov13)
+
+# Set up environment-based paths for CUDA and TensorRT
+if(DEFINED ENV{CUDA_HOME})
+  set(CUDA_TOOLKIT_ROOT_DIR $ENV{CUDA_HOME})
+else()
+  set(CUDA_TOOLKIT_ROOT_DIR "/usr/local/cuda")
+endif()
+
+if(DEFINED ENV{TENSORRT_DIR})
+  set(TENSORRT_ROOT $ENV{TENSORRT_DIR})
+else()
+  set(TENSORRT_ROOT "/opt/TensorRT-8.6.1.6")
+endif()
+
+message(STATUS "Using CUDA from: ${CUDA_TOOLKIT_ROOT_DIR}")
+message(STATUS "Using TensorRT from: ${TENSORRT_ROOT}")
+
+add_definitions(-std=c++11)
+add_definitions(-DAPI_EXPORTS)
+set(CMAKE_CXX_STANDARD 11)
+set(CMAKE_BUILD_TYPE Debug)
+
+set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc)
+enable_language(CUDA)
+
+include_directories(${PROJECT_SOURCE_DIR}/include)
+include_directories(${PROJECT_SOURCE_DIR}/plugin)
+
+# CUDA and TensorRT configuration
+if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
+  message("embed_platform on")
+  include_directories(${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux/include)
+  link_directories(${CUDA_TOOLKIT_ROOT_DIR}/targets/aarch64-linux/lib)
+  include_directories(${TENSORRT_ROOT}/include)
+  link_directories(${TENSORRT_ROOT}/lib)
+else()
+  message("embed_platform off")
+  include_directories(${CUDA_TOOLKIT_ROOT_DIR}/include)
+  link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64)
+  include_directories(${TENSORRT_ROOT}/include)
+  link_directories(${TENSORRT_ROOT}/lib)
+endif()
+
+add_library(myplugins SHARED ${PROJECT_SOURCE_DIR}/plugin/yololayer.cu)
+target_link_libraries(myplugins nvinfer cudart)
+
+find_package(OpenCV REQUIRED)
+include_directories(${OpenCV_INCLUDE_DIRS})
+
+file(GLOB_RECURSE SRCS ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/*.cu)
+
+add_executable(yolov13-det ${PROJECT_SOURCE_DIR}/yolov13_det.cpp ${SRCS})
+target_link_libraries(yolov13-det nvinfer)
+target_link_libraries(yolov13-det cudart)
+target_link_libraries(yolov13-det myplugins)
+target_link_libraries(yolov13-det ${OpenCV_LIBS})
@@ -0,0 +1,56 @@
+import sys  # noqa: F401
+import argparse
+import os
+import struct
+import torch
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Convert .pt file to .wts')
+    parser.add_argument('-w', '--weights', required=True,
+                        help='Input weights (.pt) file path (required)')
+    parser.add_argument(
+        '-o', '--output', help='Output (.wts) file path (optional)')
+
+    args = parser.parse_args()
+    if not os.path.isfile(args.weights):
+        raise SystemExit('Invalid input file')
+    if not args.output:
+        args.output = os.path.splitext(args.weights)[0] + '.wts'
+    elif os.path.isdir(args.output):
+        args.output = os.path.join(
+            args.output,
+            os.path.splitext(os.path.basename(args.weights))[0] + '.wts')
+    return args.weights, args.output
+
+
+pt_file, wts_file = parse_args()
+
+print('Generating .wts for detection model')
+
+# Load model
+print(f'Loading {pt_file}')
+
+# Initialize
+device = 'cpu'
+
+# Load model
+model = torch.load(pt_file, map_location=device, weights_only=False)['model'].float()  # load to FP32
+
+# Anchor handling for detection model
+anchor_grid = model.model[-1].anchors * model.model[-1].stride[..., None, None]
+delattr(model.model[-1], 'anchors')
+
+model.to(device).eval()
+
+with open(wts_file, 'w') as f:
+    f.write('{}\n'.format(len(model.state_dict().keys())))
+    for k, v in model.state_dict().items():
+        vr = v.reshape(-1).cpu().numpy()
+        f.write('{} {} '.format(k, len(vr)))
+        for vv in vr:
+            f.write(' ')
+            f.write(struct.pack('>f', float(vv)).hex())
+        f.write('\n')
+
+# python3 gen_wts.py -w your_model.pt -o output_name.wts
@@ -0,0 +1,117 @@
+#pragma once
+
+#include <map>
+#include <string>
+#include <vector>
+#include "NvInfer.h"
+
+using namespace std;
+std::map<std::string, nvinfer1::Weights> loadWeights(const std::string file);
+
+nvinfer1::IScaleLayer* addBatchNorm2d(nvinfer1::INetworkDefinition* network,
+                                      std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
+                                      std::string lname, float eps);
+
+nvinfer1::IElementWiseLayer* convBnSiLU(nvinfer1::INetworkDefinition* network,
+                                        std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
+                                        int ch, std::vector<int> k, int s, std::string lname, int p = 0, int g = 1,
+                                        int d = 1);
+
+nvinfer1::ILayer* Conv(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+                       nvinfer1::ITensor& input, int c_out, std::string lname, int k = 1, int s = 1, int padding = 0,
+                       int g = 1, bool act = true);
+
+nvinfer1::IShuffleLayer* DFL(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+                             nvinfer1::ITensor& input, int ch, int grid, int k, int s, int p, std::string lname);
+
+nvinfer1::IPluginV2Layer* addYoLoLayer(nvinfer1::INetworkDefinition* network,
+                                       std::vector<nvinfer1::IConcatenationLayer*> dets, const int* px_arry,
+                                       int px_arry_num);
+
+nvinfer1::IElementWiseLayer* C3k(nvinfer1::INetworkDefinition* network,
+                                 std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c2,
+                                 std::string lname, int n = 1, bool shortcut = true, int g = 1, float e = 0.5,
+                                 int k = 3);
+
+nvinfer1::IElementWiseLayer* C3K2(nvinfer1::INetworkDefinition* network,
+                                  std::map<std::string, nvinfer1::Weights>& weightMap, nvinfer1::ITensor& input, int c2,
+                                  int n, std::string lname, bool c3k = false, float e = 0.5, int g = 1,
+                                  bool shortcut = true);
+
+nvinfer1::ILayer* AAttn(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+                        nvinfer1::ITensor& input, int dim, int num_heads, std::string lname, int area = 1);
+
+nvinfer1::ILayer* DWConv(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+                         nvinfer1::ITensor& input, int ch, std::vector<int> k, int s, std::string lname);
+
+nvinfer1::IElementWiseLayer* ABlock(nvinfer1::INetworkDefinition* network,
+                                    std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
+                                    int dim, int num_heads, std::string lname, float mlp_ratio = 1.2, int area = 1);
+
+nvinfer1::ILayer* A2C2f(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>,
+                        nvinfer1::ITensor& input, int c2, int n, std::string lname, bool a2 = true, int area = 1,
+                        bool residual = false, float mlp_ratio = 2.0, float e = 0.5, int g = 1, bool shortcut = true);
+
+nvinfer1::IElementWiseLayer* DSConv(nvinfer1::INetworkDefinition* network,
+                                    std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
+                                    int c_in, int c_out, std::string lname, int k = 3, int s = 1, int p = 0, int d = 1,
+                                    bool bias = false);
+
+nvinfer1::ILayer* DSBottleneck(nvinfer1::INetworkDefinition* network,
+                               std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input, int c1,
+                               int c2, std::string lname, bool shortcut = true, float e = 0.5, int k1 = 3, int k2 = 5,
+                               int d2 = 1);
+
+nvinfer1::ILayer* DSC3k(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+                        nvinfer1::ITensor& input, int c2, int n, std::string lname, bool shortcut = true, int g = 1,
+                        float e = 0.5, int k1 = 3, int k2 = 5, int d2 = 1);
+
+nvinfer1::ILayer* DSC3K2(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+                         nvinfer1::ITensor& input, int c2, std::string lname, int n = 1, bool dsc3k = false,
+                         float e = 0.5, int g = 1, bool shortcut = true, int k1 = 3, int k2 = 7, int d2 = 1);
+
+nvinfer1::ILayer* FuseModule(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+                             std::vector<nvinfer1::ITensor*>& input, int c_in, bool channel_adjust, std::string lname);
+
+// nvinfer1::ILayer* FuseModule(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+//                              std::vector<nvinfer1::ITensor*>input, int c_in, bool channel_adjust, std::string lname);
+
+nvinfer1::ISoftMaxLayer* AdaHyperedgeGen(nvinfer1::INetworkDefinition* network,
+                                         std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
+                                         int node_dim, int num_hyperedges, std::string lname, int num_heads = 4,
+                                         std::string context = "both");
+
+nvinfer1::IElementWiseLayer* GELU(nvinfer1::INetworkDefinition* network, nvinfer1::ITensor& input);
+
+nvinfer1::IElementWiseLayer* AdaHGConv(nvinfer1::INetworkDefinition* network,
+                                       std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
+                                       int embed_dim, std::string lname, int num_hyperedges = 16, int num_heads = 4,
+                                       std::string context = "both");
+
+nvinfer1::IShuffleLayer* AdaHGComputation(nvinfer1::INetworkDefinition* network,
+                                          std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
+                                          int embed_dim, std::string lname, int num_hyperedges = 16, int num_heads = 8,
+                                          std::string context = "both");
+
+nvinfer1::ILayer* C3AH(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+                       nvinfer1::ITensor& input, int c2, std::string lname, float e = 1.0, int num_hyperedges = 8,
+                       std::string context = "both");
+
+nvinfer1::ILayer* HyperACE(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights> weightMap,
+                           std::vector<nvinfer1::ITensor*> input, int c1, int c2, std::string lname, int n = 1,
+                           int num_hyperedges = 8, bool dsc3k = false, bool shortcut = false, float e1 = 0.5,
+                           float e2 = 1, std::string context = "both", bool channel_adjust = true);
+
+nvinfer1::ILayer* DownsampleConv(nvinfer1::INetworkDefinition* network,
+                                 std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
+                                 int in_channels, std::string lname, bool channel_adjust);
+
+nvinfer1::IElementWiseLayer* FullPad_Tunnel(nvinfer1::INetworkDefinition* network,
+                                            std::map<std::string, nvinfer1::Weights> weightMap,
+                                            std::vector<nvinfer1::ITensor*> input, std::string lname);
+
+nvinfer1::ILayer* DownsampleConv(nvinfer1::INetworkDefinition* network,
+                                 std::map<std::string, nvinfer1::Weights> weightMap, nvinfer1::ITensor& input,
+                                 int in_channels, std::string lname, bool channel_adjust = true);
+
+void cout_dim(nvinfer1::ITensor& input);
@@ -0,0 +1,75 @@
+#ifndef ENTROPY_CALIBRATOR_H
+#define ENTROPY_CALIBRATOR_H
+
+#include <NvInfer.h>
+#include <string>
+#include <vector>
+#include "macros.h"
+
+//! \class Int8EntropyCalibrator2
+//!
+//! \brief Implements Entropy calibrator 2.
+//!  CalibrationAlgoType is kENTROPY_CALIBRATION_2.
+//!
+class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
+   public:
+    Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name,
+                           const char* input_blob_name, bool read_cache = true);
+    virtual ~Int8EntropyCalibrator2();
+    int getBatchSize() const TRT_NOEXCEPT override;
+    bool getBatch(void* bindings[], const char* names[], int nbBindings) TRT_NOEXCEPT override;
+    const void* readCalibrationCache(size_t& length) TRT_NOEXCEPT override;
+    void writeCalibrationCache(const void* cache, size_t length) TRT_NOEXCEPT override;
+
+   private:
+    int batchsize_;
+    int input_w_;
+    int input_h_;
+    int img_idx_;
+    std::string img_dir_;
+    std::vector<std::string> img_files_;
+    size_t input_count_;
+    std::string calib_table_name_;
+    const char* input_blob_name_;
+    bool read_cache_;
+    void* device_input_;
+    std::vector<char> calib_cache_;
+};
+
+#endif  // ENTROPY_CALIBRATOR_H
+
+//#ifndef ENTROPY_CALIBRATOR_H
+//#define ENTROPY_CALIBRATOR_H
+//
+//#include <NvInfer.h>
+//#include <string>
+//#include <vector>
+//#include "macros.h"
+//
+//// ÐÞ¸Ä¼Ì³Ð¹ØÏµ
+//class Int8EntropyCalibrator2 : public nvinfer1::IInt8Calibrator {
+//   public:
+//    Int8EntropyCalibrator2(int batchsize, int input_w, int input_h, const char* img_dir, const char* calib_table_name,
+//                           const char* input_blob_name, bool read_cache = true);
+//    virtual ~Int8EntropyCalibrator2();
+//    int getBatchSize() const noexcept override;
+//    bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override;
+//    const void* readCalibrationCache(size_t& length) noexcept override;
+//    void writeCalibrationCache(const void* cache, size_t length) noexcept override;
+//
+//   private:
+//    int batchsize_;
+//    int input_w_;
+//    int input_h_;
+//    int img_idx_;
+//    std::string img_dir_;
+//    std::vector<std::string> img_files_;
+//    size_t input_count_;
+//    std::string calib_table_name_;
+//    const char* input_blob_name_;
+//    bool read_cache_;
+//    void* device_input_;
+//    std::vector<char> calib_cache_;
+//};
+//
+//#endif  // ENTROPY_CALIBRATOR_H
@@ -0,0 +1,17 @@
+// #define USE_FP16
+// #define USE_FP32
+#define USE_INT8
+
+const static char* kInputTensorName = "images";
+const static char* kOutputTensorName = "output";
+const static int kNumClass = 80;
+const static int kBatchSize = 1;
+const static int kGpuId = 0;
+const static int kInputH = 640;
+const static int kInputW = 640;
+const static float kNmsThresh = 0.45f;
+const static float kConfThresh = 0.5f;
+const static int kMaxInputImageSize = 3000 * 3000;
+const static int kMaxNumOutputBbox = 1000;
+//Quantization input image folder path
+const static char* kInputQuantizationFolder = "./tensorrtx-int8calib-data/coco_calib";
@@ -0,0 +1,17 @@
+#ifndef TRTX_CUDA_UTILS_H_
+#define TRTX_CUDA_UTILS_H_
+
+#include <cuda_runtime_api.h>
+
+#ifndef CUDA_CHECK
+#define CUDA_CHECK(callstr)                                                                    \
+    {                                                                                          \
+        cudaError_t error_code = callstr;                                                      \
+        if (error_code != cudaSuccess) {                                                       \
+            std::cerr << "CUDA error " << error_code << " at " << __FILE__ << ":" << __LINE__; \
+            assert(0);                                                                         \
+        }                                                                                      \
+    }
+#endif  // CUDA_CHECK
+
+#endif  // TRTX_CUDA_UTILS_H_