Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ repos:
types: [python]
- id: check-added-large-files
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: v14.0.6
rev: v18.1.3
hooks:
- id: clang-format
types_or: [c++, c, cuda]
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ The basic workflow of TensorRTx is:

## News

- `3 Mar 2026`. [zgjja](https://github.com/zgjja) Add Vision Transformer
- `2 Feb 2026`. [fazligorkembal](https://github.com/fazligorkembal) Yolo26-Det, Yolo26-Obb, Yolo26-Cls
- `15 Jan 2026`. [zgjja](https://github.com/zgjja) Refactor multiple old CV models to support TensorRT SDK through 7~10.
- `8 Jan 2026`. [ydk61](https://github.com/ydk61): YOLOv13
Expand Down Expand Up @@ -136,6 +137,7 @@ Following models are implemented.
| [shufflenet](./shufflenetv2) | ShuffleNet v2 with 0.5x output channels |
| [squeezenet](./squeezenet) | SqueezeNet 1.1 model |
| [vgg](./vgg) | VGG 11-layer model |
| [ViT](./vit) | vision transformer, using weight and model from huggingface |
| [yolov3-tiny](./yolov3-tiny) | weights and pytorch implementation from [ultralytics/yolov3](https://github.com/ultralytics/yolov3) |
| [yolov3](./yolov3) | darknet-53, weights and pytorch implementation from [ultralytics/yolov3](https://github.com/ultralytics/yolov3) |
| [yolov3-spp](./yolov3-spp) | darknet-53, weights and pytorch implementation from [ultralytics/yolov3](https://github.com/ultralytics/yolov3) |
Expand Down
42 changes: 42 additions & 0 deletions vit/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
cmake_minimum_required(VERSION 3.17.0)

project(
vit
VERSION 0.1
LANGUAGES C CXX CUDA)

if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 80 86 89 90 100 120)
endif()

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_INCLUDE_CURRENT_DIR TRUE)

option(CUDA_USE_STATIC_CUDA_RUNTIME "Use static cudaruntime library" OFF)

find_package(Threads REQUIRED)
find_package(CUDAToolkit REQUIRED)
find_package(OpenCV REQUIRED)

if(NOT TARGET TensorRT::TensorRT)
include(FindTensorRT.cmake)
else()
message("TensorRT has been found, skipping for ${PROJECT_NAME}")
endif()

add_executable(${PROJECT_NAME} "${PROJECT_NAME}.cc" "cuda_allocator.cc"
"profiler.cc")
target_include_directories(${PROJECT_NAME} PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(
${PROJECT_NAME} PUBLIC Threads::Threads CUDA::cudart CUDA::cuda_driver
TensorRT::TensorRT ${OpenCV_LIBS})

if(WIN32)
set_target_properties(
${PROJECT_NAME} PROPERTIES MSVC_RUNTIME_LIBRARY
"MultiThreaded$<$<CONFIG:Debug>:Debug>")
endif()
142 changes: 142 additions & 0 deletions vit/FindTensorRT.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
cmake_minimum_required(VERSION 3.17.0)

function(_guess_path var_name required_files)
set(_result "")

foreach(path_entry IN LISTS ARGN)
if(NOT EXISTS "${path_entry}")
message(DEBUG "skip non-existing path '${path_entry}'")
continue()
endif()

set(_ok TRUE)
foreach(required_file IN LISTS required_files)
if(NOT EXISTS "${path_entry}/${required_file}")
set(_ok FALSE)
message(DEBUG "'${path_entry}' missing '${required_file}'")
break()
endif()
endforeach()

if(_ok)
list(APPEND _result "${path_entry}")
message(DEBUG "accept '${path_entry}'")
else()
message(DEBUG "reject '${path_entry}'")
endif()
endforeach()

if(_result STREQUAL "")
message(
FATAL_ERROR
"_guess_path(${var_name}) failed: no valid path found. required_files='${required_files}' candidates='${ARGN}'"
)
endif()

set(${var_name}
"${_result}"
PARENT_SCOPE)
endfunction()

# add library
add_library(TensorRT IMPORTED INTERFACE)
add_library(TensorRT::TensorRT ALIAS TensorRT)

set(TRT_VERSION
CACHE
STRING
"TensorRT version, e.g. \"8.6.1.6\" or \"8.6.1.6+cuda12.0.1.011\", \"8.6.1.6.Windows10.x86_64.cuda-12.0\" etc"
)

if(NOT TRT_VERSION STREQUAL "" AND NOT $ENV{TRT_VERSION} STREQUAL "")
message(
WARNING
"TRT_VERSION defined by cmake and environment variable both, using the later one"
)
endif()

if(NOT $ENV{TRT_VERSION} STREQUAL "")
set(TRT_VERSION $ENV{TRT_VERSION})
endif()

string(REGEX MATCH "([0-9]+)" _match ${TRT_VERSION})
set(TRT_MAJOR_VERSION "${_match}")
unset(_match)

if(WIN32)
set(TensorRT_DIR "C:/Program Files/TensorRT-${TRT_VERSION}")
if(NOT EXISTS "${TensorRT_DIR}")
message(FATAL_ERROR "TensorRT_DIR=${TensorRT_DIR} does not exist!")
endif()

if(${TRT_MAJOR_VERSION} GREATER_EQUAL 10)
set(_modules nvinfer_10 nvinfer_plugin_10 nvinfer_vc_plugin_10
nvinfer_dispatch_10 nvinfer_lean_10)
message(DEBUG "Using ${_modules}")
else()
set(_modules nvinfer nvinfer_plugin nvinfer_vc_plugin nvinfer_dispatch
nvinfer_lean)
endif()

set(TensorRT_LIBRARY_DIR "${TensorRT_DIR}/lib")
set(TensorRT_INCLUDE_DIR "${TensorRT_DIR}/include")
elseif(UNIX)
string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _trt_arch)
set(_trt_include_candidates)
if(_trt_arch MATCHES "^(aarch64|arm64|arch64)$")
set(_trt_include_candidates "/usr/include/aarch64-linux-gnu" "/usr/include"
"/usr/local/cuda/targets/aarch64-linux/include")
set(_trt_library_candidates
"/usr/local/tensorrt/targets/aarch64-linux-gnu/lib"
"/usr/lib/aarch64-linux-gnu" "/usr/lib/aarch64-linux-gnu/tegra"
"/usr/lib")
elseif(_trt_arch MATCHES "^(x86_64|amd64)$")
set(_trt_include_candidates
"/usr/local/tensorrt/targets/x86_64-linux-gnu/include"
"/usr/include/x86_64-linux-gnu" "/usr/include")
set(_trt_library_candidates
"/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
"/usr/lib/x86_64-linux-gnu" "/usr/lib")
else()
message(FATAL_ERROR "Unknown architecture")
endif()

set(_modules nvinfer nvinfer_plugin)
if(${TRT_MAJOR_VERSION} GREATER_EQUAL 8)
list(APPEND _modules nvinfer_vc_plugin nvinfer_dispatch nvinfer_lean)
endif()

_guess_path(TensorRT_LIBRARY_DIR "libnvinfer.so;libnvinfer_plugin.so"
${_trt_library_candidates})
message(STATUS "TensorRT libraries: ${TensorRT_LIBRARY_DIR}")
_guess_path(TensorRT_INCLUDE_DIR "NvInfer.h" ${_trt_include_candidates})
message(STATUS "TensorRT includes: ${TensorRT_INCLUDE_DIR}")
endif()

foreach(lib IN LISTS _modules)
find_library(
TensorRT_${lib}_LIBRARY
NAMES ${lib}
HINTS ${TensorRT_LIBRARY_DIR})
list(APPEND TensorRT_LIBRARIES ${TensorRT_${lib}_LIBRARY})
endforeach()

target_link_libraries(TensorRT INTERFACE ${TensorRT_LIBRARIES})

message(STATUS "Found TensorRT libs: ${TensorRT_LIBRARIES}")

set_target_properties(
TensorRT
PROPERTIES C_STANDARD 17
CXX_STANDARD 17
POSITION_INDEPENDENT_CODE ON
SKIP_BUILD_RPATH TRUE
BUILD_WITH_INSTALL_RPATH TRUE
INSTALL_RPATH "$ORIGIN"
INTERFACE_INCLUDE_DIRECTORIES "${TensorRT_INCLUDE_DIR}")

unset(TRT_MAJOR_VERSION)
unset(_modules)
unset(_trt_include_candidates)
unset(_trt_library_candidates)
unset(_trt_arch)
Loading
Loading