support for tensorrt

xmba15 · xmba15 · commit 0436df371f84 · 2022-09-04T20:08:25.000+09:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -17,8 +17,13 @@ find_package(CUDA QUIET)
 
 if(CUDA_FOUND AND USE_GPU)
   add_definitions(-DENABLE_GPU=1)
+  include(tensorrt)
+  if (TENSORRT_FOUND)
+    add_definitions(-DENABLE_TENSORRT=1)
+  endif()
 else()
   add_definitions(-DENABLE_GPU=0)
+  add_definitions(-DENABLE_TENSORRT=0)
 endif()
 
 
diff --git a/README.md b/README.md
@@ -23,7 +23,8 @@ Hope that they both are helpful for your work.
 ## TODO
 
 - [x] Support inference of multi-inputs, multi-outputs
-- [x] Examples for famous models, like yolov3, mask-rcnn, [ultra-light-weight face detector](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB), [yolox](https://github.com/Megvii-BaseDetection/YOLOX), [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3), [SuperPoint](https://github.com/magicleap/SuperPointPretrainedNetwork), [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork/tree/ddcf11f42e7e0732a0c4607648f9448ea8d73590). Might consider supporting more if requested
+- [x] Examples for famous models, like yolov3, mask-rcnn, [ultra-light-weight face detector](https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB), [yolox](https://github.com/Megvii-BaseDetection/YOLOX), [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg/tree/release/2.3), [SuperPoint](https://github.com/magicleap/SuperPointPretrainedNetwork), [SuperGlue](https://github.com/magicleap/SuperGluePretrainedNetwork/tree/ddcf11f42e7e0732a0c4607648f9448ea8d73590). Might consider supporting more if requested.
+- [x] (Minimal^^) Support for TensorRT backend
 - [ ] Batch-inference
 
 ## Installation
@@ -96,6 +97,10 @@ docker build -f ./dockerfiles/ubuntu2004_gpu.dockerfile -t onnx_runtime_gpu .
 docker run -it --rm --gpus all -v `pwd`:/workspace onnx_runtime_gpu
 ```
 
+- Onnxruntime will be built with TensorRT support if the environment has TensorRT. Check [this memo](./docs/onnxruntime_tensorrt.md) for useful URLs related to building with TensorRT.
+- Be careful to choose TensorRT version compatible with onnxruntime. A good guess can be inferred from [HERE](https://github.com/microsoft/onnxruntime/blob/main/dockerfiles/Dockerfile.tensorrt).
+- Also it is not possible to use models whose input shapes are dynamic with TensorRT backend, according to [this](https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html#shape-inference-for-tensorrt-subgraphs)
+
 </details>
 
 ## How to test apps
diff --git a/cmake/tensorrt.cmake b/cmake/tensorrt.cmake
@@ -0,0 +1,29 @@
+# Ref: //github.com/PRBonn/rangenet_lib/blob/master/cmake/tensorrt-config.cmake
+
+find_package(CUDA)
+find_library(NVINFER  NAMES nvinfer)
+find_library(NVINFERPLUGIN NAMES nvinfer_plugin)
+find_library(NVPARSERS NAMES nvparsers)
+find_library(NVONNXPARSER NAMES nvonnxparser)
+
+# newer tensorrt does not have nvonnxparser_runtime
+# find_library(NVONNXPARSERRUNTIME NAMES nvonnxparser_runtime)
+
+# If it is ALL there, export libraries as a single package
+if(CUDA_FOUND AND NVINFER AND NVINFERPLUGIN AND NVPARSERS AND NVONNXPARSER)
+  message("TensorRT available!")
+  message("CUDA Libs: ${CUDA_LIBRARIES}")
+  message("CUDA Headers: ${CUDA_INCLUDE_DIRS}")
+  message("NVINFER: ${NVINFER}")
+  message("NVINFERPLUGIN: ${NVINFERPLUGIN}")
+  message("NVPARSERS: ${NVPARSERS}")
+  message("NVONNXPARSER: ${NVONNXPARSER}")
+  list(APPEND TENSORRT_LIBRARIES ${CUDA_LIBRARIES} nvinfer nvinfer_plugin nvparsers nvonnxparser)
+  message("All togheter now (libs): ${TENSORRT_LIBRARIES}")
+  list(APPEND TENSORRT_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS})
+  message("All togheter now (inc): ${TENSORRT_INCLUDE_DIRS}")
+  set(TENSORRT_FOUND ON)
+else()
+  message("TensorRT NOT Available")
+  set(TENSORRT_FOUND OFF)
+endif()
diff --git a/dockerfiles/ubuntu2004_tensorrt.dockerfile b/dockerfiles/ubuntu2004_tensorrt.dockerfile
@@ -0,0 +1,26 @@
+FROM  nvcr.io/nvidia/tensorrt:21.07-py3
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+WORKDIR /build
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        sudo \
+        gnupg2 \
+        lsb-release \
+        build-essential \
+        software-properties-common \
+        cmake \
+        git \
+        tmux && \
+    bash install_latest_cmake.bash && \
+    bash install_onnx_runtime.bash && \
+    bash install_apps_dependencies.bash && \
+    rm -rf /build && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /workspace
+
+ENTRYPOINT ["/bin/bash"]
diff --git a/docs/onnxruntime_tensorrt.md b/docs/onnxruntime_tensorrt.md
@@ -0,0 +1,19 @@
+# 📝 memo on how to use tensorrt onnxruntime
+
+---
+
+## :running: How to Run
+
+---
+
+Sample docker with tensorrt environment is provided [HERE](../dockerfiles/ubuntu2004_tensorrt.dockerfile)
+
+## :gem: References
+
+---
+
+- [tensorrt tags](https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorrt/tags)
+- [overview of tensorrt docker](https://docs.nvidia.com/deeplearning/tensorrt/container-release-notes/overview.html)
+- [tensorrt introduction](https://developer.nvidia.com/tensorrt)
+- [build instruction](https://onnxruntime.ai/docs/build/eps.html)
+- [sample dockerfile provided by onnxruntime repo](https://github.com/microsoft/onnxruntime/blob/v1.10.0/dockerfiles/Dockerfile.tensorrt): _need to choose suitable tensorrt version that matches onnxruntime version_
diff --git a/scripts/get_tensorrt_environment_variables.bash b/scripts/get_tensorrt_environment_variables.bash
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+export TENSORRT_HOME="$(dirname $(whereis libnvinfer |  awk '{print $2}'))"
diff --git a/scripts/install_onnx_runtime.bash b/scripts/install_onnx_runtime.bash
@@ -24,6 +24,13 @@ source $CURRENT_DIR/get_cuda_environment_variables.bash
 if [ ! -z "$CUDA_HOME" -a ! -z "$CUDA_VERSION" -a ! -z "$CUDNN_HOME" ]; then
     BUILDARGS="${BUILDARGS} --use_cuda --cuda_version=${CUDA_VERSION} --cuda_home=${CUDA_HOME} --cudnn_home=${CUDNN_HOME}"
 fi
+
+source $CURRENT_DIR/get_tensorrt_environment_variables.bash
+if [ ! -z "$TENSORRT_HOME" ]; then
+    # onnxruntime v1.10.0 is compatible with tensorrt 8
+    BUILDARGS="${BUILDARGS} --use_tensorrt --tensorrt_home=${TENSORRT_HOME}"
+fi
+
 ./build.sh ${BUILDARGS}
 cd ./build/Linux/${BUILDTYPE}
 sudo make install
diff --git a/src/OrtSessionHandler.cpp b/src/OrtSessionHandler.cpp
@@ -6,6 +6,11 @@
  */
 
 #include <onnxruntime/core/session/onnxruntime_cxx_api.h>
+
+#if ENABLE_TENSORRT
+#include <onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h>
+#endif
+
 #include <ort_utility/ort_utility.hpp>
 
 #include <algorithm>
@@ -207,10 +212,15 @@ void OrtSessionHandler::OrtSessionHandlerIml::initSession()
     Ort::SessionOptions sessionOptions;
 
     sessionOptions.SetIntraOpNumThreads(1);
+    // tensorrt options can be customized into sessionOptions
+    // https://onnxruntime.ai/docs/execution-providers/TensorRT-ExecutionProvider.html
 
 #if ENABLE_GPU
     if (m_gpuIdx.has_value()) {
         Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, m_gpuIdx.value()));
+#if ENABLE_TENSORRT
+        Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sessionOptions, m_gpuIdx.value()));
+#endif
     }
 #endif
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+#!/usr/bin/env bash`
	`2`	`+`
	`3`	`+export TENSORRT_HOME="$(dirname $(whereis libnvinfer \| awk '{print $2}'))"`