Feature: rt-detrv2 onnx model can convert tensorrt (#588)

jyrainer · web-flow · commit 4da140156607 · 2025-04-18T18:40:25.000+08:00
* chore: Add missing module requirements
- scipy and pycocotools

* chore: add module
- onnx

* fix: Disable assert statement
- When checking data type, (self.bindings[n].data.dtype == blob[n].dtype)

* chore: add module and check version
- add  onnxruntime, tensorrt
- check version: torch, torchvision, faster-coco-eval

* feat: export TensorRT in rt-detrv2
- build onnx to TensorRT

* chore: typo

* docs: add hint about export tensorrt in rtdetrv2-pytorch

* feat: Add Dockerfile for building a Docker image
- use command "docker compose up -d"

* fix: del onnx2tensorrt method

* revert: update package versions in requirements.txt
diff --git a/rtdetrv2_pytorch/Dockerfile b/rtdetrv2_pytorch/Dockerfile
@@ -0,0 +1,11 @@
+# tensorrt:23.01-py3 (8.5.2.2)
+FROM nvcr.io/nvidia/tensorrt:23.01-py3
+
+WORKDIR /workspace
+
+COPY requirements.txt .
+
+RUN pip install --upgrade pip && \
+    pip install -r requirements.txt
+
+CMD ["/bin/bash"]
diff --git a/rtdetrv2_pytorch/README.md b/rtdetrv2_pytorch/README.md
@@ -120,7 +120,13 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 torchrun --master_port=9909 --nproc_per_node=4 tool
 python tools/export_onnx.py -c path/to/config -r path/to/checkpoint --check
 ```
 
-<!-- <summary>5. Inference </summary> -->
+<!-- <summary>5. Export tensorrt </summary> -->
+5. Export tensorrt
+```shell
+python tools/export_trt.py -i path/to/onnxfile
+```
+
+<!-- <summary>6. Inference </summary> -->
 5. Inference
 
 Support torch, onnxruntime, tensorrt and openvino, see details in *references/deploy*
diff --git a/rtdetrv2_pytorch/docker-compose.yml b/rtdetrv2_pytorch/docker-compose.yml
@@ -0,0 +1,15 @@
+version: "3.9"
+
+services:
+  tensorrt-container:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    image: rtdetr-v2:23.01
+    volumes:
+      - ./:/workspace
+    runtime: nvidia
+    environment:
+      - NVIDIA_VISIBLE_DEVICES=all
+    stdin_open: true
+    tty: true
diff --git a/rtdetrv2_pytorch/references/deploy/rtdetrv2_tensorrt.py b/rtdetrv2_pytorch/references/deploy/rtdetrv2_tensorrt.py
@@ -8,11 +8,12 @@
 
 import numpy as np
 from PIL import Image, ImageDraw
+import tensorrt as trt
 
 import torch
 import torchvision.transforms as T 
 
-import tensorrt as trt
+
 
 
 class TimeProfiler(contextlib.ContextDecorator):
@@ -124,10 +125,10 @@ def run_torch(self, blob):
                 self.context.set_input_shape(n, blob[n].shape) 
                 self.bindings[n] = self.bindings[n]._replace(shape=blob[n].shape)
             
-            # TODO (lyuwenyu): check dtype, 
-            assert self.bindings[n].data.dtype == blob[n].dtype, '{} dtype mismatch'.format(n)
-            # if self.bindings[n].data.dtype != blob[n].shape:
-            #     blob[n] = blob[n].to(self.bindings[n].data.dtype)
+            # TODO (lyuwenyu): check dtype,
+            # assert self.bindings[n].data.dtype == blob[n].dtype, '{} dtype mismatch'.format(n)
+            if self.bindings[n].data.dtype != blob[n].shape:
+                blob[n] = blob[n].to(self.bindings[n].data.dtype)
 
         self.bindings_addr.update({n: blob[n].data_ptr() for n in self.input_names})
         self.context.execute_v2(list(self.bindings_addr.values()))
@@ -180,11 +181,6 @@ def speed(self, blob, n):
 
         return self.time_profile.total / n 
 
-
-    @staticmethod
-    def onnx2tensorrt():
-        pass
-
 def draw(images, labels, boxes, scores, thrh = 0.6):
     for i, im in enumerate(images):
         draw = ImageDraw.Draw(im)
diff --git a/rtdetrv2_pytorch/requirements.txt b/rtdetrv2_pytorch/requirements.txt
@@ -5,4 +5,6 @@ PyYAML
 tensorboard
 scipy
 pycocotools
-onnx
+onnx
+onnxruntime-gpu
+tensorrt==8.5.2.2
diff --git a/rtdetrv2_pytorch/tools/export_trt.py b/rtdetrv2_pytorch/tools/export_trt.py
@@ -0,0 +1,81 @@
+import os
+import argparse
+import tensorrt as trt
+
+def main(onnx_path, engine_path, max_batchsize, opt_batchsize, min_batchsize, use_fp16=True, verbose=False)->None:
+    """ Convert ONNX model to TensorRT engine.
+    Args:
+        onnx_path (str): Path to the input ONNX model.
+        engine_path (str): Path to save the output TensorRT engine.
+        use_fp16 (bool): Whether to use FP16 precision.
+        verbose (bool): Whether to enable verbose logging.
+    """
+    logger = trt.Logger(trt.Logger.VERBOSE if verbose else trt.Logger.INFO)
+
+    builder = trt.Builder(logger)
+    network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
+    network = builder.create_network(network_flags)
+
+    parser = trt.OnnxParser(network, logger)
+    config = builder.create_builder_config()
+    config.set_preview_feature(trt.PreviewFeature.FASTER_DYNAMIC_SHAPES_0805, True)
+
+    if not os.path.isfile(onnx_path):
+        raise FileNotFoundError(f"ONNX file not found: {onnx_path}")
+    
+    print(f"[INFO] Loading ONNX file from {onnx_path}")
+    with open(onnx_path, "rb") as f:
+        if not parser.parse(f.read()):
+            for error in range(parser.num_errors):
+                print(parser.get_error(error))
+            raise RuntimeError("Failed to parse ONNX file")
+
+    config = builder.create_builder_config()
+    config.set_preview_feature(trt.PreviewFeature.FASTER_DYNAMIC_SHAPES_0805, True)
+    config.max_workspace_size = 1 << 30  # 1GB
+    
+    if use_fp16:
+        if builder.platform_has_fast_fp16:
+            config.set_flag(trt.BuilderFlag.FP16)
+            print("[INFO] FP16 optimization enabled.")
+        else:
+            print("[WARNING] FP16 not supported on this platform. Proceeding with FP32.")
+
+    profile = builder.create_optimization_profile()
+    profile.set_shape("images", min=(min_batchsize, 3, 640, 640), opt=(opt_batchsize, 3, 640, 640), max=(max_batchsize, 3, 640, 640))
+    profile.set_shape("orig_target_sizes", min=(1, 2), opt=(1, 2), max=(1, 2))
+    config.add_optimization_profile(profile)
+
+    print("[INFO] Building TensorRT engine...")
+    engine = builder.build_engine(network, config)
+
+    if engine is None:
+        raise RuntimeError("Failed to build the engine.")
+
+    print(f"[INFO] Saving engine to {engine_path}")
+    with open(engine_path, "wb") as f:
+        f.write(engine.serialize())
+    print("[INFO] Engine export complete.")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Convert ONNX to TensorRT Engine")
+    parser.add_argument("--onnx", "-i", type=str, required=True, help="Path to input ONNX model file")
+    parser.add_argument("--saveEngine", "-o", type=str, default="model.engine", help="Path to output TensorRT engine file")
+    parser.add_argument("--maxBatchSize", "-Mb", type=int, default=32, help="Maximum batch size for inference")
+    parser.add_argument("--optBatchSize", "-ob", type=int, default=16, help="Optimal batch size for inference")
+    parser.add_argument("--minBatchSize", "-mb", type=int, default=1, help="Minimum batch size for inference")
+    parser.add_argument("--fp16", default=True, action="store_true", help="Enable FP16 precision mode")
+    parser.add_argument("--verbose", action="store_true", help="Enable verbose logging")
+
+    args = parser.parse_args()
+
+    main(
+        onnx_path=args.onnx,
+        engine_path=args.saveEngine,
+        max_batchsize=args.maxBatchSize,
+        opt_batchsize=args.optBatchSize,
+        min_batchsize=args.minBatchSize,
+        use_fp16=args.fp16,
+        verbose=args.verbose
+    )