Support third party RT-DETR models

kajananchinniahNV · kajananchinniahNV · commit 2d81e6e5a3ca · 2025-12-10T16:49:52.000-08:00
diff --git a/isaac_ros_rtdetr/include/isaac_ros_rtdetr/rtdetr_preprocessor_node.hpp b/isaac_ros_rtdetr/include/isaac_ros_rtdetr/rtdetr_preprocessor_node.hpp
@@ -64,6 +64,7 @@ class RtDetrPreprocessorNode : public rclcpp::Node
   std::string output_size_tensor_name_{};
   int64_t image_height_{};
   int64_t image_width_{};
+  bool use_max_dim_for_orig_size_{};
   cudaStream_t stream_;
 };
 
diff --git a/isaac_ros_rtdetr/launch/isaac_ros_rtdetr_oss.launch.py b/isaac_ros_rtdetr/launch/isaac_ros_rtdetr_oss.launch.py
@@ -0,0 +1,219 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+
+import launch
+from launch.actions import DeclareLaunchArgument
+from launch.substitutions import LaunchConfiguration
+from launch_ros.actions import ComposableNodeContainer
+from launch_ros.descriptions import ComposableNode
+
+MODEL_INPUT_SIZE = 640  # RT-DETR models expect 640x640 encoded image size
+MODEL_NUM_CHANNELS = 3  # RT-DETR models expect 3 image channels
+
+
+def generate_launch_description():
+    """Generate launch description for testing relevant nodes."""
+    launch_args = [
+        DeclareLaunchArgument(
+            'model_file_path',
+            default_value='',
+            description='The absolute file path to the ONNX file'),
+        DeclareLaunchArgument(
+            'engine_file_path',
+            default_value='',
+            description='The absolute file path to the TensorRT engine file'),
+        DeclareLaunchArgument(
+            'input_image_width',
+            default_value='640',
+            description='The input image width'),
+        DeclareLaunchArgument(
+            'input_image_height',
+            default_value='480',
+            description='The input image height'),
+        DeclareLaunchArgument(
+            'input_tensor_names',
+            default_value='["images", "orig_target_sizes"]',
+            description='A list of tensor names to bound to the specified input binding names'),
+        DeclareLaunchArgument(
+            'input_binding_names',
+            default_value='["images", "orig_target_sizes"]',
+            description='A list of input tensor binding names (specified by model)'),
+        DeclareLaunchArgument(
+            'output_tensor_names',
+            default_value='["labels", "boxes", "scores"]',
+            description='A list of tensor names to bound to the specified output binding names'),
+        DeclareLaunchArgument(
+            'output_binding_names',
+            default_value='["labels", "boxes", "scores"]',
+            description='A list of output tensor binding names (specified by model)'),
+        DeclareLaunchArgument(
+            'verbose',
+            default_value='False',
+            description='Whether TensorRT should verbosely log or not'),
+        DeclareLaunchArgument(
+            'force_engine_update',
+            default_value='False',
+            description='Whether TensorRT should update the TensorRT engine file or not'),
+        DeclareLaunchArgument(
+            'confidence_threshold',
+            default_value='0.6',
+            description='The minimum score for a bounding box to be published.',
+        ),
+    ]
+
+    # Image Encoding parameters
+    input_image_width = LaunchConfiguration('input_image_width')
+    input_image_height = LaunchConfiguration('input_image_height')
+
+    # TensorRT parameters
+    model_file_path = LaunchConfiguration('model_file_path')
+    engine_file_path = LaunchConfiguration('engine_file_path')
+    input_tensor_names = LaunchConfiguration('input_tensor_names')
+    input_binding_names = LaunchConfiguration('input_binding_names')
+    output_tensor_names = LaunchConfiguration('output_tensor_names')
+    output_binding_names = LaunchConfiguration('output_binding_names')
+    verbose = LaunchConfiguration('verbose')
+    force_engine_update = LaunchConfiguration('force_engine_update')
+
+    confidence_threshold = LaunchConfiguration('confidence_threshold')
+
+    resize_node = ComposableNode(
+        name='resize_node',
+        package='isaac_ros_image_proc',
+        plugin='nvidia::isaac_ros::image_proc::ResizeNode',
+        parameters=[{
+            'input_width': input_image_width,
+            'input_height': input_image_height,
+            'output_width': MODEL_INPUT_SIZE,
+            'output_height': MODEL_INPUT_SIZE,
+            'keep_aspect_ratio': False,
+            'encoding_desired': 'rgb8',
+            'disable_padding': False
+        }],
+        remappings=[
+            ('image', 'image_rect'),
+            ('camera_info', 'camera_info_rect')],
+    )
+
+    image_format_node = ComposableNode(
+        name='image_format_node',
+        package='isaac_ros_image_proc',
+        plugin='nvidia::isaac_ros::image_proc::ImageFormatConverterNode',
+        parameters=[{
+            'encoding_desired': 'rgb8',
+            'image_width': MODEL_INPUT_SIZE,
+            'image_height': MODEL_INPUT_SIZE
+        }],
+        remappings=[
+            ('image_raw', 'resize/image'),
+            ('image', 'image_rgb')]
+    )
+
+    image_to_tensor_node = ComposableNode(
+        name='image_to_tensor_node',
+        package='isaac_ros_tensor_proc',
+        plugin='nvidia::isaac_ros::dnn_inference::ImageToTensorNode',
+        parameters=[{
+            'scale': True,
+            'tensor_name': 'image',
+        }],
+        remappings=[
+            ('image', 'image_rgb'),
+            ('tensor', 'normalized_tensor'),
+        ]
+    )
+
+    interleave_to_planar_node = ComposableNode(
+        name='interleaved_to_planar_node',
+        package='isaac_ros_tensor_proc',
+        plugin='nvidia::isaac_ros::dnn_inference::InterleavedToPlanarNode',
+        parameters=[{
+            'input_tensor_shape': [MODEL_INPUT_SIZE, MODEL_INPUT_SIZE, MODEL_NUM_CHANNELS]
+        }],
+        remappings=[
+            ('interleaved_tensor', 'normalized_tensor')
+        ]
+    )
+
+    reshape_node = ComposableNode(
+        name='reshape_node',
+        package='isaac_ros_tensor_proc',
+        plugin='nvidia::isaac_ros::dnn_inference::ReshapeNode',
+        parameters=[{
+            'output_tensor_name': 'input_tensor',
+            'input_tensor_shape': [MODEL_NUM_CHANNELS, MODEL_INPUT_SIZE, MODEL_INPUT_SIZE],
+            'output_tensor_shape': [1, MODEL_NUM_CHANNELS, MODEL_INPUT_SIZE, MODEL_INPUT_SIZE]
+        }],
+        remappings=[
+            ('tensor', 'planar_tensor')
+        ],
+    )
+
+    rtdetr_preprocessor_node = ComposableNode(
+        name='rtdetr_preprocessor',
+        package='isaac_ros_rtdetr',
+        plugin='nvidia::isaac_ros::rtdetr::RtDetrPreprocessorNode',
+        parameters=[{
+            'image_width': input_image_width,
+            'image_height': input_image_height,
+            'use_max_dim_for_orig_size': False,
+        }],
+        remappings=[
+            ('encoded_tensor', 'reshaped_tensor')
+        ]
+    )
+
+    tensor_rt_node = ComposableNode(
+        name='tensor_rt',
+        package='isaac_ros_tensor_rt',
+        plugin='nvidia::isaac_ros::dnn_inference::TensorRTNode',
+        parameters=[{
+            'model_file_path': model_file_path,
+            'engine_file_path': engine_file_path,
+            'output_binding_names': output_binding_names,
+            'output_tensor_names': output_tensor_names,
+            'input_tensor_names': input_tensor_names,
+            'input_binding_names': input_binding_names,
+            'verbose': verbose,
+            'force_engine_update': force_engine_update
+        }]
+    )
+
+    rtdetr_decoder_node = ComposableNode(
+        name='rtdetr_decoder',
+        package='isaac_ros_rtdetr',
+        plugin='nvidia::isaac_ros::rtdetr::RtDetrDecoderNode',
+        parameters=[{
+            'confidence_threshold': confidence_threshold,
+        }],
+    )
+
+    container = ComposableNodeContainer(
+        name='rtdetr_container',
+        namespace='rtdetr_container',
+        package='rclcpp_components',
+        executable='component_container_mt',
+        composable_node_descriptions=[
+            resize_node, image_format_node,
+            image_to_tensor_node, interleave_to_planar_node, reshape_node,
+            rtdetr_preprocessor_node, tensor_rt_node, rtdetr_decoder_node
+        ],
+        output='screen'
+    )
+
+    final_launch_description = launch_args + [container]
+    return launch.LaunchDescription(final_launch_description)
diff --git a/isaac_ros_rtdetr/package.xml b/isaac_ros_rtdetr/package.xml
@@ -21,7 +21,7 @@ SPDX-License-Identifier: Apache-2.0
 <?xml-model href="http://download.ros.org/schema/package_format3.xsd" schematypens="http://www.w3.org/2001/XMLSchema"?>
 <package format="3">
   <name>isaac_ros_rtdetr</name>
-  <version>4.0.0</version>
+  <version>4.0.1</version>
   <description>RT-DETR model processing</description>
 
   <maintainer email="isaac-ros-maintainers@nvidia.com">Isaac ROS Maintainers</maintainer>
diff --git a/isaac_ros_rtdetr/src/rtdetr_preprocessor_node.cpp b/isaac_ros_rtdetr/src/rtdetr_preprocessor_node.cpp
@@ -17,6 +17,8 @@
 
 #include "isaac_ros_rtdetr/rtdetr_preprocessor_node.hpp"
 
+#include <algorithm>
+
 #include "isaac_ros_nitros_tensor_list_type/nitros_tensor_builder.hpp"
 #include "isaac_ros_nitros_tensor_list_type/nitros_tensor_list.hpp"
 #include "isaac_ros_nitros_tensor_list_type/nitros_tensor_list_builder.hpp"
@@ -59,7 +61,8 @@ RtDetrPreprocessorNode::RtDetrPreprocessorNode(const rclcpp::NodeOptions options
       "output_size_tensor_name",
       "orig_target_sizes")},
   image_height_{declare_parameter<int64_t>("image_height", 480)},
-  image_width_{declare_parameter<int64_t>("image_width", 640)}
+  image_width_{declare_parameter<int64_t>("image_width", 640)},
+  use_max_dim_for_orig_size_{declare_parameter<bool>("use_max_dim_for_orig_size", true)}
 {
   CHECK_CUDA_ERROR(
     ::nvidia::isaac_ros::common::initNamedCudaStream(
@@ -89,9 +92,12 @@ void RtDetrPreprocessorNode::InputCallback(
     output_image_buffer, input_image_tensor.GetBuffer(),
     input_image_tensor.GetTensorSize(), cudaMemcpyDefault, stream_);
 
-  int64_t image_size = std::max(image_height_, image_width_);
+  const int64_t orig_width = use_max_dim_for_orig_size_ ?
+    std::max(image_height_, image_width_) : image_width_;
+  const int64_t orig_height = use_max_dim_for_orig_size_ ?
+    std::max(image_height_, image_width_) : image_height_;
 
-  int64_t output_size[2]{image_size, image_size};
+  int64_t output_size[2]{orig_width, orig_height};
   void * output_size_buffer;
   cudaMallocAsync(&output_size_buffer, sizeof(output_size), stream_);
   cudaMemcpyAsync(output_size_buffer, output_size, sizeof(output_size), cudaMemcpyDefault, stream_);