diff --git a/plugin_execution_providers/tensorrt/CMakeLists.txt b/plugin_execution_providers/tensorrt/CMakeLists.txt index 85e6ca9f..cd44d594 100644 --- a/plugin_execution_providers/tensorrt/CMakeLists.txt +++ b/plugin_execution_providers/tensorrt/CMakeLists.txt @@ -28,7 +28,7 @@ endif() add_definitions(-DONNX_NAMESPACE=onnx) add_definitions(-DONNX_ML) add_definitions(-DNOMINMAX) -file(GLOB tensorrt_src "./*.cc" "./utils/*.cc" "./cuda/unary_elementwise_ops_impl.cu" "./*.h") +file(GLOB tensorrt_src "./src/*.cc" "./src/utils/*.cc" "./src/cuda/unary_elementwise_ops_impl.cu" "./src/*.h") add_library(TensorRTEp SHARED ${tensorrt_src}) if (NOT ORT_HOME) @@ -111,7 +111,7 @@ if (WIN32) # Windows "${DEPS_PATH}/onnx-build/${CMAKE_BUILD_TYPE}/onnx_proto.lib") set(TRT_EP_LIB_LINK_FLAG - "-DEF:${CMAKE_SOURCE_DIR}/tensorrt_execution_provider.def") + "-DEF:${CMAKE_SOURCE_DIR}/src/tensorrt_execution_provider.def") else() # Linux set(ORT_LIB "${ORT_HOME}/lib/libonnxruntime.so") @@ -142,7 +142,7 @@ set_property(TARGET TensorRTEp APPEND_STRING PROPERTY LINK_FLAGS ${TRT_EP_LIB_LINK_FLAG}) target_include_directories(TensorRTEp PUBLIC "${ORT_HOME}/include" - "./utils" + "./src/utils" "/usr/local/cuda/include" "${TENSORRT_HOME}/include" "${DEPS_PATH}/flatbuffers-src/include" diff --git a/plugin_execution_providers/tensorrt/README.md b/plugin_execution_providers/tensorrt/README.md new file mode 100644 index 00000000..482703c9 --- /dev/null +++ b/plugin_execution_providers/tensorrt/README.md @@ -0,0 +1,56 @@ +# Plugin TensorRT EP + +This repo contains: +- The plugin TRT EP implementation +- How to build plugin TRT EP +- How to build python wheel for plugin TRT EP +- How to run inference with plugin TRT EP using python API + +Plugin TRT EP is migrated from the original TRT EP and provides the implementations of `OrtEpFactory`, `OrtEp`, `OrtNodeComputeInfo`, `OrtDataTransferImpl` ... that are required for a plugin EP to be able to interact with ONNX Runtime via the EP ABI (introduced in ORT 1.23.0). + +## How to build (on Windows) ## +````bash +mkdir build;cd build +```` +````bash +cmake -S ../ -B ./ -DCMAKE_BUILD_TYPE=Debug -DTENSORRT_HOME=C:/folder/to/trt -DORT_HOME=C:/folder/to/ort +```` +````bash +cmake --build ./ --config Debug +````` + +If the build succeeds, you will see the TRT EP DLL being generated at: +``` +C:\repos\onnxruntime-inference-examples\plugin_execution_providers\tensorrt\build> ls .\Debug + +TensorRTEp.dll +``` + + +Note: The `ORT_HOME` should contain the `include` and `lib` folder as below +``` +C:\folder\to\ort + | + | ----- lib + | | ----- onnxruntime.dll + | | ----- onnxruntime.lib + | | ----- onnxruntime.pdb + | ... + | + | ---- include + | | ----- onnxruntime_c_api.h + | | ----- onnxruntime_ep_c_api.h + | | ----- onnxruntime_cxx_api.h + | | ----- onnxruntime_cxx_inline_api.h + | ... +``` +## How to build python wheel (on Windows) ## +``` +setup.py bdist_wheel +``` +Once it's done, you will see the wheel file at: +``` +C:\repos\onnxruntime-inference-examples\plugin_execution_providers\tensorrt> ls .\dist + +plugin_trt_ep-0.1.0-cp312-cp312-win_amd64.whl +``` \ No newline at end of file diff --git a/plugin_execution_providers/tensorrt/example/plugin_ep_inference.py b/plugin_execution_providers/tensorrt/example/plugin_ep_inference.py new file mode 100644 index 00000000..ca9599f7 --- /dev/null +++ b/plugin_execution_providers/tensorrt/example/plugin_ep_inference.py @@ -0,0 +1,52 @@ +import onnxruntime as onnxrt +import plugin_trt_ep +import numpy as np + +# Path to the plugin EP library +ep_lib_path = plugin_trt_ep.get_path() +# Registration name can be anything the application chooses +ep_registration_name = "TensorRTEp" +# EP name should match the name assigned by the EP factory when creating the EP (i.e., in the implementation of OrtEP::CreateEp) +ep_name = ep_registration_name + +# Register plugin EP library with ONNX Runtime +onnxrt.register_execution_provider_library(ep_registration_name, ep_lib_path) + +# +# Create ORT session with explicit OrtEpDevice(s) +# + +# Find the OrtEpDevice for "TensorRTEp" +ep_devices = onnxrt.get_ep_devices() +trt_ep_device = None +for ep_device in ep_devices: + if ep_device.ep_name == ep_name: + trt_ep_device = ep_device + +assert trt_ep_device != None + +sess_options = onnxrt.SessionOptions() + +# Equivalent to the C API's SessionOptionsAppendExecutionProvider_V2 that appends "TensorRTEp" to ORT session option +sess_options.add_provider_for_devices([trt_ep_device], {'trt_engine_cache_enable': '1'}) + +assert sess_options.has_providers() == True + +# Create ORT session with "TensorRTEp" plugin EP +sess = onnxrt.InferenceSession("C:\\models\\mul_1.onnx", sess_options=sess_options) + +# Run sample model and check output +x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32) +input_name = sess.get_inputs()[0].name +res = sess.run([], {input_name: x}) +output_expected = np.array([[1.0, 4.0], [9.0, 16.0], [25.0, 36.0]], dtype=np.float32) +np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08) + +# Unregister the library using the application-specified registration name. +# Must only unregister a library after all sessions that use the library have been released. +onnxrt.unregister_execution_provider_library(ep_registration_name) + + +# Note: +# The mul_1.onnx can be found here: +# https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/testdata/mul_1.onnx \ No newline at end of file diff --git a/plugin_execution_providers/tensorrt/plugin_trt_ep/__init__.py b/plugin_execution_providers/tensorrt/plugin_trt_ep/__init__.py new file mode 100644 index 00000000..40eed2ca --- /dev/null +++ b/plugin_execution_providers/tensorrt/plugin_trt_ep/__init__.py @@ -0,0 +1,28 @@ +import os +import importlib.resources +import ctypes +import onnxruntime as ort + +ort_dir = os.path.dirname(os.path.abspath(ort.__file__)) +dll_path = os.path.join(ort_dir, "capi", "onnxruntime.dll") + +# When the application calls ort.register_execution_provider_library() with the path to the plugin EP DLL, +# ORT internally uses LoadLibraryExW() to load that DLL. Since the plugin EP depends on onnxruntime.dll, +# the operating system will attempt to locate and load onnxruntime.dll first. +# +# On Windows, LoadLibraryExW() searches the directory containing the plugin EP DLL before searching system directories. +# Because onnxruntime.dll is not located in the plugin EP’s directory, Windows ends up loading the copy from a +# system directory instead — which is not the correct version. +# +# To ensure the plugin EP uses the correct onnxruntime.dll bundled with the ONNX Runtime package, +# we load that DLL explicitly before loading the plugin EP DLL. +ctypes.WinDLL(dll_path) + +def get_path(filename: str = "TensorRTEp.dll") -> str: + """ + Returns the absolute filesystem path to a DLL (or any file) + packaged inside plugin_trt_ep/libs. + """ + package = __name__ + ".libs" + with importlib.resources.as_file(importlib.resources.files(package) / filename) as path: + return str(path) \ No newline at end of file diff --git a/plugin_execution_providers/tensorrt/setup.py b/plugin_execution_providers/tensorrt/setup.py new file mode 100644 index 00000000..84a71cbb --- /dev/null +++ b/plugin_execution_providers/tensorrt/setup.py @@ -0,0 +1,53 @@ +from setuptools import setup, find_packages +from setuptools.dist import Distribution +import os +import shutil + +ep_dll = "TensorRTEp.dll" +src_folder = r".\build\\Debug" +dst_folder = r".\\plugin_trt_ep\\libs" + +class BinaryDistribution(Distribution): + # This ensures wheel is marked as "non-pure" (has binary files) + def has_ext_modules(self): + return True + +def copy_ep_dll(src_folder: str, dst_folder: str, ep_dll: str = "TensorRTEp.dll"): + """ + Copy EP DLL from src_folder to dst_folder. + Create dst_folder if it doesn't exist. + """ + src_dll_path = os.path.join(src_folder, ep_dll) + + # Validate source file + if not os.path.isfile(src_dll_path): + raise FileNotFoundError(f"Source DLL not found: {src_dll_path}") + + # Create destination folder if needed + os.makedirs(dst_folder, exist_ok=True) + + dst_dll_path = os.path.join(dst_folder, ep_dll) + + # Copy file + shutil.copy2(src_dll_path, dst_dll_path) + + print(f"Copied {ep_dll} to: {dst_dll_path}") + +try: + copy_ep_dll(src_folder, dst_folder, ep_dll) +except Exception as e: + print(f"Error: {e}") + +setup( + name="plugin_trt_ep", + version="0.1.0", + packages=["plugin_trt_ep"], + include_package_data=True, # include MANIFEST.in contents + package_data={ + "plugin_trt_ep": ["libs/*.dll"], # include DLLs inside the wheel + }, + distclass=BinaryDistribution, + description="Example package including DLLs", + author="ORT", + python_requires=">=3.8", +) diff --git a/plugin_execution_providers/tensorrt/cuda/cu_inc/unary_elementwise_impl.cuh b/plugin_execution_providers/tensorrt/src/cuda/cu_inc/unary_elementwise_impl.cuh similarity index 100% rename from plugin_execution_providers/tensorrt/cuda/cu_inc/unary_elementwise_impl.cuh rename to plugin_execution_providers/tensorrt/src/cuda/cu_inc/unary_elementwise_impl.cuh diff --git a/plugin_execution_providers/tensorrt/cuda/unary_elementwise_ops_impl.cu b/plugin_execution_providers/tensorrt/src/cuda/unary_elementwise_ops_impl.cu similarity index 100% rename from plugin_execution_providers/tensorrt/cuda/unary_elementwise_ops_impl.cu rename to plugin_execution_providers/tensorrt/src/cuda/unary_elementwise_ops_impl.cu diff --git a/plugin_execution_providers/tensorrt/cuda/unary_elementwise_ops_impl.h b/plugin_execution_providers/tensorrt/src/cuda/unary_elementwise_ops_impl.h similarity index 100% rename from plugin_execution_providers/tensorrt/cuda/unary_elementwise_ops_impl.h rename to plugin_execution_providers/tensorrt/src/cuda/unary_elementwise_ops_impl.h diff --git a/plugin_execution_providers/tensorrt/cuda_allocator.cc b/plugin_execution_providers/tensorrt/src/cuda_allocator.cc similarity index 100% rename from plugin_execution_providers/tensorrt/cuda_allocator.cc rename to plugin_execution_providers/tensorrt/src/cuda_allocator.cc diff --git a/plugin_execution_providers/tensorrt/cuda_allocator.h b/plugin_execution_providers/tensorrt/src/cuda_allocator.h similarity index 100% rename from plugin_execution_providers/tensorrt/cuda_allocator.h rename to plugin_execution_providers/tensorrt/src/cuda_allocator.h diff --git a/plugin_execution_providers/tensorrt/nv_includes.h b/plugin_execution_providers/tensorrt/src/nv_includes.h similarity index 100% rename from plugin_execution_providers/tensorrt/nv_includes.h rename to plugin_execution_providers/tensorrt/src/nv_includes.h diff --git a/plugin_execution_providers/tensorrt/onnx_ctx_model_helper.cc b/plugin_execution_providers/tensorrt/src/onnx_ctx_model_helper.cc similarity index 100% rename from plugin_execution_providers/tensorrt/onnx_ctx_model_helper.cc rename to plugin_execution_providers/tensorrt/src/onnx_ctx_model_helper.cc diff --git a/plugin_execution_providers/tensorrt/onnx_ctx_model_helper.h b/plugin_execution_providers/tensorrt/src/onnx_ctx_model_helper.h similarity index 100% rename from plugin_execution_providers/tensorrt/onnx_ctx_model_helper.h rename to plugin_execution_providers/tensorrt/src/onnx_ctx_model_helper.h diff --git a/plugin_execution_providers/tensorrt/ort_trt_int8_cal_table.fbs.h b/plugin_execution_providers/tensorrt/src/ort_trt_int8_cal_table.fbs.h similarity index 100% rename from plugin_execution_providers/tensorrt/ort_trt_int8_cal_table.fbs.h rename to plugin_execution_providers/tensorrt/src/ort_trt_int8_cal_table.fbs.h diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider.cc b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider.cc similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider.cc rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider.cc diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider.def b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider.def similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider.def rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider.def diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider.h b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider.h similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider.h rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider.h diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider.lds b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider.lds similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider.lds rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider.lds diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider_data_transfer.cc b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_data_transfer.cc similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider_data_transfer.cc rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_data_transfer.cc diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider_data_transfer.h b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_data_transfer.h similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider_data_transfer.h rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_data_transfer.h diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider_info.cc b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_info.cc similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider_info.cc rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_info.cc diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider_info.h b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_info.h similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider_info.h rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_info.h diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider_stream_support.cc b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_stream_support.cc similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider_stream_support.cc rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_stream_support.cc diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider_stream_support.h b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_stream_support.h similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider_stream_support.h rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_stream_support.h diff --git a/plugin_execution_providers/tensorrt/tensorrt_execution_provider_utils.h b/plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_utils.h similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_execution_provider_utils.h rename to plugin_execution_providers/tensorrt/src/tensorrt_execution_provider_utils.h diff --git a/plugin_execution_providers/tensorrt/tensorrt_provider_factory.cc b/plugin_execution_providers/tensorrt/src/tensorrt_provider_factory.cc similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_provider_factory.cc rename to plugin_execution_providers/tensorrt/src/tensorrt_provider_factory.cc diff --git a/plugin_execution_providers/tensorrt/tensorrt_provider_factory.h b/plugin_execution_providers/tensorrt/src/tensorrt_provider_factory.h similarity index 100% rename from plugin_execution_providers/tensorrt/tensorrt_provider_factory.h rename to plugin_execution_providers/tensorrt/src/tensorrt_provider_factory.h diff --git a/plugin_execution_providers/tensorrt/utils/cuda/cuda_call.h b/plugin_execution_providers/tensorrt/src/utils/cuda/cuda_call.h similarity index 100% rename from plugin_execution_providers/tensorrt/utils/cuda/cuda_call.h rename to plugin_execution_providers/tensorrt/src/utils/cuda/cuda_call.h diff --git a/plugin_execution_providers/tensorrt/utils/cuda/cuda_common.h b/plugin_execution_providers/tensorrt/src/utils/cuda/cuda_common.h similarity index 100% rename from plugin_execution_providers/tensorrt/utils/cuda/cuda_common.h rename to plugin_execution_providers/tensorrt/src/utils/cuda/cuda_common.h diff --git a/plugin_execution_providers/tensorrt/utils/ep_utils.h b/plugin_execution_providers/tensorrt/src/utils/ep_utils.h similarity index 100% rename from plugin_execution_providers/tensorrt/utils/ep_utils.h rename to plugin_execution_providers/tensorrt/src/utils/ep_utils.h diff --git a/plugin_execution_providers/tensorrt/utils/helper.cc b/plugin_execution_providers/tensorrt/src/utils/helper.cc similarity index 100% rename from plugin_execution_providers/tensorrt/utils/helper.cc rename to plugin_execution_providers/tensorrt/src/utils/helper.cc diff --git a/plugin_execution_providers/tensorrt/utils/make_string.h b/plugin_execution_providers/tensorrt/src/utils/make_string.h similarity index 100% rename from plugin_execution_providers/tensorrt/utils/make_string.h rename to plugin_execution_providers/tensorrt/src/utils/make_string.h diff --git a/plugin_execution_providers/tensorrt/utils/ort_graph_to_proto.h b/plugin_execution_providers/tensorrt/src/utils/ort_graph_to_proto.h similarity index 100% rename from plugin_execution_providers/tensorrt/utils/ort_graph_to_proto.h rename to plugin_execution_providers/tensorrt/src/utils/ort_graph_to_proto.h diff --git a/plugin_execution_providers/tensorrt/utils/parse_string.h b/plugin_execution_providers/tensorrt/src/utils/parse_string.h similarity index 100% rename from plugin_execution_providers/tensorrt/utils/parse_string.h rename to plugin_execution_providers/tensorrt/src/utils/parse_string.h diff --git a/plugin_execution_providers/tensorrt/utils/path_string.h b/plugin_execution_providers/tensorrt/src/utils/path_string.h similarity index 100% rename from plugin_execution_providers/tensorrt/utils/path_string.h rename to plugin_execution_providers/tensorrt/src/utils/path_string.h diff --git a/plugin_execution_providers/tensorrt/utils/provider_options.h b/plugin_execution_providers/tensorrt/src/utils/provider_options.h similarity index 100% rename from plugin_execution_providers/tensorrt/utils/provider_options.h rename to plugin_execution_providers/tensorrt/src/utils/provider_options.h diff --git a/plugin_execution_providers/tensorrt/utils/provider_options_utils.h b/plugin_execution_providers/tensorrt/src/utils/provider_options_utils.h similarity index 100% rename from plugin_execution_providers/tensorrt/utils/provider_options_utils.h rename to plugin_execution_providers/tensorrt/src/utils/provider_options_utils.h