Skip to content

Commit 9973b2f

Browse files
authored
TensorRT 10.15 OSS Release (NVIDIA#4692)
Signed-off-by: Kevin Chen <kevinch@nvidia.com>
1 parent 0c80196 commit 9973b2f

File tree

212 files changed

+11806
-2602
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

212 files changed

+11806
-2602
lines changed

.github/workflows/blossom-ci.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,6 @@ jobs:
4242
github.actor == 'rajeevsrao' ||
4343
github.actor == 'kevinch-nv' ||
4444
github.actor == 'ttyio' ||
45-
github.actor == 'samurdhikaru' ||
4645
github.actor == 'zerollzeng' ||
4746
github.actor == 'nvpohanh' ||
4847
github.actor == 'poweiw'

CHANGELOG.md

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,32 @@
11
# TensorRT OSS Release Changelog
22

3+
## 10.15 GA - 2026-2-2
4+
5+
- Sample changes
6+
- Added 2 safety samples sampleSafeMNIST, and sampleSafePluginV3 to demonstrate how to use TensorRT with the safety workflow.
7+
- Added trtSafeExec to accompany the safety workflow release.
8+
- Added python/stream_writer to showcase how to serialize a TensorRT engine directly to a custom stream using the IStreamWriter interface, rather than writing to a file or a contiguous memory buffer.
9+
- Added python/strongly_type_autocast to demonstrate how to convert FP32 ONNX models to mixed precision (FP32-FP16) using ModelOpt's AutoCast tool and subsequently building the engine with TensorRT's Strong Typing mode.
10+
- Added sampleCudla to demonstrate how to use the cuDLA API to run TensorRT engines on the Deep Learning Accelerator (DLA) hardware, which is available on NVIDIA Jetson and DRIVE platforms.
11+
- Deprecated sampleCharRNN.
12+
13+
- Plugin changes
14+
- Deprecated bertQKVToContextPlugin and will be removed in a future release. No alternatives are planned to be provided.
15+
16+
- Parser changes
17+
- Added support for `RotaryEmbedding`, `RMSNormalization` and `TensorScatter` for improved LLM model support
18+
- Added more specialized quantization ops for models quantized through TensorRT ModelOptimizer.
19+
- Added `kREPORT_CAPABILITY_DLA` flag to enable per-node validation when building DLA engines through TensorRT.
20+
- Added `kENABLE_PLUGIN_OVERRIDE` flag to enable TensorRT plugin override for nodes that share names with user plugins.
21+
- Improved error reporting for models with multiple subgraphs, such as `Loop` or `Scan` nodes.
22+
23+
- Demo changes
24+
- demoDiffusion:
25+
- Stable Diffusion 1.5, 2.0 and 2.1 pipelines have been deprecated and removed.
26+
- Added support for Wan2.2-T2V-A14B Text to Video pipeline
27+
28+
29+
330
## 10.14 GA - 2025-11-7
431
- Sample changes
532
- Replace all pycuda usages with cuda-python APIs

CMakeLists.txt

Lines changed: 144 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ endif()
6767
set(CMAKE_SKIP_BUILD_RPATH True)
6868

6969
# CUDA targets
70-
set(DEFAULT_CUDA_VERSION 13.0.0)
70+
set(DEFAULT_CUDA_VERSION 13.1.0)
7171
set_ifndef(CUDA_VERSION ${DEFAULT_CUDA_VERSION})
7272
message(STATUS "CUDA version set to ${CUDA_VERSION}")
7373

@@ -92,7 +92,7 @@ endif()
9292
set(BERT_GENCODES)
9393
# Generate SASS for each architecture
9494
foreach(arch ${CMAKE_CUDA_ARCHITECTURES})
95-
if (${arch} GREATER_EQUAL 75)
95+
if (${arch} GREATER_EQUAL 75 AND NOT ${arch} EQUAL 110)
9696
set(BERT_GENCODES "${BERT_GENCODES} -gencode arch=compute_${arch},code=sm_${arch}")
9797
endif()
9898
set(GENCODES "${GENCODES} -gencode arch=compute_${arch},code=sm_${arch}")
@@ -101,7 +101,7 @@ endforeach()
101101
# Generate PTX for the last architecture in the list.
102102
list(GET CMAKE_CUDA_ARCHITECTURES -1 LATEST_SM)
103103
set(GENCODES "${GENCODES} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}")
104-
if (${LATEST_SM} GREATER_EQUAL 75)
104+
if (${LATEST_SM} GREATER_EQUAL 75 AND NOT ${arch} EQUAL 110)
105105
set(BERT_GENCODES "${BERT_GENCODES} -gencode arch=compute_${LATEST_SM},code=compute_${LATEST_SM}")
106106
endif()
107107

@@ -122,6 +122,131 @@ endif(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
122122
option(BUILD_PLUGINS "Build TensorRT plugin" ON)
123123
option(BUILD_PARSERS "Build TensorRT parsers" ON)
124124
option(BUILD_SAMPLES "Build TensorRT samples" ON)
125+
option(BUILD_SAFE_SAMPLES "Build TensorRT safety samples" OFF)
126+
option(TRT_SAFETY_INFERENCE_ONLY "Build only the safety inference components (no safety builders)" OFF)
127+
128+
############################################################################################
129+
# Early dependency discovery
130+
# These must be found before they are used in target definitions
131+
132+
set(THREADS_PREFER_PTHREAD_FLAG ON)
133+
# QNX has built-in threading support and doesn't need FindThreads
134+
if(NOT CMAKE_SYSTEM_NAME STREQUAL "QNX")
135+
find_package(Threads REQUIRED)
136+
else()
137+
# For QNX, create a dummy Threads::Threads target if it doesn't exist
138+
if(NOT TARGET Threads::Threads)
139+
add_library(Threads::Threads INTERFACE IMPORTED GLOBAL)
140+
# QNX threading is built into libc, no explicit linking needed
141+
endif()
142+
endif()
143+
144+
## find_package(CUDA) is broken for cross-compilation. Enable CUDA language instead.
145+
if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
146+
find_package(CUDA ${CUDA_VERSION} REQUIRED)
147+
endif()
148+
149+
include_directories(
150+
${CUDA_INCLUDE_DIRS}
151+
)
152+
153+
############################################################################################
154+
# Safety runtime libraries (libnvinfer_safe) used by safety samples and
155+
# inference-only builds.
156+
if(BUILD_SAFE_SAMPLES OR TRT_SAFETY_INFERENCE_ONLY)
157+
set(TRT_NVINFER_SAFE_NAME "nvinfer_safe")
158+
159+
# Shared safety runtime.
160+
find_library(nvinfer_safe_path
161+
${TRT_NVINFER_SAFE_NAME}
162+
PATHS ${TRT_LIB_DIR}
163+
NO_CMAKE_FIND_ROOT_PATH
164+
)
165+
if(NOT nvinfer_safe_path)
166+
message(FATAL_ERROR "nvinfer_safe library not found. Please ensure safety runtime libraries are available in TRT_LIB_DIR ('${TRT_LIB_DIR}').")
167+
endif()
168+
add_library(TRTSAFE::nvinfer_safe_shared SHARED IMPORTED)
169+
set_target_properties(TRTSAFE::nvinfer_safe_shared PROPERTIES IMPORTED_LOCATION ${nvinfer_safe_path})
170+
171+
target_link_libraries(TRTSAFE::nvinfer_safe_shared INTERFACE cuda) # nvinfer_safe needs the cuda driver.
172+
173+
# Debug runtime library (provides debugging features like tensor dumping).
174+
set(nvinfer_safe_debug_lib_name "${TRT_NVINFER_SAFE_NAME}_debug")
175+
find_library(nvinfer_safe_debug_path
176+
${nvinfer_safe_debug_lib_name}
177+
PATHS ${TRT_LIB_DIR}
178+
NO_CMAKE_FIND_ROOT_PATH
179+
)
180+
if(NOT nvinfer_safe_debug_path)
181+
message(FATAL_ERROR "nvinfer_safe_debug library not found. Please ensure debug runtime library is available in TRT_LIB_DIR.")
182+
endif()
183+
add_library(TRTSAFE::nvinfer_safe_debug SHARED IMPORTED)
184+
set_target_properties(TRTSAFE::nvinfer_safe_debug PROPERTIES IMPORTED_LOCATION ${nvinfer_safe_debug_path})
185+
186+
# Headers for the safety runtime.
187+
# Try to find include directory relative to lib dir first, then fall back to standard locations
188+
if(EXISTS "${TRT_LIB_DIR}/../include/NvInfer.h")
189+
target_include_directories(TRTSAFE::nvinfer_safe_shared INTERFACE ${TRT_LIB_DIR}/../include)
190+
target_include_directories(TRTSAFE::nvinfer_safe_debug INTERFACE ${TRT_LIB_DIR}/../include)
191+
elseif(EXISTS "/usr/include/NvInfer.h")
192+
target_include_directories(TRTSAFE::nvinfer_safe_shared INTERFACE /usr/include)
193+
target_include_directories(TRTSAFE::nvinfer_safe_debug INTERFACE /usr/include)
194+
elseif(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/include/NvInfer.h")
195+
target_include_directories(TRTSAFE::nvinfer_safe_shared INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
196+
target_include_directories(TRTSAFE::nvinfer_safe_debug INTERFACE ${CMAKE_CURRENT_SOURCE_DIR}/include)
197+
else()
198+
message(WARNING "Could not find TensorRT headers. Please ensure they are installed.")
199+
endif()
200+
201+
# On QNX, TRT depends on DLA symbols stored in the DriveOS PDK.
202+
# Since trying to find these shared libs at link time will be difficult, we ignore unresolved symbols in shared libs.
203+
if(CMAKE_SYSTEM_NAME STREQUAL "QNX")
204+
target_link_options(TRTSAFE::nvinfer_safe_shared INTERFACE LINKER:--unresolved-symbols=ignore-in-shared-libs)
205+
target_link_options(TRTSAFE::nvinfer_safe_debug INTERFACE LINKER:--unresolved-symbols=ignore-in-shared-libs)
206+
endif()
207+
endif()
208+
209+
# OSS safety inference-only mode: require safety samples and disable enterprise
210+
# components.
211+
if(TRT_SAFETY_INFERENCE_ONLY)
212+
if(NOT BUILD_SAFE_SAMPLES)
213+
set(BUILD_SAFE_SAMPLES ON CACHE BOOL "Build TensorRT safety samples" FORCE)
214+
endif()
215+
216+
set(TRT_SAFETY_INFERENCE_ONLY ON CACHE BOOL "" FORCE)
217+
218+
# Disable enterprise OSS components for this configuration.
219+
set(BUILD_PLUGINS OFF CACHE BOOL "" FORCE)
220+
set(BUILD_PARSERS OFF CACHE BOOL "" FORCE)
221+
set(BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
222+
223+
# Add CUDA library directory early so all samples can find it
224+
if(CUDA_TOOLKIT_ROOT_DIR)
225+
link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib ${CUDA_TOOLKIT_ROOT_DIR}/targets/x86_64-linux/lib)
226+
endif()
227+
228+
# Interface target for safety samples in inference-only mode.
229+
add_library(trt_global_definitions INTERFACE)
230+
231+
target_link_libraries(trt_global_definitions INTERFACE
232+
TRTSAFE::nvinfer_safe_shared
233+
cudart
234+
Threads::Threads
235+
)
236+
237+
if(NOT WIN32 AND NOT CMAKE_SYSTEM_NAME STREQUAL "QNX")
238+
target_link_libraries(trt_global_definitions INTERFACE dl rt)
239+
endif()
240+
target_include_directories(trt_global_definitions INTERFACE
241+
${CMAKE_CURRENT_SOURCE_DIR}/include
242+
${CMAKE_CURRENT_SOURCE_DIR}/samples/common
243+
${CMAKE_CURRENT_SOURCE_DIR}/shared
244+
${CUDA_INCLUDE_DIRS}
245+
)
246+
target_compile_options(trt_global_definitions INTERFACE
247+
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>
248+
)
249+
endif()
125250

126251
# C++17
127252
set(CMAKE_CXX_STANDARD 17)
@@ -160,8 +285,6 @@ message(STATUS "cuDNN version set to ${CUDNN_VERSION}")
160285
set_ifndef(PROTOBUF_VERSION ${DEFAULT_PROTOBUF_VERSION})
161286
message(STATUS "Protobuf version set to ${PROTOBUF_VERSION}")
162287

163-
set(THREADS_PREFER_PTHREAD_FLAG ON)
164-
find_package(Threads REQUIRED)
165288
if (BUILD_PLUGINS OR BUILD_PARSERS)
166289
include(third_party/protobuf.cmake)
167290
endif()
@@ -171,14 +294,6 @@ if(NOT CUB_ROOT_DIR)
171294
endif()
172295
endif()
173296

174-
## find_package(CUDA) is broken for cross-compilation. Enable CUDA language instead.
175-
if(NOT DEFINED CMAKE_TOOLCHAIN_FILE)
176-
find_package(CUDA ${CUDA_VERSION} REQUIRED)
177-
endif()
178-
179-
include_directories(
180-
${CUDA_INCLUDE_DIRS}
181-
)
182297
if(BUILD_PARSERS)
183298
configure_protobuf(${PROTOBUF_VERSION})
184299
endif()
@@ -206,7 +321,16 @@ find_library_create_target(nvinfer ${nvinfer_lib_name} SHARED "${TRT_LIB_DIR}")
206321
if (DEFINED USE_CUGFX)
207322
find_library(CUDART_LIB cugfx_dll HINTS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib/x64 lib64)
208323
else()
209-
find_library(CUDART_LIB cudart_static HINTS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib/x64 lib64)
324+
# DriveOS platforms use cudart.so instead of cudart_static. This isn't the most sophisticated check, but it's correct.
325+
if(CUDA_VERSION VERSION_GREATER_EQUAL 12.0)
326+
set(CUDART_LIB_NAME cudart_static)
327+
set(CMAKE_CUDA_RUNTIME_LIBRARY "static")
328+
else()
329+
set(CUDART_LIB_NAME cudart)
330+
set(CMAKE_CUDA_RUNTIME_LIBRARY "shared")
331+
endif()
332+
333+
find_library(CUDART_LIB ${CUDART_LIB_NAME} HINTS ${CUDA_TOOLKIT_ROOT_DIR} PATH_SUFFIXES lib lib/x64 lib64)
210334
endif()
211335

212336
if (NOT MSVC)
@@ -241,6 +365,11 @@ else()
241365
find_library_create_target(${nvonnxparser_lib_name} ${nvonnxparser_lib_name} SHARED "${HINT_PATHS}")
242366
endif()
243367

244-
if(BUILD_SAMPLES)
368+
# Samples:
369+
# - BUILD_SAMPLES controls the regular (enterprise/OSS) samples via
370+
# samples/CMakeLists.txt.
371+
# - BUILD_SAFE_SAMPLES controls the safety samples (builder + infer parts).
372+
# Both can be enabled at the same time if desired.
373+
if(BUILD_SAMPLES OR BUILD_SAFE_SAMPLES)
245374
add_subdirectory(samples)
246375
endif()

0 commit comments

Comments
 (0)