Skip to content

Commit e3e64f5

Browse files
committed
Fix sampleCudla, update cmake toolchains, improve docs (NVIDIA#4696)
Signed-off-by: Po-Wei Wang (Vincent) <poweiw@nvidia.com>
1 parent 40c8707 commit e3e64f5

File tree

9 files changed

+141
-10
lines changed

9 files changed

+141
-10
lines changed

README.md

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,94 @@ For Linux platforms, we recommend that you generate a docker container for build
239239
- `GPU_ARCHS`: GPU (SM) architectures to target. By default we generate CUDA code for all major SMs. Specific SM versions can be specified here as a quoted space-separated list to reduce compilation time and binary size. Table of compute capabilities of NVIDIA GPUs can be found [here](https://developer.nvidia.com/cuda-gpus). Examples: - NVidia A100: `-DGPU_ARCHS="80"` - RTX 50 series: `-DGPU_ARCHS="120"` - Multiple SMs: `-DGPU_ARCHS="80 120"`
240240
- `TRT_PLATFORM_ID`: Bare-metal build (unlike containerized cross-compilation). Currently supported options: `x86_64` (default).
241241

242+
## Building TensorRT DriveOS Samples
243+
244+
- Generate Makefiles and build
245+
246+
**Example: Cross-Compile for DOS7 Linux (aarch64)**
247+
248+
```bash
249+
cd $TRT_OSSPATH
250+
mkdir -p build && cd build
251+
cmake .. -DBUILD_SAMPLES=ON -DBUILD_PLUGINS=OFF -DBUILD_PARSERS=OFF -DTRT_OUT_DIR=`pwd`/bin_dynamic_cross -DTRT_LIB_DIR=$TRT_LIBPATH -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_aarch64_dos_cross.toolchain
252+
make -j$(nproc)
253+
```
254+
255+
**Example: Cross-Compile for DOS6.5 Linux (aarch64)**
256+
257+
```bash
258+
cd $TRT_OSSPATH
259+
mkdir -p build && cd build
260+
cmake .. -DBUILD_SAMPLES=ON -DBUILD_PLUGINS=OFF -DBUILD_PARSERS=OFF -DTRT_OUT_DIR=`pwd`/bin_dynamic_cross -DTRT_LIB_DIR=$TRT_LIBPATH -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_aarch64_dos_cross.toolchain -DCUDA_VERSION=11.4 -DGPU_ARCHS=87
261+
make -j$(nproc)
262+
```
263+
264+
**Example: Native build for DOS6.5 and DOS7 Linux (aarch64)**
265+
266+
```bash
267+
cd $TRT_OSSPATH
268+
mkdir -p build && cd build
269+
cmake .. -DTRT_LIB_DIR=$TRT_LIBPATH -DTRT_OUT_DIR=`pwd`/out -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_aarch64-native.toolchain -DBUILD_SAMPLES=ON -DBUILD_PLUGINS=OFF -DBUILD_PARSERS=OFF
270+
make -j$(nproc)
271+
```
272+
273+
**Example: Cross-Compile for DOS6.5 QNX (aarch64)**
274+
275+
```bash
276+
cd $TRT_OSSPATH
277+
mkdir -p build && cd build
278+
export CUDA_VERSION=11.4
279+
export CUDA=cuda-$CUDA_VERSION
280+
export CUDA_ROOT=/usr/local/cuda-safe-$CUDA_VERSION
281+
export QNX_BASE=/drive/toolchains/qnx_toolchain # Set to your QNX toolchain installation path
282+
export QNX_HOST=$QNX_BASE/host/linux/x86_64/
283+
export QNX_TARGET=$QNX_BASE/target/qnx7/
284+
export PATH=$PATH:$QNX_HOST/usr/bin
285+
cmake .. -DBUILD_SAMPLES=ON -DBUILD_PLUGINS=OFF -DBUILD_PARSERS=OFF -DBUILD_SAFE_SAMPLES=OFF -DCMAKE_CUDA_COMPILER=$CUDA_ROOT/bin/nvcc -DTRT_OUT_DIR=`pwd`/bin_dynamic_cross -DTRT_LIB_DIR=$TRT_LIBPATH -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_qnx.toolchain -DCUDA_VERSION=$CUDA_VERSION -DGPU_ARCHS=87
286+
make -j$(nproc)
287+
```
288+
289+
> NOTE: Set `QNX_BASE` to your QNX toolchain installation path.
290+
> If your CUDA version is not the same as in the example, set `CUDA_VERSION` (for examples that use it in multiple places) or add `-DCUDA_VERSION=<version>` to the cmake command.
291+
292+
**Example: Cross-Compile for DOS6.5 QNX Safety (aarch64)**
293+
294+
```bash
295+
cd $TRT_OSSPATH
296+
mkdir -p build && cd build
297+
export CUDA_VERSION=11.4
298+
export QNX_BASE=/drive/toolchains/qnx_toolchain # Set to your QNX toolchain installation path
299+
export QNX_HOST=$QNX_BASE/host/linux/x86_64/
300+
export QNX_TARGET=$QNX_BASE/target/qnx7/
301+
export PATH=$PATH:$QNX_HOST/usr/bin
302+
export CUDA=cuda-$CUDA_VERSION
303+
export CUDA_ROOT=/usr/local/cuda-safe-$CUDA_VERSION
304+
cmake .. -DBUILD_SAMPLES=OFF -DBUILD_SAFE_SAMPLES=ON -DBUILD_PLUGINS=OFF -DBUILD_PARSERS=OFF -DTRT_SAFETY_INFERENCE_ONLY=ON -DTRT_OUT_DIR=`pwd`/bin_dynamic_cross -DTRT_LIB_DIR=$TRT_LIBPATH -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_qnx_safe.toolchain -DCUDA_VERSION=$CUDA_VERSION -DCMAKE_CUDA_COMPILER=$CUDA_ROOT/bin/nvcc -DGPU_ARCHS=87
305+
make -j$(nproc)
306+
```
307+
308+
> NOTE: Set `QNX_BASE` to your QNX toolchain installation path.
309+
> If your CUDA version is not the same as in the example, set `CUDA_VERSION` (for examples that use it in multiple places) or add `-DCUDA_VERSION=<version>` to the cmake command.
310+
311+
**Example: Cross-Compile for DOS7 QNX (aarch64)**
312+
313+
```bash
314+
cd $TRT_OSSPATH
315+
mkdir -p build && cd build
316+
export CUDA_VERSION=13.1
317+
export CUDA=cuda-$CUDA_VERSION
318+
export CUDA_ROOT=/usr/local/cuda-safe-$CUDA_VERSION
319+
export QNX_BASE=/drive/toolchains/qnx_toolchain # Set to your QNX toolchain installation path
320+
export QNX_HOST=$QNX_BASE/host/linux/x86_64/
321+
export QNX_TARGET=$QNX_BASE/target/qnx/
322+
export PATH=$PATH:$QNX_HOST/usr/bin
323+
cmake .. -DBUILD_SAMPLES=ON -DBUILD_PLUGINS=OFF -DBUILD_PARSERS=OFF -DBUILD_SAFE_SAMPLES=OFF -DCMAKE_CUDA_COMPILER=$CUDA_ROOT/bin/nvcc -DTRT_OUT_DIR=`pwd`/bin_dynamic_cross -DTRT_LIB_DIR=$TRT_LIBPATH -DCMAKE_TOOLCHAIN_FILE=$TRT_OSSPATH/cmake/toolchains/cmake_qnx.toolchain -DCUDA_VERSION=$CUDA_VERSION -DGPU_ARCHS=110
324+
make -j$(nproc)
325+
```
326+
327+
> NOTE: Set `QNX_BASE` to your QNX toolchain installation path.
328+
> If your CUDA version is not the same as in the example, set `CUDA_VERSION` (for examples that use it in multiple places) or add `-DCUDA_VERSION=<version>` to the cmake command.
329+
242330
# References
243331

244332
## TensorRT Resources

cmake/toolchains/cmake_aarch64_dos_cross.toolchain

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,10 @@ set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER} CACHE STRING "" FORCE)
5353
set(CMAKE_CUDA_FLAGS "-I${CUDA_INCLUDE_DIRS} -Xcompiler=\"-fPIC ${CMAKE_CXX_FLAGS}\"" CACHE STRING "" FORCE)
5454
set(CMAKE_CUDA_COMPILER_FORCED TRUE)
5555

56-
set(CUDA_LIBS -L${CUDA_ROOT}/lib)
57-
56+
set(CUDA_LIBS -L${CUDA_ROOT}/lib/stubs -L${CUDA_ROOT}/lib)
5857
set(ADDITIONAL_PLATFORM_LIB_FLAGS ${CUDA_LIBS} -lcublas -lcudart -lstdc++ -lm)
5958

59+
link_directories(${CUDA_ROOT}/lib/stubs)
6060
link_directories(${CUDA_ROOT}/lib)
61+
62+
set(CMAKE_EXE_LINKER_FLAGS "-Wl,-rpath-link,${CUDA_ROOT}/lib/stubs -Wl,--allow-shlib-undefined" CACHE STRING "" FORCE)

cmake/toolchains/cmake_qnx.toolchain

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ include_directories(BEFORE SYSTEM
126126
${CUDA_ROOT}/targets/aarch64-qnx/include
127127
)
128128

129+
set(CUDA_TARGET_LIB_DIR "${CUDA_ROOT}/targets/aarch64-qnx/lib" CACHE PATH "CUDA target library directory")
130+
129131
# And, well, as another consequence of that weirdness, we need to ensure that the cuda libs are on the link path.
130132
add_link_options(
131133
"-L${CUDA_ROOT}/targets/aarch64-qnx/lib"

cmake/toolchains/cmake_qnx_safe.toolchain

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ include_directories(BEFORE SYSTEM
121121
${CUDA_ROOT}/targets/aarch64-qnx-safe/include
122122
)
123123

124+
set(CUDA_TARGET_LIB_DIR "${CUDA_ROOT}/targets/aarch64-qnx-safe/lib" CACHE PATH "CUDA target library directory")
125+
124126
link_directories(
125127
${CUDA_ROOT}/targets/aarch64-qnx-safe/lib
126128
${CUDA_ROOT}/targets/aarch64-qnx-safe/lib/stubs

samples/CMakeLists.txt

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,24 @@ if(${TRT_BUILD_SAMPLES})
8585
add_sample(sampleOnnxMnistCoordConvAC)
8686
endif()
8787

88+
set(CUDLA_SUPPORTED OFF)
8889
if(${TRT_BUILD_ENABLE_DLA} AND CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND NOT WIN32)
90+
if(CMAKE_SYSTEM_NAME STREQUAL "QNX" AND CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
91+
# cuDLA is supported on DOS 6 (CUDA 11.4) but not DOS 7+ (CUDA 12.0+)
92+
message(STATUS "Skipping sampleCudla: cuDLA not supported on QNX with CUDA >= 12.0 (DriveOS 7)")
93+
else()
94+
set(CUDLA_SUPPORTED ON)
95+
endif()
96+
endif()
97+
if(CUDLA_SUPPORTED)
8998
if(TARGET CUDA::cudla)
9099
set(CUDLA_TARGET CUDA::cudla CACHE STRING "cuDLA library target")
91100
else()
92101
find_library(CUDLA_TARGET cudla HINTS
93-
${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib
94-
${CUDA_ROOT}/lib64 ${CUDA_ROOT}/lib
95-
/usr/local/cuda/targets/aarch64-linux/lib)
102+
${CUDA_TARGET_LIB_DIR}/stubs ${CUDA_TARGET_LIB_DIR}
103+
${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib
104+
${CUDA_ROOT}/lib/stubs ${CUDA_ROOT}/lib64 ${CUDA_ROOT}/lib
105+
/usr/local/cuda/targets/aarch64-linux/lib/stubs /usr/local/cuda/targets/aarch64-linux/lib)
96106
endif()
97107
if(CUDLA_TARGET)
98108
add_sample(sampleCudla)
@@ -152,11 +162,21 @@ else()
152162
add_subdirectory(sampleOnnxMnistCoordConvAC)
153163
add_subdirectory(sampleProgressMonitor)
154164
add_subdirectory(trtexec)
155-
if(TRT_BUILD_ENABLE_DLA AND CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND NOT CMAKE_SYSTEM_NAME STREQUAL "QNX" AND NOT WIN32)
165+
set(CUDLA_SUPPORTED OFF)
166+
if(TRT_BUILD_ENABLE_DLA AND CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" AND NOT WIN32)
167+
if(CMAKE_SYSTEM_NAME STREQUAL "QNX" AND CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
168+
# cuDLA is supported on DOS 6 (CUDA 11.4) but not DOS 7+ (CUDA 12.0+)
169+
message(STATUS "Skipping sampleCudla: cuDLA not supported on QNX with CUDA >= 12.0 (DriveOS 7)")
170+
else()
171+
set(CUDLA_SUPPORTED ON)
172+
endif()
173+
endif()
174+
if(CUDLA_SUPPORTED)
156175
find_library(CUDLA_TARGET cudla HINTS
157-
${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib
158-
${CUDA_ROOT}/lib64 ${CUDA_ROOT}/lib
159-
/usr/local/cuda/targets/aarch64-linux/lib)
176+
${CUDA_TARGET_LIB_DIR}/stubs ${CUDA_TARGET_LIB_DIR}
177+
${CUDA_TOOLKIT_ROOT_DIR}/lib/stubs ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ${CUDA_TOOLKIT_ROOT_DIR}/lib
178+
${CUDA_ROOT}/lib/stubs ${CUDA_ROOT}/lib64 ${CUDA_ROOT}/lib
179+
/usr/local/cuda/targets/aarch64-linux/lib/stubs /usr/local/cuda/targets/aarch64-linux/lib)
160180
if(CUDLA_TARGET)
161181
add_subdirectory(sampleCudla)
162182
else()

samples/common/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ target_include_directories(trt_samples_common PUBLIC
6262

6363
# OSS build mode
6464
if(NOT TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW)
65+
target_sources(trt_samples_common PRIVATE
66+
${CMAKE_CURRENT_SOURCE_DIR}/../../shared/utils/cacheUtils.cpp
67+
${CMAKE_CURRENT_SOURCE_DIR}/../../shared/utils/fileLock.cpp
68+
)
6569
target_link_libraries(trt_samples_common PUBLIC
6670
Threads::Threads
6771
${CUDA_LIBRARIES}

samples/common/sampleInference.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* SPDX-FileCopyrightText: Copyright (c) 1993-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
* SPDX-FileCopyrightText: Copyright (c) 1993-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
33
* SPDX-License-Identifier: Apache-2.0
44
*
55
* Licensed under the Apache License, Version 2.0 (the "License");

samples/sampleCudla/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
#
17+
if (${TRT_BUILD_ENABLE_NEW_SAMPLES_FLOW})
1718

1819
add_executable(sample_cudla sampleCudla.cpp)
1920
target_link_libraries(sample_cudla PRIVATE trt_samples_common TRT_SAMPLES::tensorrt ${CUDLA_TARGET})
@@ -25,3 +26,14 @@ installLibraries(
2526
OPTIONAL
2627
COMPONENT internal
2728
)
29+
30+
else()
31+
32+
set(SAMPLE_SOURCES sampleCudla.cpp)
33+
34+
include(../CMakeSamplesTemplate.txt)
35+
36+
target_link_libraries(${TARGET_NAME} ${CUDLA_TARGET})
37+
target_compile_definitions(${TARGET_NAME} PRIVATE ENABLE_DLA=1)
38+
39+
endif()

samples/trtSafeExec/trtSafeExec.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ nvinfer2::safe::TypedArray createTypedArray(
303303
case DataType::kINT64: return nvinfer2::safe::TypedArray(static_cast<int64_t*>(ptr), bufferSize);
304304
case DataType::kINT32: return nvinfer2::safe::TypedArray(static_cast<int32_t*>(ptr), bufferSize);
305305
case DataType::kINT8: return nvinfer2::safe::TypedArray(static_cast<int8_t*>(ptr), bufferSize);
306+
case DataType::kUINT8: return nvinfer2::safe::TypedArray(static_cast<uint8_t*>(ptr), bufferSize);
306307
case DataType::kBOOL: return nvinfer2::safe::TypedArray(static_cast<bool*>(ptr), bufferSize);
307308
default:
308309
{

0 commit comments

Comments
 (0)