From 99c58829ea377a2fbee53cbf4cf01d000d5b4eab Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Thu, 3 Jul 2025 08:34:44 -0700 Subject: [PATCH 1/9] Update file names --- build.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/build.py b/build.py index 716b227c42..8fa31f25f1 100755 --- a/build.py +++ b/build.py @@ -1259,7 +1259,7 @@ def create_dockerfile_linux( # stage of the PyTorch backend if not FLAGS.enable_gpu and ("pytorch" in backends): df += """ -RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.12 backends/pytorch/libtorch_cuda.so +RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.13 backends/pytorch/libtorch_cuda.so """ if "tensorrtllm" in backends: df += """ @@ -1554,17 +1554,16 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine): df += """ RUN mkdir -p /usr/local/cuda/lib64/stubs COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusparse.so /usr/local/cuda/lib64/stubs/libcusparse.so.12 -COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.11 +COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.12 COPY --from=min_container /usr/local/cuda/lib64/stubs/libcurand.so /usr/local/cuda/lib64/stubs/libcurand.so.10 -COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.11 -COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.12 -COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.12 -COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.11 +COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.12 +COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.13 +COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.13 RUN mkdir -p /usr/local/cuda/targets/{cuda_arch}-linux/lib -COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. -COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. -COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. +COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. +COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. +COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. COPY --from=min_container /usr/local/cuda/lib64/libcufile.so.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. RUN mkdir -p /opt/hpcx/ucc/lib/ /opt/hpcx/ucx/lib/ From 99d7476159c678ae2e8a2fdeb0b85f6dc0f75d8b Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 16 Jul 2025 17:28:32 -0700 Subject: [PATCH 2/9] temp: DCGM - internal repository. --- Dockerfile.sdk | 6 ++++++ build.py | 36 +++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/Dockerfile.sdk b/Dockerfile.sdk index f7bbf64432..66fd9ef95f 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -263,7 +263,13 @@ RUN pip3 install --upgrade "numpy<2" pillow attrdict && \ "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \ xargs pip3 install --upgrade +ARG DCGM_SOURCE_LIST # Install DCGM +RUN if [ -n "${DCGM_SOURCE_LIST}" ]; then \ + echo "deb [trusted=yes] $DCGM_SOURCE_LIST / " > /etc/apt/sources.list.d/dcgm-list.list && \ + cat /etc/apt/sources.list.d/dcgm-list.list; \ + fi + RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \ [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \ curl -o /tmp/cuda-keyring.deb \ diff --git a/build.py b/build.py index 8fa31f25f1..3900eefb7a 100755 --- a/build.py +++ b/build.py @@ -841,7 +841,15 @@ def tensorrtllm_cmake_args(images): return cargs -def install_dcgm_libraries(dcgm_version, target_machine): +def install_dcgm_libraries(dcgm_version): + if os.getenv("DCGM_SOURCE_LIST"): + dcgm_source_list = """ +RUN echo "deb [trusted=yes] {} / " > /etc/apt/sources.list.d/dcgm-list.list \\ + && cat /etc/apt/sources.list.d/dcgm-list.list""".format( + os.getenv("DCGM_SOURCE_LIST") + ) + else: + dcgm_source_list = "" if dcgm_version == "": fail( "unable to determine default repo-tag, DCGM version not known for {}".format( @@ -852,11 +860,13 @@ def install_dcgm_libraries(dcgm_version, target_machine): else: # RHEL has the same install instructions for both aarch64 and x86 if target_platform() == "rhel": - if target_machine == "aarch64": - return """ + return ( + dcgm_source_list + + """ ENV DCGM_VERSION {} # Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads -RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo \\ +RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) && \\ + && dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/${{ARCH}}/cuda-rhel8.repo \\ && dnf clean expire-cache \\ && dnf install --assumeyes \\ datacenter-gpu-manager-4-core=1:{} \\ @@ -876,13 +886,16 @@ def install_dcgm_libraries(dcgm_version, target_machine): """.format( dcgm_version, dcgm_version, dcgm_version ) + ) else: - if target_machine == "aarch64": - return """ + return ( + dcgm_source_list + + """ ENV DCGM_VERSION {} # Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads -RUN curl -o /tmp/cuda-keyring.deb \\ - https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb \\ +RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) \\ + && curl -o /tmp/cuda-keyring.deb \\ + https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${{ARCH}}/cuda-keyring_1.1-1_all.deb \\ && apt install /tmp/cuda-keyring.deb \\ && rm /tmp/cuda-keyring.deb \\ && apt update \\ @@ -907,6 +920,7 @@ def install_dcgm_libraries(dcgm_version, target_machine): """.format( dcgm_version, dcgm_version, dcgm_version ) + ) def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap): @@ -1007,7 +1021,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap): && mv /tmp/boost_1_80_0/boost /usr/include/boost """ if FLAGS.enable_gpu: - df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine()) + df += install_dcgm_libraries(argmap["DCGM_VERSION"]) df += """ ENV TRITON_SERVER_VERSION ${TRITON_VERSION} ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION} @@ -1120,7 +1134,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap): """ if FLAGS.enable_gpu: - df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine()) + df += install_dcgm_libraries(argmap["DCGM_VERSION"]) df += """ ENV TRITON_SERVER_VERSION ${TRITON_VERSION} @@ -1412,7 +1426,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach df += fastertransformer_buildscript.create_postbuild(is_multistage_build=False) if enable_gpu: - df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine) + df += install_dcgm_libraries(argmap["DCGM_VERSION"]) # This segment will break the RHEL SBSA build. Need to determine whether # this is necessary to incorporate. if target_platform() != "rhel": From ea7cfa82272dd872430aad2cf9c8c5cf23ac0360 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Thu, 17 Jul 2025 18:22:20 -0700 Subject: [PATCH 3/9] Add missing file --- build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/build.py b/build.py index 3900eefb7a..448d294efe 100755 --- a/build.py +++ b/build.py @@ -1579,6 +1579,7 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine): COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. COPY --from=min_container /usr/local/cuda/lib64/libcufile.so.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. +COPY --from=min_container /usr/local/cuda/lib64/libnvrtc.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/. RUN mkdir -p /opt/hpcx/ucc/lib/ /opt/hpcx/ucx/lib/ COPY --from=min_container /opt/hpcx/ucc/lib/libucc.so.1 /opt/hpcx/ucc/lib/libucc.so.1 From 7ba499dadb25b0ed2029c1fdeeb419fd109fbad4 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 30 Jul 2025 10:10:44 -0700 Subject: [PATCH 4/9] finish rebase --- Dockerfile.sdk | 6 ------ build.py | 40 +++++++++++++--------------------------- 2 files changed, 13 insertions(+), 33 deletions(-) diff --git a/Dockerfile.sdk b/Dockerfile.sdk index 66fd9ef95f..f7bbf64432 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -263,13 +263,7 @@ RUN pip3 install --upgrade "numpy<2" pillow attrdict && \ "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \ xargs pip3 install --upgrade -ARG DCGM_SOURCE_LIST # Install DCGM -RUN if [ -n "${DCGM_SOURCE_LIST}" ]; then \ - echo "deb [trusted=yes] $DCGM_SOURCE_LIST / " > /etc/apt/sources.list.d/dcgm-list.list && \ - cat /etc/apt/sources.list.d/dcgm-list.list; \ - fi - RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \ [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \ curl -o /tmp/cuda-keyring.deb \ diff --git a/build.py b/build.py index 448d294efe..ef1418e67b 100755 --- a/build.py +++ b/build.py @@ -74,11 +74,11 @@ "release_version": "2.60.0dev", "triton_container_version": "25.08dev", "upstream_container_version": "25.07", - "ort_version": "1.22.0", + "ort_version": "1.23.0", "ort_openvino_version": "2025.2.0", "standalone_openvino_version": "2025.2.0", "dcgm_version": "4.2.3-2", - "vllm_version": "0.9.0.1", + "vllm_version": "0.9.2", "rhel_py_version": "3.12.3", } @@ -841,15 +841,7 @@ def tensorrtllm_cmake_args(images): return cargs -def install_dcgm_libraries(dcgm_version): - if os.getenv("DCGM_SOURCE_LIST"): - dcgm_source_list = """ -RUN echo "deb [trusted=yes] {} / " > /etc/apt/sources.list.d/dcgm-list.list \\ - && cat /etc/apt/sources.list.d/dcgm-list.list""".format( - os.getenv("DCGM_SOURCE_LIST") - ) - else: - dcgm_source_list = "" +def install_dcgm_libraries(dcgm_version, target_machine): if dcgm_version == "": fail( "unable to determine default repo-tag, DCGM version not known for {}".format( @@ -860,13 +852,11 @@ def install_dcgm_libraries(dcgm_version): else: # RHEL has the same install instructions for both aarch64 and x86 if target_platform() == "rhel": - return ( - dcgm_source_list - + """ + if target_machine == "aarch64": + return """ ENV DCGM_VERSION {} # Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads -RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) && \\ - && dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/${{ARCH}}/cuda-rhel8.repo \\ +RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo \\ && dnf clean expire-cache \\ && dnf install --assumeyes \\ datacenter-gpu-manager-4-core=1:{} \\ @@ -886,16 +876,13 @@ def install_dcgm_libraries(dcgm_version): """.format( dcgm_version, dcgm_version, dcgm_version ) - ) else: - return ( - dcgm_source_list - + """ + if target_machine == "aarch64": + return """ ENV DCGM_VERSION {} # Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads -RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) \\ - && curl -o /tmp/cuda-keyring.deb \\ - https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${{ARCH}}/cuda-keyring_1.1-1_all.deb \\ +RUN curl -o /tmp/cuda-keyring.deb \\ + https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb \\ && apt install /tmp/cuda-keyring.deb \\ && rm /tmp/cuda-keyring.deb \\ && apt update \\ @@ -920,7 +907,6 @@ def install_dcgm_libraries(dcgm_version): """.format( dcgm_version, dcgm_version, dcgm_version ) - ) def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap): @@ -1021,7 +1007,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap): && mv /tmp/boost_1_80_0/boost /usr/include/boost """ if FLAGS.enable_gpu: - df += install_dcgm_libraries(argmap["DCGM_VERSION"]) + df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine()) df += """ ENV TRITON_SERVER_VERSION ${TRITON_VERSION} ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION} @@ -1134,7 +1120,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap): """ if FLAGS.enable_gpu: - df += install_dcgm_libraries(argmap["DCGM_VERSION"]) + df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine()) df += """ ENV TRITON_SERVER_VERSION ${TRITON_VERSION} @@ -1426,7 +1412,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach df += fastertransformer_buildscript.create_postbuild(is_multistage_build=False) if enable_gpu: - df += install_dcgm_libraries(argmap["DCGM_VERSION"]) + df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine) # This segment will break the RHEL SBSA build. Need to determine whether # this is necessary to incorporate. if target_platform() != "rhel": From 03477553b46250d3c5f92a2766a0b41eef7a39d7 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Wed, 30 Jul 2025 12:55:23 -0700 Subject: [PATCH 5/9] Use --extra-index-url for vLLM installation --- build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build.py b/build.py index ef1418e67b..56e0be5842 100755 --- a/build.py +++ b/build.py @@ -1494,7 +1494,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach cp -r nvpl_slim_24.04/include/* /usr/local/include && \\ rm -rf nvpl_slim_24.04.tar nvpl_slim_24.04; \\ fi \\ - && pip3 install --no-cache-dir --progress-bar on --index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\ + && pip3 install --no-cache-dir --extra-index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\ # Need to install in-house build of pytorch-triton to support triton_key definition used by torch 2.5.1 && cd /tmp \\ && wget $PYTORCH_TRITON_URL \\ From d37ea64fd96742dca674677765b9eafa5a33b617 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Thu, 31 Jul 2025 13:02:50 -0700 Subject: [PATCH 6/9] Exclude model generation for AGX --- qa/common/gen_qa_model_repository | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index f84c0603d0..0abe77de4d 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -430,7 +430,7 @@ python3 $VOLUME_SRCDIR/gen_qa_ragged_models.py --tensorrt --models_dir=$VOLUME_R chmod -R 777 $VOLUME_RAGGEDDESTDIR python3 $VOLUME_SRCDIR/gen_qa_trt_format_models.py --models_dir=$VOLUME_FORMATDESTDIR chmod -R 777 $VOLUME_FORMATDESTDIR -python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR +nvidia-smi --query-gpu=compute_cap | grep -qz 11.0 && echo -e '\033[33m[WARNING]\033[0m Skipping model generation for data dependent shape' || python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR chmod -R 777 $VOLUME_DATADEPENDENTDIR # Make shared library for custom Hardmax plugin. if [ -d "/usr/src/tensorrt" ]; then From 779700fdda95f77e309a427c882effbb8be30beb Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Thu, 31 Jul 2025 13:34:57 -0700 Subject: [PATCH 7/9] Change script to support TRT_VERBOSE --- qa/common/gen_jetson_trt_models | 1 + .../gen_qa_dyna_sequence_implicit_models.py | 12 ++++++++-- qa/common/gen_qa_dyna_sequence_models.py | 18 +++++++++++--- qa/common/gen_qa_identity_models.py | 18 +++++++++++--- qa/common/gen_qa_implicit_models.py | 12 ++++++++-- qa/common/gen_qa_model_repository | 1 + qa/common/gen_qa_models.py | 24 +++++++++++++++---- qa/common/gen_qa_ragged_models.py | 12 ++++++++-- qa/common/gen_qa_reshape_models.py | 6 ++++- qa/common/gen_qa_sequence_models.py | 18 +++++++++++--- qa/common/gen_qa_trt_data_dependent_shape.py | 8 ++++++- qa/common/gen_qa_trt_format_models.py | 6 ++++- 12 files changed, 114 insertions(+), 22 deletions(-) diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models index 5bbfb4c74f..956ba03955 100755 --- a/qa/common/gen_jetson_trt_models +++ b/qa/common/gen_jetson_trt_models @@ -142,6 +142,7 @@ docker pull $TENSORRT_IMAGE docker run $DOCKER_GPU_ARGS \ --rm -v $DOCKER_VOLUME:/mnt \ + -e TRT_VERBOSE \ $TENSORRT_IMAGE bash -xe $VOLUME_SRCDIR/$TRT_MODEL_SCRIPT # Copy generated models to /tmp/ if not running in CI diff --git a/qa/common/gen_qa_dyna_sequence_implicit_models.py b/qa/common/gen_qa_dyna_sequence_implicit_models.py index e07e4cf5ec..a977710c51 100755 --- a/qa/common/gen_qa_dyna_sequence_implicit_models.py +++ b/qa/common/gen_qa_dyna_sequence_implicit_models.py @@ -357,7 +357,11 @@ def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape): def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape): trt_dtype = np_to_trt_dtype(dtype) - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() @@ -492,7 +496,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) trt_dtype = np_to_trt_dtype(dtype) trt_memory_format = trt.TensorFormat.LINEAR - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() diff --git a/qa/common/gen_qa_dyna_sequence_models.py b/qa/common/gen_qa_dyna_sequence_models.py index 9c21d92b96..e91ce42132 100755 --- a/qa/common/gen_qa_dyna_sequence_models.py +++ b/qa/common/gen_qa_dyna_sequence_models.py @@ -59,7 +59,11 @@ def create_plan_shape_tensor_modelfile( trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype) trt_memory_format = trt.TensorFormat.LINEAR - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() @@ -202,7 +206,11 @@ def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape): # Create the model. For now don't implement a proper accumulator # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID') # otherwise... the tests know to expect this. - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() @@ -310,7 +318,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) # Create the model. For now don't implement a proper accumulator # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID') # otherwise... the tests know to expect this. - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() diff --git a/qa/common/gen_qa_identity_models.py b/qa/common/gen_qa_identity_models.py index 5fa7b7ab01..7b513d3fbf 100755 --- a/qa/common/gen_qa_identity_models.py +++ b/qa/common/gen_qa_identity_models.py @@ -545,7 +545,11 @@ def create_plan_dynamic_rf_modelfile( models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size ): # Create the model - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() @@ -644,7 +648,11 @@ def create_plan_shape_tensor_modelfile( # Note that values of OUTPUT tensor must be identical # to INPUT values - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() @@ -748,7 +756,11 @@ def create_plan_dynamic_modelfile( models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size ): # Create the model - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() diff --git a/qa/common/gen_qa_implicit_models.py b/qa/common/gen_qa_implicit_models.py index c3429d6012..241c021bdd 100755 --- a/qa/common/gen_qa_implicit_models.py +++ b/qa/common/gen_qa_implicit_models.py @@ -899,7 +899,11 @@ def create_onnx_modelconfig( def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape): trt_dtype = np_to_trt_dtype(dtype) - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() @@ -1005,7 +1009,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) trt_dtype = np_to_trt_dtype(dtype) trt_memory_format = trt.TensorFormat.LINEAR - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository index 0abe77de4d..6865c1c71e 100755 --- a/qa/common/gen_qa_model_repository +++ b/qa/common/gen_qa_model_repository @@ -463,6 +463,7 @@ if [ "$MODEL_TYPE" != "igpu" ] ; then --label PROJECT_NAME=$PROJECT_NAME \ $DOCKER_GPU_ARGS \ -v $DOCKER_VOLUME:/mnt \ + -e TRT_VERBOSE \ $TENSORRT_IMAGE \ bash -xe $VOLUME_SRCDIR/$TRTSCRIPT diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py index cd7efea723..cfce75be39 100755 --- a/qa/common/gen_qa_models.py +++ b/qa/common/gen_qa_models.py @@ -66,7 +66,11 @@ def create_plan_dynamic_rf_modelfile( trt_memory_format = trt.TensorFormat.LINEAR # Create the model - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() if max_batch == 0: @@ -206,7 +210,11 @@ def create_plan_dynamic_modelfile( trt_output1_dtype = np_to_trt_dtype(output1_dtype) # Create the model - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() if max_batch == 0: @@ -372,7 +380,11 @@ def create_plan_fixed_rf_modelfile( trt_memory_format = trt.TensorFormat.LINEAR # Create the model - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() if max_batch == 0: @@ -483,7 +495,11 @@ def create_plan_fixed_modelfile( trt_output1_dtype = np_to_trt_dtype(output1_dtype) # Create the model - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() if max_batch == 0: diff --git a/qa/common/gen_qa_ragged_models.py b/qa/common/gen_qa_ragged_models.py index de8c583d88..5db3dcf6ab 100755 --- a/qa/common/gen_qa_ragged_models.py +++ b/qa/common/gen_qa_ragged_models.py @@ -57,7 +57,11 @@ def create_plan_modelfile(models_dir, model_version, dtype): # - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE # - BATCH_ITEM_SHAPE_FLATTEN - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() trt_dtype = np_to_trt_dtype(dtype) @@ -412,7 +416,11 @@ def create_plan_itemshape_modelfile(models_dir, model_version, dtype): # generated to have matching batch dimension, the output can be produced # via identity op and expect Triton will scatter the output properly. - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() trt_dtype = np_to_trt_dtype(dtype) diff --git a/qa/common/gen_qa_reshape_models.py b/qa/common/gen_qa_reshape_models.py index 8193b29677..d70333c925 100755 --- a/qa/common/gen_qa_reshape_models.py +++ b/qa/common/gen_qa_reshape_models.py @@ -58,7 +58,11 @@ def create_plan_modelfile( io_cnt = len(input_shapes) # Create the model that copies inputs to corresponding outputs. - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() diff --git a/qa/common/gen_qa_sequence_models.py b/qa/common/gen_qa_sequence_models.py index ad31bbc0ba..99debede00 100755 --- a/qa/common/gen_qa_sequence_models.py +++ b/qa/common/gen_qa_sequence_models.py @@ -59,7 +59,11 @@ def create_plan_shape_tensor_modelfile( trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype) trt_memory_format = trt.TensorFormat.LINEAR - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() @@ -182,7 +186,11 @@ def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape): # Create the model. For now don't implement a proper accumulator # just return 0 if not-ready and 'INPUT'+'START' otherwise... the # tests know to expect this. - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() @@ -271,7 +279,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape) # Create the model. For now don't implement a proper accumulator # just return 0 if not-ready and 'INPUT'+'START' otherwise... the # tests know to expect this. - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() diff --git a/qa/common/gen_qa_trt_data_dependent_shape.py b/qa/common/gen_qa_trt_data_dependent_shape.py index c6f4bf2b5e..14971f3471 100755 --- a/qa/common/gen_qa_trt_data_dependent_shape.py +++ b/qa/common/gen_qa_trt_data_dependent_shape.py @@ -45,7 +45,13 @@ def create_data_dependent_modelfile( trt_input_dtype = np_to_trt_dtype(input_dtype) # Create the model - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() diff --git a/qa/common/gen_qa_trt_format_models.py b/qa/common/gen_qa_trt_format_models.py index 6419a6e2ab..7ff16bf7d9 100755 --- a/qa/common/gen_qa_trt_format_models.py +++ b/qa/common/gen_qa_trt_format_models.py @@ -81,7 +81,11 @@ def create_plan_modelfile( trt_output_memory_format = output_memory_format # Create the model - TRT_LOGGER = trt.Logger(trt.Logger.INFO) + TRT_LOGGER = ( + trt.Logger(trt.Logger.INFO) + if os.environ.get("TRT_VERBOSE") != "1" + else trt.Logger(trt.Logger.VERBOSE) + ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() if max_batch == 0: From fb697d499db7dce064fcfb36d21fff4706d6ec02 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Mon, 4 Aug 2025 18:23:37 -0700 Subject: [PATCH 8/9] Update copyrights --- qa/common/gen_qa_trt_data_dependent_shape.py | 2 +- qa/common/gen_qa_trt_format_models.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/qa/common/gen_qa_trt_data_dependent_shape.py b/qa/common/gen_qa_trt_data_dependent_shape.py index 14971f3471..7e1b16ae2c 100755 --- a/qa/common/gen_qa_trt_data_dependent_shape.py +++ b/qa/common/gen_qa_trt_data_dependent_shape.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions diff --git a/qa/common/gen_qa_trt_format_models.py b/qa/common/gen_qa_trt_format_models.py index 7ff16bf7d9..fee469e6a8 100755 --- a/qa/common/gen_qa_trt_format_models.py +++ b/qa/common/gen_qa_trt_format_models.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions From 9dec4d55e81cbd5c795a1850018cb88806da7396 Mon Sep 17 00:00:00 2001 From: Misha Chornyi Date: Mon, 4 Aug 2025 19:14:00 -0700 Subject: [PATCH 9/9] Remov duplicate --- qa/common/gen_qa_trt_data_dependent_shape.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/qa/common/gen_qa_trt_data_dependent_shape.py b/qa/common/gen_qa_trt_data_dependent_shape.py index 7e1b16ae2c..9ee9b60b68 100755 --- a/qa/common/gen_qa_trt_data_dependent_shape.py +++ b/qa/common/gen_qa_trt_data_dependent_shape.py @@ -49,8 +49,6 @@ def create_data_dependent_modelfile( trt.Logger(trt.Logger.INFO) if os.environ.get("TRT_VERBOSE") != "1" else trt.Logger(trt.Logger.VERBOSE) - if os.environ.get("TRT_VERBOSE") != "1" - else trt.Logger(trt.Logger.VERBOSE) ) builder = trt.Builder(TRT_LOGGER) network = builder.create_network()