From 99c58829ea377a2fbee53cbf4cf01d000d5b4eab Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Thu, 3 Jul 2025 08:34:44 -0700
Subject: [PATCH 1/9] Update file names

---
 build.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/build.py b/build.py
index 716b227c42..8fa31f25f1 100755
--- a/build.py
+++ b/build.py
@@ -1259,7 +1259,7 @@ def create_dockerfile_linux(
     # stage of the PyTorch backend
     if not FLAGS.enable_gpu and ("pytorch" in backends):
         df += """
-RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.12 backends/pytorch/libtorch_cuda.so
+RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.13 backends/pytorch/libtorch_cuda.so
 """
     if "tensorrtllm" in backends:
         df += """
@@ -1554,17 +1554,16 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
         df += """
 RUN mkdir -p /usr/local/cuda/lib64/stubs
 COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusparse.so /usr/local/cuda/lib64/stubs/libcusparse.so.12
-COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.11
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.12
 COPY --from=min_container /usr/local/cuda/lib64/stubs/libcurand.so /usr/local/cuda/lib64/stubs/libcurand.so.10
-COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.11
-COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.12
-COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.12
-COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.11
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.12
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.13
+COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.13
 
 RUN mkdir -p /usr/local/cuda/targets/{cuda_arch}-linux/lib
-COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
-COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
-COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
 COPY --from=min_container /usr/local/cuda/lib64/libcufile.so.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
 
 RUN mkdir -p /opt/hpcx/ucc/lib/ /opt/hpcx/ucx/lib/

From 99d7476159c678ae2e8a2fdeb0b85f6dc0f75d8b Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Wed, 16 Jul 2025 17:28:32 -0700
Subject: [PATCH 2/9] temp: DCGM - internal repository.

---
 Dockerfile.sdk |  6 ++++++
 build.py       | 36 +++++++++++++++++++++++++-----------
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index f7bbf64432..66fd9ef95f 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -263,7 +263,13 @@ RUN pip3 install --upgrade "numpy<2" pillow attrdict && \
          "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
     xargs pip3 install --upgrade
 
+ARG DCGM_SOURCE_LIST
 # Install DCGM
+RUN if [ -n "${DCGM_SOURCE_LIST}" ]; then \
+        echo "deb [trusted=yes] $DCGM_SOURCE_LIST / " > /etc/apt/sources.list.d/dcgm-list.list && \
+        cat /etc/apt/sources.list.d/dcgm-list.list; \
+    fi
+
 RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
         [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \
         curl -o /tmp/cuda-keyring.deb \
diff --git a/build.py b/build.py
index 8fa31f25f1..3900eefb7a 100755
--- a/build.py
+++ b/build.py
@@ -841,7 +841,15 @@ def tensorrtllm_cmake_args(images):
     return cargs
 
 
-def install_dcgm_libraries(dcgm_version, target_machine):
+def install_dcgm_libraries(dcgm_version):
+    if os.getenv("DCGM_SOURCE_LIST"):
+        dcgm_source_list = """
+RUN echo "deb [trusted=yes] {} / " > /etc/apt/sources.list.d/dcgm-list.list \\
+    && cat /etc/apt/sources.list.d/dcgm-list.list""".format(
+            os.getenv("DCGM_SOURCE_LIST")
+        )
+    else:
+        dcgm_source_list = ""
     if dcgm_version == "":
         fail(
             "unable to determine default repo-tag, DCGM version not known for {}".format(
@@ -852,11 +860,13 @@ def install_dcgm_libraries(dcgm_version, target_machine):
     else:
         # RHEL has the same install instructions for both aarch64 and x86
         if target_platform() == "rhel":
-            if target_machine == "aarch64":
-                return """
+            return (
+                dcgm_source_list
+                + """
 ENV DCGM_VERSION {}
 # Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
-RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo \\
+RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) && \\
+    && dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/${{ARCH}}/cuda-rhel8.repo \\
     && dnf clean expire-cache \\
     && dnf install --assumeyes \\
                  datacenter-gpu-manager-4-core=1:{} \\
@@ -876,13 +886,16 @@ def install_dcgm_libraries(dcgm_version, target_machine):
 """.format(
                     dcgm_version, dcgm_version, dcgm_version
                 )
+            )
         else:
-            if target_machine == "aarch64":
-                return """
+            return (
+                dcgm_source_list
+                + """
 ENV DCGM_VERSION {}
 # Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
-RUN curl -o /tmp/cuda-keyring.deb \\
-        https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb \\
+RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) \\
+      && curl -o /tmp/cuda-keyring.deb \\
+        https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${{ARCH}}/cuda-keyring_1.1-1_all.deb \\
       && apt install /tmp/cuda-keyring.deb \\
       && rm /tmp/cuda-keyring.deb \\
       && apt update \\
@@ -907,6 +920,7 @@ def install_dcgm_libraries(dcgm_version, target_machine):
 """.format(
                     dcgm_version, dcgm_version, dcgm_version
                 )
+            )
 
 
 def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
@@ -1007,7 +1021,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
       && mv /tmp/boost_1_80_0/boost /usr/include/boost
 """
     if FLAGS.enable_gpu:
-        df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())
+        df += install_dcgm_libraries(argmap["DCGM_VERSION"])
     df += """
 ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
 ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
@@ -1120,7 +1134,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
 """
 
         if FLAGS.enable_gpu:
-            df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())
+            df += install_dcgm_libraries(argmap["DCGM_VERSION"])
 
     df += """
 ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
@@ -1412,7 +1426,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
         df += fastertransformer_buildscript.create_postbuild(is_multistage_build=False)
 
     if enable_gpu:
-        df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine)
+        df += install_dcgm_libraries(argmap["DCGM_VERSION"])
         # This segment will break the RHEL SBSA build. Need to determine whether
         # this is necessary to incorporate.
         if target_platform() != "rhel":

From ea7cfa82272dd872430aad2cf9c8c5cf23ac0360 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Thu, 17 Jul 2025 18:22:20 -0700
Subject: [PATCH 3/9] Add missing file

---
 build.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build.py b/build.py
index 3900eefb7a..448d294efe 100755
--- a/build.py
+++ b/build.py
@@ -1579,6 +1579,7 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
 COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
 COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
 COPY --from=min_container /usr/local/cuda/lib64/libcufile.so.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
+COPY --from=min_container /usr/local/cuda/lib64/libnvrtc.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
 
 RUN mkdir -p /opt/hpcx/ucc/lib/ /opt/hpcx/ucx/lib/
 COPY --from=min_container /opt/hpcx/ucc/lib/libucc.so.1 /opt/hpcx/ucc/lib/libucc.so.1

From 7ba499dadb25b0ed2029c1fdeeb419fd109fbad4 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Wed, 30 Jul 2025 10:10:44 -0700
Subject: [PATCH 4/9] finish rebase

---
 Dockerfile.sdk |  6 ------
 build.py       | 40 +++++++++++++---------------------------
 2 files changed, 13 insertions(+), 33 deletions(-)

diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index 66fd9ef95f..f7bbf64432 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -263,13 +263,7 @@ RUN pip3 install --upgrade "numpy<2" pillow attrdict && \
          "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
     xargs pip3 install --upgrade
 
-ARG DCGM_SOURCE_LIST
 # Install DCGM
-RUN if [ -n "${DCGM_SOURCE_LIST}" ]; then \
-        echo "deb [trusted=yes] $DCGM_SOURCE_LIST / " > /etc/apt/sources.list.d/dcgm-list.list && \
-        cat /etc/apt/sources.list.d/dcgm-list.list; \
-    fi
-
 RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
         [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \
         curl -o /tmp/cuda-keyring.deb \
diff --git a/build.py b/build.py
index 448d294efe..ef1418e67b 100755
--- a/build.py
+++ b/build.py
@@ -74,11 +74,11 @@
     "release_version": "2.60.0dev",
     "triton_container_version": "25.08dev",
     "upstream_container_version": "25.07",
-    "ort_version": "1.22.0",
+    "ort_version": "1.23.0",
     "ort_openvino_version": "2025.2.0",
     "standalone_openvino_version": "2025.2.0",
     "dcgm_version": "4.2.3-2",
-    "vllm_version": "0.9.0.1",
+    "vllm_version": "0.9.2",
     "rhel_py_version": "3.12.3",
 }
 
@@ -841,15 +841,7 @@ def tensorrtllm_cmake_args(images):
     return cargs
 
 
-def install_dcgm_libraries(dcgm_version):
-    if os.getenv("DCGM_SOURCE_LIST"):
-        dcgm_source_list = """
-RUN echo "deb [trusted=yes] {} / " > /etc/apt/sources.list.d/dcgm-list.list \\
-    && cat /etc/apt/sources.list.d/dcgm-list.list""".format(
-            os.getenv("DCGM_SOURCE_LIST")
-        )
-    else:
-        dcgm_source_list = ""
+def install_dcgm_libraries(dcgm_version, target_machine):
     if dcgm_version == "":
         fail(
             "unable to determine default repo-tag, DCGM version not known for {}".format(
@@ -860,13 +852,11 @@ def install_dcgm_libraries(dcgm_version):
     else:
         # RHEL has the same install instructions for both aarch64 and x86
         if target_platform() == "rhel":
-            return (
-                dcgm_source_list
-                + """
+            if target_machine == "aarch64":
+                return """
 ENV DCGM_VERSION {}
 # Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
-RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) && \\
-    && dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/${{ARCH}}/cuda-rhel8.repo \\
+RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo \\
     && dnf clean expire-cache \\
     && dnf install --assumeyes \\
                  datacenter-gpu-manager-4-core=1:{} \\
@@ -886,16 +876,13 @@ def install_dcgm_libraries(dcgm_version):
 """.format(
                     dcgm_version, dcgm_version, dcgm_version
                 )
-            )
         else:
-            return (
-                dcgm_source_list
-                + """
+            if target_machine == "aarch64":
+                return """
 ENV DCGM_VERSION {}
 # Install DCGM. Steps from https://developer.nvidia.com/dcgm#Downloads
-RUN ARCH=$( [ $(uname -m) = "x86_64" ] && echo "$(uname -m)" || echo "sbsa" ) \\
-      && curl -o /tmp/cuda-keyring.deb \\
-        https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/${{ARCH}}/cuda-keyring_1.1-1_all.deb \\
+RUN curl -o /tmp/cuda-keyring.deb \\
+        https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb \\
       && apt install /tmp/cuda-keyring.deb \\
       && rm /tmp/cuda-keyring.deb \\
       && apt update \\
@@ -920,7 +907,6 @@ def install_dcgm_libraries(dcgm_version):
 """.format(
                     dcgm_version, dcgm_version, dcgm_version
                 )
-            )
 
 
 def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
@@ -1021,7 +1007,7 @@ def create_dockerfile_buildbase_rhel(ddir, dockerfile_name, argmap):
       && mv /tmp/boost_1_80_0/boost /usr/include/boost
 """
     if FLAGS.enable_gpu:
-        df += install_dcgm_libraries(argmap["DCGM_VERSION"])
+        df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())
     df += """
 ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
 ENV NVIDIA_TRITON_SERVER_VERSION ${TRITON_CONTAINER_VERSION}
@@ -1134,7 +1120,7 @@ def create_dockerfile_buildbase(ddir, dockerfile_name, argmap):
 """
 
         if FLAGS.enable_gpu:
-            df += install_dcgm_libraries(argmap["DCGM_VERSION"])
+            df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine())
 
     df += """
 ENV TRITON_SERVER_VERSION ${TRITON_VERSION}
@@ -1426,7 +1412,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
         df += fastertransformer_buildscript.create_postbuild(is_multistage_build=False)
 
     if enable_gpu:
-        df += install_dcgm_libraries(argmap["DCGM_VERSION"])
+        df += install_dcgm_libraries(argmap["DCGM_VERSION"], target_machine)
         # This segment will break the RHEL SBSA build. Need to determine whether
         # this is necessary to incorporate.
         if target_platform() != "rhel":

From 03477553b46250d3c5f92a2766a0b41eef7a39d7 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Wed, 30 Jul 2025 12:55:23 -0700
Subject: [PATCH 5/9] Use --extra-index-url for vLLM installation

---
 build.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.py b/build.py
index ef1418e67b..56e0be5842 100755
--- a/build.py
+++ b/build.py
@@ -1494,7 +1494,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
             cp -r nvpl_slim_24.04/include/* /usr/local/include && \\
             rm -rf nvpl_slim_24.04.tar nvpl_slim_24.04; \\
         fi \\
-        && pip3 install --no-cache-dir --progress-bar on --index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\
+        && pip3 install --no-cache-dir --extra-index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\
         # Need to install in-house build of pytorch-triton to support triton_key definition used by torch 2.5.1
         && cd /tmp \\
         && wget $PYTORCH_TRITON_URL \\

From d37ea64fd96742dca674677765b9eafa5a33b617 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Thu, 31 Jul 2025 13:02:50 -0700
Subject: [PATCH 6/9] Exclude model generation for AGX

---
 qa/common/gen_qa_model_repository | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index f84c0603d0..0abe77de4d 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -430,7 +430,7 @@ python3 $VOLUME_SRCDIR/gen_qa_ragged_models.py --tensorrt --models_dir=$VOLUME_R
 chmod -R 777 $VOLUME_RAGGEDDESTDIR
 python3 $VOLUME_SRCDIR/gen_qa_trt_format_models.py --models_dir=$VOLUME_FORMATDESTDIR
 chmod -R 777 $VOLUME_FORMATDESTDIR
-python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR
+nvidia-smi --query-gpu=compute_cap | grep -qz 11.0 && echo -e '\033[33m[WARNING]\033[0m Skipping model generation for data dependent shape' || python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR
 chmod -R 777 $VOLUME_DATADEPENDENTDIR
 # Make shared library for custom Hardmax plugin.
 if [ -d "/usr/src/tensorrt" ]; then

From 779700fdda95f77e309a427c882effbb8be30beb Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Thu, 31 Jul 2025 13:34:57 -0700
Subject: [PATCH 7/9] Change script to support TRT_VERBOSE

---
 qa/common/gen_jetson_trt_models               |  1 +
 .../gen_qa_dyna_sequence_implicit_models.py   | 12 ++++++++--
 qa/common/gen_qa_dyna_sequence_models.py      | 18 +++++++++++---
 qa/common/gen_qa_identity_models.py           | 18 +++++++++++---
 qa/common/gen_qa_implicit_models.py           | 12 ++++++++--
 qa/common/gen_qa_model_repository             |  1 +
 qa/common/gen_qa_models.py                    | 24 +++++++++++++++----
 qa/common/gen_qa_ragged_models.py             | 12 ++++++++--
 qa/common/gen_qa_reshape_models.py            |  6 ++++-
 qa/common/gen_qa_sequence_models.py           | 18 +++++++++++---
 qa/common/gen_qa_trt_data_dependent_shape.py  |  8 ++++++-
 qa/common/gen_qa_trt_format_models.py         |  6 ++++-
 12 files changed, 114 insertions(+), 22 deletions(-)

diff --git a/qa/common/gen_jetson_trt_models b/qa/common/gen_jetson_trt_models
index 5bbfb4c74f..956ba03955 100755
--- a/qa/common/gen_jetson_trt_models
+++ b/qa/common/gen_jetson_trt_models
@@ -142,6 +142,7 @@ docker pull $TENSORRT_IMAGE
 
 docker run $DOCKER_GPU_ARGS \
    --rm -v $DOCKER_VOLUME:/mnt \
+   -e TRT_VERBOSE \
   $TENSORRT_IMAGE bash -xe $VOLUME_SRCDIR/$TRT_MODEL_SCRIPT
 
 # Copy generated models to /tmp/ if not running in CI
diff --git a/qa/common/gen_qa_dyna_sequence_implicit_models.py b/qa/common/gen_qa_dyna_sequence_implicit_models.py
index e07e4cf5ec..a977710c51 100755
--- a/qa/common/gen_qa_dyna_sequence_implicit_models.py
+++ b/qa/common/gen_qa_dyna_sequence_implicit_models.py
@@ -357,7 +357,11 @@ def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape):
 
 def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
     trt_dtype = np_to_trt_dtype(dtype)
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
@@ -492,7 +496,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
     trt_dtype = np_to_trt_dtype(dtype)
     trt_memory_format = trt.TensorFormat.LINEAR
 
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
diff --git a/qa/common/gen_qa_dyna_sequence_models.py b/qa/common/gen_qa_dyna_sequence_models.py
index 9c21d92b96..e91ce42132 100755
--- a/qa/common/gen_qa_dyna_sequence_models.py
+++ b/qa/common/gen_qa_dyna_sequence_models.py
@@ -59,7 +59,11 @@ def create_plan_shape_tensor_modelfile(
     trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
     trt_memory_format = trt.TensorFormat.LINEAR
 
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
@@ -202,7 +206,11 @@ def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
     # Create the model. For now don't implement a proper accumulator
     # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
     # otherwise...  the tests know to expect this.
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
@@ -310,7 +318,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
     # Create the model. For now don't implement a proper accumulator
     # just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
     # otherwise...  the tests know to expect this.
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
diff --git a/qa/common/gen_qa_identity_models.py b/qa/common/gen_qa_identity_models.py
index 5fa7b7ab01..7b513d3fbf 100755
--- a/qa/common/gen_qa_identity_models.py
+++ b/qa/common/gen_qa_identity_models.py
@@ -545,7 +545,11 @@ def create_plan_dynamic_rf_modelfile(
     models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
 ):
     # Create the model
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
@@ -644,7 +648,11 @@ def create_plan_shape_tensor_modelfile(
     # Note that values of OUTPUT tensor must be identical
     # to INPUT values
 
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
@@ -748,7 +756,11 @@ def create_plan_dynamic_modelfile(
     models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
 ):
     # Create the model
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
diff --git a/qa/common/gen_qa_implicit_models.py b/qa/common/gen_qa_implicit_models.py
index c3429d6012..241c021bdd 100755
--- a/qa/common/gen_qa_implicit_models.py
+++ b/qa/common/gen_qa_implicit_models.py
@@ -899,7 +899,11 @@ def create_onnx_modelconfig(
 
 def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
     trt_dtype = np_to_trt_dtype(dtype)
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
@@ -1005,7 +1009,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
     trt_dtype = np_to_trt_dtype(dtype)
     trt_memory_format = trt.TensorFormat.LINEAR
 
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
diff --git a/qa/common/gen_qa_model_repository b/qa/common/gen_qa_model_repository
index 0abe77de4d..6865c1c71e 100755
--- a/qa/common/gen_qa_model_repository
+++ b/qa/common/gen_qa_model_repository
@@ -463,6 +463,7 @@ if [ "$MODEL_TYPE" != "igpu" ] ; then
     --label PROJECT_NAME=$PROJECT_NAME \
     $DOCKER_GPU_ARGS \
     -v $DOCKER_VOLUME:/mnt \
+    -e TRT_VERBOSE \
     $TENSORRT_IMAGE \
     bash -xe $VOLUME_SRCDIR/$TRTSCRIPT
 
diff --git a/qa/common/gen_qa_models.py b/qa/common/gen_qa_models.py
index cd7efea723..cfce75be39 100755
--- a/qa/common/gen_qa_models.py
+++ b/qa/common/gen_qa_models.py
@@ -66,7 +66,11 @@ def create_plan_dynamic_rf_modelfile(
     trt_memory_format = trt.TensorFormat.LINEAR
 
     # Create the model
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
     if max_batch == 0:
@@ -206,7 +210,11 @@ def create_plan_dynamic_modelfile(
     trt_output1_dtype = np_to_trt_dtype(output1_dtype)
 
     # Create the model
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
     if max_batch == 0:
@@ -372,7 +380,11 @@ def create_plan_fixed_rf_modelfile(
     trt_memory_format = trt.TensorFormat.LINEAR
 
     # Create the model
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
     if max_batch == 0:
@@ -483,7 +495,11 @@ def create_plan_fixed_modelfile(
     trt_output1_dtype = np_to_trt_dtype(output1_dtype)
 
     # Create the model
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
     if max_batch == 0:
diff --git a/qa/common/gen_qa_ragged_models.py b/qa/common/gen_qa_ragged_models.py
index de8c583d88..5db3dcf6ab 100755
--- a/qa/common/gen_qa_ragged_models.py
+++ b/qa/common/gen_qa_ragged_models.py
@@ -57,7 +57,11 @@ def create_plan_modelfile(models_dir, model_version, dtype):
     # - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
     # - BATCH_ITEM_SHAPE_FLATTEN
 
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
     trt_dtype = np_to_trt_dtype(dtype)
@@ -412,7 +416,11 @@ def create_plan_itemshape_modelfile(models_dir, model_version, dtype):
     # generated to have matching batch dimension, the output can be produced
     # via identity op and expect Triton will scatter the output properly.
 
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
     trt_dtype = np_to_trt_dtype(dtype)
diff --git a/qa/common/gen_qa_reshape_models.py b/qa/common/gen_qa_reshape_models.py
index 8193b29677..d70333c925 100755
--- a/qa/common/gen_qa_reshape_models.py
+++ b/qa/common/gen_qa_reshape_models.py
@@ -58,7 +58,11 @@ def create_plan_modelfile(
     io_cnt = len(input_shapes)
 
     # Create the model that copies inputs to corresponding outputs.
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
diff --git a/qa/common/gen_qa_sequence_models.py b/qa/common/gen_qa_sequence_models.py
index ad31bbc0ba..99debede00 100755
--- a/qa/common/gen_qa_sequence_models.py
+++ b/qa/common/gen_qa_sequence_models.py
@@ -59,7 +59,11 @@ def create_plan_shape_tensor_modelfile(
     trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
     trt_memory_format = trt.TensorFormat.LINEAR
 
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
@@ -182,7 +186,11 @@ def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
     # Create the model. For now don't implement a proper accumulator
     # just return 0 if not-ready and 'INPUT'+'START' otherwise...  the
     # tests know to expect this.
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
@@ -271,7 +279,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
     # Create the model. For now don't implement a proper accumulator
     # just return 0 if not-ready and 'INPUT'+'START' otherwise...  the
     # tests know to expect this.
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
diff --git a/qa/common/gen_qa_trt_data_dependent_shape.py b/qa/common/gen_qa_trt_data_dependent_shape.py
index c6f4bf2b5e..14971f3471 100755
--- a/qa/common/gen_qa_trt_data_dependent_shape.py
+++ b/qa/common/gen_qa_trt_data_dependent_shape.py
@@ -45,7 +45,13 @@ def create_data_dependent_modelfile(
     trt_input_dtype = np_to_trt_dtype(input_dtype)
 
     # Create the model
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
 
diff --git a/qa/common/gen_qa_trt_format_models.py b/qa/common/gen_qa_trt_format_models.py
index 6419a6e2ab..7ff16bf7d9 100755
--- a/qa/common/gen_qa_trt_format_models.py
+++ b/qa/common/gen_qa_trt_format_models.py
@@ -81,7 +81,11 @@ def create_plan_modelfile(
     trt_output_memory_format = output_memory_format
 
     # Create the model
-    TRT_LOGGER = trt.Logger(trt.Logger.INFO)
+    TRT_LOGGER = (
+        trt.Logger(trt.Logger.INFO)
+        if os.environ.get("TRT_VERBOSE") != "1"
+        else trt.Logger(trt.Logger.VERBOSE)
+    )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()
     if max_batch == 0:

From fb697d499db7dce064fcfb36d21fff4706d6ec02 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Mon, 4 Aug 2025 18:23:37 -0700
Subject: [PATCH 8/9] Update copyrights

---
 qa/common/gen_qa_trt_data_dependent_shape.py | 2 +-
 qa/common/gen_qa_trt_format_models.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/qa/common/gen_qa_trt_data_dependent_shape.py b/qa/common/gen_qa_trt_data_dependent_shape.py
index 14971f3471..7e1b16ae2c 100755
--- a/qa/common/gen_qa_trt_data_dependent_shape.py
+++ b/qa/common/gen_qa_trt_data_dependent_shape.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
diff --git a/qa/common/gen_qa_trt_format_models.py b/qa/common/gen_qa_trt_format_models.py
index 7ff16bf7d9..fee469e6a8 100755
--- a/qa/common/gen_qa_trt_format_models.py
+++ b/qa/common/gen_qa_trt_format_models.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions

From 9dec4d55e81cbd5c795a1850018cb88806da7396 Mon Sep 17 00:00:00 2001
From: Misha Chornyi <mchornyi@nvidia.com>
Date: Mon, 4 Aug 2025 19:14:00 -0700
Subject: [PATCH 9/9] Remov duplicate

---
 qa/common/gen_qa_trt_data_dependent_shape.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/qa/common/gen_qa_trt_data_dependent_shape.py b/qa/common/gen_qa_trt_data_dependent_shape.py
index 7e1b16ae2c..9ee9b60b68 100755
--- a/qa/common/gen_qa_trt_data_dependent_shape.py
+++ b/qa/common/gen_qa_trt_data_dependent_shape.py
@@ -49,8 +49,6 @@ def create_data_dependent_modelfile(
         trt.Logger(trt.Logger.INFO)
         if os.environ.get("TRT_VERBOSE") != "1"
         else trt.Logger(trt.Logger.VERBOSE)
-        if os.environ.get("TRT_VERBOSE") != "1"
-        else trt.Logger(trt.Logger.VERBOSE)
     )
     builder = trt.Builder(TRT_LOGGER)
     network = builder.create_network()