Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 12 additions & 12 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,11 @@
"release_version": "2.60.0dev",
"triton_container_version": "25.08dev",
"upstream_container_version": "25.07",
"ort_version": "1.22.0",
"ort_version": "1.23.0",
"ort_openvino_version": "2025.2.0",
"standalone_openvino_version": "2025.2.0",
"dcgm_version": "4.2.3-2",
"vllm_version": "0.9.0.1",
"vllm_version": "0.9.2",
"rhel_py_version": "3.12.3",
}

Expand Down Expand Up @@ -1259,7 +1259,7 @@ def create_dockerfile_linux(
# stage of the PyTorch backend
if not FLAGS.enable_gpu and ("pytorch" in backends):
df += """
RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.12 backends/pytorch/libtorch_cuda.so
RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.13 backends/pytorch/libtorch_cuda.so
"""
if "tensorrtllm" in backends:
df += """
Expand Down Expand Up @@ -1494,7 +1494,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
cp -r nvpl_slim_24.04/include/* /usr/local/include && \\
rm -rf nvpl_slim_24.04.tar nvpl_slim_24.04; \\
fi \\
&& pip3 install --no-cache-dir --progress-bar on --index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\
&& pip3 install --no-cache-dir --extra-index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\
# Need to install in-house build of pytorch-triton to support triton_key definition used by torch 2.5.1
&& cd /tmp \\
&& wget $PYTORCH_TRITON_URL \\
Expand Down Expand Up @@ -1554,18 +1554,18 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
df += """
RUN mkdir -p /usr/local/cuda/lib64/stubs
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusparse.so /usr/local/cuda/lib64/stubs/libcusparse.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.11
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcurand.so /usr/local/cuda/lib64/stubs/libcurand.so.10
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.11
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.11
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.12
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.13
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.13
RUN mkdir -p /usr/local/cuda/targets/{cuda_arch}-linux/lib
COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libcufile.so.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
COPY --from=min_container /usr/local/cuda/lib64/libnvrtc.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
RUN mkdir -p /opt/hpcx/ucc/lib/ /opt/hpcx/ucx/lib/
COPY --from=min_container /opt/hpcx/ucc/lib/libucc.so.1 /opt/hpcx/ucc/lib/libucc.so.1
Expand Down
1 change: 1 addition & 0 deletions qa/common/gen_jetson_trt_models
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ docker pull $TENSORRT_IMAGE

docker run $DOCKER_GPU_ARGS \
--rm -v $DOCKER_VOLUME:/mnt \
-e TRT_VERBOSE \
$TENSORRT_IMAGE bash -xe $VOLUME_SRCDIR/$TRT_MODEL_SCRIPT

# Copy generated models to /tmp/ if not running in CI
Expand Down
12 changes: 10 additions & 2 deletions qa/common/gen_qa_dyna_sequence_implicit_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,11 @@ def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape):

def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
trt_dtype = np_to_trt_dtype(dtype)
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down Expand Up @@ -492,7 +496,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
trt_dtype = np_to_trt_dtype(dtype)
trt_memory_format = trt.TensorFormat.LINEAR

TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down
18 changes: 15 additions & 3 deletions qa/common/gen_qa_dyna_sequence_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ def create_plan_shape_tensor_modelfile(
trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
trt_memory_format = trt.TensorFormat.LINEAR

TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down Expand Up @@ -202,7 +206,11 @@ def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
# Create the model. For now don't implement a proper accumulator
# just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
# otherwise... the tests know to expect this.
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down Expand Up @@ -310,7 +318,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
# Create the model. For now don't implement a proper accumulator
# just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
# otherwise... the tests know to expect this.
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down
18 changes: 15 additions & 3 deletions qa/common/gen_qa_identity_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,11 @@ def create_plan_dynamic_rf_modelfile(
models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
):
# Create the model
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down Expand Up @@ -644,7 +648,11 @@ def create_plan_shape_tensor_modelfile(
# Note that values of OUTPUT tensor must be identical
# to INPUT values

TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down Expand Up @@ -748,7 +756,11 @@ def create_plan_dynamic_modelfile(
models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
):
# Create the model
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down
12 changes: 10 additions & 2 deletions qa/common/gen_qa_implicit_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,11 @@ def create_onnx_modelconfig(

def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
trt_dtype = np_to_trt_dtype(dtype)
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down Expand Up @@ -1005,7 +1009,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
trt_dtype = np_to_trt_dtype(dtype)
trt_memory_format = trt.TensorFormat.LINEAR

TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down
3 changes: 2 additions & 1 deletion qa/common/gen_qa_model_repository
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,7 @@ python3 $VOLUME_SRCDIR/gen_qa_ragged_models.py --tensorrt --models_dir=$VOLUME_R
chmod -R 777 $VOLUME_RAGGEDDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_trt_format_models.py --models_dir=$VOLUME_FORMATDESTDIR
chmod -R 777 $VOLUME_FORMATDESTDIR
python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR
nvidia-smi --query-gpu=compute_cap | grep -qz 11.0 && echo -e '\033[33m[WARNING]\033[0m Skipping model generation for data dependent shape' || python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR
chmod -R 777 $VOLUME_DATADEPENDENTDIR
# Make shared library for custom Hardmax plugin.
if [ -d "/usr/src/tensorrt" ]; then
Expand Down Expand Up @@ -463,6 +463,7 @@ if [ "$MODEL_TYPE" != "igpu" ] ; then
--label PROJECT_NAME=$PROJECT_NAME \
$DOCKER_GPU_ARGS \
-v $DOCKER_VOLUME:/mnt \
-e TRT_VERBOSE \
$TENSORRT_IMAGE \
bash -xe $VOLUME_SRCDIR/$TRTSCRIPT

Expand Down
24 changes: 20 additions & 4 deletions qa/common/gen_qa_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,11 @@ def create_plan_dynamic_rf_modelfile(
trt_memory_format = trt.TensorFormat.LINEAR

# Create the model
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()
if max_batch == 0:
Expand Down Expand Up @@ -206,7 +210,11 @@ def create_plan_dynamic_modelfile(
trt_output1_dtype = np_to_trt_dtype(output1_dtype)

# Create the model
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()
if max_batch == 0:
Expand Down Expand Up @@ -372,7 +380,11 @@ def create_plan_fixed_rf_modelfile(
trt_memory_format = trt.TensorFormat.LINEAR

# Create the model
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()
if max_batch == 0:
Expand Down Expand Up @@ -483,7 +495,11 @@ def create_plan_fixed_modelfile(
trt_output1_dtype = np_to_trt_dtype(output1_dtype)

# Create the model
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()
if max_batch == 0:
Expand Down
12 changes: 10 additions & 2 deletions qa/common/gen_qa_ragged_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,11 @@ def create_plan_modelfile(models_dir, model_version, dtype):
# - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
# - BATCH_ITEM_SHAPE_FLATTEN

TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()
trt_dtype = np_to_trt_dtype(dtype)
Expand Down Expand Up @@ -412,7 +416,11 @@ def create_plan_itemshape_modelfile(models_dir, model_version, dtype):
# generated to have matching batch dimension, the output can be produced
# via identity op and expect Triton will scatter the output properly.

TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()
trt_dtype = np_to_trt_dtype(dtype)
Expand Down
6 changes: 5 additions & 1 deletion qa/common/gen_qa_reshape_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ def create_plan_modelfile(
io_cnt = len(input_shapes)

# Create the model that copies inputs to corresponding outputs.
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down
18 changes: 15 additions & 3 deletions qa/common/gen_qa_sequence_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ def create_plan_shape_tensor_modelfile(
trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
trt_memory_format = trt.TensorFormat.LINEAR

TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down Expand Up @@ -182,7 +186,11 @@ def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
# Create the model. For now don't implement a proper accumulator
# just return 0 if not-ready and 'INPUT'+'START' otherwise... the
# tests know to expect this.
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down Expand Up @@ -271,7 +279,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
# Create the model. For now don't implement a proper accumulator
# just return 0 if not-ready and 'INPUT'+'START' otherwise... the
# tests know to expect this.
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down
8 changes: 6 additions & 2 deletions qa/common/gen_qa_trt_data_dependent_shape.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -45,7 +45,11 @@ def create_data_dependent_modelfile(
trt_input_dtype = np_to_trt_dtype(input_dtype)

# Create the model
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()

Expand Down
8 changes: 6 additions & 2 deletions qa/common/gen_qa_trt_format_models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/usr/bin/env python3

# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -81,7 +81,11 @@ def create_plan_modelfile(
trt_output_memory_format = output_memory_format

# Create the model
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
TRT_LOGGER = (
trt.Logger(trt.Logger.INFO)
if os.environ.get("TRT_VERBOSE") != "1"
else trt.Logger(trt.Logger.VERBOSE)
)
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()
if max_batch == 0:
Expand Down
Loading