Skip to content

Commit 995dcc0

Browse files
authored
Update versions for 25.08 (#8327)
1 parent e672c28 commit 995dcc0

13 files changed

+127
-37
lines changed

build.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,11 @@
7474
"release_version": "2.60.0dev",
7575
"triton_container_version": "25.08dev",
7676
"upstream_container_version": "25.07",
77-
"ort_version": "1.22.0",
77+
"ort_version": "1.23.0",
7878
"ort_openvino_version": "2025.2.0",
7979
"standalone_openvino_version": "2025.2.0",
8080
"dcgm_version": "4.2.3-2",
81-
"vllm_version": "0.9.0.1",
81+
"vllm_version": "0.9.2",
8282
"rhel_py_version": "3.12.3",
8383
}
8484

@@ -1259,7 +1259,7 @@ def create_dockerfile_linux(
12591259
# stage of the PyTorch backend
12601260
if not FLAGS.enable_gpu and ("pytorch" in backends):
12611261
df += """
1262-
RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.12 backends/pytorch/libtorch_cuda.so
1262+
RUN patchelf --add-needed /usr/local/cuda/lib64/stubs/libcublasLt.so.13 backends/pytorch/libtorch_cuda.so
12631263
"""
12641264
if "tensorrtllm" in backends:
12651265
df += """
@@ -1494,7 +1494,7 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
14941494
cp -r nvpl_slim_24.04/include/* /usr/local/include && \\
14951495
rm -rf nvpl_slim_24.04.tar nvpl_slim_24.04; \\
14961496
fi \\
1497-
&& pip3 install --no-cache-dir --progress-bar on --index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\
1497+
&& pip3 install --no-cache-dir --extra-index-url $VLLM_INDEX_URL -r /run/secrets/requirements \\
14981498
# Need to install in-house build of pytorch-triton to support triton_key definition used by torch 2.5.1
14991499
&& cd /tmp \\
15001500
&& wget $PYTORCH_TRITON_URL \\
@@ -1554,18 +1554,18 @@ def add_cpu_libs_to_linux_dockerfile(backends, target_machine):
15541554
df += """
15551555
RUN mkdir -p /usr/local/cuda/lib64/stubs
15561556
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusparse.so /usr/local/cuda/lib64/stubs/libcusparse.so.12
1557-
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.11
1557+
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcusolver.so /usr/local/cuda/lib64/stubs/libcusolver.so.12
15581558
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcurand.so /usr/local/cuda/lib64/stubs/libcurand.so.10
1559-
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.11
1560-
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.12
1561-
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.12
1562-
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.11
1559+
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcufft.so /usr/local/cuda/lib64/stubs/libcufft.so.12
1560+
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublas.so /usr/local/cuda/lib64/stubs/libcublas.so.13
1561+
COPY --from=min_container /usr/local/cuda/lib64/stubs/libcublasLt.so /usr/local/cuda/lib64/stubs/libcublasLt.so.13
15631562
15641563
RUN mkdir -p /usr/local/cuda/targets/{cuda_arch}-linux/lib
1565-
COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
1566-
COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
1567-
COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.12 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
1564+
COPY --from=min_container /usr/local/cuda/lib64/libcudart.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
1565+
COPY --from=min_container /usr/local/cuda/lib64/libcupti.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
1566+
COPY --from=min_container /usr/local/cuda/lib64/libnvJitLink.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
15681567
COPY --from=min_container /usr/local/cuda/lib64/libcufile.so.0 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
1568+
COPY --from=min_container /usr/local/cuda/lib64/libnvrtc.so.13 /usr/local/cuda/targets/{cuda_arch}-linux/lib/.
15691569
15701570
RUN mkdir -p /opt/hpcx/ucc/lib/ /opt/hpcx/ucx/lib/
15711571
COPY --from=min_container /opt/hpcx/ucc/lib/libucc.so.1 /opt/hpcx/ucc/lib/libucc.so.1

qa/common/gen_jetson_trt_models

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ docker pull $TENSORRT_IMAGE
142142

143143
docker run $DOCKER_GPU_ARGS \
144144
--rm -v $DOCKER_VOLUME:/mnt \
145+
-e TRT_VERBOSE \
145146
$TENSORRT_IMAGE bash -xe $VOLUME_SRCDIR/$TRT_MODEL_SCRIPT
146147

147148
# Copy generated models to /tmp/ if not running in CI

qa/common/gen_qa_dyna_sequence_implicit_models.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,11 @@ def create_onnx_modelconfig(models_dir, model_version, max_batch, dtype, shape):
357357

358358
def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
359359
trt_dtype = np_to_trt_dtype(dtype)
360-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
360+
TRT_LOGGER = (
361+
trt.Logger(trt.Logger.INFO)
362+
if os.environ.get("TRT_VERBOSE") != "1"
363+
else trt.Logger(trt.Logger.VERBOSE)
364+
)
361365
builder = trt.Builder(TRT_LOGGER)
362366
network = builder.create_network()
363367

@@ -492,7 +496,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
492496
trt_dtype = np_to_trt_dtype(dtype)
493497
trt_memory_format = trt.TensorFormat.LINEAR
494498

495-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
499+
TRT_LOGGER = (
500+
trt.Logger(trt.Logger.INFO)
501+
if os.environ.get("TRT_VERBOSE") != "1"
502+
else trt.Logger(trt.Logger.VERBOSE)
503+
)
496504
builder = trt.Builder(TRT_LOGGER)
497505
network = builder.create_network()
498506

qa/common/gen_qa_dyna_sequence_models.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,11 @@ def create_plan_shape_tensor_modelfile(
5959
trt_shape_dtype = np_to_trt_dtype(shape_tensor_input_dtype)
6060
trt_memory_format = trt.TensorFormat.LINEAR
6161

62-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
62+
TRT_LOGGER = (
63+
trt.Logger(trt.Logger.INFO)
64+
if os.environ.get("TRT_VERBOSE") != "1"
65+
else trt.Logger(trt.Logger.VERBOSE)
66+
)
6367
builder = trt.Builder(TRT_LOGGER)
6468
network = builder.create_network()
6569

@@ -202,7 +206,11 @@ def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
202206
# Create the model. For now don't implement a proper accumulator
203207
# just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
204208
# otherwise... the tests know to expect this.
205-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
209+
TRT_LOGGER = (
210+
trt.Logger(trt.Logger.INFO)
211+
if os.environ.get("TRT_VERBOSE") != "1"
212+
else trt.Logger(trt.Logger.VERBOSE)
213+
)
206214
builder = trt.Builder(TRT_LOGGER)
207215
network = builder.create_network()
208216

@@ -310,7 +318,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
310318
# Create the model. For now don't implement a proper accumulator
311319
# just return 0 if not-ready and 'INPUT'+'START'*('END'*'CORRID')
312320
# otherwise... the tests know to expect this.
313-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
321+
TRT_LOGGER = (
322+
trt.Logger(trt.Logger.INFO)
323+
if os.environ.get("TRT_VERBOSE") != "1"
324+
else trt.Logger(trt.Logger.VERBOSE)
325+
)
314326
builder = trt.Builder(TRT_LOGGER)
315327
network = builder.create_network()
316328

qa/common/gen_qa_identity_models.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,11 @@ def create_plan_dynamic_rf_modelfile(
545545
models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
546546
):
547547
# Create the model
548-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
548+
TRT_LOGGER = (
549+
trt.Logger(trt.Logger.INFO)
550+
if os.environ.get("TRT_VERBOSE") != "1"
551+
else trt.Logger(trt.Logger.VERBOSE)
552+
)
549553
builder = trt.Builder(TRT_LOGGER)
550554
network = builder.create_network()
551555

@@ -644,7 +648,11 @@ def create_plan_shape_tensor_modelfile(
644648
# Note that values of OUTPUT tensor must be identical
645649
# to INPUT values
646650

647-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
651+
TRT_LOGGER = (
652+
trt.Logger(trt.Logger.INFO)
653+
if os.environ.get("TRT_VERBOSE") != "1"
654+
else trt.Logger(trt.Logger.VERBOSE)
655+
)
648656
builder = trt.Builder(TRT_LOGGER)
649657
network = builder.create_network()
650658

@@ -748,7 +756,11 @@ def create_plan_dynamic_modelfile(
748756
models_dir, model_version, io_cnt, max_batch, dtype, shape, profile_max_size
749757
):
750758
# Create the model
751-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
759+
TRT_LOGGER = (
760+
trt.Logger(trt.Logger.INFO)
761+
if os.environ.get("TRT_VERBOSE") != "1"
762+
else trt.Logger(trt.Logger.VERBOSE)
763+
)
752764
builder = trt.Builder(TRT_LOGGER)
753765
network = builder.create_network()
754766

qa/common/gen_qa_implicit_models.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -899,7 +899,11 @@ def create_onnx_modelconfig(
899899

900900
def create_plan_modelfile(models_dir, model_version, max_batch, dtype, shape):
901901
trt_dtype = np_to_trt_dtype(dtype)
902-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
902+
TRT_LOGGER = (
903+
trt.Logger(trt.Logger.INFO)
904+
if os.environ.get("TRT_VERBOSE") != "1"
905+
else trt.Logger(trt.Logger.VERBOSE)
906+
)
903907
builder = trt.Builder(TRT_LOGGER)
904908
network = builder.create_network()
905909

@@ -1005,7 +1009,11 @@ def create_plan_rf_modelfile(models_dir, model_version, max_batch, dtype, shape)
10051009
trt_dtype = np_to_trt_dtype(dtype)
10061010
trt_memory_format = trt.TensorFormat.LINEAR
10071011

1008-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
1012+
TRT_LOGGER = (
1013+
trt.Logger(trt.Logger.INFO)
1014+
if os.environ.get("TRT_VERBOSE") != "1"
1015+
else trt.Logger(trt.Logger.VERBOSE)
1016+
)
10091017
builder = trt.Builder(TRT_LOGGER)
10101018
network = builder.create_network()
10111019

qa/common/gen_qa_model_repository

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ python3 $VOLUME_SRCDIR/gen_qa_ragged_models.py --tensorrt --models_dir=$VOLUME_R
430430
chmod -R 777 $VOLUME_RAGGEDDESTDIR
431431
python3 $VOLUME_SRCDIR/gen_qa_trt_format_models.py --models_dir=$VOLUME_FORMATDESTDIR
432432
chmod -R 777 $VOLUME_FORMATDESTDIR
433-
python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR
433+
nvidia-smi --query-gpu=compute_cap | grep -qz 11.0 && echo -e '\033[33m[WARNING]\033[0m Skipping model generation for data dependent shape' || python3 $VOLUME_SRCDIR/gen_qa_trt_data_dependent_shape.py --models_dir=$VOLUME_DATADEPENDENTDIR
434434
chmod -R 777 $VOLUME_DATADEPENDENTDIR
435435
# Make shared library for custom Hardmax plugin.
436436
if [ -d "/usr/src/tensorrt" ]; then
@@ -463,6 +463,7 @@ if [ "$MODEL_TYPE" != "igpu" ] ; then
463463
--label PROJECT_NAME=$PROJECT_NAME \
464464
$DOCKER_GPU_ARGS \
465465
-v $DOCKER_VOLUME:/mnt \
466+
-e TRT_VERBOSE \
466467
$TENSORRT_IMAGE \
467468
bash -xe $VOLUME_SRCDIR/$TRTSCRIPT
468469

qa/common/gen_qa_models.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,11 @@ def create_plan_dynamic_rf_modelfile(
6666
trt_memory_format = trt.TensorFormat.LINEAR
6767

6868
# Create the model
69-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
69+
TRT_LOGGER = (
70+
trt.Logger(trt.Logger.INFO)
71+
if os.environ.get("TRT_VERBOSE") != "1"
72+
else trt.Logger(trt.Logger.VERBOSE)
73+
)
7074
builder = trt.Builder(TRT_LOGGER)
7175
network = builder.create_network()
7276
if max_batch == 0:
@@ -206,7 +210,11 @@ def create_plan_dynamic_modelfile(
206210
trt_output1_dtype = np_to_trt_dtype(output1_dtype)
207211

208212
# Create the model
209-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
213+
TRT_LOGGER = (
214+
trt.Logger(trt.Logger.INFO)
215+
if os.environ.get("TRT_VERBOSE") != "1"
216+
else trt.Logger(trt.Logger.VERBOSE)
217+
)
210218
builder = trt.Builder(TRT_LOGGER)
211219
network = builder.create_network()
212220
if max_batch == 0:
@@ -372,7 +380,11 @@ def create_plan_fixed_rf_modelfile(
372380
trt_memory_format = trt.TensorFormat.LINEAR
373381

374382
# Create the model
375-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
383+
TRT_LOGGER = (
384+
trt.Logger(trt.Logger.INFO)
385+
if os.environ.get("TRT_VERBOSE") != "1"
386+
else trt.Logger(trt.Logger.VERBOSE)
387+
)
376388
builder = trt.Builder(TRT_LOGGER)
377389
network = builder.create_network()
378390
if max_batch == 0:
@@ -483,7 +495,11 @@ def create_plan_fixed_modelfile(
483495
trt_output1_dtype = np_to_trt_dtype(output1_dtype)
484496

485497
# Create the model
486-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
498+
TRT_LOGGER = (
499+
trt.Logger(trt.Logger.INFO)
500+
if os.environ.get("TRT_VERBOSE") != "1"
501+
else trt.Logger(trt.Logger.VERBOSE)
502+
)
487503
builder = trt.Builder(TRT_LOGGER)
488504
network = builder.create_network()
489505
if max_batch == 0:

qa/common/gen_qa_ragged_models.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,11 @@ def create_plan_modelfile(models_dir, model_version, dtype):
5757
# - BATCH_MAX_ELEMENT_COUNT_AS_SHAPE
5858
# - BATCH_ITEM_SHAPE_FLATTEN
5959

60-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
60+
TRT_LOGGER = (
61+
trt.Logger(trt.Logger.INFO)
62+
if os.environ.get("TRT_VERBOSE") != "1"
63+
else trt.Logger(trt.Logger.VERBOSE)
64+
)
6165
builder = trt.Builder(TRT_LOGGER)
6266
network = builder.create_network()
6367
trt_dtype = np_to_trt_dtype(dtype)
@@ -412,7 +416,11 @@ def create_plan_itemshape_modelfile(models_dir, model_version, dtype):
412416
# generated to have matching batch dimension, the output can be produced
413417
# via identity op and expect Triton will scatter the output properly.
414418

415-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
419+
TRT_LOGGER = (
420+
trt.Logger(trt.Logger.INFO)
421+
if os.environ.get("TRT_VERBOSE") != "1"
422+
else trt.Logger(trt.Logger.VERBOSE)
423+
)
416424
builder = trt.Builder(TRT_LOGGER)
417425
network = builder.create_network()
418426
trt_dtype = np_to_trt_dtype(dtype)

qa/common/gen_qa_reshape_models.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,11 @@ def create_plan_modelfile(
5858
io_cnt = len(input_shapes)
5959

6060
# Create the model that copies inputs to corresponding outputs.
61-
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
61+
TRT_LOGGER = (
62+
trt.Logger(trt.Logger.INFO)
63+
if os.environ.get("TRT_VERBOSE") != "1"
64+
else trt.Logger(trt.Logger.VERBOSE)
65+
)
6266
builder = trt.Builder(TRT_LOGGER)
6367
network = builder.create_network()
6468

0 commit comments

Comments
 (0)