Skip to content

Commit d638f04

Browse files
committed
Merge branch 'main' into orca_fix
2 parents bf88f4a + 7866ef4 commit d638f04

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1059
-132
lines changed

Dockerfile.QA

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ RUN mkdir -p qa/common && \
144144
mkdir qa/L0_data_compression/models && \
145145
cp -r docs/examples/model_repository/simple qa/L0_data_compression/models && \
146146
cp bin/data_compressor_test qa/L0_data_compression/. && \
147+
cp bin/backend_tensor_size_test qa/L0_input_validation/. && \
147148
cp bin/metrics_api_test qa/L0_metrics/. && \
148149
cp bin/response_cache_test qa/L0_response_cache/. && \
149150
cp bin/request_cancellation_test qa/L0_request_cancellation/. && \

Dockerfile.sdk

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#
3030

3131
# Base image on the minimum Triton container
32-
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.03-py3-min
32+
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.04-py3-min
3333

3434
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
3535
ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
@@ -41,7 +41,7 @@ ARG TRITON_THIRD_PARTY_REPO_TAG=main
4141
ARG TRITON_ENABLE_GPU=ON
4242
ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
4343
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
44-
44+
ARG TRITON_PERF_ANALYZER_BUILD=1
4545
# DCGM version to install for Model Analyzer
4646
ARG DCGM_VERSION=3.3.6
4747

@@ -115,6 +115,9 @@ ARG TRITON_ENABLE_GPU
115115
ARG JAVA_BINDINGS_MAVEN_VERSION
116116
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG
117117
ARG TARGETPLATFORM
118+
ARG TRITON_PERF_ANALYZER_BUILD
119+
120+
ENV TRITON_PERF_ANALYZER_BUILD=${TRITON_PERF_ANALYZER_BUILD}
118121

119122
WORKDIR /workspace
120123
COPY TRITON_VERSION .
@@ -144,7 +147,10 @@ RUN make -j16 cc-clients java-clients && \
144147
# the python client until now. Post-migration we should focus
145148
# effort on de-tangling these flows.
146149
WORKDIR /workspace/pa_build
147-
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
150+
# NOTE: If TRITON_PERF_ANALYZER_BUILD=0, the Performance Analyzer (PA) binaries must already exist
151+
# in the path specified by the ARG TRITON_PA_REPO_SUBDIR.
152+
RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
153+
cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
148154
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
149155
-DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
150156
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
@@ -160,12 +166,29 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
160166
-DTRITON_ENABLE_PYTHON_GRPC=ON \
161167
-DTRITON_PACKAGE_PERF_ANALYZER=ON \
162168
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
163-
/workspace/perf_analyzer
164-
RUN make -j16 perf-analyzer python-clients
165-
166-
RUN pip3 install build \
167-
&& cd /workspace/perf_analyzer/genai-perf \
168-
&& python3 -m build --wheel --outdir /workspace/install/python
169+
/workspace/perf_analyzer && \
170+
make -j16 perf-analyzer python-clients && \
171+
pip3 install build && \
172+
cd /workspace/perf_analyzer/genai-perf && \
173+
python3 -m build --wheel --outdir /workspace/install/python; \
174+
else \
175+
ls /workspace/perf_analyzer/ && \
176+
tar -xzf /workspace/perf_analyzer/perf_analyzer*.tar.gz -C /workspace/install/bin && \
177+
echo "Perf Analyzer binaries was extracted and not build" && \
178+
cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
179+
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
180+
-DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
181+
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
182+
-DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
183+
-DTRITON_ENABLE_PYTHON_HTTP=ON \
184+
-DTRITON_ENABLE_PYTHON_GRPC=ON \
185+
-DTRITON_PACKAGE_PERF_ANALYZER=ON \
186+
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
187+
/workspace/perf_analyzer && \
188+
make -j16 python-clients && \
189+
mkdir -p /workspace/install/python && \
190+
cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \
191+
fi
169192

170193
# Install Java API Bindings
171194
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@
2929

3030
>[!WARNING]
3131
>You are currently on the `main` branch which tracks under-development progress
32-
>towards the next release. The current release is version [2.56.0](https://github.com/triton-inference-server/server/releases/latest)
33-
>and corresponds to the 25.03 container release on NVIDIA GPU Cloud (NGC).
32+
>towards the next release. The current release is version [2.57.0](https://github.com/triton-inference-server/server/releases/latest)
33+
>and corresponds to the 25.04 container release on NVIDIA GPU Cloud (NGC).
3434
3535
# Triton Inference Server
3636

TRITON_VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.57.0dev
1+
2.58.0dev

build.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -71,14 +71,14 @@
7171
#
7272

7373
DEFAULT_TRITON_VERSION_MAP = {
74-
"release_version": "2.57.0dev",
75-
"triton_container_version": "25.04dev",
76-
"upstream_container_version": "25.03",
77-
"ort_version": "1.21.0",
78-
"ort_openvino_version": "2025.0.0",
79-
"standalone_openvino_version": "2025.0.0",
74+
"release_version": "2.58.0dev",
75+
"triton_container_version": "25.05dev",
76+
"upstream_container_version": "25.04",
77+
"ort_version": "1.22.0",
78+
"ort_openvino_version": "2025.1.0",
79+
"standalone_openvino_version": "2025.1.0",
8080
"dcgm_version": "3.3.6",
81-
"vllm_version": "0.7.3",
81+
"vllm_version": "0.8.4",
8282
"rhel_py_version": "3.12.3",
8383
}
8484

@@ -323,7 +323,7 @@ def gitclone(self, repo, tag, subdir, org):
323323
# reference onto a new branch we name "tritonbuildref".
324324
if tag.startswith("pull/"):
325325
self.cmd(
326-
f" git clone --recursive --depth=1 {org}/{repo}.git {subdir};",
326+
f" git clone --recursive --depth=1 {org}/{repo}.git {subdir}; git --git-dir {subdir}/.git log --oneline -1",
327327
check_exitcode=True,
328328
)
329329
self.cmd("}" if target_platform() == "windows" else "fi")
@@ -332,7 +332,7 @@ def gitclone(self, repo, tag, subdir, org):
332332
self.cmd(f"git checkout tritonbuildref", check_exitcode=True)
333333
else:
334334
self.cmd(
335-
f" git clone --recursive --single-branch --depth=1 -b {tag} {org}/{repo}.git {subdir};",
335+
f" git clone --recursive --single-branch --depth=1 -b {tag} {org}/{repo}.git {subdir}; git --git-dir {subdir}/.git log --oneline -1",
336336
check_exitcode=True,
337337
)
338338
self.cmd("}" if target_platform() == "windows" else "fi")

deploy/aws/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:25.03-py3
30+
imageName: nvcr.io/nvidia/tritonserver:25.04-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: s3://triton-inference-server-repository/model_repository
3333
numGpus: 1

deploy/fleetcommand/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
apiVersion: v1
2828
# appVersion is the Triton version; update when changing release
29-
appVersion: "2.56.0"
29+
appVersion: "2.57.0"
3030
description: Triton Inference Server (Fleet Command)
3131
name: triton-inference-server
3232
# version is the Chart version; update when changing anything in the chart

deploy/fleetcommand/values.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:25.03-py3
30+
imageName: nvcr.io/nvidia/tritonserver:25.04-py3
3131
pullPolicy: IfNotPresent
3232
numGpus: 1
3333
serverCommand: tritonserver
@@ -47,13 +47,13 @@ image:
4747
#
4848
# To set model control mode, uncomment and configure below
4949
# TODO: Fix the following url, it is invalid
50-
# See https://github.com/triton-inference-server/server/blob/r25.03/docs/model_management.md
50+
# See https://github.com/triton-inference-server/server/blob/r25.04/docs/user_guide/model_management.md
5151
# for more details
5252
#- --model-control-mode=explicit|poll|none
5353
#
5454
# Additional server args
5555
#
56-
# see https://github.com/triton-inference-server/server/blob/r25.03/README.md
56+
# see https://github.com/triton-inference-server/server/blob/r25.04/README.md
5757
# for more details
5858

5959
service:

deploy/gcp/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:25.03-py3
30+
imageName: nvcr.io/nvidia/tritonserver:25.04-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: gs://triton-inference-server-repository/model_repository
3333
numGpus: 1

deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ metadata:
3333
namespace: default
3434
spec:
3535
containers:
36-
- image: nvcr.io/nvidia/tritonserver:25.03-py3-sdk
36+
- image: nvcr.io/nvidia/tritonserver:25.04-py3-sdk
3737
imagePullPolicy: Always
3838
name: nv-triton-client
3939
securityContext:

0 commit comments

Comments
 (0)