triton-inference-server
diff --git a/‎Dockerfile.QA
Lines changed: 1 addition & 0 deletions b/‎Dockerfile.QA
Lines changed: 1 addition & 0 deletions
diff --git a/‎Dockerfile.sdk
Lines changed: 32 additions & 9 deletions b/‎Dockerfile.sdk
Lines changed: 32 additions & 9 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎TRITON_VERSION
Lines changed: 1 addition & 1 deletion b/‎TRITON_VERSION
Lines changed: 1 addition & 1 deletion
diff --git a/‎build.py
Lines changed: 9 additions & 9 deletions b/‎build.py
Lines changed: 9 additions & 9 deletions
diff --git a/‎deploy/aws/values.yaml
Lines changed: 1 addition & 1 deletion b/‎deploy/aws/values.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/fleetcommand/Chart.yaml
Lines changed: 1 addition & 1 deletion b/‎deploy/fleetcommand/Chart.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/fleetcommand/values.yaml
Lines changed: 3 additions & 3 deletions b/‎deploy/fleetcommand/values.yaml
Lines changed: 3 additions & 3 deletions
diff --git a/‎deploy/gcp/values.yaml
Lines changed: 1 addition & 1 deletion b/‎deploy/gcp/values.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
Lines changed: 1 addition & 1 deletion b/‎deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml
Lines changed: 1 addition & 1 deletion
@@ -144,6 +144,7 @@ RUN mkdir -p qa/common && \
     mkdir qa/L0_data_compression/models && \
     cp -r docs/examples/model_repository/simple qa/L0_data_compression/models && \
     cp bin/data_compressor_test qa/L0_data_compression/. && \
+    cp bin/backend_tensor_size_test qa/L0_input_validation/. && \
     cp bin/metrics_api_test qa/L0_metrics/. && \
     cp bin/response_cache_test qa/L0_response_cache/. && \
     cp bin/request_cancellation_test qa/L0_request_cancellation/. && \
 
@@ -29,7 +29,7 @@
 #
 
 # Base image on the minimum Triton container
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.03-py3-min
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.04-py3-min
 
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
@@ -41,7 +41,7 @@ ARG TRITON_THIRD_PARTY_REPO_TAG=main
 ARG TRITON_ENABLE_GPU=ON
 ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
 ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
-
+ARG TRITON_PERF_ANALYZER_BUILD=1
 # DCGM version to install for Model Analyzer
 ARG DCGM_VERSION=3.3.6
 
@@ -115,6 +115,9 @@ ARG TRITON_ENABLE_GPU
 ARG JAVA_BINDINGS_MAVEN_VERSION
 ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG
 ARG TARGETPLATFORM
+ARG TRITON_PERF_ANALYZER_BUILD
+
+ENV TRITON_PERF_ANALYZER_BUILD=${TRITON_PERF_ANALYZER_BUILD}
 
 WORKDIR /workspace
 COPY TRITON_VERSION .
@@ -144,7 +147,10 @@ RUN make -j16 cc-clients java-clients && \
 # the python client until now. Post-migration we should focus
 # effort on de-tangling these flows.
 WORKDIR /workspace/pa_build
-RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+# NOTE: If TRITON_PERF_ANALYZER_BUILD=0, the Performance Analyzer (PA) binaries must already exist
+# in the path specified by the ARG TRITON_PA_REPO_SUBDIR.
+RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
+        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
           -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
           -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
           -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
@@ -160,12 +166,29 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
           -DTRITON_ENABLE_PYTHON_GRPC=ON \
           -DTRITON_PACKAGE_PERF_ANALYZER=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
-          /workspace/perf_analyzer
-RUN make -j16 perf-analyzer python-clients
-
-RUN pip3 install build \
-    && cd /workspace/perf_analyzer/genai-perf \
-    && python3 -m build --wheel --outdir /workspace/install/python
+        /workspace/perf_analyzer && \
+        make -j16 perf-analyzer python-clients && \
+        pip3 install build && \
+        cd /workspace/perf_analyzer/genai-perf && \
+        python3 -m build --wheel --outdir /workspace/install/python; \
+    else \
+        ls /workspace/perf_analyzer/ && \
+        tar -xzf /workspace/perf_analyzer/perf_analyzer*.tar.gz -C /workspace/install/bin && \
+        echo "Perf Analyzer binaries was extracted and not build" && \
+        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+          -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
+          -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
+          -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
+          -DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
+          -DTRITON_ENABLE_PYTHON_HTTP=ON \
+          -DTRITON_ENABLE_PYTHON_GRPC=ON \
+          -DTRITON_PACKAGE_PERF_ANALYZER=ON \
+          -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
+        /workspace/perf_analyzer && \
+        make -j16 python-clients && \
+        mkdir -p /workspace/install/python && \
+        cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \
+    fi
 
 # Install Java API Bindings
 RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
 
@@ -29,8 +29,8 @@
 
 >[!WARNING]
 >You are currently on the `main` branch which tracks under-development progress
->towards the next release. The current release is version [2.56.0](https://github.com/triton-inference-server/server/releases/latest)
->and corresponds to the 25.03 container release on NVIDIA GPU Cloud (NGC).
+>towards the next release. The current release is version [2.57.0](https://github.com/triton-inference-server/server/releases/latest)
+>and corresponds to the 25.04 container release on NVIDIA GPU Cloud (NGC).
 
 # Triton Inference Server
 
 
@@ -1 +1 @@
-2.57.0dev
+2.58.0dev
@@ -71,14 +71,14 @@
 #
 
 DEFAULT_TRITON_VERSION_MAP = {
-    "release_version": "2.57.0dev",
-    "triton_container_version": "25.04dev",
-    "upstream_container_version": "25.03",
-    "ort_version": "1.21.0",
-    "ort_openvino_version": "2025.0.0",
-    "standalone_openvino_version": "2025.0.0",
+    "release_version": "2.58.0dev",
+    "triton_container_version": "25.05dev",
+    "upstream_container_version": "25.04",
+    "ort_version": "1.22.0",
+    "ort_openvino_version": "2025.1.0",
+    "standalone_openvino_version": "2025.1.0",
     "dcgm_version": "3.3.6",
-    "vllm_version": "0.7.3",
+    "vllm_version": "0.8.4",
     "rhel_py_version": "3.12.3",
 }
 
@@ -323,7 +323,7 @@ def gitclone(self, repo, tag, subdir, org):
         # reference onto a new branch we name "tritonbuildref".
         if tag.startswith("pull/"):
             self.cmd(
-                f"  git clone --recursive --depth=1 {org}/{repo}.git {subdir};",
+                f"  git clone --recursive --depth=1 {org}/{repo}.git {subdir}; git --git-dir {subdir}/.git log --oneline -1",
                 check_exitcode=True,
             )
             self.cmd("}" if target_platform() == "windows" else "fi")
@@ -332,7 +332,7 @@ def gitclone(self, repo, tag, subdir, org):
             self.cmd(f"git checkout tritonbuildref", check_exitcode=True)
         else:
             self.cmd(
-                f"  git clone --recursive --single-branch --depth=1 -b {tag} {org}/{repo}.git {subdir};",
+                f"  git clone --recursive --single-branch --depth=1 -b {tag} {org}/{repo}.git {subdir}; git --git-dir {subdir}/.git log --oneline -1",
                 check_exitcode=True,
             )
             self.cmd("}" if target_platform() == "windows" else "fi")
 
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:25.03-py3
+  imageName: nvcr.io/nvidia/tritonserver:25.04-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://triton-inference-server-repository/model_repository
   numGpus: 1
 
@@ -26,7 +26,7 @@
 
 apiVersion: v1
 # appVersion is the Triton version; update when changing release
-appVersion: "2.56.0"
+appVersion: "2.57.0"
 description: Triton Inference Server (Fleet Command)
 name: triton-inference-server
 # version is the Chart version; update when changing anything in the chart
 
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:25.03-py3
+  imageName: nvcr.io/nvidia/tritonserver:25.04-py3
   pullPolicy: IfNotPresent
   numGpus: 1
   serverCommand: tritonserver
@@ -47,13 +47,13 @@ image:
     #
     # To set model control mode, uncomment and configure below
     # TODO: Fix the following url, it is invalid
-    # See https://github.com/triton-inference-server/server/blob/r25.03/docs/model_management.md
+    # See https://github.com/triton-inference-server/server/blob/r25.04/docs/user_guide/model_management.md
     #  for more details
     #- --model-control-mode=explicit|poll|none
     #
     # Additional server args
     #
-    # see https://github.com/triton-inference-server/server/blob/r25.03/README.md
+    # see https://github.com/triton-inference-server/server/blob/r25.04/README.md
     #  for more details
 
 service:
 
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:25.03-py3
+  imageName: nvcr.io/nvidia/tritonserver:25.04-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: gs://triton-inference-server-repository/model_repository
   numGpus: 1
 
@@ -33,7 +33,7 @@ metadata:
   namespace: default
 spec:
   containers:
-  - image: nvcr.io/nvidia/tritonserver:25.03-py3-sdk
+  - image: nvcr.io/nvidia/tritonserver:25.04-py3-sdk
     imagePullPolicy: Always
     name: nv-triton-client
     securityContext: