triton-inference-server
diff --git a/‎Dockerfile.sdk‎
Lines changed: 1 addition & 1 deletion b/‎Dockerfile.sdk‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎Dockerfile.win10.min‎
Lines changed: 17 additions & 28 deletions b/‎Dockerfile.win10.min‎
Lines changed: 17 additions & 28 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 4 deletions b/‎README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎TRITON_VERSION‎
Lines changed: 1 addition & 1 deletion b/‎TRITON_VERSION‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎build.py‎
Lines changed: 3 additions & 3 deletions b/‎build.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎deploy/aws/values.yaml‎
Lines changed: 1 addition & 1 deletion b/‎deploy/aws/values.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/fleetcommand/Chart.yaml‎
Lines changed: 1 addition & 1 deletion b/‎deploy/fleetcommand/Chart.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/fleetcommand/values.yaml‎
Lines changed: 3 additions & 3 deletions b/‎deploy/fleetcommand/values.yaml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎deploy/gcp/values.yaml‎
Lines changed: 1 addition & 1 deletion b/‎deploy/gcp/values.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml‎
Lines changed: 1 addition & 1 deletion b/‎deploy/gke-marketplace-app/benchmark/perf-analyzer-script/triton_client.yaml‎
Lines changed: 1 addition & 1 deletion
@@ -29,7 +29,7 @@
 #
 
 # Base image on the minimum Triton container
-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.05-py3-min
+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.06-py3-min
 
 ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
 ARG TRITON_COMMON_REPO_TAG=main
 
@@ -37,9 +37,9 @@ RUN choco install unzip -y
 #
 # Installing TensorRT
 #
-ARG TENSORRT_VERSION
-ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.0.zip"
-ARG TENSORRT_SOURCE=${TENSORRT_ZIP}
+ARG TENSORRT_VERSION=10.0.1.6
+ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.4.zip"
+ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.0.1/zip/TensorRT-10.0.1.6.Windows10.win10.cuda-12.4.zip
 # COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
 ADD ${TENSORRT_SOURCE} /tmp/${TENSORRT_ZIP}
 RUN unzip /tmp/%TENSORRT_ZIP%
@@ -51,9 +51,9 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
 #
 # Installing cuDNN
 #
-ARG CUDNN_VERSION
+ARG CUDNN_VERSION=9.1.0.70
 ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
-ARG CUDNN_SOURCE=${CUDNN_ZIP}
+ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.1.0.70_cuda12-archive.zip
 ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
 RUN unzip /tmp/%CUDNN_ZIP%
 RUN move cudnn-* cudnn
@@ -75,7 +75,7 @@ RUN choco install git docker unzip -y
 #
 # Installing python
 #
-ARG PYTHON_VERSION=3.8.10
+ARG PYTHON_VERSION=3.10.11
 ARG PYTHON_SOURCE=https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-amd64.exe
 ADD ${PYTHON_SOURCE} python-${PYTHON_VERSION}-amd64.exe
 RUN python-%PYTHON_VERSION%-amd64.exe /quiet InstallAllUsers=1 PrependPath=1 Include_doc=0 TargetDir="C:\python%PYTHON_VERSION%"
@@ -88,14 +88,8 @@ LABEL PYTHON_VERSION=${PYTHON_VERSION}
 #
 # Installing CMake
 #
-ARG CMAKE_VERSION=3.27.1
-ARG CMAKE_FILE=cmake-${CMAKE_VERSION}-windows-x86_64
-ARG CMAKE_SOURCE=https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_FILE}.zip
-
-ADD ${CMAKE_SOURCE} ${CMAKE_FILE}.zip
-RUN unzip %CMAKE_FILE%.zip
-RUN move %CMAKE_FILE% "c:\CMake"
-RUN setx PATH "c:\CMake\bin;%PATH%"
+ARG CMAKE_VERSION=3.29.3
+RUN pip install cmake==%CMAKE_VERSION%
 
 ENV CMAKE_TOOLCHAIN_FILE /vcpkg/scripts/buildsystems/vcpkg.cmake
 ENV VCPKG_TARGET_TRIPLET x64-windows
@@ -104,27 +98,22 @@ LABEL CMAKE_VERSION=${CMAKE_VERSION}
 
 # Be aware that pip can interact badly with VS cmd shell so need to pip install before
 # vsdevcmd.bat (see https://bugs.python.org/issue38989)
-
-
 #
 # Installing Visual Studio BuildTools: VS17 2022
 #
-ARG BUILDTOOLS_VERSION
+ARG BUILDTOOLS_VERSION=17.9.34622.214
 # Download collect.exe in case of an install failure.
 ADD https://aka.ms/vscollect.exe "C:\tmp\collect.exe"
 
 # Use the latest release channel. For more control, specify the location of an internal layout.
-ARG CHANNEL_URL=https://aka.ms/vs/17/release/channel
-ADD ${CHANNEL_URL} "C:\tmp\VisualStudio.chman"
 # Download the Build Tools bootstrapper.
-ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe
+# ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe
+ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/5e7b923b-7d89-4e14-95b8-a84ab168e243/96b21d216c7954aaf606c6d7ba59a3de991884a8a86c578c767ba349c23188a9/vs_BuildTools.exe
 ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe
 # Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended.
 ARG VS_INSTALL_PATH_WP="C:\BuildTools"
 RUN vs_buildtools.exe --quiet --wait --norestart --nocache install \
       --installPath %VS_INSTALL_PATH_WP% \
-      --channelUri "C:\tmp\VisualStudio.chman" \
-      --installChannelUri "C:\tmp\VisualStudio.chman" \
       --add Microsoft.VisualStudio.Workload.VCTools \
       --includeRecommended \
       --locale "En-us"
@@ -136,15 +125,15 @@ WORKDIR /
 #
 # Installing Vcpkg
 #
-ARG VCPGK_VERSION=2023.11.20
+ARG VCPGK_VERSION=2024.03.19
 RUN git clone --single-branch --depth=1 -b %VCPGK_VERSION% https://github.com/microsoft/vcpkg.git
 WORKDIR /vcpkg
 RUN bootstrap-vcpkg.bat
 RUN vcpkg.exe update
 RUN vcpkg.exe install \
-      b64:x64-windows \
       boost-interprocess:x64-windows \
       boost-stacktrace:x64-windows \
+      b64:x64-windows \
       openssl-windows:x64-windows \
       openssl:x64-windows \
       pthread:x64-windows \
@@ -160,8 +149,8 @@ WORKDIR /
 # Installing CUDA
 #
 ARG CUDA_MAJOR=12
-ARG CUDA_MINOR=3
-ARG CUDA_PATCH=2
+ARG CUDA_MINOR=5
+ARG CUDA_PATCH=0
 ARG CUDA_VERSION=${CUDA_MAJOR}.${CUDA_MINOR}.${CUDA_PATCH}
 ARG CUDA_PACKAGES="nvcc_${CUDA_MAJOR}.${CUDA_MINOR} \
                    cudart_${CUDA_MAJOR}.${CUDA_MINOR} \
@@ -186,15 +175,15 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi
 
 RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"
 
-ARG CUDNN_VERSION
+ARG CUDNN_VERSION=9.1.0.70
 ENV CUDNN_VERSION ${CUDNN_VERSION}
 COPY --from=dependency_base /cudnn /cudnn
 RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
 RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
 RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
 LABEL CUDNN_VERSION="${CUDNN_VERSION}"
 
-ARG TENSORRT_VERSION
+ARG TENSORRT_VERSION=10.0.1.6
 ENV TRT_VERSION ${TENSORRT_VERSION}
 COPY --from=dependency_base /TensorRT /TensorRT
 RUN setx PATH "c:\TensorRT\lib;%PATH%"
 
@@ -33,7 +33,7 @@
 > [!WARNING]
 > ##### LATEST RELEASE
 > You are currently on the `main` branch which tracks under-development progress towards the next release.
-> The current release is version [2.46.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.05 container release on NVIDIA GPU Cloud (NGC).
+> The current release is version [2.47.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.06 container release on NVIDIA GPU Cloud (NGC).
 
 Triton Inference Server is an open source inference serving software that
 streamlines AI inferencing. Triton enables teams to deploy any AI model from
@@ -91,16 +91,16 @@ Inference Server with the
 
 ```bash
 # Step 1: Create the example model repository
-git clone -b r24.05 https://github.com/triton-inference-server/server.git
+git clone -b r24.06 https://github.com/triton-inference-server/server.git
 cd server/docs/examples
 ./fetch_models.sh
 
 # Step 2: Launch triton from the NGC Triton container
-docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.05-py3 tritonserver --model-repository=/models
+docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.06-py3 tritonserver --model-repository=/models
 
 # Step 3: Sending an Inference Request
 # In a separate console, launch the image_client example from the NGC Triton SDK container
-docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.05-py3-sdk
+docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.06-py3-sdk
 /workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
 
 # Inference should return the following
 
@@ -1 +1 @@
-2.47.0dev
+2.48.0dev
@@ -69,10 +69,10 @@
 # incorrectly load the other version of the openvino libraries.
 #
 TRITON_VERSION_MAP = {
-    "2.47.0dev": (
+    "2.48.0dev": (
         "24.06dev",  # triton container
-        "24.05",  # upstream container
-        "1.18.0",  # ORT
+        "24.06",  # upstream container
+        "1.18.1",  # ORT
         "2024.0.0",  # ORT OpenVINO
         "2024.0.0",  # Standalone OpenVINO
         "3.2.6",  # DCGM version
 
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.05-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.06-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: s3://triton-inference-server-repository/model_repository
   numGpus: 1
 
@@ -26,7 +26,7 @@
 
 apiVersion: v1
 # appVersion is the Triton version; update when changing release
-appVersion: "2.46.0"
+appVersion: "2.47.0"
 description: Triton Inference Server (Fleet Command)
 name: triton-inference-server
 # version is the Chart version; update when changing anything in the chart
 
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.05-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.06-py3
   pullPolicy: IfNotPresent
   numGpus: 1
   serverCommand: tritonserver
@@ -47,13 +47,13 @@ image:
     #
     # To set model control mode, uncomment and configure below
     # TODO: Fix the following url, it is invalid
-    # See https://github.com/triton-inference-server/server/blob/r24.05/docs/model_management.md
+    # See https://github.com/triton-inference-server/server/blob/r24.06/docs/model_management.md
     #  for more details
     #- --model-control-mode=explicit|poll|none
     #
     # Additional server args
     #
-    # see https://github.com/triton-inference-server/server/blob/r24.05/README.md
+    # see https://github.com/triton-inference-server/server/blob/r24.06/README.md
     #  for more details
 
 service:
 
@@ -27,7 +27,7 @@
 replicaCount: 1
 
 image:
-  imageName: nvcr.io/nvidia/tritonserver:24.05-py3
+  imageName: nvcr.io/nvidia/tritonserver:24.06-py3
   pullPolicy: IfNotPresent
   modelRepositoryPath: gs://triton-inference-server-repository/model_repository
   numGpus: 1
 
@@ -33,7 +33,7 @@ metadata:
   namespace: default
 spec:
   containers:
-  - image: nvcr.io/nvidia/tritonserver:24.05-py3-sdk
+  - image: nvcr.io/nvidia/tritonserver:24.06-py3-sdk
     imagePullPolicy: Always
     name: nv-triton-client
     securityContext:
Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,7 @@`
`29`	`29`	`#`
`30`	`30`
`31`	`31`	`# Base image on the minimum Triton container`
`32`		`-ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.05-py3-min`
	`32`	`+ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.06-py3-min`
`33`	`33`
`34`	`34`	`ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo`
`35`	`35`	`ARG TRITON_COMMON_REPO_TAG=main`