Skip to content

Commit 4a80379

Browse files
committed
Merge branch 'main' of github.com:triton-inference-server/server into yinggeh-DLIS-7272-refactor-core-input-checks
2 parents 8492549 + f6021f7 commit 4a80379

File tree

76 files changed

+2722
-115
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

76 files changed

+2722
-115
lines changed

Dockerfile.QA

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,10 @@ RUN rm -fr qa/L0_copyrights qa/L0_build_variants && \
390390
RUN find qa/pkgs/ -maxdepth 1 -type f -name \
391391
"tritonserver-*.whl" | xargs -I {} pip3 install --upgrade {}[all]
392392

393+
# Install Triton Frontend Python API
394+
RUN find qa/pkgs/ -type f -name \
395+
"tritonfrontend-*.whl" | xargs -I {} pip3 install --upgrade {}[all]
396+
393397
ENV LD_LIBRARY_PATH /opt/tritonserver/qa/clients:${LD_LIBRARY_PATH}
394398

395399
# DLIS-3631: Needed to run Perf Analyzer CI tests correctly

Dockerfile.sdk

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
#
3030

3131
# Base image on the minimum Triton container
32-
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.07-py3-min
32+
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.08-py3-min
3333

3434
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
3535
ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo

Dockerfile.win10.min

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ RUN choco install unzip -y
3737
#
3838
# Installing TensorRT
3939
#
40-
ARG TENSORRT_VERSION=10.2.0.19
40+
ARG TENSORRT_VERSION=10.3.0.26
4141
ARG TENSORRT_ZIP="TensorRT-${TENSORRT_VERSION}.Windows10.x86_64.cuda-12.5.zip"
4242
ARG TENSORRT_SOURCE=https://developer.nvidia.com/downloads/compute/machine-learning/tensorrt/10.2.0/zip/TensorRT-10.2.0.19.Windows10.x86_64.cuda-12.5.zip
4343
# COPY ${TENSORRT_ZIP} /tmp/${TENSORRT_ZIP}
@@ -51,7 +51,7 @@ LABEL TENSORRT_VERSION="${TENSORRT_VERSION}"
5151
#
5252
# Installing cuDNN
5353
#
54-
ARG CUDNN_VERSION=9.2.1.18
54+
ARG CUDNN_VERSION=9.3.0.75
5555
ARG CUDNN_ZIP=cudnn-windows-x86_64-${CUDNN_VERSION}_cuda12-archive.zip
5656
ARG CUDNN_SOURCE=https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/windows-x86_64/cudnn-windows-x86_64-9.2.1.18_cuda12-archive.zip
5757
ADD ${CUDNN_SOURCE} /tmp/${CUDNN_ZIP}
@@ -101,14 +101,14 @@ LABEL CMAKE_VERSION=${CMAKE_VERSION}
101101
#
102102
# Installing Visual Studio BuildTools: VS17 2022
103103
#
104-
ARG BUILDTOOLS_VERSION=17.9.34622.214
104+
ARG BUILDTOOLS_VERSION=17.10.35201.131
105105
# Download collect.exe in case of an install failure.
106106
ADD https://aka.ms/vscollect.exe "C:\tmp\collect.exe"
107107

108108
# Use the latest release channel. For more control, specify the location of an internal layout.
109109
# Download the Build Tools bootstrapper.
110110
# ARG BUILD_TOOLS_SOURCE=https://aka.ms/vs/17/release/vs_buildtools.exe
111-
ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/5e7b923b-7d89-4e14-95b8-a84ab168e243/96b21d216c7954aaf606c6d7ba59a3de991884a8a86c578c767ba349c23188a9/vs_BuildTools.exe
111+
ARG BUILD_TOOLS_SOURCE=https://download.visualstudio.microsoft.com/download/pr/28626b4b-f88f-4b55-a0cf-f3eaa2c643fb/e6c43d4dfb36338d954cdb3ad9010ab2a479e712088f4f6b016eadcc721bab28/vs_BuildTools.exe
112112
ADD ${BUILD_TOOLS_SOURCE} vs_buildtools.exe
113113
# Install Build Tools with the Microsoft.VisualStudio.Workload.VCTools workload, including recommended.
114114
ARG VS_INSTALL_PATH_WP="C:\BuildTools"
@@ -175,15 +175,15 @@ RUN copy "%CUDA_INSTALL_ROOT_WP%\extras\visual_studio_integration\MSBuildExtensi
175175

176176
RUN setx PATH "%CUDA_INSTALL_ROOT_WP%\bin;%PATH%"
177177

178-
ARG CUDNN_VERSION=9.2.1.18
178+
ARG CUDNN_VERSION=9.3.0.75
179179
ENV CUDNN_VERSION ${CUDNN_VERSION}
180180
COPY --from=dependency_base /cudnn /cudnn
181181
RUN copy cudnn\bin\cudnn*.dll "%CUDA_INSTALL_ROOT_WP%\bin\."
182182
RUN copy cudnn\lib\x64\cudnn*.lib "%CUDA_INSTALL_ROOT_WP%\lib\x64\."
183183
RUN copy cudnn\include\cudnn*.h "%CUDA_INSTALL_ROOT_WP%\include\."
184184
LABEL CUDNN_VERSION="${CUDNN_VERSION}"
185185

186-
ARG TENSORRT_VERSION=10.2.0.19
186+
ARG TENSORRT_VERSION=10.3.0.26
187187
ENV TRT_VERSION ${TENSORRT_VERSION}
188188
COPY --from=dependency_base /TensorRT /TensorRT
189189
RUN setx PATH "c:\TensorRT\lib;%PATH%"

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ ___
4545

4646
##### LATEST RELEASE
4747
You are currently on the `main` branch which tracks under-development progress towards the next release.
48-
The current release is version [2.48.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.07 container release on NVIDIA GPU Cloud (NGC).
48+
The current release is version [2.49.0](https://github.com/triton-inference-server/server/releases/latest) and corresponds to the 24.08 container release on NVIDIA GPU Cloud (NGC).
4949

5050
Triton Inference Server is an open source inference serving software that
5151
streamlines AI inferencing. Triton enables teams to deploy any AI model from
@@ -103,16 +103,16 @@ Inference Server with the
103103

104104
```bash
105105
# Step 1: Create the example model repository
106-
git clone -b r24.07 https://github.com/triton-inference-server/server.git
106+
git clone -b r24.08 https://github.com/triton-inference-server/server.git
107107
cd server/docs/examples
108108
./fetch_models.sh
109109

110110
# Step 2: Launch triton from the NGC Triton container
111-
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.07-py3 tritonserver --model-repository=/models
111+
docker run --gpus=1 --rm --net=host -v ${PWD}/model_repository:/models nvcr.io/nvidia/tritonserver:24.08-py3 tritonserver --model-repository=/models
112112

113113
# Step 3: Sending an Inference Request
114114
# In a separate console, launch the image_client example from the NGC Triton SDK container
115-
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.07-py3-sdk
115+
docker run -it --rm --net=host nvcr.io/nvidia/tritonserver:24.08-py3-sdk
116116
/workspace/install/bin/image_client -m densenet_onnx -c 3 -s INCEPTION /workspace/images/mug.jpg
117117

118118
# Inference should return the following

TRITON_VERSION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2.49.0dev
1+
2.50.0dev

build.py

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@
7070
# incorrectly load the other version of the openvino libraries.
7171
#
7272
TRITON_VERSION_MAP = {
73-
"2.49.0dev": (
74-
"24.08dev", # triton container
75-
"24.07", # upstream container
73+
"2.50.0dev": (
74+
"24.09dev", # triton container
75+
"24.08", # upstream container
7676
"1.18.1", # ORT
7777
"2024.0.0", # ORT OpenVINO
7878
"2024.0.0", # Standalone OpenVINO
@@ -116,7 +116,8 @@ def fail_if(p, msg):
116116

117117

118118
def target_platform():
119-
if FLAGS.target_platform is not None:
119+
# When called by compose.py, FLAGS will be None
120+
if FLAGS and FLAGS.target_platform is not None:
120121
return FLAGS.target_platform
121122
platform_string = platform.system().lower()
122123
if platform_string == "linux":
@@ -132,7 +133,8 @@ def target_platform():
132133

133134

134135
def target_machine():
135-
if FLAGS.target_machine is not None:
136+
# When called by compose.py, FLAGS will be None
137+
if FLAGS and FLAGS.target_machine is not None:
136138
return FLAGS.target_machine
137139
return platform.machine().lower()
138140

@@ -214,6 +216,8 @@ def header(self, desc=None):
214216

215217
self.comment("Exit script immediately if any command fails")
216218
if target_platform() == "windows":
219+
self._file.write("$UseStructuredOutput = $false\n")
220+
self.blankln()
217221
self._file.write("function ExitWithCode($exitcode) {\n")
218222
self._file.write(" $host.SetShouldExit($exitcode)\n")
219223
self._file.write(" exit $exitcode\n")
@@ -639,13 +643,16 @@ def pytorch_cmake_args(images):
639643
cmake_backend_arg("pytorch", "TRITON_PYTORCH_DOCKER_IMAGE", None, image),
640644
]
641645

642-
if FLAGS.enable_gpu:
646+
# TODO: TPRD-372 TorchTRT extension is not currently supported by our manylinux build
647+
# TODO: TPRD-373 NVTX extension is not currently supported by our manylinux build
648+
if target_platform() != "rhel":
649+
if FLAGS.enable_gpu:
650+
cargs.append(
651+
cmake_backend_enable("pytorch", "TRITON_PYTORCH_ENABLE_TORCHTRT", True)
652+
)
643653
cargs.append(
644-
cmake_backend_enable("pytorch", "TRITON_PYTORCH_ENABLE_TORCHTRT", True)
654+
cmake_backend_enable("pytorch", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx)
645655
)
646-
cargs.append(
647-
cmake_backend_enable("pytorch", "TRITON_ENABLE_NVTX", FLAGS.enable_nvtx)
648-
)
649656
return cargs
650657

651658

@@ -655,7 +662,9 @@ def onnxruntime_cmake_args(images, library_paths):
655662
"onnxruntime",
656663
"TRITON_BUILD_ONNXRUNTIME_VERSION",
657664
None,
658-
TRITON_VERSION_MAP[FLAGS.version][2],
665+
os.getenv("TRITON_BUILD_ONNXRUNTIME_VERSION")
666+
if os.getenv("TRITON_BUILD_ONNXRUNTIME_VERSION")
667+
else TRITON_VERSION_MAP[FLAGS.version][2],
659668
)
660669
]
661670

@@ -1301,7 +1310,6 @@ def dockerfile_prepare_container_linux(argmap, backends, enable_gpu, target_mach
13011310
gpu_enabled=gpu_enabled
13021311
)
13031312

1304-
# This
13051313
if target_platform() == "rhel":
13061314
df += """
13071315
# Common dpeendencies.

deploy/aws/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:24.07-py3
30+
imageName: nvcr.io/nvidia/tritonserver:24.08-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: s3://triton-inference-server-repository/model_repository
3333
numGpus: 1

deploy/fleetcommand/Chart.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626

2727
apiVersion: v1
2828
# appVersion is the Triton version; update when changing release
29-
appVersion: "2.48.0"
29+
appVersion: "2.49.0"
3030
description: Triton Inference Server (Fleet Command)
3131
name: triton-inference-server
3232
# version is the Chart version; update when changing anything in the chart

deploy/fleetcommand/values.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:24.07-py3
30+
imageName: nvcr.io/nvidia/tritonserver:24.08-py3
3131
pullPolicy: IfNotPresent
3232
numGpus: 1
3333
serverCommand: tritonserver
@@ -47,13 +47,13 @@ image:
4747
#
4848
# To set model control mode, uncomment and configure below
4949
# TODO: Fix the following url, it is invalid
50-
# See https://github.com/triton-inference-server/server/blob/r24.07/docs/model_management.md
50+
# See https://github.com/triton-inference-server/server/blob/r24.08/docs/model_management.md
5151
# for more details
5252
#- --model-control-mode=explicit|poll|none
5353
#
5454
# Additional server args
5555
#
56-
# see https://github.com/triton-inference-server/server/blob/r24.07/README.md
56+
# see https://github.com/triton-inference-server/server/blob/r24.08/README.md
5757
# for more details
5858

5959
service:

deploy/gcp/values.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
replicaCount: 1
2828

2929
image:
30-
imageName: nvcr.io/nvidia/tritonserver:24.07-py3
30+
imageName: nvcr.io/nvidia/tritonserver:24.08-py3
3131
pullPolicy: IfNotPresent
3232
modelRepositoryPath: gs://triton-inference-server-repository/model_repository
3333
numGpus: 1

0 commit comments

Comments
 (0)