3232ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.08-py3-min
3333
3434ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
35- ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
3635ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
3736ARG TRITON_COMMON_REPO_TAG=main
3837ARG TRITON_CORE_REPO_TAG=main
@@ -41,7 +40,6 @@ ARG TRITON_THIRD_PARTY_REPO_TAG=main
4140ARG TRITON_ENABLE_GPU=ON
4241ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
4342ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
44- ARG TRITON_PERF_ANALYZER_BUILD=1
4543# DCGM version to install for Model Analyzer
4644ARG DCGM_VERSION=4.4.0-1
4745
@@ -97,7 +95,6 @@ RUN rm -f /usr/bin/python && \
9795# Build the client library and examples
9896ARG TRITON_REPO_ORGANIZATION
9997ARG TRITON_CLIENT_REPO_SUBDIR
100- ARG TRITON_PA_REPO_SUBDIR
10198ARG TRITON_COMMON_REPO_TAG
10299ARG TRITON_CORE_REPO_TAG
103100ARG TRITON_CLIENT_REPO_TAG
@@ -106,14 +103,10 @@ ARG TRITON_ENABLE_GPU
106103ARG JAVA_BINDINGS_MAVEN_VERSION
107104ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG
108105ARG TARGETPLATFORM
109- ARG TRITON_PERF_ANALYZER_BUILD
110-
111- ENV TRITON_PERF_ANALYZER_BUILD=${TRITON_PERF_ANALYZER_BUILD}
112106
113107WORKDIR /workspace
114108COPY TRITON_VERSION .
115109COPY ${TRITON_CLIENT_REPO_SUBDIR} client
116- COPY ${TRITON_PA_REPO_SUBDIR} perf_analyzer
117110
118111WORKDIR /workspace/client_build
119112RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
@@ -124,63 +117,11 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
124117 -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
125118 -DTRITON_ENABLE_PERF_ANALYZER=OFF \
126119 -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
127- -DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \
120+ -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
128121 -DTRITON_ENABLE_JAVA_HTTP=ON \
129122 -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
130123 -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
131- RUN cmake --build . -v --parallel --target cc-clients java-clients
132-
133- # TODO: PA will rebuild the CC clients since it depends on it.
134- # This should be optimized so that we do not have to build
135- # the CC clients twice. Similarly, because the SDK expectation is
136- # that PA is packaged with the python client, we hold off on building
137- # the python client until now. Post-migration we should focus
138- # effort on de-tangling these flows.
139- WORKDIR /workspace/pa_build
140- # NOTE: If TRITON_PERF_ANALYZER_BUILD=0, the Performance Analyzer (PA) binaries must already exist
141- # in the path specified by the ARG TRITON_PA_REPO_SUBDIR.
142- RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
143- cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
144- -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
145- -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
146- -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
147- -DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
148- -DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
149- -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
150- -DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
151- -DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \
152- -DTRITON_ENABLE_PERF_ANALYZER_TS=ON \
153- -DTRITON_ENABLE_PERF_ANALYZER_OPENAI=ON \
154- -DTRITON_ENABLE_CC_HTTP=ON \
155- -DTRITON_ENABLE_CC_GRPC=ON \
156- -DTRITON_ENABLE_PYTHON_HTTP=ON \
157- -DTRITON_ENABLE_PYTHON_GRPC=ON \
158- -DTRITON_PACKAGE_PERF_ANALYZER=ON \
159- -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
160- /workspace/perf_analyzer && \
161- cmake --build . -v --parallel --target perf-analyzer python-clients && \
162- pip3 install build && \
163- cd /workspace/perf_analyzer/genai-perf && \
164- python3 -m build --wheel --outdir /workspace/install/python; \
165- else \
166- ls /workspace/perf_analyzer/ && \
167- tar -xzf /workspace/perf_analyzer/perf_analyzer*.tar.gz -C /workspace/install/bin && \
168- echo "Perf Analyzer binaries was extracted and not build" && \
169- cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
170- -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
171- -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
172- -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
173- -DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
174- -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
175- -DTRITON_ENABLE_PYTHON_HTTP=ON \
176- -DTRITON_ENABLE_PYTHON_GRPC=ON \
177- -DTRITON_PACKAGE_PERF_ANALYZER=ON \
178- -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
179- /workspace/perf_analyzer && \
180- cmake --build . -v --parallel --target python-clients && \
181- mkdir -p /workspace/install/python && \
182- cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \
183- fi
124+ RUN cmake --build . -v --parallel --target cc-clients java-clients python-clients
184125
185126# Install Java API Bindings
186127RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
@@ -235,7 +176,6 @@ WORKDIR /workspace
235176COPY TRITON_VERSION .
236177COPY NVIDIA_Deep_Learning_Container_License.pdf .
237178COPY --from=sdk_build /workspace/client/ client/
238- COPY --from=sdk_build /workspace/perf_analyzer/ perf_analyzer/
239179COPY --from=sdk_build /workspace/install/ install/
240180RUN cd install && \
241181 export VERSION=`cat /workspace/TRITON_VERSION` && \
@@ -253,8 +193,6 @@ COPY --from=sdk_build /workspace/client/src/python/library/tests/* qa/python_cli
253193# Install an image needed by the quickstart and other documentation.
254194COPY qa/images/mug.jpg images/mug.jpg
255195
256- RUN pip3 install install/python/genai_perf-*.whl
257-
258196# Install the dependencies needed to run the client examples. These
259197# are not needed for building but including them allows this image to
260198# be used to run the client examples.
@@ -263,6 +201,9 @@ RUN pip3 install --upgrade "numpy<2" pillow attrdict && \
263201 "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
264202 xargs pip3 install --upgrade
265203
204+ # Install GenAI-Perf
205+ RUN pip3 install genai-perf
206+
266207# Install DCGM
267208RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
268209 [ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \
0 commit comments