Skip to content

Commit eb95992

Browse files
matthewkotilamc-nv
authored andcommitted
fix: Install GenAI-Perf and Perf Analyzer in SDK container from pypi.org instead of building from source
1 parent 3065d9e commit eb95992

File tree

1 file changed

+5
-64
lines changed

1 file changed

+5
-64
lines changed

Dockerfile.sdk

Lines changed: 5 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:25.08-py3-min
3333

3434
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
35-
ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
3635
ARG TRITON_REPO_ORGANIZATION=http://github.com/triton-inference-server
3736
ARG TRITON_COMMON_REPO_TAG=main
3837
ARG TRITON_CORE_REPO_TAG=main
@@ -41,7 +40,6 @@ ARG TRITON_THIRD_PARTY_REPO_TAG=main
4140
ARG TRITON_ENABLE_GPU=ON
4241
ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
4342
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
44-
ARG TRITON_PERF_ANALYZER_BUILD=1
4543
# DCGM version to install for Model Analyzer
4644
ARG DCGM_VERSION=4.4.0-1
4745

@@ -97,7 +95,6 @@ RUN rm -f /usr/bin/python && \
9795
# Build the client library and examples
9896
ARG TRITON_REPO_ORGANIZATION
9997
ARG TRITON_CLIENT_REPO_SUBDIR
100-
ARG TRITON_PA_REPO_SUBDIR
10198
ARG TRITON_COMMON_REPO_TAG
10299
ARG TRITON_CORE_REPO_TAG
103100
ARG TRITON_CLIENT_REPO_TAG
@@ -106,14 +103,10 @@ ARG TRITON_ENABLE_GPU
106103
ARG JAVA_BINDINGS_MAVEN_VERSION
107104
ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG
108105
ARG TARGETPLATFORM
109-
ARG TRITON_PERF_ANALYZER_BUILD
110-
111-
ENV TRITON_PERF_ANALYZER_BUILD=${TRITON_PERF_ANALYZER_BUILD}
112106

113107
WORKDIR /workspace
114108
COPY TRITON_VERSION .
115109
COPY ${TRITON_CLIENT_REPO_SUBDIR} client
116-
COPY ${TRITON_PA_REPO_SUBDIR} perf_analyzer
117110

118111
WORKDIR /workspace/client_build
119112
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
@@ -124,63 +117,11 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
124117
-DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
125118
-DTRITON_ENABLE_PERF_ANALYZER=OFF \
126119
-DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
127-
-DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \
120+
-DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
128121
-DTRITON_ENABLE_JAVA_HTTP=ON \
129122
-DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
130123
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
131-
RUN cmake --build . -v --parallel --target cc-clients java-clients
132-
133-
# TODO: PA will rebuild the CC clients since it depends on it.
134-
# This should be optimized so that we do not have to build
135-
# the CC clients twice. Similarly, because the SDK expectation is
136-
# that PA is packaged with the python client, we hold off on building
137-
# the python client until now. Post-migration we should focus
138-
# effort on de-tangling these flows.
139-
WORKDIR /workspace/pa_build
140-
# NOTE: If TRITON_PERF_ANALYZER_BUILD=0, the Performance Analyzer (PA) binaries must already exist
141-
# in the path specified by the ARG TRITON_PA_REPO_SUBDIR.
142-
RUN if [ "$TRITON_PERF_ANALYZER_BUILD" = "1" ]; then \
143-
cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
144-
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
145-
-DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
146-
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
147-
-DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
148-
-DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
149-
-DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
150-
-DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
151-
-DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \
152-
-DTRITON_ENABLE_PERF_ANALYZER_TS=ON \
153-
-DTRITON_ENABLE_PERF_ANALYZER_OPENAI=ON \
154-
-DTRITON_ENABLE_CC_HTTP=ON \
155-
-DTRITON_ENABLE_CC_GRPC=ON \
156-
-DTRITON_ENABLE_PYTHON_HTTP=ON \
157-
-DTRITON_ENABLE_PYTHON_GRPC=ON \
158-
-DTRITON_PACKAGE_PERF_ANALYZER=ON \
159-
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
160-
/workspace/perf_analyzer && \
161-
cmake --build . -v --parallel --target perf-analyzer python-clients && \
162-
pip3 install build && \
163-
cd /workspace/perf_analyzer/genai-perf && \
164-
python3 -m build --wheel --outdir /workspace/install/python; \
165-
else \
166-
ls /workspace/perf_analyzer/ && \
167-
tar -xzf /workspace/perf_analyzer/perf_analyzer*.tar.gz -C /workspace/install/bin && \
168-
echo "Perf Analyzer binaries was extracted and not build" && \
169-
cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
170-
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
171-
-DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
172-
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
173-
-DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
174-
-DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
175-
-DTRITON_ENABLE_PYTHON_HTTP=ON \
176-
-DTRITON_ENABLE_PYTHON_GRPC=ON \
177-
-DTRITON_PACKAGE_PERF_ANALYZER=ON \
178-
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
179-
/workspace/perf_analyzer && \
180-
cmake --build . -v --parallel --target python-clients && \
181-
mkdir -p /workspace/install/python && \
182-
cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \
183-
fi
124+
RUN cmake --build . -v --parallel --target cc-clients java-clients python-clients
184125

185126
# Install Java API Bindings
186127
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
@@ -235,7 +176,6 @@ WORKDIR /workspace
235176
COPY TRITON_VERSION .
236177
COPY NVIDIA_Deep_Learning_Container_License.pdf .
237178
COPY --from=sdk_build /workspace/client/ client/
238-
COPY --from=sdk_build /workspace/perf_analyzer/ perf_analyzer/
239179
COPY --from=sdk_build /workspace/install/ install/
240180
RUN cd install && \
241181
export VERSION=`cat /workspace/TRITON_VERSION` && \
@@ -253,8 +193,6 @@ COPY --from=sdk_build /workspace/client/src/python/library/tests/* qa/python_cli
253193
# Install an image needed by the quickstart and other documentation.
254194
COPY qa/images/mug.jpg images/mug.jpg
255195

256-
RUN pip3 install install/python/genai_perf-*.whl
257-
258196
# Install the dependencies needed to run the client examples. These
259197
# are not needed for building but including them allows this image to
260198
# be used to run the client examples.
@@ -263,6 +201,9 @@ RUN pip3 install --upgrade "numpy<2" pillow attrdict && \
263201
"tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
264202
xargs pip3 install --upgrade
265203

204+
# Install GenAI-Perf
205+
RUN pip3 install genai-perf
206+
266207
# Install DCGM
267208
RUN if [ "$TRITON_ENABLE_GPU" = "ON" ]; then \
268209
[ "$(uname -m)" != "x86_64" ] && arch="sbsa" || arch="x86_64" && \

0 commit comments

Comments
 (0)