Skip to content

Commit 7ed8e0f

Browse files
committed
Merge branch 'main' of github.com:triton-inference-server/server into yinggeh-DLIS-6657-client-input-byte-size-check
2 parents 3863c39 + e181662 commit 7ed8e0f

File tree

21 files changed

+577
-152
lines changed

21 files changed

+577
-152
lines changed

Dockerfile.sdk

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@
3232
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:24.07-py3-min
3333

3434
ARG TRITON_CLIENT_REPO_SUBDIR=clientrepo
35+
ARG TRITON_PA_REPO_SUBDIR=perfanalyzerrepo
3536
ARG TRITON_COMMON_REPO_TAG=main
3637
ARG TRITON_CORE_REPO_TAG=main
38+
ARG TRITON_CLIENT_REPO_TAG=main
3739
ARG TRITON_THIRD_PARTY_REPO_TAG=main
3840
ARG TRITON_MODEL_ANALYZER_REPO_TAG=main
3941
ARG TRITON_ENABLE_GPU=ON
@@ -103,8 +105,10 @@ RUN rm -f /usr/bin/python && \
103105
# Build the client library and examples
104106
ARG TRITON_REPO_ORGANIZATION
105107
ARG TRITON_CLIENT_REPO_SUBDIR
108+
ARG TRITON_PA_REPO_SUBDIR
106109
ARG TRITON_COMMON_REPO_TAG
107110
ARG TRITON_CORE_REPO_TAG
111+
ARG TRITON_CLIENT_REPO_TAG
108112
ARG TRITON_THIRD_PARTY_REPO_TAG
109113
ARG TRITON_ENABLE_GPU
110114
ARG JAVA_BINDINGS_MAVEN_VERSION
@@ -114,26 +118,53 @@ ARG TARGETPLATFORM
114118
WORKDIR /workspace
115119
COPY TRITON_VERSION .
116120
COPY ${TRITON_CLIENT_REPO_SUBDIR} client
121+
COPY ${TRITON_PA_REPO_SUBDIR} perf_analyzer
117122

118-
WORKDIR /workspace/build
123+
WORKDIR /workspace/client_build
119124
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
120125
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
121126
-DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
122127
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
123128
-DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
124129
-DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
130+
-DTRITON_ENABLE_PERF_ANALYZER=OFF \
125131
-DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
126-
-DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
132+
-DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \
127133
-DTRITON_ENABLE_JAVA_HTTP=ON \
128-
-DTRITON_ENABLE_PERF_ANALYZER=ON \
134+
-DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
135+
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
136+
RUN make -j16 cc-clients java-clients && \
137+
rm -fr ~/.m2
138+
139+
# TODO: PA will rebuild the CC clients since it depends on it.
140+
# This should be optimized so that we do not have to build
141+
# the CC clients twice. Similarly, because the SDK expectation is
142+
# that PA is packaged with the python client, we hold off on building
143+
# the python client until now. Post-migration we should focus
144+
# effort on de-tangling these flows.
145+
WORKDIR /workspace/pa_build
146+
RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
147+
-DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
148+
-DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
149+
-DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
150+
-DTRITON_CORE_REPO_TAG=${TRITON_CORE_REPO_TAG} \
151+
-DTRITON_CLIENT_REPO_TAG=${TRITON_CLIENT_REPO_TAG} \
129152
-DTRITON_ENABLE_PERF_ANALYZER_C_API=ON \
130153
-DTRITON_ENABLE_PERF_ANALYZER_TFS=ON \
131154
-DTRITON_ENABLE_PERF_ANALYZER_TS=ON \
132155
-DTRITON_ENABLE_PERF_ANALYZER_OPENAI=ON \
133-
-DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
134-
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
135-
RUN make -j16 cc-clients python-clients java-clients && \
136-
rm -fr ~/.m2
156+
-DTRITON_ENABLE_CC_HTTP=ON \
157+
-DTRITON_ENABLE_CC_GRPC=ON \
158+
-DTRITON_ENABLE_PYTHON_HTTP=ON \
159+
-DTRITON_ENABLE_PYTHON_GRPC=ON \
160+
-DTRITON_PACKAGE_PERF_ANALYZER=ON \
161+
-DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
162+
/workspace/perf_analyzer
163+
RUN make -j16 perf-analyzer python-clients
164+
165+
RUN pip3 install build \
166+
&& cd /workspace/perf_analyzer/genai-perf \
167+
&& python3 -m build --wheel --outdir /workspace/install/python
137168

138169
# Install Java API Bindings
139170
RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
@@ -144,9 +175,6 @@ RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
144175
--jar-install-path /workspace/install/java-api-bindings; \
145176
fi
146177

147-
RUN pip3 install build \
148-
&& cd /workspace/client/src/c++/perf_analyzer/genai-perf \
149-
&& python3 -m build --wheel --outdir /workspace/install/python
150178
############################################################################
151179
## Create sdk container
152180
############################################################################

qa/L0_backend_python/argument_validation/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

28-
CLIENT_PY=../python_unittest.py
28+
CLIENT_PY=../test_infer_shm_leak.py
2929
CLIENT_LOG="./arg_validation_client.log"
3030
TEST_RESULT_FILE='test_results.txt'
3131
SERVER_ARGS="--model-repository=${MODELDIR}/argument_validation/models --backend-directory=${BACKEND_DIR} --log-verbose=1"

qa/L0_backend_python/bls/test.sh

Lines changed: 23 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,15 @@
2525
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

28-
CLIENT_PY=../python_unittest.py
28+
CLIENT_PY=../test_infer_shm_leak.py
2929
CLIENT_LOG="./bls_client.log"
3030
TEST_RESULT_FILE='test_results.txt'
3131
source ../../common/util.sh
3232

3333
TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:=http://github.com/triton-inference-server}
3434

3535
RET=0
36-
rm -fr *.log ./models *.txt
36+
rm -fr *.log ./models *.txt *.xml
3737

3838
# FIXME: [DLIS-5970] Until Windows supports GPU tensors, only test CPU
3939
if [[ ${TEST_WINDOWS} == 0 ]]; then
@@ -119,30 +119,35 @@ if [[ ${TEST_WINDOWS} == 0 ]]; then
119119

120120
for MODEL_NAME in bls bls_memory bls_memory_async bls_async; do
121121
export MODEL_NAME=${MODEL_NAME}
122-
123-
python3 -m pytest --junitxml="${MODEL_NAME}.${TRIAL}.${CUDA_MEMORY_POOL_SIZE_MB}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
124-
if [ $? -ne 0 ]; then
122+
# Run with pytest to capture the return code correctly
123+
pytest --junitxml="${MODEL_NAME}.${TRIAL}.${CUDA_MEMORY_POOL_SIZE_MB}.report.xml" $CLIENT_PY >> $CLIENT_LOG 2>&1
124+
EXIT_CODE=$?
125+
if [ $EXIT_CODE -ne 0 ]; then
125126
echo -e "\n***\n*** ${MODEL_NAME} ${BLS_KIND} test FAILED. \n***"
127+
RET=$EXIT_CODE
126128
cat $SERVER_LOG
127129
cat $CLIENT_LOG
128-
RET=1
129130
fi
130131
done
131132

132-
set -e
133-
134133
kill_server
135134

136-
# Check for bls 'test_timeout' to ensure timeout value is being correctly passed
137-
if [ `grep -c "Request timeout: 11000000000" $SERVER_LOG` == "0" ]; then
138-
echo -e "\n***\n*** BLS timeout value not correctly passed to model: line ${LINENO}\n***"
139-
cat $SERVER_LOG
140-
RET=1
135+
set -e
136+
137+
# Only check the timeout value if there is no error since the test
138+
# may fail before the test_timeout case gets run.
139+
if [ $RET -eq 0 ]; then
140+
# Check for bls 'test_timeout' to ensure timeout value is being correctly passed
141+
if [ `grep -c "Request timeout: 11000000000" $SERVER_LOG` == "0" ]; then
142+
echo -e "\n***\n*** BLS timeout value not correctly passed to model: line ${LINENO}\n***"
143+
cat $SERVER_LOG
144+
RET=1
145+
fi
141146
fi
142147

143-
if [[ $CUDA_MEMORY_POOL_SIZE_MB -eq 128 ]]; then
148+
if [[ $CUDA_MEMORY_POOL_SIZE_MB -eq 256 ]]; then
144149
if [ `grep -c "Failed to allocate memory from CUDA memory pool" $SERVER_LOG` != "0" ]; then
145-
echo -e "\n***\n*** Expected to use CUDA memory pool for all tests when CUDA_MEMOY_POOL_SIZE_MB is 128 MB for 'bls' $BLS_KIND test\n***"
150+
echo -e "\n***\n*** Expected to use CUDA memory pool for all tests when CUDA_MEMORY_POOL_SIZE_MB is 256 MB for 'bls' $BLS_KIND test\n***"
146151
cat $SERVER_LOG
147152
RET=1
148153
fi
@@ -342,10 +347,10 @@ set -e
342347

343348
kill_server
344349

345-
if [ $RET -eq 1 ]; then
346-
echo -e "\n***\n*** BLS test FAILED. \n***"
347-
else
350+
if [ $RET -eq 0 ]; then
348351
echo -e "\n***\n*** BLS test PASSED. \n***"
352+
else
353+
echo -e "\n***\n*** BLS test FAILED. \n***"
349354
fi
350355

351356
exit $RET

qa/L0_backend_python/custom_metrics/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

28-
CLIENT_PY=../python_unittest.py
28+
CLIENT_PY=../test_infer_shm_leak.py
2929
CLIENT_LOG="./custom_metrics_client.log"
3030
TEST_RESULT_FILE='test_results.txt'
3131
source ../../common/util.sh

qa/L0_backend_python/python_test.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,36 @@ def test_bool(self):
365365
self.assertIsNotNone(output0)
366366
self.assertTrue(np.all(output0 == input_data))
367367

368+
def test_bf16(self):
369+
model_name = "identity_bf16"
370+
shape = [2, 2]
371+
with self._shm_leak_detector.Probe() as shm_probe:
372+
with httpclient.InferenceServerClient(
373+
f"{_tritonserver_ipaddr}:8000"
374+
) as client:
375+
# NOTE: Client will truncate FP32 to BF16 internally
376+
# since numpy has no built-in BF16 representation.
377+
np_input = np.ones(shape, dtype=np.float32)
378+
inputs = [
379+
httpclient.InferInput(
380+
"INPUT0", np_input.shape, "BF16"
381+
).set_data_from_numpy(np_input)
382+
]
383+
result = client.infer(model_name, inputs)
384+
385+
# Assert that Triton correctly returned a BF16 tensor.
386+
response = result.get_response()
387+
triton_output = response["outputs"][0]
388+
triton_dtype = triton_output["datatype"]
389+
self.assertEqual(triton_dtype, "BF16")
390+
391+
np_output = result.as_numpy("OUTPUT0")
392+
self.assertIsNotNone(np_output)
393+
# BF16 tensors are held in FP32 when converted to numpy due to
394+
# lack of native BF16 support in numpy, so verify that.
395+
self.assertEqual(np_output.dtype, np.float32)
396+
self.assertTrue(np.allclose(np_output, np_input))
397+
368398
def test_infer_pytorch(self):
369399
# FIXME: This model requires torch. Because windows tests are not run in a docker
370400
# environment with torch installed, we need to think about how we want to install

qa/L0_backend_python/request_rescheduling/test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

28-
CLIENT_PY="../python_unittest.py"
28+
CLIENT_PY="../test_infer_shm_leak.py"
2929
CLIENT_LOG="./request_rescheduling_client.log"
3030
TEST_RESULT_FILE='test_results.txt'
3131
source ../../common/util.sh

qa/L0_backend_python/setup_python_enviroment.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ apt-get update && apt-get -y install \
151151
libboost-dev
152152
rm -f /usr/bin/python3 && \
153153
ln -s "/usr/bin/python3.${PYTHON_ENV_VERSION}" /usr/bin/python3
154-
pip3 install --upgrade install requests numpy virtualenv protobuf
154+
pip3 install --upgrade requests numpy virtualenv protobuf
155155
find /opt/tritonserver/qa/pkgs/ -maxdepth 1 -type f -name \
156156
"tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
157157
xargs pip3 install --upgrade

qa/L0_backend_python/test.sh

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ fi
9595
mkdir -p models/identity_fp32/1/
9696
cp ../python_models/identity_fp32/model.py ./models/identity_fp32/1/model.py
9797
cp ../python_models/identity_fp32/config.pbtxt ./models/identity_fp32/config.pbtxt
98+
mkdir -p models/identity_bf16/1/
99+
cp ../python_models/identity_bf16/model.py ./models/identity_bf16/1/model.py
100+
cp ../python_models/identity_bf16/config.pbtxt ./models/identity_bf16/config.pbtxt
98101
RET=0
99102

100103
cp -r ./models/identity_fp32 ./models/identity_uint8
@@ -422,11 +425,20 @@ if [ "$TEST_JETSON" == "0" ]; then
422425
# between dependencies.
423426
setup_virtualenv
424427

428+
set +e
425429
(cd ${TEST} && bash -ex test.sh)
426-
if [ $? -ne 0 ]; then
430+
EXIT_CODE=$?
431+
if [ $EXIT_CODE -ne 0 ]; then
427432
echo "Subtest ${TEST} FAILED"
428-
RET=1
433+
RET=$EXIT_CODE
434+
435+
# In bls test, it is allowed to fail with a strict memory leak of 480 bytes with exit code '123'.
436+
# Propagate the exit code to make sure it's not overwritten by other tests.
437+
if [[ ${TEST} == "bls" ]] && [[ $EXIT_CODE -ne 1 ]] ; then
438+
BLS_RET=$RET
439+
fi
429440
fi
441+
set -e
430442

431443
deactivate_virtualenv
432444
done
@@ -435,11 +447,13 @@ if [ "$TEST_JETSON" == "0" ]; then
435447
if [[ ${PYTHON_ENV_VERSION} = "10" ]] && [[ ${TEST_WINDOWS} == 0 ]]; then
436448
# In 'env' test we use miniconda for dependency management. No need to run
437449
# the test in a virtual environment.
450+
set +e
438451
(cd env && bash -ex test.sh)
439452
if [ $? -ne 0 ]; then
440453
echo "Subtest env FAILED"
441454
RET=1
442455
fi
456+
set -e
443457
fi
444458
fi
445459

@@ -456,12 +470,14 @@ for TEST in ${SUBTESTS}; do
456470
# between dependencies.
457471
setup_virtualenv
458472

473+
set +e
459474
(cd ${TEST} && bash -ex test.sh)
460475

461476
if [ $? -ne 0 ]; then
462477
echo "Subtest ${TEST} FAILED"
463478
RET=1
464479
fi
480+
set -e
465481

466482
deactivate_virtualenv
467483
done
@@ -472,4 +488,14 @@ else
472488
echo -e "\n***\n*** Test FAILED\n***"
473489
fi
474490

475-
exit $RET
491+
# Exit with RET if it is 1, meaning that the test failed.
492+
# Otherwise, exit with BLS_RET if it is set, meaning that the known memory leak is captured.
493+
if [ $RET -eq 1 ]; then
494+
exit $RET
495+
else
496+
if [ -z "$BLS_RET" ]; then
497+
exit $RET
498+
else
499+
exit $BLS_RET
500+
fi
501+
fi

qa/L0_backend_python/python_unittest.py renamed to qa/L0_backend_python/test_infer_shm_leak.py

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import os
3434
import unittest
3535

36+
import pytest
3637
import shm_util
3738
import tritonclient.grpc as grpcclient
3839
from tritonclient.utils import *
@@ -41,11 +42,13 @@
4142
# we overwrite the IP address with the TRITONSERVER_IPADDR envvar
4243
_tritonserver_ipaddr = os.environ.get("TRITONSERVER_IPADDR", "localhost")
4344

45+
# The exit code 123 is used to indicate that the shm leak probe detected a 480
46+
# bytes leak in the bls sub-test. Any leak other than 480 bytes will cause the
47+
# test to fail with the default exit code 1.
48+
ALLOWED_FAILURE_EXIT_CODE = 123
4449

45-
class PythonUnittest(unittest.TestCase):
46-
def setUp(self):
47-
self._shm_leak_detector = shm_util.ShmLeakDetector()
4850

51+
class TestInferShmLeak:
4952
def _run_unittest(self, model_name):
5053
with grpcclient.InferenceServerClient(f"{_tritonserver_ipaddr}:8001") as client:
5154
# No input is required
@@ -54,15 +57,17 @@ def _run_unittest(self, model_name):
5457

5558
# The model returns 1 if the tests were successfully passed.
5659
# Otherwise, it will return 0.
57-
self.assertEqual(
58-
output0, [1], f"python_unittest failed for model {model_name}"
59-
)
60-
61-
def test_python_unittest(self):
62-
model_name = os.environ["MODEL_NAME"]
63-
with self._shm_leak_detector.Probe() as shm_probe:
64-
self._run_unittest(model_name)
60+
assert output0 == [1], f"python_unittest failed for model {model_name}"
6561

62+
def test_shm_leak(self):
63+
self._shm_leak_detector = shm_util.ShmLeakDetector()
64+
model_name = os.environ.get("MODEL_NAME", "default_model")
6665

67-
if __name__ == "__main__":
68-
unittest.main()
66+
try:
67+
with self._shm_leak_detector.Probe() as shm_probe:
68+
self._run_unittest(model_name)
69+
except AssertionError as e:
70+
if "Known shared memory leak of 480 bytes detected" in str(e):
71+
pytest.exit(str(e), returncode=ALLOWED_FAILURE_EXIT_CODE)
72+
else:
73+
raise e

qa/L0_dlpack_multi_gpu/test.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727

2828
SERVER=/opt/tritonserver/bin/tritonserver
2929
SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
30-
CLIENT_PY=./python_unittest.py
30+
CLIENT_PY=./test_infer_shm_leak.py
3131
CLIENT_LOG="./client.log"
3232
EXPECTED_NUM_TESTS="1"
3333
TEST_RESULT_FILE='test_results.txt'
@@ -52,8 +52,8 @@ rm -fr *.log ./models
5252
mkdir -p models/dlpack_test/1/
5353
cp ../python_models/dlpack_test/model.py models/dlpack_test/1/
5454
cp ../python_models/dlpack_test/config.pbtxt models/dlpack_test
55-
cp ../L0_backend_python/python_unittest.py .
56-
sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' python_unittest.py
55+
cp ../L0_backend_python/test_infer_shm_leak.py .
56+
sed -i 's#sys.path.append("../../common")#sys.path.append("../common")#g' test_infer_shm_leak.py
5757

5858
run_server
5959
if [ "$SERVER_PID" == "0" ]; then

0 commit comments

Comments
 (0)