diff --git a/Dockerfile.sdk b/Dockerfile.sdk index f28d7f710e..31a62ea68b 100644 --- a/Dockerfile.sdk +++ b/Dockerfile.sdk @@ -42,7 +42,7 @@ ARG TRITON_MODEL_ANALYZER_REPO_TAG=main ARG TRITON_ENABLE_GPU=ON ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4 ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8 - +ARG PERF_ANALYZER_BUILD=ON # DCGM version to install for Model Analyzer ARG DCGM_VERSION=3.3.6 @@ -131,11 +131,11 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \ -DTRITON_ENABLE_PERF_ANALYZER=OFF \ -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \ - -DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \ + -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \ -DTRITON_ENABLE_JAVA_HTTP=ON \ -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \ -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client -RUN make -j16 cc-clients java-clients && \ +RUN make -j16 cc-clients java-clients python-clients && \ rm -fr ~/.m2 # TODO: PA will rebuild the CC clients since it depends on it. @@ -145,7 +145,8 @@ RUN make -j16 cc-clients java-clients && \ # the python client until now. Post-migration we should focus # effort on de-tangling these flows. WORKDIR /workspace/pa_build -RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ +RUN if [ "$PERF_ANALYZER_BUILD" = "ON" ]; then \ + cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \ -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \ -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \ @@ -161,12 +162,18 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \ -DTRITON_ENABLE_PYTHON_GRPC=ON \ -DTRITON_PACKAGE_PERF_ANALYZER=ON \ -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \ - /workspace/perf_analyzer -RUN make -j16 perf-analyzer python-clients - -RUN pip3 install build \ - && cd /workspace/perf_analyzer/genai-perf \ - && python3 -m build --wheel --outdir /workspace/install/python + /workspace/perf_analyzer && \ + make -j16 perf-analyzer && \ + pip3 install build \ + && cd /workspace/perf_analyzer/genai-perf && \ + python3 -m build --wheel --outdir /workspace/install/python; \ + else \ + ls /workspace/perf_analyzer/ && ls /workspace/pa_build && \ + tar -xzf /workspace/perf_analyzer/perf_analyzer*.tar.gz -C /workspace/install/bin && \ + echo "Perf Analyzer binaries was extracted and not build"; \ + mkdir -p /workspace/install/python && \ + cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \ + fi # Install Java API Bindings RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ @@ -238,11 +245,12 @@ COPY --from=sdk_build /workspace/client/src/python/library/tests/* qa/python_cli # Install an image needed by the quickstart and other documentation. COPY qa/images/mug.jpg images/mug.jpg -RUN pip3 install install/python/genai_perf-*.whl - # Install the dependencies needed to run the client examples. These # are not needed for building but including them allows this image to # be used to run the client examples. + +RUN pip3 install install/python/genai_perf-*.whl; + RUN pip3 install --upgrade "numpy<2" pillow attrdict && \ find install/python/ -maxdepth 1 -type f -name \ "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \ diff --git a/qa/L0_perf_analyzer/nginx.conf b/qa/L0_perf_analyzer/nginx.conf deleted file mode 100644 index 4a7dfcc04a..0000000000 --- a/qa/L0_perf_analyzer/nginx.conf +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -server { - listen 443 ssl; - server_name localhost; - - ssl_certificate /etc/nginx/cert.crt; - ssl_certificate_key /etc/nginx/cert.key; - - location / { - proxy_pass http://localhost:8000; - proxy_http_version 1.1; - } -} diff --git a/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json b/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json deleted file mode 100644 index d0feacd9b4..0000000000 --- a/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json +++ /dev/null @@ -1,95 +0,0 @@ -{ - "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://github.com/triton-inference-server/client/blob/main/src/c%2B%2B/perf_analyzer/examples/schema.json", - "title": "Perf Analyzer output data", - "description": "A json file describing the output from a Perf Analyzer run.", - "type": "object", - "required": [ - "experiments", - "version" - ], - "properties": { - "experiments": { - "description": "The array of all experiments run by Perf Analyzer.", - "type": "array", - "required": [ - "experiment", - "requests", - "window_boundaries" - ], - "minItems": 1, - "uniqueItems": true, - "items": { - "type": "object", - "properties": { - "experiment": { - "description": "A single experiment run by Perf Analyzer.", - "type": "object", - "required": [ - "mode", - "value" - ], - "minItems": 1, - "maxItems": 1, - "properties": { - "mode": { - "description": "Operating mode of Perf Analyzer: For example, 'concurrency' or 'request rate'.", - "type": "string" - }, - "value": { - "description": "Concurrency or request rate for the current experiment.", - "type": "integer" - } - } - }, - "requests": { - "description": "The array of requests sent by Perf Analyzer for this experiment.", - "type": "array", - "items": { - "$ref": "#/properties/experiments/items/properties/$defs/request" - } - }, - "$defs": { - "request": { - "description": "Info for a single request.", - "type": "object", - "required": [ - "timestamp", - "response_timestamps" - ], - "properties": { - "timestamp": { - "description": "Time stamp of the request.", - "type": "integer" - }, - "sequence_id": { - "description": "The sequence_id of the request.", - "type": "integer" - }, - "response_timestamps": { - "description": "All associated responses to this request.", - "type": "array", - "items": { - "type": "integer" - } - } - } - } - }, - "window_boundaries": { - "description": "An array of time stamps describing window boundaries.", - "type": "array", - "items": { - "type": "integer" - }, - "uniqueItems": true - } - } - } - }, - "version": { - "description": "The version of Perf Analyzer that generated the report.", - "type": "string" - } - } -} \ No newline at end of file diff --git a/qa/L0_perf_analyzer/test.sh b/qa/L0_perf_analyzer/test.sh deleted file mode 100755 index 49c7e72e48..0000000000 --- a/qa/L0_perf_analyzer/test.sh +++ /dev/null @@ -1,1164 +0,0 @@ -#!/bin/bash -# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} -if [ "$#" -ge 1 ]; then - REPO_VERSION=$1 -fi -if [ -z "$REPO_VERSION" ]; then - echo -e "Repository version must be specified" - echo -e "\n***\n*** Test Failed\n***" - exit 1 -fi -if [ ! -z "$TEST_REPO_ARCH" ]; then - REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH} -fi - -export CUDA_VISIBLE_DEVICES=0 - -CLIENT_LOG="./perf_analyzer.log" -PERF_ANALYZER=../clients/perf_analyzer - -DATADIR=`pwd`/models -TESTDATADIR=`pwd`/test_data - -INT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/int_data.json -INT_DIFFSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/int_data_diff_shape.json -INT_OPTIONAL_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/int_data_optional.json -FLOAT_DIFFSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/float_data_with_shape.json -STRING_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/string_data.json -STRING_WITHSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/string_data_with_shape.json -SEQ_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_data.json -SHAPETENSORADTAFILE=`pwd`/../common/perf_analyzer_input_data_json/shape_tensor_data.json -IMAGE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/image_data.json - -OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/output.json -NON_ALIGNED_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/non_aligned_output.json -WRONG_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/wrong_output.json -WRONG_OUTPUT_2_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/wrong_output_2.json - -SEQ_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_output.json -SEQ_WRONG_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_wrong_output.json - -REPEAT_INT32_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/repeat_int32_data.json - -TRACE_FILE="trace.json" - -SERVER=/opt/tritonserver/bin/tritonserver -SERVER_ARGS="--model-repository=${DATADIR} --trace-config triton,file=${TRACE_FILE}" -SERVER_LOG="./inference_server.log" - -ERROR_STRING="error | Request count: 0 | : 0 infer/sec" - -STABILITY_THRESHOLD="100" - -source ../common/util.sh - -rm -f $SERVER_LOG $CLIENT_LOG -rm -rf $DATADIR $TESTDATADIR $ENSEMBLE_DATADIR - -mkdir -p $DATADIR -# Copy fixed-shape models -cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_int32_int32_int32 $DATADIR/ -cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_nobatch_int32_int32_int32 $DATADIR/ -cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_object_object_object $DATADIR/ -cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_nobatch_object_object_object $DATADIR/ - -# Copy a variable-shape models -cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_object_int32_int32 $DATADIR/ -cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/ - -# Copy shape tensor models -cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/ - -# Copying ensemble including a sequential model -cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR -cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_sequence_model_repository/simple_savedmodel_sequence_object $DATADIR -cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_sequence_model_repository/nop_TYPE_FP32_-1 $DATADIR - -# Copying variable sequence model -cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_sequence_model_repository/graphdef_sequence_float32 $DATADIR - -mkdir $DATADIR/nop_TYPE_FP32_-1/1 - -# Copy inception model to the model repository -cp -r /data/inferenceserver/${REPO_VERSION}/tf_model_store/inception_v1_graphdef $DATADIR - -# Copy resnet50v1.5_fp16 -cp -r /data/inferenceserver/${REPO_VERSION}/perf_model_store/resnet50v1.5_fp16_savedmodel $DATADIR - -# Copy and customize custom_zero_1_float32 -cp -r ../custom_models/custom_zero_1_float32 $DATADIR && \ - mkdir $DATADIR/custom_zero_1_float32/1 && \ - (cd $DATADIR/custom_zero_1_float32 && \ - echo "parameters [" >> config.pbtxt && \ - echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}" >> config.pbtxt && \ - echo "]" >> config.pbtxt) - -# Copy and customize optional inputs model -cp -r ../python_models/optional $DATADIR && \ - mkdir $DATADIR/optional/1 && \ - mv $DATADIR/optional/model.py $DATADIR/optional/1 && \ - sed -i 's/max_batch_size: 0/max_batch_size: 2/g' $DATADIR/optional/config.pbtxt - -# Copy decoupled model -git clone --depth=1 https://github.com/triton-inference-server/python_backend -mkdir -p $DATADIR/repeat_int32/1 -cp python_backend/examples/decoupled/repeat_config.pbtxt $DATADIR/repeat_int32/config.pbtxt -cp python_backend/examples/decoupled/repeat_model.py $DATADIR/repeat_int32/1/model.py - -# Generating test data -mkdir -p $TESTDATADIR -for INPUT in INPUT0 INPUT1; do - for i in {1..16}; do - echo '1' >> $TESTDATADIR/${INPUT} - done -done - -RET=0 - -run_server -if [ "$SERVER_PID" == "0" ]; then - echo -e "\n***\n*** Failed to start $SERVER\n***" - cat $SERVER_LOG - exit 1 -fi - - -# Test whether there was a conflict in sending sequences. This should -# be done before other testing as the server might emit this warning -# in certain test cases that are expected to raise this warning -SERVER_ERROR_STRING="The previous sequence did not end before this sequence start" - -set +e -$PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object -p2000 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed: Expected an error when using dynamic shapes in string inputs\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "input INPUT0 contains dynamic shape, provide shapes to send along with the request" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed: \n***" - RET=1 -fi - -# Testing with ensemble and sequential model variants -$PERF_ANALYZER -v -i grpc -m simple_savedmodel_sequence_object -p 2000 -t5 --streaming \ ---input-data=$SEQ_JSONDATAFILE --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed: Sequence conflict when maintaining concurrency\n***" - RET=1 -fi - -$PERF_ANALYZER -v -i grpc -m simple_savedmodel_sequence_object -p 1000 --request-rate-range 100:200:50 --streaming \ ---input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -if [ $(cat $SERVER_LOG | grep "${SERVER_ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $SERVER_LOG | grep "${SERVER_ERROR_STRING}" - echo -e "\n***\n*** Test Failed: Sequence conflict\n***" - RET=1 -fi -set -e - -for PROTOCOL in grpc http; do - - # Testing simple configurations with different shared memory types - for SHARED_MEMORY_TYPE in none system cuda; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \ - --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 -a \ - --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - # TODO Add back testing with preprocess_inception_ensemble model - - # Testing with inception model - for SHARED_MEMORY_TYPE in none system cuda; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 1 -p2000 -b 1 \ - --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 1 -p2000 -b 1 -a \ - --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - # Testing with resnet50 models with large batch sizes - for SHARED_MEMORY_TYPE in none system cuda; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 2 -p2000 -b 64 \ - --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 2 -p2000 -b 64 \ - --shared-memory=$SHARED_MEMORY_TYPE -a -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - # Test perf client behavior on different model with different batch size - for MODEL in graphdef_nobatch_int32_int32_int32 graphdef_int32_int32_int32; do - # Valid batch size - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m $MODEL -t 1 -p2000 -b 1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - - # Invalid batch sizes - for STATIC_BATCH in 0 10; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m $MODEL -t 1 -p2000 -b $STATIC_BATCH -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - done - - # Testing with the new arguments - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --concurrency-range 1:5:2 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "error | Request count: 0 | : 0 infer/sec\|: 0 usec|Request concurrency: 2" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --concurrency-range 1:5:2 \ - --input-data=${INT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "error | Request count: 0 | : 0 infer/sec\|: 0 usec|Request concurrency: 2" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:500 \ - -p1000 -b 1 -a -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:500 \ - --input-data=${INT_JSONDATAFILE} -p1000 -b 1 -a -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - # Binary search for request rate mode - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:100 -p1000 -b 1 \ - -a --binary-search --request-distribution "poisson" -l 10 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - - # Binary search for concurrency range mode and make sure it doesn't hang - $PERF_ANALYZER -v -a --request-distribution "poisson" --shared-memory none \ - --percentile 99 --binary-search --concurrency-range 1:8:2 -l 5 \ - -m graphdef_int32_int32_int32 -b 1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 & - PA_PID=$! - if [ "$PA_PID" == "0" ]; then - echo -e "\n***\n*** Failed to start $PERF_ANALYZER\n***" - cat $CLIENT_LOG - RET=1 - fi - # wait for PA to finish running - sleep 200 - if ps -p $PA_PID > /dev/null; then - cat $CLIENT_LOG - echo -e "\n***\n*** $PERF_ANALYZER is hanging after 200 s\n***" - kill $PA_PID - RET=1 - fi - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - - # Testing with combinations of string input and shared memory types - for SHARED_MEMORY_TYPE in none system cuda; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object --string-data=1 -p2000 \ - --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - # Testing with combinations of file inputs and shared memory types - for SHARED_MEMORY_TYPE in none system cuda; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object --input-data=$TESTDATADIR -p2000 \ - --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - for SHARED_MEMORY_TYPE in none system cuda; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object --input-data=$STRING_JSONDATAFILE \ - --input-data=$STRING_JSONDATAFILE -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - # Testing with combinations of variable inputs and shared memory types - for SHARED_MEMORY_TYPE in none system cuda; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_int32_int32 --input-data=$TESTDATADIR \ - --shape INPUT0:2,8 --shape INPUT1:2,8 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} \ - >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - for SHARED_MEMORY_TYPE in none system cuda; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_int32_int32 --input-data=$STRING_WITHSHAPE_JSONDATAFILE \ - --shape INPUT0:2,8 --shape INPUT1:2,8 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} \ - >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_float32 --shape INPUT0:2,8,2 \ - --shape INPUT1:2,8,2 -p2000 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - - # Trying to batch tensors with different shape - for SHARED_MEMORY_TYPE in none system cuda; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_float32 --shape INPUT0:2,8,2 --shape INPUT1:2,8,2 -p2000 -b 4 \ - --shared-memory=$SHARED_MEMORY_TYPE --input-data=$INT_DIFFSHAPE_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep -P "The supplied shape .+ is incompatible with the model's input shape" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - # Shape tensor I/O model (server needs the shape tensor on the CPU) - for SHARED_MEMORY_TYPE in none system; do - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \ - --shape DUMMY_INPUT0:4,4 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -b 8 -s ${STABILITY_THRESHOLD} \ - >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep ": 0 infer/sec\|: 0 usec" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m simple_savedmodel_sequence_object -p 2000 -t5 --sync \ - --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - $PERF_ANALYZER -v -i $PROTOCOL -m simple_savedmodel_sequence_object -p 2000 -t5 --sync \ - --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - $PERF_ANALYZER -v -i $PROTOCOL -m simple_savedmodel_sequence_object -p 1000 --request-rate-range 100:200:50 --sync \ - --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - - - # Testing with variable ensemble model. This unit specifies different shape values - # for different inferences. - for SHARED_MEMORY_TYPE in none system cuda; do - set +e - # FIXME: Enable HTTP when the server is able to correctly return the complex error messages. - $PERF_ANALYZER -v -i grpc -m graphdef_sequence_float32 --shape INPUT:2 --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE \ - --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep -P "The supplied shape .+ is incompatible with the model's input shape" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - set -e - done - - # Testing that trace logging works - set +e - rm ${TRACE_FILE}* - $PERF_ANALYZER -v -i $PROTOCOL -m simple_savedmodel_sequence_object -p 2000 -t5 --sync \ - --trace-level TIMESTAMPS --trace-rate 1000 --trace-count 100 --log-frequency 10 \ - --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if ! compgen -G "$TRACE_FILE*" > /dev/null; then - echo -e "\n***\n*** Test Failed. $TRACE_FILE failed to generate.\n***" - RET=1 - elif [ $(cat ${TRACE_FILE}* | grep "REQUEST_START" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed. Did not find `REQUEST_START` in $TRACE_FILE \n***" - RET=1 - fi - curl localhost:8000/v2/trace/setting -d '{"trace_level":["OFF"]}' - set -e - - # Testing that setting trace file does not work - set +e - $PERF_ANALYZER -v -i $PROTOCOL -m simple_savedmodel_sequence_object \ - --trace-file $TRACE_FILE >$CLIENT_LOG 2>&1 - if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed. Expected to fail for unknown arg --trace-file" - RET=1 - fi - curl localhost:8000/v2/trace/setting -d '{"trace_level":["OFF"]}' - set -e -done - -# Test with output validation -set +e -$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${NON_ALIGNED_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "The 'validation_data' field doesn't align with 'data' field in the json file" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${WRONG_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "mismatch in the data provided" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${WRONG_OUTPUT_2_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "Output doesn't match expected output" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - - -$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -$PERF_ANALYZER -v -m simple_savedmodel_sequence_object -i grpc --streaming \ ---input-data=${SEQ_WRONG_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "Output doesn't match expected output" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -$PERF_ANALYZER -v -m simple_savedmodel_sequence_object -i grpc --streaming \ ---input-data=${SEQ_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - -## Testing with very large concurrencies and large dataset -INPUT_DATA_OPTION="--input-data $SEQ_JSONDATAFILE " -for i in {1..9}; do - INPUT_DATA_OPTION=" ${INPUT_DATA_OPTION} ${INPUT_DATA_OPTION}" -done -set +e -$PERF_ANALYZER -v -m simple_savedmodel_sequence_object -p 10000 --concurrency-range 1500:2000:250 -i grpc --streaming \ -${INPUT_DATA_OPTION} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - -## Test count_windows mode -set +e - -# Send incorrect shape and make sure that perf_analyzer doesn't hang -$PERF_ANALYZER -v -m graphdef_object_int32_int32 --measurement-mode "count_windows" \ - --shape INPUT0:1,8,100 --shape INPUT1:2,8 --string-data=1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "unexpected shape for input 'INPUT0' for model" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -$PERF_ANALYZER -v -m graphdef_object_int32_int32 --measurement-mode "count_windows" \ - --shape INPUT0:2,8 --shape INPUT1:2,8 --string-data=1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - -# Test with optional inputs missing but still valid -set +e -$PERF_ANALYZER -v -m optional --measurement-mode "count_windows" \ - --input-data=${INT_OPTIONAL_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - -# Test with optional inputs missing and invalid -set +e -OPTIONAL_INPUT_ERROR_STRING="For batch sizes larger than 1, the same set of -inputs must be specified for each batch. You cannot use different set of -optional inputs for each individual batch." -$PERF_ANALYZER -v -m optional -b 2 --measurement-mode "count_windows" \ - --input-data=${INT_OPTIONAL_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${OPTIONAL_INPUT_ERROR_STRING}" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - - -# Test Custom request rate option -CUSTOM_SCHEDULE_FILE=$TESTDATADIR/custom.schedule -echo '30000' >> $CUSTOM_SCHEDULE_FILE -echo '10000' >> $CUSTOM_SCHEDULE_FILE -echo '40000' >> $CUSTOM_SCHEDULE_FILE -echo '20000' >> $CUSTOM_SCHEDULE_FILE -echo '25000' >> $CUSTOM_SCHEDULE_FILE - -set +e -$PERF_ANALYZER -v -i grpc -m graphdef_int32_int32_int32 --request-intervals $CUSTOM_SCHEDULE_FILE >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "Request Rate: 40" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed: \n***" - RET=1 -fi -set -e - -# Test --serial-sequences mode -set +e -$PERF_ANALYZER -v -i $PROTOCOL -m simple_savedmodel_sequence_object -p 1000 --request-rate-range 100:200:50 --serial-sequences \ - --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -$PERF_ANALYZER -v -i $PROTOCOL -m simple_savedmodel_sequence_object -p 1000 --request-intervals $CUSTOM_SCHEDULE_FILE --serial-sequences \ - --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - -## Test decoupled model support -$PERF_ANALYZER -v -m repeat_int32 --input-data=$REPEAT_INT32_JSONDATAFILE \ - --profile-export-file profile_export.json -i grpc --async --streaming -s \ - ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -python3 -c "import json ; \ - requests = json.load(open('profile_export.json'))['experiments'][0]['requests'] ; \ - assert any(len(r['response_timestamps']) > 1 for r in requests)" -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -check-jsonschema --schemafile perf_analyzer_profile_export_schema.json profile_export.json -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -## Test perf_analyzer with MPI / multiple models - -is_synchronized() { - local TIMESTAMP_RANK_0_STABLE=$(grep -oP "^\K[^$]+(?=\[1,0\]:All models on all MPI ranks are stable)" 1/rank.0/stdout | date "+%s" -f -) - local TIMESTAMP_RANK_1_STABLE=$(grep -oP "^\K[^$]+(?=\[1,1\]:All models on all MPI ranks are stable)" 1/rank.1/stdout | date "+%s" -f -) - local TIMESTAMP_RANK_2_STABLE=$(grep -oP "^\K[^$]+(?=\[1,2\]:All models on all MPI ranks are stable)" 1/rank.2/stdout | date "+%s" -f -) - local TIMESTAMP_MIN=$(echo -e "${TIMESTAMP_RANK_0_STABLE}\n${TIMESTAMP_RANK_1_STABLE}\n${TIMESTAMP_RANK_2_STABLE}" | sort -n | head -1) - local TIMESTAMP_MAX=$(echo -e "${TIMESTAMP_RANK_0_STABLE}\n${TIMESTAMP_RANK_1_STABLE}\n${TIMESTAMP_RANK_2_STABLE}" | sort -n | tail -1) - local TIMESTAMP_MAX_MIN_DIFFERENCE=$((${TIMESTAMP_MAX}-${TIMESTAMP_MIN})) - local ALLOWABLE_SECONDS_BETWEEN_PROFILES_FINISHING="5" - echo $(($TIMESTAMP_MAX_MIN_DIFFERENCE <= $ALLOWABLE_SECONDS_BETWEEN_PROFILES_FINISHING)) -} - -is_stable() { - local RANK=$1 - local IS_THROUGHPUT=$2 - if [ $IS_THROUGHPUT ]; then - local GREP_PATTERN="\[1,$RANK\]: Pass \[[0-9]+\] throughput: \K[0-9]+\.?[0-9]*" - else - local GREP_PATTERN="\[1,$RANK\]: Pass \[[0-9]+\] throughput: [0-9]+\.?[0-9]* infer/sec. Avg latency: \K[0-9]+" - fi - local LAST_MINUS_0=$(grep -oP "$GREP_PATTERN" 1/rank.$RANK/stdout | tail -3 | sed -n 3p) - local LAST_MINUS_1=$(grep -oP "$GREP_PATTERN" 1/rank.$RANK/stdout | tail -3 | sed -n 2p) - local LAST_MINUS_2=$(grep -oP "$GREP_PATTERN" 1/rank.$RANK/stdout | tail -3 | sed -n 1p) - local MEAN=$(awk "BEGIN {print (($LAST_MINUS_0+$LAST_MINUS_1+$LAST_MINUS_2)/3)}") - local STABILITY_THRESHOLD=0.5 - # Based on this: https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/inference_profiler.cc#L629-L644 - local WITHIN_THRESHOLD_0=$(awk "BEGIN {print ($LAST_MINUS_0 >= ((1 - $STABILITY_THRESHOLD) * $MEAN) && $LAST_MINUS_0 <= ((1 + $STABILITY_THRESHOLD) * $MEAN))}") - local WITHIN_THRESHOLD_1=$(awk "BEGIN {print ($LAST_MINUS_1 >= ((1 - $STABILITY_THRESHOLD) * $MEAN) && $LAST_MINUS_1 <= ((1 + $STABILITY_THRESHOLD) * $MEAN))}") - local WITHIN_THRESHOLD_2=$(awk "BEGIN {print ($LAST_MINUS_2 >= ((1 - $STABILITY_THRESHOLD) * $MEAN) && $LAST_MINUS_2 <= ((1 + $STABILITY_THRESHOLD) * $MEAN))}") - echo $(($WITHIN_THRESHOLD_0 && $WITHIN_THRESHOLD_1 && $WITHIN_THRESHOLD_2)) -} - -set +e -mpiexec --allow-run-as-root \ - -n 1 --merge-stderr-to-stdout --output-filename . --tag-output --timestamp-output \ - $PERF_ANALYZER -v -m graphdef_int32_int32_int32 \ - --measurement-mode count_windows -s 50 --enable-mpi : \ - -n 1 --merge-stderr-to-stdout --output-filename . --tag-output --timestamp-output \ - $PERF_ANALYZER -v -m graphdef_nobatch_int32_int32_int32 \ - --measurement-mode count_windows -s 50 --enable-mpi : \ - -n 1 --merge-stderr-to-stdout --output-filename . --tag-output --timestamp-output \ - $PERF_ANALYZER -v -m custom_zero_1_float32 \ - --measurement-mode count_windows -s 50 --enable-mpi -if [ $? -ne 0 ]; then - cat 1/rank.0/stdout 1/rank.2/stdout 1/rank.2/stdout - echo -e "\n***\n*** Perf Analyzer returned non-zero exit code\n***" - echo -e "\n***\n*** Test Failed\n***" - RET=1 -else - if [ $(is_synchronized) -eq 0 ]; then - cat 1/rank.0/stdout 1/rank.2/stdout 1/rank.2/stdout - echo -e "\n***\n*** All models did not finish profiling at almost the same time\n***" - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - RANK_0_THROUGHPUT_IS_STABLE=$(is_stable 0 1) - RANK_0_LATENCY_IS_STABLE=$(is_stable 0 0) - RANK_1_THROUGHPUT_IS_STABLE=$(is_stable 1 1) - RANK_1_LATENCY_IS_STABLE=$(is_stable 1 0) - RANK_2_THROUGHPUT_IS_STABLE=$(is_stable 2 1) - RANK_2_LATENCY_IS_STABLE=$(is_stable 2 0) - - ALL_STABLE=$(( \ - $RANK_0_THROUGHPUT_IS_STABLE && \ - $RANK_0_LATENCY_IS_STABLE && \ - $RANK_1_THROUGHPUT_IS_STABLE && \ - $RANK_1_LATENCY_IS_STABLE && \ - $RANK_2_THROUGHPUT_IS_STABLE && \ - $RANK_2_LATENCY_IS_STABLE)) - - if [ $ALL_STABLE -eq 0 ]; then - cat 1/rank.0/stdout 1/rank.2/stdout 1/rank.2/stdout - echo -e "\n***\n*** All models did not stabilize\n***" - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - rm -rf 1 -fi -set -e - -## Test perf_analyzer without MPI library (`libmpi.so`) available - -rm -rf /opt/hpcx/ompi/lib/libmpi* - -set +e -$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - -kill $SERVER_PID -wait $SERVER_PID - -# Generate valid CA -openssl genrsa -passout pass:1234 -des3 -out ca.key 4096 -openssl req -passin pass:1234 -new -x509 -days 365 -key ca.key -out ca.crt -subj "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Test/CN=Root CA" - -# Generate valid Server Key/Cert -openssl genrsa -passout pass:1234 -des3 -out server.key 4096 -openssl req -passin pass:1234 -new -key server.key -out server.csr -subj "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Server/CN=localhost" -openssl x509 -req -passin pass:1234 -days 365 -in server.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out server.crt - -# Remove passphrase from the Server Key -openssl rsa -passin pass:1234 -in server.key -out server.key - -# Generate valid Client Key/Cert -openssl genrsa -passout pass:1234 -des3 -out client.key 4096 -openssl req -passin pass:1234 -new -key client.key -out client.csr -subj "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Client/CN=localhost" -openssl x509 -passin pass:1234 -req -days 365 -in client.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out client.crt - -# Remove passphrase from Client Key -openssl rsa -passin pass:1234 -in client.key -out client.key - -# Create mutated client key (Make first char of each like capital) -cp client.key client2.key && sed -i "s/\b\(.\)/\u\1/g" client2.key -cp client.crt client2.crt && sed -i "s/\b\(.\)/\u\1/g" client2.crt - -SERVER_ARGS="--model-repository=${DATADIR} --grpc-use-ssl=1 --grpc-server-cert=server.crt --grpc-server-key=server.key --grpc-root-cert=ca.crt" - -run_server -if [ "$SERVER_PID" == "0" ]; then - echo -e "\n***\n*** Failed to start $SERVER\n***" - cat $SERVER_LOG - exit 1 -fi - -# Test gRPC SSL -set +e - -# Test that gRPC protocol with SSL works correctly -$PERF_ANALYZER -v -i grpc -m graphdef_int32_int32_int32 \ - --ssl-grpc-use-ssl \ - --ssl-grpc-root-certifications-file=ca.crt \ - --ssl-grpc-private-key-file=client.key \ - --ssl-grpc-certificate-chain-file=client.crt \ - -s ${STABILITY_THRESHOLD} \ - > ${CLIENT_LOG}.grpc_success 2>&1 -if [ $? -ne 0 ]; then - cat ${CLIENT_LOG}.grpc_success - RET=1 -fi - -# Test that gRPC protocol with SSL fails with incorrect key -$PERF_ANALYZER -v -i grpc -m graphdef_int32_int32_int32 \ - --ssl-grpc-use-ssl \ - --ssl-grpc-root-certifications-file=ca.crt \ - --ssl-grpc-private-key-file=client.key \ - --ssl-grpc-certificate-chain-file=client2.crt \ - -s ${STABILITY_THRESHOLD} \ - > ${CLIENT_LOG}.grpc_failure 2>&1 -if [ $? -eq 0 ]; then - cat ${CLIENT_LOG}.grpc_failure - echo -e "\n***\n*** Expected test failure\n***" - RET=1 -fi - -set -e - -kill $SERVER_PID -wait $SERVER_PID - -cp server.crt /etc/nginx/cert.crt -cp server.key /etc/nginx/cert.key - -SERVER_ARGS="--model-repository=${DATADIR}" - -run_server -if [ "$SERVER_PID" == "0" ]; then - echo -e "\n***\n*** Failed to start $SERVER\n***" - cat $SERVER_LOG - exit 1 -fi - -# Setup the new configuration for the proxy. The HTTPS traffic will be -# redirected to the running instance of server at localhost:8000 -cp nginx.conf /etc/nginx/sites-available/default - -# Start the proxy server -service nginx restart - -# Test HTTP SSL -set +e - -# Test that HTTP protocol with SSL works correctly with certificates -$PERF_ANALYZER -v -u https://localhost:443 -i http -m graphdef_int32_int32_int32 \ - --ssl-https-verify-peer 1 \ - --ssl-https-verify-host 2 \ - --ssl-https-ca-certificates-file ca.crt \ - --ssl-https-client-certificate-file client.crt \ - --ssl-https-client-certificate-type PEM \ - --ssl-https-private-key-file client.key \ - --ssl-https-private-key-type PEM \ - -s ${STABILITY_THRESHOLD} \ - > ${CLIENT_LOG}.https_success 2>&1 -if [ $? -ne 0 ]; then - cat ${CLIENT_LOG}.https_success - RET=1 -fi - -# Test that HTTP protocol with SSL works correctly without certificates -$PERF_ANALYZER -v -u https://localhost:443 -i http -m graphdef_int32_int32_int32 \ - --ssl-https-verify-peer 0 \ - --ssl-https-verify-host 0 \ - -s ${STABILITY_THRESHOLD} \ - > ${CLIENT_LOG}.https_success 2>&1 -if [ $? -ne 0 ]; then - cat ${CLIENT_LOG}.https_success - RET=1 -fi - -# Test that HTTP protocol with SSL fails with incorrect key -$PERF_ANALYZER -v -u https://localhost:443 -i http -m graphdef_int32_int32_int32 \ - --ssl-https-verify-peer 1 \ - --ssl-https-verify-host 2 \ - --ssl-https-ca-certificates-file ca.crt \ - --ssl-https-client-certificate-file client.crt \ - --ssl-https-client-certificate-type PEM \ - --ssl-https-private-key-file client2.key \ - --ssl-https-private-key-type PEM \ - -s ${STABILITY_THRESHOLD} \ - > ${CLIENT_LOG}.https_failure 2>&1 -if [ $? -eq 0 ]; then - cat ${CLIENT_LOG}.https_failure - echo -e "\n***\n*** Expected test failure\n***" - RET=1 -fi - -set -e - -kill $SERVER_PID -wait $SERVER_PID - -if [ $RET -eq 0 ]; then - echo -e "\n***\n*** Test Passed\n***" -else - echo -e "\n***\n*** Test FAILED\n***" -fi - -exit $RET diff --git a/qa/L0_perf_analyzer_capi/test.sh b/qa/L0_perf_analyzer_capi/test.sh deleted file mode 100755 index 53196fa762..0000000000 --- a/qa/L0_perf_analyzer_capi/test.sh +++ /dev/null @@ -1,320 +0,0 @@ -#!/bin/bash -# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# TESTS COPIED FROM L0_perf_analyzer/test.sh -REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} -if [ "$#" -ge 1 ]; then - REPO_VERSION=$1 -fi -if [ -z "$REPO_VERSION" ]; then - echo -e "Repository version must be specified" - echo -e "\n***\n*** Test Failed\n***" - exit 1 -fi -if [ ! -z "$TEST_REPO_ARCH" ]; then - REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH} -fi - -export CUDA_VISIBLE_DEVICES=0 - -CLIENT_LOG="./perf_analyzer.log" -PERF_ANALYZER=../clients/perf_analyzer - -DATADIR=`pwd`/models -TESTDATADIR=`pwd`/test_data - -SERVER_LIBRARY_PATH=/opt/tritonserver - -FLOAT_DIFFSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/float_data_with_shape.json -STRING_WITHSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/string_data_with_shape.json -SEQ_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_data.json -SHAPETENSORADTAFILE=`pwd`/../common/perf_analyzer_input_data_json/shape_tensor_data.json - -ERROR_STRING="error | Request count: 0 | : 0 infer/sec" - -STABILITY_THRESHOLD="9999" - -source ../common/util.sh - -rm -f $CLIENT_LOG -rm -rf $DATADIR $TESTDATADIR $ENSEMBLE_DATADIR - -mkdir -p $DATADIR -# Copy fixed-shape models -cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_int32_int32_int32 $DATADIR/ -cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_object_object_object $DATADIR/ - -# Copy a variable-shape models -cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_object_int32_int32 $DATADIR/ -cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/ - -# Copy shape tensor models -cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/ - -# Copying ensemble including a sequential model -cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR -cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_sequence_model_repository/simple_savedmodel_sequence_object $DATADIR - -# Copying variable sequence model -cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_sequence_model_repository/graphdef_sequence_float32 $DATADIR - -# Copying bls model with undefined variable -mkdir -p $DATADIR/bls_undefined/1 && \ - cp ../python_models/bls_undefined/model.py $DATADIR/bls_undefined/1/. && \ - cp ../python_models/bls_undefined/config.pbtxt $DATADIR/bls_undefined/. - -# Generating test data -mkdir -p $TESTDATADIR -for INPUT in INPUT0 INPUT1; do - for i in {1..16}; do - echo '1' >> $TESTDATADIR/${INPUT} - done -done - -RET=0 - -########## Test C API ############# -# Make sure tritonserver is not running first -set +e -SERVER_PID=$(pidof tritonserver) -if [ $? -ne 1 ]; then -echo -e "\n There was a previous instance of tritonserver, killing \n" - kill $SERVER_PID - wait $SERVER_PID -fi -set -e - -# Testing simple configuration -$PERF_ANALYZER -v -m graphdef_int32_int32_int32 \ ---service-kind=triton_c_api \ ---model-repository=$DATADIR --triton-server-directory=$SERVER_LIBRARY_PATH \ --s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \ ---service-kind=triton_c_api --model-repository=$DATADIR \ ---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \ ->$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -#Testing with string input -$PERF_ANALYZER -v -m graphdef_object_object_object --string-data=1 -p2000 \ ---service-kind=triton_c_api --model-repository=$DATADIR \ ---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \ ->$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -# Testing with variable inputs -$PERF_ANALYZER -v -m graphdef_object_int32_int32 --input-data=$TESTDATADIR \ ---shape INPUT0:2,8 --shape INPUT1:2,8 \ ---service-kind=triton_c_api --model-repository=$DATADIR \ ---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \ ->$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -$PERF_ANALYZER -v -m graphdef_object_int32_int32 \ ---input-data=$STRING_WITHSHAPE_JSONDATAFILE \ ---shape INPUT0:2,8 --shape INPUT1:2,8 -p2000 \ ---service-kind=triton_c_api --model-repository=$DATADIR \ ---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \ ->$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -$PERF_ANALYZER -v -m graphdef_int32_int32_float32 --shape INPUT0:2,8,2 \ ---shape INPUT1:2,8,2 -p2000 \ ---service-kind=triton_c_api --model-repository=$DATADIR \ ---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \ ->$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -# Shape tensor I/O model (server needs the shape tensor on the CPU) -$PERF_ANALYZER -v -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \ ---shape DUMMY_INPUT0:4,4 -p2000 -b 8 \ ---service-kind=triton_c_api --model-repository=$DATADIR \ ---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \ ->$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep ": 0 infer/sec\|: 0 usec" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -$PERF_ANALYZER -v -m simple_savedmodel_sequence_object -p 2000 -t5 --sync \ --s ${STABILITY_THRESHOLD} \ ---input-data=$SEQ_JSONDATAFILE \ ---service-kind=triton_c_api --model-repository=$DATADIR \ ---triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi - -set +e -$PERF_ANALYZER -v -m graphdef_sequence_float32 --shape INPUT:2 \ --s ${STABILITY_THRESHOLD} \ ---input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE \ ---input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE -p2000 \ ---service-kind=triton_c_api --model-repository=$DATADIR \ ---triton-server-directory=$SERVER_LIBRARY_PATH --sync >$CLIENT_LOG 2>&1 -if [ $? -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep -P "The supplied shape .+ is incompatible with the model's input shape" | wc -l) -eq 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - -for SHARED_MEMORY_TYPE in system cuda; do - $PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \ - -s ${STABILITY_THRESHOLD} \ - --shared-memory=$SHARED_MEMORY_TYPE \ - --service-kind=triton_c_api --model-repository=$DATADIR \ - --triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1 - if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi -done - - -$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:500 -p1000 -b 1 \ ---service-kind=triton_c_api --model-repository=$DATADIR \ ---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \ ->$CLIENT_LOG 2>&1 -if [ $? -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - -set +e -# Testing erroneous configuration -# This model is expected to fail -$PERF_ANALYZER -v -m bls_undefined --shape INPUT0:1048576 -t 64\ ---service-kind=triton_c_api \ ---model-repository=$DATADIR --triton-server-directory=$SERVER_LIBRARY_PATH \ --s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 -if [ $? -ne 99 ]; then - cat $CLIENT_LOG - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - -# Make sure server is not still running -set +e -SERVER_PID=$(pidof tritonserver) -if [ $? -eq 0 ]; then - echo -e "\n Tritonserver did not exit properly, killing \n" - kill $SERVER_PID - wait $SERVER_PID - RET=1 -fi -set -e - -if [ $RET -eq 0 ]; then - echo -e "\n***\n*** Test Passed\n***" -else - echo -e "\n***\n*** Test FAILED\n***" -fi -exit $RET diff --git a/qa/L0_perf_analyzer_doc_links/mkdocs.yml b/qa/L0_perf_analyzer_doc_links/mkdocs.yml deleted file mode 100644 index 41a4bfe485..0000000000 --- a/qa/L0_perf_analyzer_doc_links/mkdocs.yml +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -site_name: CI Test -use_directory_urls: False -docs_dir: "./docs" -plugins: - - htmlproofer - - search - -markdown_extensions: - - toc: - permalink: True diff --git a/qa/L0_perf_analyzer_doc_links/test.sh b/qa/L0_perf_analyzer_doc_links/test.sh deleted file mode 100755 index d0757bca9e..0000000000 --- a/qa/L0_perf_analyzer_doc_links/test.sh +++ /dev/null @@ -1,74 +0,0 @@ -#!/bin/bash -# Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -LOG="`pwd`/doc_links.log" -CONFIG="`pwd`/mkdocs.yml" -RET=0 - -# Download necessary packages -python3 -m pip install mkdocs -python3 -m pip install mkdocs-htmlproofer-plugin==0.10.3 - -#Download perf_analyzer docs -TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"} -TRITON_PERF_ANALYZER_REPO_TAG="${TRITON_PERF_ANALYZER_REPO_TAG:=main}" -git clone -b ${TRITON_PERF_ANALYZER_REPO_TAG} ${TRITON_REPO_ORGANIZATION}/perf_analyzer.git -cp `pwd`/perf_analyzer/README.md . -cp -rf `pwd`/perf_analyzer/docs . - -# Need to remove all links that start with -- or -. Mkdocs converts all -- to - for anchor links. -# This breaks all links to cli commands throughout the docs. This will iterate over all -# files in the docs directory and remove -- and - at the start of options, which allows the -# tool to check links for correctness. -for file in `pwd`/docs/*.md -do - echo $file - sed -i 's/`-*/`/g' $file - sed -i 's/#-*/#/g' $file -done - -exec mkdocs serve -f $CONFIG > $LOG & -PID=$! -sleep 20 - -until [[ (-z `pgrep mkdocs`) ]]; do - kill -2 $PID - sleep 2 -done - -if [[ ! -z `grep "invalid url" $LOG` ]]; then - cat $LOG - RET=1 -fi - - -if [ $RET -eq 0 ]; then - echo -e "\n***\n*** Test PASSED\n***" -else - echo -e "\n***\n*** Test FAILED\n***" -fi -exit $RET diff --git a/qa/L0_perf_analyzer_ground_truth/test.sh b/qa/L0_perf_analyzer_ground_truth/test.sh deleted file mode 100755 index d5d78e63f4..0000000000 --- a/qa/L0_perf_analyzer_ground_truth/test.sh +++ /dev/null @@ -1,175 +0,0 @@ -#!/bin/bash -# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} -if [ "$#" -ge 1 ]; then - REPO_VERSION=$1 -fi -if [ -z "${REPO_VERSION}" ]; then - echo -e "Repository version must be specified" - echo -e "\n***\n*** Test Failed\n***" - exit 1 -fi -if [ ! -z "$TEST_REPO_ARCH" ]; then - REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH} -fi - -source ../common/util.sh - -# Setup client/perf_analyzer -CLIENT_LOG="./perf_analyzer.log" -PERF_ANALYZER=../clients/perf_analyzer - -function check_perf_analyzer_error { - ERROR_STRING="error | Request count: 0 | : 0 infer/sec" - CLIENT_RET="$1" - if [ ${CLIENT_RET} -ne 0 ]; then - cat ${CLIENT_LOG} - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat ${CLIENT_LOG} | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat ${CLIENT_LOG} - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi -} - -# Checks that the model infer/sec performance is equal to an expected value -# +/- some tolerance. -# $1: csv result file from PA run -# $2: expected infer/sec value -# $3: tolerance for expected value equality -function check_performance { - # get the boundary values based on the tolerance percentage - MIN=$(python3 -c "print(${2} * (1 - ${3}))") - MAX=$(python3 -c "print(${2} * (1 + ${3}))") - - # delete all but the 2nd line in the resulting file - # then get the 2nd column value which is the infer/sec measurement - report_val=$(sed '2!d' $1 | awk -F ',' {'print $2'}) - - # check if within tolerance - ret=$(python3 -c "print(${report_val} >= ${MIN} and ${report_val} <= ${MAX})") - if [ "$ret" = "False" ]; then - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi -} - -# Iterate over the grpc results to ensure gRPC times are greater than 0 -# $1: client log file -# example line: Avg gRPC time: 42648 usec (marshal 6 usec + response wait 42640 usec + unmarshal 2 usec) -function check_grpc_time { - grep "gRPC" $1 | awk '{print $4}' | while read -r line; do - if [ $line -eq 0 ]; then - RET=1 - fi - done -} - -# Create input_data.json to communicate the requested model delay -# $1: desired model delay -function create_input_data { - echo "{\"data\":[{\"INPUT0\" : [${1}]}]}" > input_data.json -} - -# Setup server -export CUDA_VISIBLE_DEVICES=0 -SERVER=/opt/tritonserver/bin/tritonserver -SERVER_ARGS="--model-repository=`pwd`/models" -SERVER_LOG="./inference_server.log" - -rm -f $SERVER_LOG $CLIENT_LOG -MODEL_DIR="./models" -rm -fr ${MODEL_DIR} && mkdir ${MODEL_DIR} -MODELS="ground_truth" - -for model in ${MODELS}; do - # Add version directory to each model if non-existent - mkdir -p "${MODEL_DIR}/${model}/1" - cp ../python_models/${model}/model.py ./models/${model}/1/model.py - cp ../python_models/${model}/config.pbtxt ./models/${model}/config.pbtxt -done - -# Run server -run_server -if [ "${SERVER_PID}" == "0" ]; then - echo -e "\n***\n*** Failed to start ${SERVER}\n***" - cat ${SERVER_LOG} - exit 1 -fi - -# Run perf_analyzer -set +e -RET=0 -PROTOCOLS="http grpc" -OUTPUT_FILE="results" -MODEL_DELAYS=(0.05 0.5) -TOLERANCE="0.05" - -for model_delay in ${MODEL_DELAYS[@]}; do - create_input_data ${model_delay} - EXPECTED_RESULT=$(python3 -c "print(1 / ${model_delay})") - for protocol in ${PROTOCOLS}; do - for model in ${MODELS}; do - echo "================================================================" - echo "[PERMUTATION] Protocol=${protocol} Model=${model}" - echo "================================================================" - - ${PERF_ANALYZER} -v -i ${protocol} --concurrency-range 2 --input-data input_data.json -m ${model} -f ${OUTPUT_FILE} | tee ${CLIENT_LOG} 2>&1 - check_perf_analyzer_error $? - - check_performance ${OUTPUT_FILE} ${EXPECTED_RESULT} ${TOLERANCE} - - if [ "${protocol}" == "grpc" ]; then - check_grpc_time ${CLIENT_LOG} - fi - done; - done; -done; - - -set -e - -# Cleanup -kill $SERVER_PID -wait $SERVER_PID - -if [ $RET -eq 0 ]; then - echo -e "\n***\n*** Test Passed\n***" -else - echo "=== START SERVER LOG ===" - cat ${SERVER_LOG} - echo "=== END SERVER LOG ===" - echo "=== START CLIENT LOG ===" - cat ${CLIENT_LOG} - echo "=== END CLIENT LOG ===" - echo -e "\n***\n*** Test FAILED\n***" -fi - -exit ${RET} diff --git a/qa/L0_perf_analyzer_report/test.sh b/qa/L0_perf_analyzer_report/test.sh deleted file mode 100755 index 469d11ce3a..0000000000 --- a/qa/L0_perf_analyzer_report/test.sh +++ /dev/null @@ -1,175 +0,0 @@ -#!/bin/bash -# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} -if [ "$#" -ge 1 ]; then - REPO_VERSION=$1 -fi -if [ -z "${REPO_VERSION}" ]; then - echo -e "Repository version must be specified" - echo -e "\n***\n*** Test Failed\n***" - exit 1 -fi -if [ ! -z "$TEST_REPO_ARCH" ]; then - REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH} -fi - -source ../common/util.sh - -# Setup client/perf_analyzer -CLIENT_LOG="./perf_analyzer.log" -PERF_ANALYZER=../clients/perf_analyzer - -function check_perf_analyzer_error { - ERROR_STRING="error | Request count: 0 | : 0 infer/sec" - CLIENT_RET="$1" - if [ ${CLIENT_RET} -ne 0 ]; then - cat ${CLIENT_LOG} - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - if [ $(cat ${CLIENT_LOG} | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then - cat ${CLIENT_LOG} - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi -} - -function check_cache_output { - # Validate cache info in perf_analyzer output - CACHE_STRING="Cache hit count" - if [ $(cat ${CLIENT_LOG} | grep -i "${CACHE_STRING}" | wc -l) -eq 0 ]; then - cat ${CLIENT_LOG} - echo "ERROR: No cache hit count found in output" - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi - - # Validate non-zero number of cache hits - ERROR_STRING="Cache hit count: 0" - num_cache_hit_lines=$(cat ${CLIENT_LOG} | grep -i "${CACHE_STRING}" | wc -l) - num_cache_hit_zero_lines=$(cat ${CLIENT_LOG} | grep -i "${ERROR_STRING}" | wc -l) - if [ ${num_cache_hit_zero_lines} -eq ${num_cache_hit_lines} ]; then - cat ${CLIENT_LOG} - echo "ERROR: All cache hit counts were zero, expected a non-zero number of cache hits" - echo -e "\n***\n*** Test Failed\n***" - RET=1 - fi -} - -# Setup server -export CUDA_VISIBLE_DEVICES=0 -SERVER=/opt/tritonserver/bin/tritonserver -# --response-cache-byte-size must be non-zero to test models with cache enabled -SERVER_ARGS="--model-repository=`pwd`/models --response-cache-byte-size=8192" -SERVER_LOG="./inference_server.log" - -# Setup model repository from existing qa_model_repository -rm -f $SERVER_LOG $CLIENT_LOG -MODEL_DIR="./models" -rm -fr ${MODEL_DIR} && mkdir ${MODEL_DIR} -ENSEMBLE_MODEL="simple_onnx_float32_float32_float32" -COMPOSING_MODEL="onnx_float32_float32_float32" -ENSEMBLE_MODEL_CACHE_ENABLED="${ENSEMBLE_MODEL}_cache_enabled" -ENSEMBLE_MODEL_CACHE_DISABLED="${ENSEMBLE_MODEL}_cache_disabled" -COMPOSING_MODEL_CACHE_ENABLED="${COMPOSING_MODEL}_cache_enabled" -COMPOSING_MODEL_CACHE_DISABLED="${COMPOSING_MODEL}_cache_disabled" -MODELS="${ENSEMBLE_MODEL_CACHE_ENABLED} ${ENSEMBLE_MODEL_CACHE_DISABLED} ${COMPOSING_MODEL_CACHE_ENABLED} ${COMPOSING_MODEL_CACHE_DISABLED}" - -## Setup ensemble models, one with cache enabled and one with cache disabled -cp -r "/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository/${ENSEMBLE_MODEL}" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_ENABLED}" -cp -r "/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository/${ENSEMBLE_MODEL}" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_DISABLED}" - -## Setup composing models, one with cache enabled and one with cache disabled -cp -r "/data/inferenceserver/${REPO_VERSION}/qa_model_repository/${COMPOSING_MODEL}" "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_ENABLED}" -cp -r "/data/inferenceserver/${REPO_VERSION}/qa_model_repository/${COMPOSING_MODEL}" "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_DISABLED}" - -for model in ${MODELS}; do - # Remove "name" line from each config to use directory name for simplicity - sed -i "/^name:/d" "${MODEL_DIR}/${model}/config.pbtxt" - # Add version directory to each model if non-existent - mkdir -p "${MODEL_DIR}/${model}/1" -done - -## Update "model_name" lines in each ensemble model config ensemble steps -sed -i "s/${COMPOSING_MODEL}/${COMPOSING_MODEL_CACHE_ENABLED}/g" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_ENABLED}/config.pbtxt" -sed -i "s/${COMPOSING_MODEL}/${COMPOSING_MODEL_CACHE_DISABLED}/g" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_DISABLED}/config.pbtxt" - -## Append cache config to each model config -echo -e "response_cache { enable: True }" >> "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_ENABLED}/config.pbtxt" -echo -e "response_cache { enable: False }" >> "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_DISABLED}/config.pbtxt" -echo -e "response_cache { enable: True }" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_ENABLED}/config.pbtxt" -echo -e "response_cache { enable: False }" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_DISABLED}/config.pbtxt" -# Force CPU memory for composing models since cache doesn't currently support GPU memory -echo -e "instance_group [{ kind: KIND_CPU, count: 1 }]" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_ENABLED}/config.pbtxt" -echo -e "instance_group [{ kind: KIND_CPU, count: 1 }]" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_DISABLED}/config.pbtxt" - -# Run server -run_server -if [ "${SERVER_PID}" == "0" ]; then - echo -e "\n***\n*** Failed to start ${SERVER}\n***" - cat ${SERVER_LOG} - exit 1 -fi - -# Run perf_analyzer -set +e -RET=0 -PROTOCOLS="http grpc" -STABILITY_THRESHOLD="15" -for protocol in ${PROTOCOLS}; do - for model in ${MODELS}; do - echo "================================================================" - echo "[PERMUTATION] Protocol=${protocol} Model=${model}" - echo "================================================================" - - ${PERF_ANALYZER} -v -i ${protocol} -m ${model} -s ${STABILITY_THRESHOLD} | tee ${CLIENT_LOG} 2>&1 - check_perf_analyzer_error $? - - # Check response cache outputs - if [[ ${model} == *"cache_enabled"* ]]; then - check_cache_output - fi - done; -done; -set -e - -# Cleanup -kill $SERVER_PID -wait $SERVER_PID - - -if [ $RET -eq 0 ]; then - echo -e "\n***\n*** Test Passed\n***" -else - echo "=== START SERVER LOG ===" - cat ${SERVER_LOG} - echo "=== END SERVER LOG ===" - echo -e "\n***\n*** Test FAILED\n***" -fi - -exit ${RET} diff --git a/qa/L0_perf_analyzer_unit_tests/test.sh b/qa/L0_perf_analyzer_unit_tests/test.sh deleted file mode 100755 index f2a70d23ff..0000000000 --- a/qa/L0_perf_analyzer_unit_tests/test.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -TEST_LOG="./perf_analyzer_unit_tests.log" -PERF_ANALYZER_UNIT_TESTS=../clients/perf_analyzer_unit_tests - -RET=0 - -rm -f $TEST_LOG - -set +e -$PERF_ANALYZER_UNIT_TESTS >> $TEST_LOG 2>&1 -if [ $? -ne 0 ]; then - echo -e "\n***\n*** Test Failed\n***" - RET=1 -fi -set -e - -if [ $RET -eq 0 ]; then - echo -e "\n***\n*** Test Passed\n***" -else - cat $TEST_LOG - echo -e "\n***\n*** Test FAILED\n***" -fi - -exit $RET