diff --git a/Dockerfile.sdk b/Dockerfile.sdk
index f28d7f710e..31a62ea68b 100644
--- a/Dockerfile.sdk
+++ b/Dockerfile.sdk
@@ -42,7 +42,7 @@ ARG TRITON_MODEL_ANALYZER_REPO_TAG=main
 ARG TRITON_ENABLE_GPU=ON
 ARG JAVA_BINDINGS_MAVEN_VERSION=3.8.4
 ARG JAVA_BINDINGS_JAVACPP_PRESETS_TAG=1.5.8
-
+ARG PERF_ANALYZER_BUILD=ON
 # DCGM version to install for Model Analyzer
 ARG DCGM_VERSION=3.3.6
 
@@ -131,11 +131,11 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
           -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_THIRD_PARTY_REPO_TAG} \
           -DTRITON_ENABLE_PERF_ANALYZER=OFF \
           -DTRITON_ENABLE_CC_HTTP=ON -DTRITON_ENABLE_CC_GRPC=ON \
-          -DTRITON_ENABLE_PYTHON_HTTP=OFF -DTRITON_ENABLE_PYTHON_GRPC=OFF \
+          -DTRITON_ENABLE_PYTHON_HTTP=ON -DTRITON_ENABLE_PYTHON_GRPC=ON \
           -DTRITON_ENABLE_JAVA_HTTP=ON \
           -DTRITON_ENABLE_EXAMPLES=ON -DTRITON_ENABLE_TESTS=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} /workspace/client
-RUN make -j16 cc-clients java-clients && \
+RUN make -j16 cc-clients java-clients python-clients && \
     rm -fr ~/.m2
 
 # TODO: PA will rebuild the CC clients since it depends on it.
@@ -145,7 +145,8 @@ RUN make -j16 cc-clients java-clients && \
 # the python client until now. Post-migration we should focus
 # effort on de-tangling these flows.
 WORKDIR /workspace/pa_build
-RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
+RUN if [ "$PERF_ANALYZER_BUILD" = "ON" ]; then \
+        cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
           -DTRITON_VERSION=`cat /workspace/TRITON_VERSION` \
           -DTRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION} \
           -DTRITON_COMMON_REPO_TAG=${TRITON_COMMON_REPO_TAG} \
@@ -161,12 +162,18 @@ RUN cmake -DCMAKE_INSTALL_PREFIX=/workspace/install \
           -DTRITON_ENABLE_PYTHON_GRPC=ON \
           -DTRITON_PACKAGE_PERF_ANALYZER=ON \
           -DTRITON_ENABLE_GPU=${TRITON_ENABLE_GPU} \
-          /workspace/perf_analyzer
-RUN make -j16 perf-analyzer python-clients
-
-RUN pip3 install build \
-    && cd /workspace/perf_analyzer/genai-perf \
-    && python3 -m build --wheel --outdir /workspace/install/python
+          /workspace/perf_analyzer && \
+        make -j16 perf-analyzer && \
+        pip3 install build \
+        && cd /workspace/perf_analyzer/genai-perf && \
+        python3 -m build --wheel --outdir /workspace/install/python; \
+    else \
+        ls /workspace/perf_analyzer/ && ls /workspace/pa_build && \
+        tar -xzf /workspace/perf_analyzer/perf_analyzer*.tar.gz -C /workspace/install/bin && \
+        echo "Perf Analyzer binaries was extracted and not build"; \
+        mkdir -p /workspace/install/python && \
+        cp /workspace/perf_analyzer/genai_perf-*.whl /workspace/install/python/; \
+    fi
 
 # Install Java API Bindings
 RUN if [ "$TARGETPLATFORM" = "linux/amd64" ]; then \
@@ -238,11 +245,12 @@ COPY --from=sdk_build /workspace/client/src/python/library/tests/* qa/python_cli
 # Install an image needed by the quickstart and other documentation.
 COPY qa/images/mug.jpg images/mug.jpg
 
-RUN pip3 install install/python/genai_perf-*.whl
-
 # Install the dependencies needed to run the client examples. These
 # are not needed for building but including them allows this image to
 # be used to run the client examples.
+
+RUN pip3 install install/python/genai_perf-*.whl;
+
 RUN pip3 install --upgrade "numpy<2" pillow attrdict && \
     find install/python/ -maxdepth 1 -type f -name \
          "tritonclient-*linux*.whl" | xargs printf -- '%s[all]' | \
diff --git a/qa/L0_perf_analyzer/nginx.conf b/qa/L0_perf_analyzer/nginx.conf
deleted file mode 100644
index 4a7dfcc04a..0000000000
--- a/qa/L0_perf_analyzer/nginx.conf
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-server {
-   listen 443 ssl;
-   server_name localhost;
-
-   ssl_certificate /etc/nginx/cert.crt;
-   ssl_certificate_key /etc/nginx/cert.key;
-
-    location / {
-              proxy_pass http://localhost:8000;
-              proxy_http_version 1.1;
-              }
-}
diff --git a/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json b/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json
deleted file mode 100644
index d0feacd9b4..0000000000
--- a/qa/L0_perf_analyzer/perf_analyzer_profile_export_schema.json
+++ /dev/null
@@ -1,95 +0,0 @@
-{
-    "$schema": "https://json-schema.org/draft/2020-12/schema",
-    "$id": "https://github.com/triton-inference-server/client/blob/main/src/c%2B%2B/perf_analyzer/examples/schema.json",
-    "title": "Perf Analyzer output data",
-    "description": "A json file describing the output from a Perf Analyzer run.",
-    "type": "object",
-    "required": [
-        "experiments",
-        "version"
-    ],
-    "properties": {
-        "experiments": {
-            "description": "The array of all experiments run by Perf Analyzer.",
-            "type": "array",
-            "required": [
-                "experiment",
-                "requests",
-                "window_boundaries"
-            ],
-            "minItems": 1,
-            "uniqueItems": true,
-            "items": {
-                "type": "object",
-                "properties": {
-                    "experiment": {
-                        "description": "A single experiment run by Perf Analyzer.",
-                        "type": "object",
-                        "required": [
-                            "mode",
-                            "value"
-                        ],
-                        "minItems": 1,
-                        "maxItems": 1,
-                        "properties": {
-                            "mode": {
-                                "description": "Operating mode of Perf Analyzer: For example, 'concurrency' or 'request rate'.",
-                                "type": "string"
-                            },
-                            "value": {
-                                "description": "Concurrency or request rate for the current experiment.",
-                                "type": "integer"
-                            }
-                        }
-                    },
-                    "requests": {
-                        "description": "The array of requests sent by Perf Analyzer for this experiment.",
-                        "type": "array",
-                        "items": {
-                            "$ref": "#/properties/experiments/items/properties/$defs/request"
-                        }
-                    },
-                    "$defs": {
-                        "request": {
-                            "description": "Info for a single request.",
-                            "type": "object",
-                            "required": [
-                                "timestamp",
-                                "response_timestamps"
-                            ],
-                            "properties": {
-                                "timestamp": {
-                                    "description": "Time stamp of the request.",
-                                    "type": "integer"
-                                },
-                                "sequence_id": {
-                                    "description": "The sequence_id of the request.",
-                                    "type": "integer"
-                                },
-                                "response_timestamps": {
-                                    "description": "All associated responses to this request.",
-                                    "type": "array",
-                                    "items": {
-                                        "type": "integer"
-                                    }
-                                }
-                            }
-                        }
-                    },
-                    "window_boundaries": {
-                        "description": "An array of time stamps describing window boundaries.",
-                        "type": "array",
-                        "items": {
-                            "type": "integer"
-                        },
-                        "uniqueItems": true
-                    }
-                }
-            }
-        },
-        "version": {
-            "description": "The version of Perf Analyzer that generated the report.",
-            "type": "string"
-        }
-    }
-}
\ No newline at end of file
diff --git a/qa/L0_perf_analyzer/test.sh b/qa/L0_perf_analyzer/test.sh
deleted file mode 100755
index 49c7e72e48..0000000000
--- a/qa/L0_perf_analyzer/test.sh
+++ /dev/null
@@ -1,1164 +0,0 @@
-#!/bin/bash
-# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
-if [ "$#" -ge 1 ]; then
-    REPO_VERSION=$1
-fi
-if [ -z "$REPO_VERSION" ]; then
-    echo -e "Repository version must be specified"
-    echo -e "\n***\n*** Test Failed\n***"
-    exit 1
-fi
-if [ ! -z "$TEST_REPO_ARCH" ]; then
-    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
-fi
-
-export CUDA_VISIBLE_DEVICES=0
-
-CLIENT_LOG="./perf_analyzer.log"
-PERF_ANALYZER=../clients/perf_analyzer
-
-DATADIR=`pwd`/models
-TESTDATADIR=`pwd`/test_data
-
-INT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/int_data.json
-INT_DIFFSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/int_data_diff_shape.json
-INT_OPTIONAL_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/int_data_optional.json
-FLOAT_DIFFSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/float_data_with_shape.json
-STRING_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/string_data.json
-STRING_WITHSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/string_data_with_shape.json
-SEQ_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_data.json
-SHAPETENSORADTAFILE=`pwd`/../common/perf_analyzer_input_data_json/shape_tensor_data.json
-IMAGE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/image_data.json
-
-OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/output.json
-NON_ALIGNED_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/non_aligned_output.json
-WRONG_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/wrong_output.json
-WRONG_OUTPUT_2_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/wrong_output_2.json
-
-SEQ_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_output.json
-SEQ_WRONG_OUTPUT_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_wrong_output.json
-
-REPEAT_INT32_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/repeat_int32_data.json
-
-TRACE_FILE="trace.json"
-
-SERVER=/opt/tritonserver/bin/tritonserver
-SERVER_ARGS="--model-repository=${DATADIR} --trace-config triton,file=${TRACE_FILE}"
-SERVER_LOG="./inference_server.log"
-
-ERROR_STRING="error | Request count: 0 | : 0 infer/sec"
-
-STABILITY_THRESHOLD="100"
-
-source ../common/util.sh
-
-rm -f $SERVER_LOG $CLIENT_LOG
-rm -rf $DATADIR $TESTDATADIR $ENSEMBLE_DATADIR
-
-mkdir -p $DATADIR
-# Copy fixed-shape models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_int32_int32_int32 $DATADIR/
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_nobatch_int32_int32_int32 $DATADIR/
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_object_object_object $DATADIR/
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_nobatch_object_object_object $DATADIR/
-
-# Copy a variable-shape models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_object_int32_int32 $DATADIR/
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/
-
-# Copy shape tensor models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/
-
-# Copying ensemble including a sequential model
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_sequence_model_repository/simple_savedmodel_sequence_object $DATADIR
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_sequence_model_repository/nop_TYPE_FP32_-1 $DATADIR
-
-# Copying variable sequence model
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_sequence_model_repository/graphdef_sequence_float32 $DATADIR
-
-mkdir $DATADIR/nop_TYPE_FP32_-1/1
-
-# Copy inception model to the model repository
-cp -r /data/inferenceserver/${REPO_VERSION}/tf_model_store/inception_v1_graphdef $DATADIR
-
-# Copy resnet50v1.5_fp16
-cp -r /data/inferenceserver/${REPO_VERSION}/perf_model_store/resnet50v1.5_fp16_savedmodel $DATADIR
-
-# Copy and customize custom_zero_1_float32
-cp -r ../custom_models/custom_zero_1_float32 $DATADIR && \
-  mkdir $DATADIR/custom_zero_1_float32/1 && \
-  (cd $DATADIR/custom_zero_1_float32 && \
-    echo "parameters [" >> config.pbtxt && \
-        echo "{ key: \"execute_delay_ms\"; value: { string_value: \"100\" }}" >> config.pbtxt && \
-        echo "]" >> config.pbtxt)
-
-# Copy and customize optional inputs model
-cp -r ../python_models/optional $DATADIR && \
-  mkdir $DATADIR/optional/1 && \
-  mv $DATADIR/optional/model.py $DATADIR/optional/1 && \
-  sed -i 's/max_batch_size: 0/max_batch_size: 2/g' $DATADIR/optional/config.pbtxt
-
-# Copy decoupled model
-git clone --depth=1 https://github.com/triton-inference-server/python_backend
-mkdir -p $DATADIR/repeat_int32/1
-cp python_backend/examples/decoupled/repeat_config.pbtxt $DATADIR/repeat_int32/config.pbtxt
-cp python_backend/examples/decoupled/repeat_model.py $DATADIR/repeat_int32/1/model.py
-
-# Generating test data
-mkdir -p $TESTDATADIR
-for INPUT in INPUT0 INPUT1; do
-    for i in {1..16}; do
-        echo '1' >> $TESTDATADIR/${INPUT}
-    done
-done
-
-RET=0
-
-run_server
-if [ "$SERVER_PID" == "0" ]; then
-    echo -e "\n***\n*** Failed to start $SERVER\n***"
-    cat $SERVER_LOG
-    exit 1
-fi
-
-
-# Test whether there was a conflict in sending sequences. This should
-# be done before other testing as the server might emit this warning
-# in certain test cases that are expected to raise this warning
-SERVER_ERROR_STRING="The previous sequence did not end before this sequence start"
-
-set +e
-$PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object -p2000 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -eq 0 ]; then
-  cat $CLIENT_LOG
-  echo -e "\n***\n*** Test Failed: Expected an error when using dynamic shapes in string inputs\n***"
-  RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "input INPUT0 contains dynamic shape, provide shapes to send along with the request" | wc -l) -ne 0 ]; then
-  cat $CLIENT_LOG
-  echo -e "\n***\n*** Test Failed: \n***"
-  RET=1
-fi
-
-# Testing with ensemble and sequential model variants
-$PERF_ANALYZER -v -i grpc -m  simple_savedmodel_sequence_object -p 2000 -t5 --streaming \
---input-data=$SEQ_JSONDATAFILE  --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed: Sequence conflict when maintaining concurrency\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -i grpc -m  simple_savedmodel_sequence_object -p 1000 --request-rate-range 100:200:50 --streaming \
---input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-if [ $(cat $SERVER_LOG |  grep "${SERVER_ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $SERVER_LOG |  grep "${SERVER_ERROR_STRING}"
-    echo -e "\n***\n*** Test Failed: Sequence conflict\n***"
-    RET=1
-fi
-set -e
-
-for PROTOCOL in grpc http; do
-
-    # Testing simple configurations with different shared memory types
-    for SHARED_MEMORY_TYPE in none system cuda; do
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \
-    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-
-        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 -a \
-    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    # TODO Add back testing with preprocess_inception_ensemble model
-
-    # Testing with inception model
-    for SHARED_MEMORY_TYPE in none system cuda; do
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 1 -p2000 -b 1 \
-    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-
-        $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 1 -p2000 -b 1 -a \
-    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    # Testing with resnet50 models with large batch sizes
-    for SHARED_MEMORY_TYPE in none system cuda; do
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 2 -p2000 -b 64 \
-    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-
-        $PERF_ANALYZER -v -i $PROTOCOL -m inception_v1_graphdef -t 2 -p2000 -b 64 \
-    --shared-memory=$SHARED_MEMORY_TYPE -a -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    # Test perf client behavior on different model with different batch size
-    for MODEL in graphdef_nobatch_int32_int32_int32 graphdef_int32_int32_int32; do
-        # Valid batch size
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m $MODEL -t 1 -p2000 -b 1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-
-        # Invalid batch sizes
-        for STATIC_BATCH in 0 10; do
-            set +e
-            $PERF_ANALYZER -v -i $PROTOCOL -m $MODEL -t 1 -p2000 -b $STATIC_BATCH -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-            if [ $? -eq 0 ]; then
-                cat $CLIENT_LOG
-                echo -e "\n***\n*** Test Failed\n***"
-                RET=1
-            fi
-            set -e
-        done
-    done
-
-    # Testing with the new arguments
-    set +e
-    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-
-    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --concurrency-range 1:5:2 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG | grep "error | Request count: 0 | : 0 infer/sec\|: 0 usec|Request concurrency: 2" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-
-    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --concurrency-range 1:5:2 \
-    --input-data=${INT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG | grep "error | Request count: 0 | : 0 infer/sec\|: 0 usec|Request concurrency: 2" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-
-    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:500 \
-    -p1000 -b 1 -a -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-
-    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:500 \
-    --input-data=${INT_JSONDATAFILE} -p1000 -b 1 -a -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-
-    # Binary search for request rate mode
-    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:100 -p1000 -b 1 \
-    -a --binary-search --request-distribution "poisson" -l 10 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    set -e
-
-    # Binary search for concurrency range mode and make sure it doesn't hang
-    $PERF_ANALYZER -v -a --request-distribution "poisson" --shared-memory none \
-    --percentile 99 --binary-search --concurrency-range 1:8:2 -l 5 \
-    -m graphdef_int32_int32_int32 -b 1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1 &
-    PA_PID=$!
-    if [ "$PA_PID" == "0" ]; then
-        echo -e "\n***\n*** Failed to start $PERF_ANALYZER\n***"
-        cat $CLIENT_LOG
-        RET=1
-    fi
-    # wait for PA to finish running
-    sleep 200
-    if ps -p $PA_PID > /dev/null; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** $PERF_ANALYZER is hanging after 200 s\n***"
-        kill $PA_PID
-        RET=1
-    fi
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    set -e
-
-    # Testing with combinations of string input and shared memory types
-    for SHARED_MEMORY_TYPE in none system cuda; do
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object --string-data=1 -p2000 \
-    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    # Testing with combinations of file inputs and shared memory types
-    for SHARED_MEMORY_TYPE in none system cuda; do
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object --input-data=$TESTDATADIR -p2000 \
-    --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    for SHARED_MEMORY_TYPE in none system cuda; do
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_object_object --input-data=$STRING_JSONDATAFILE \
-    --input-data=$STRING_JSONDATAFILE -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    # Testing with combinations of variable inputs and shared memory types
-    for SHARED_MEMORY_TYPE in none system cuda; do
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_int32_int32 --input-data=$TESTDATADIR \
-    --shape INPUT0:2,8 --shape INPUT1:2,8 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} \
-    >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    for SHARED_MEMORY_TYPE in none system cuda; do
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_object_int32_int32 --input-data=$STRING_WITHSHAPE_JSONDATAFILE \
-    --shape INPUT0:2,8 --shape INPUT1:2,8 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} \
-    >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    set +e
-    $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_float32 --shape INPUT0:2,8,2 \
-    --shape INPUT1:2,8,2 -p2000 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    set -e
-
-    # Trying to batch tensors with different shape
-    for SHARED_MEMORY_TYPE in none system cuda; do
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m graphdef_int32_int32_float32 --shape INPUT0:2,8,2 --shape INPUT1:2,8,2 -p2000 -b 4 \
-    --shared-memory=$SHARED_MEMORY_TYPE --input-data=$INT_DIFFSHAPE_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -eq 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG | grep -P "The supplied shape .+ is incompatible with the model's input shape" | wc -l) -eq 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    # Shape tensor I/O model (server needs the shape tensor on the CPU)
-    for SHARED_MEMORY_TYPE in none system; do
-        set +e
-        $PERF_ANALYZER -v -i $PROTOCOL -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \
-    --shape DUMMY_INPUT0:4,4 -p2000 --shared-memory=$SHARED_MEMORY_TYPE -b 8 -s ${STABILITY_THRESHOLD} \
-    >$CLIENT_LOG 2>&1
-        if [ $? -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG | grep ": 0 infer/sec\|: 0 usec" | wc -l) -ne 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    set +e
-    $PERF_ANALYZER -v -i $PROTOCOL -m  simple_savedmodel_sequence_object -p 2000 -t5 --sync \
-    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-
-    $PERF_ANALYZER -v -i $PROTOCOL -m  simple_savedmodel_sequence_object -p 2000 -t5 --sync \
-    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-
-    $PERF_ANALYZER -v -i $PROTOCOL -m  simple_savedmodel_sequence_object -p 1000 --request-rate-range 100:200:50 --sync \
-    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    set -e
-
-
-    # Testing with variable ensemble model. This unit specifies different shape values
-    # for different inferences.
-    for SHARED_MEMORY_TYPE in none system cuda; do
-        set +e
-        # FIXME: Enable HTTP when the server is able to correctly return the complex error messages.
-        $PERF_ANALYZER -v -i grpc -m graphdef_sequence_float32 --shape INPUT:2 --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE \
-    --input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE -p2000 --shared-memory=$SHARED_MEMORY_TYPE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-        if [ $? -eq 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        if [ $(cat $CLIENT_LOG |  grep -P "The supplied shape .+ is incompatible with the model's input shape" | wc -l) -eq 0 ]; then
-            cat $CLIENT_LOG
-            echo -e "\n***\n*** Test Failed\n***"
-            RET=1
-        fi
-        set -e
-    done
-
-    # Testing that trace logging works
-    set +e
-    rm ${TRACE_FILE}*
-    $PERF_ANALYZER -v -i $PROTOCOL -m simple_savedmodel_sequence_object -p 2000 -t5 --sync \
-    --trace-level TIMESTAMPS --trace-rate 1000 --trace-count 100 --log-frequency 10 \
-    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if ! compgen -G "$TRACE_FILE*" > /dev/null; then
-        echo -e "\n***\n*** Test Failed. $TRACE_FILE failed to generate.\n***"
-        RET=1
-    elif [ $(cat ${TRACE_FILE}* |  grep "REQUEST_START" | wc -l) -eq 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed. Did not find `REQUEST_START` in $TRACE_FILE \n***"
-        RET=1
-    fi
-    curl localhost:8000/v2/trace/setting -d '{"trace_level":["OFF"]}'
-    set -e
-
-    # Testing that setting trace file does not work
-    set +e
-    $PERF_ANALYZER -v -i $PROTOCOL -m simple_savedmodel_sequence_object \
-    --trace-file $TRACE_FILE >$CLIENT_LOG 2>&1
-    if [ $? -eq 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed. Expected to fail for unknown arg --trace-file"
-        RET=1
-    fi
-    curl localhost:8000/v2/trace/setting -d '{"trace_level":["OFF"]}'
-    set -e
-done
-
-# Test with output validation
-set +e
-$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${NON_ALIGNED_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "The 'validation_data' field doesn't align with 'data' field in the json file" | wc -l) -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${WRONG_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "mismatch in the data provided" | wc -l) -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${WRONG_OUTPUT_2_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "Output doesn't match expected output" | wc -l) -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-
-$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --input-data=${OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -m simple_savedmodel_sequence_object -i grpc --streaming \
---input-data=${SEQ_WRONG_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "Output doesn't match expected output" | wc -l) -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -m simple_savedmodel_sequence_object -i grpc --streaming \
---input-data=${SEQ_OUTPUT_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-set -e
-
-## Testing with very large concurrencies and large dataset
-INPUT_DATA_OPTION="--input-data $SEQ_JSONDATAFILE "
-for i in {1..9}; do
-   INPUT_DATA_OPTION=" ${INPUT_DATA_OPTION} ${INPUT_DATA_OPTION}"
-done
-set +e
-$PERF_ANALYZER -v -m  simple_savedmodel_sequence_object -p 10000 --concurrency-range 1500:2000:250 -i grpc --streaming \
-${INPUT_DATA_OPTION} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-   cat $CLIENT_LOG
-   echo -e "\n***\n*** Test Failed\n***"
-   RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-   cat $CLIENT_LOG
-   echo -e "\n***\n*** Test Failed\n***"
-   RET=1
-fi
-set -e
-
-## Test count_windows mode
-set +e
-
-# Send incorrect shape and make sure that perf_analyzer doesn't hang
-$PERF_ANALYZER -v -m graphdef_object_int32_int32 --measurement-mode "count_windows" \
-    --shape INPUT0:1,8,100 --shape INPUT1:2,8 --string-data=1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -eq 0 ]; then
-   cat $CLIENT_LOG
-   echo -e "\n***\n*** Test Failed\n***"
-   RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "unexpected shape for input 'INPUT0' for model" | wc -l) -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -m graphdef_object_int32_int32 --measurement-mode "count_windows" \
-    --shape INPUT0:2,8 --shape INPUT1:2,8 --string-data=1 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-   cat $CLIENT_LOG
-   echo -e "\n***\n*** Test Failed\n***"
-   RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-   cat $CLIENT_LOG
-   echo -e "\n***\n*** Test Failed\n***"
-   RET=1
-fi
-set -e
-
-# Test with optional inputs missing but still valid
-set +e
-$PERF_ANALYZER -v -m optional --measurement-mode "count_windows" \
-    --input-data=${INT_OPTIONAL_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-   cat $CLIENT_LOG
-   echo -e "\n***\n*** Test Failed\n***"
-   RET=1
-fi
-set -e
-
-# Test with optional inputs missing and invalid
-set +e
-OPTIONAL_INPUT_ERROR_STRING="For batch sizes larger than 1, the same set of
-inputs must be specified for each batch. You cannot use different set of
-optional inputs for each individual batch."
-$PERF_ANALYZER -v -m optional -b 2 --measurement-mode "count_windows" \
-    --input-data=${INT_OPTIONAL_JSONDATAFILE} -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -eq 0 ]; then
-   cat $CLIENT_LOG
-   echo -e "\n***\n*** Test Failed\n***"
-   RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${OPTIONAL_INPUT_ERROR_STRING}" | wc -l) -eq 0 ]; then
-   cat $CLIENT_LOG
-   echo -e "\n***\n*** Test Failed\n***"
-   RET=1
-fi
-set -e
-
-
-# Test Custom request rate option
-CUSTOM_SCHEDULE_FILE=$TESTDATADIR/custom.schedule
-echo '30000' >> $CUSTOM_SCHEDULE_FILE
-echo '10000' >> $CUSTOM_SCHEDULE_FILE
-echo '40000' >> $CUSTOM_SCHEDULE_FILE
-echo '20000' >> $CUSTOM_SCHEDULE_FILE
-echo '25000' >> $CUSTOM_SCHEDULE_FILE
-
-set +e
-$PERF_ANALYZER -v -i grpc -m graphdef_int32_int32_int32 --request-intervals $CUSTOM_SCHEDULE_FILE >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "Request Rate: 40" | wc -l) -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed: \n***"
-    RET=1
-fi
-set -e
-
-# Test --serial-sequences mode
-set +e
-$PERF_ANALYZER -v -i $PROTOCOL -m  simple_savedmodel_sequence_object -p 1000 --request-rate-range 100:200:50 --serial-sequences \
-    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -i $PROTOCOL -m  simple_savedmodel_sequence_object -p 1000 --request-intervals $CUSTOM_SCHEDULE_FILE --serial-sequences \
-    --input-data=$SEQ_JSONDATAFILE -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-set -e
-
-## Test decoupled model support
-$PERF_ANALYZER -v -m repeat_int32 --input-data=$REPEAT_INT32_JSONDATAFILE \
-    --profile-export-file profile_export.json -i grpc --async --streaming -s \
-    ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-python3 -c "import json ; \
-    requests = json.load(open('profile_export.json'))['experiments'][0]['requests'] ; \
-    assert any(len(r['response_timestamps']) > 1 for r in requests)"
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-check-jsonschema --schemafile perf_analyzer_profile_export_schema.json profile_export.json
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-## Test perf_analyzer with MPI / multiple models
-
-is_synchronized() {
-  local TIMESTAMP_RANK_0_STABLE=$(grep -oP "^\K[^$]+(?=\[1,0\]<stdout>:All models on all MPI ranks are stable)" 1/rank.0/stdout | date "+%s" -f -)
-  local TIMESTAMP_RANK_1_STABLE=$(grep -oP "^\K[^$]+(?=\[1,1\]<stdout>:All models on all MPI ranks are stable)" 1/rank.1/stdout | date "+%s" -f -)
-  local TIMESTAMP_RANK_2_STABLE=$(grep -oP "^\K[^$]+(?=\[1,2\]<stdout>:All models on all MPI ranks are stable)" 1/rank.2/stdout | date "+%s" -f -)
-  local TIMESTAMP_MIN=$(echo -e "${TIMESTAMP_RANK_0_STABLE}\n${TIMESTAMP_RANK_1_STABLE}\n${TIMESTAMP_RANK_2_STABLE}" | sort -n | head -1)
-  local TIMESTAMP_MAX=$(echo -e "${TIMESTAMP_RANK_0_STABLE}\n${TIMESTAMP_RANK_1_STABLE}\n${TIMESTAMP_RANK_2_STABLE}" | sort -n | tail -1)
-  local TIMESTAMP_MAX_MIN_DIFFERENCE=$((${TIMESTAMP_MAX}-${TIMESTAMP_MIN}))
-  local ALLOWABLE_SECONDS_BETWEEN_PROFILES_FINISHING="5"
-  echo $(($TIMESTAMP_MAX_MIN_DIFFERENCE <= $ALLOWABLE_SECONDS_BETWEEN_PROFILES_FINISHING))
-}
-
-is_stable() {
-  local RANK=$1
-  local IS_THROUGHPUT=$2
-  if [ $IS_THROUGHPUT ]; then
-    local GREP_PATTERN="\[1,$RANK\]<stdout>:  Pass \[[0-9]+\] throughput: \K[0-9]+\.?[0-9]*"
-  else
-    local GREP_PATTERN="\[1,$RANK\]<stdout>:  Pass \[[0-9]+\] throughput: [0-9]+\.?[0-9]* infer/sec. Avg latency: \K[0-9]+"
-  fi
-  local LAST_MINUS_0=$(grep -oP "$GREP_PATTERN" 1/rank.$RANK/stdout | tail -3 | sed -n 3p)
-  local LAST_MINUS_1=$(grep -oP "$GREP_PATTERN" 1/rank.$RANK/stdout | tail -3 | sed -n 2p)
-  local LAST_MINUS_2=$(grep -oP "$GREP_PATTERN" 1/rank.$RANK/stdout | tail -3 | sed -n 1p)
-  local MEAN=$(awk "BEGIN {print (($LAST_MINUS_0+$LAST_MINUS_1+$LAST_MINUS_2)/3)}")
-  local STABILITY_THRESHOLD=0.5
-  # Based on this: https://github.com/triton-inference-server/client/blob/main/src/c++/perf_analyzer/inference_profiler.cc#L629-L644
-  local WITHIN_THRESHOLD_0=$(awk "BEGIN {print ($LAST_MINUS_0 >= ((1 - $STABILITY_THRESHOLD) * $MEAN) && $LAST_MINUS_0 <= ((1 + $STABILITY_THRESHOLD) * $MEAN))}")
-  local WITHIN_THRESHOLD_1=$(awk "BEGIN {print ($LAST_MINUS_1 >= ((1 - $STABILITY_THRESHOLD) * $MEAN) && $LAST_MINUS_1 <= ((1 + $STABILITY_THRESHOLD) * $MEAN))}")
-  local WITHIN_THRESHOLD_2=$(awk "BEGIN {print ($LAST_MINUS_2 >= ((1 - $STABILITY_THRESHOLD) * $MEAN) && $LAST_MINUS_2 <= ((1 + $STABILITY_THRESHOLD) * $MEAN))}")
-  echo $(($WITHIN_THRESHOLD_0 && $WITHIN_THRESHOLD_1 && $WITHIN_THRESHOLD_2))
-}
-
-set +e
-mpiexec --allow-run-as-root \
-  -n 1 --merge-stderr-to-stdout --output-filename . --tag-output --timestamp-output \
-    $PERF_ANALYZER -v -m graphdef_int32_int32_int32 \
-      --measurement-mode count_windows -s 50 --enable-mpi : \
-  -n 1 --merge-stderr-to-stdout --output-filename . --tag-output --timestamp-output \
-    $PERF_ANALYZER -v -m graphdef_nobatch_int32_int32_int32 \
-      --measurement-mode count_windows -s 50 --enable-mpi : \
-  -n 1 --merge-stderr-to-stdout --output-filename . --tag-output --timestamp-output \
-    $PERF_ANALYZER -v -m custom_zero_1_float32 \
-      --measurement-mode count_windows -s 50 --enable-mpi
-if [ $? -ne 0 ]; then
-   cat 1/rank.0/stdout 1/rank.2/stdout 1/rank.2/stdout
-   echo -e "\n***\n*** Perf Analyzer returned non-zero exit code\n***"
-   echo -e "\n***\n*** Test Failed\n***"
-   RET=1
-else
-  if [ $(is_synchronized) -eq 0 ]; then
-    cat 1/rank.0/stdout 1/rank.2/stdout 1/rank.2/stdout
-    echo -e "\n***\n*** All models did not finish profiling at almost the same time\n***"
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-  fi
-
-  RANK_0_THROUGHPUT_IS_STABLE=$(is_stable 0 1)
-  RANK_0_LATENCY_IS_STABLE=$(is_stable 0 0)
-  RANK_1_THROUGHPUT_IS_STABLE=$(is_stable 1 1)
-  RANK_1_LATENCY_IS_STABLE=$(is_stable 1 0)
-  RANK_2_THROUGHPUT_IS_STABLE=$(is_stable 2 1)
-  RANK_2_LATENCY_IS_STABLE=$(is_stable 2 0)
-
-  ALL_STABLE=$(( \
-    $RANK_0_THROUGHPUT_IS_STABLE && \
-    $RANK_0_LATENCY_IS_STABLE && \
-    $RANK_1_THROUGHPUT_IS_STABLE && \
-    $RANK_1_LATENCY_IS_STABLE && \
-    $RANK_2_THROUGHPUT_IS_STABLE && \
-    $RANK_2_LATENCY_IS_STABLE))
-
-  if [ $ALL_STABLE -eq 0 ]; then
-    cat 1/rank.0/stdout 1/rank.2/stdout 1/rank.2/stdout
-    echo -e "\n***\n*** All models did not stabilize\n***"
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-  fi
-
-  rm -rf 1
-fi
-set -e
-
-## Test perf_analyzer without MPI library (`libmpi.so`) available
-
-rm -rf /opt/hpcx/ompi/lib/libmpi*
-
-set +e
-$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-   cat $CLIENT_LOG
-   echo -e "\n***\n*** Test Failed\n***"
-   RET=1
-fi
-set -e
-
-kill $SERVER_PID
-wait $SERVER_PID
-
-# Generate valid CA
-openssl genrsa -passout pass:1234 -des3 -out ca.key 4096
-openssl req -passin pass:1234 -new -x509 -days 365 -key ca.key -out ca.crt -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Test/CN=Root CA"
-
-# Generate valid Server Key/Cert
-openssl genrsa -passout pass:1234 -des3 -out server.key 4096
-openssl req -passin pass:1234 -new -key server.key -out server.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Server/CN=localhost"
-openssl x509 -req -passin pass:1234 -days 365 -in server.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out server.crt
-
-# Remove passphrase from the Server Key
-openssl rsa -passin pass:1234 -in server.key -out server.key
-
-# Generate valid Client Key/Cert
-openssl genrsa -passout pass:1234 -des3 -out client.key 4096
-openssl req -passin pass:1234 -new -key client.key -out client.csr -subj  "/C=SP/ST=Spain/L=Valdepenias/O=Test/OU=Client/CN=localhost"
-openssl x509 -passin pass:1234 -req -days 365 -in client.csr -CA ca.crt -CAkey ca.key -set_serial 01 -out client.crt
-
-# Remove passphrase from Client Key
-openssl rsa -passin pass:1234 -in client.key -out client.key
-
-# Create mutated client key (Make first char of each like capital)
-cp client.key client2.key && sed -i "s/\b\(.\)/\u\1/g" client2.key
-cp client.crt client2.crt && sed -i "s/\b\(.\)/\u\1/g" client2.crt
-
-SERVER_ARGS="--model-repository=${DATADIR} --grpc-use-ssl=1 --grpc-server-cert=server.crt --grpc-server-key=server.key --grpc-root-cert=ca.crt"
-
-run_server
-if [ "$SERVER_PID" == "0" ]; then
-    echo -e "\n***\n*** Failed to start $SERVER\n***"
-    cat $SERVER_LOG
-    exit 1
-fi
-
-# Test gRPC SSL
-set +e
-
-# Test that gRPC protocol with SSL works correctly
-$PERF_ANALYZER -v -i grpc -m graphdef_int32_int32_int32 \
-  --ssl-grpc-use-ssl \
-  --ssl-grpc-root-certifications-file=ca.crt \
-  --ssl-grpc-private-key-file=client.key \
-  --ssl-grpc-certificate-chain-file=client.crt \
-  -s ${STABILITY_THRESHOLD} \
-  > ${CLIENT_LOG}.grpc_success 2>&1
-if [ $? -ne 0 ]; then
-    cat ${CLIENT_LOG}.grpc_success
-    RET=1
-fi
-
-# Test that gRPC protocol with SSL fails with incorrect key
-$PERF_ANALYZER -v -i grpc -m graphdef_int32_int32_int32 \
-    --ssl-grpc-use-ssl \
-    --ssl-grpc-root-certifications-file=ca.crt \
-    --ssl-grpc-private-key-file=client.key \
-    --ssl-grpc-certificate-chain-file=client2.crt \
-    -s ${STABILITY_THRESHOLD} \
-    > ${CLIENT_LOG}.grpc_failure 2>&1
-if [ $? -eq 0 ]; then
-    cat ${CLIENT_LOG}.grpc_failure
-    echo -e "\n***\n*** Expected test failure\n***"
-    RET=1
-fi
-
-set -e
-
-kill $SERVER_PID
-wait $SERVER_PID
-
-cp server.crt /etc/nginx/cert.crt
-cp server.key /etc/nginx/cert.key
-
-SERVER_ARGS="--model-repository=${DATADIR}"
-
-run_server
-if [ "$SERVER_PID" == "0" ]; then
-    echo -e "\n***\n*** Failed to start $SERVER\n***"
-    cat $SERVER_LOG
-    exit 1
-fi
-
-# Setup the new configuration for the proxy. The HTTPS traffic will be
-# redirected to the running instance of server at localhost:8000
-cp nginx.conf /etc/nginx/sites-available/default
-
-# Start the proxy server
-service nginx restart
-
-# Test HTTP SSL
-set +e
-
-# Test that HTTP protocol with SSL works correctly with certificates
-$PERF_ANALYZER -v -u https://localhost:443 -i http -m graphdef_int32_int32_int32 \
-    --ssl-https-verify-peer 1 \
-    --ssl-https-verify-host 2 \
-    --ssl-https-ca-certificates-file ca.crt \
-    --ssl-https-client-certificate-file client.crt \
-    --ssl-https-client-certificate-type PEM \
-    --ssl-https-private-key-file client.key \
-    --ssl-https-private-key-type PEM \
-    -s ${STABILITY_THRESHOLD} \
-    > ${CLIENT_LOG}.https_success 2>&1
-if [ $? -ne 0 ]; then
-    cat ${CLIENT_LOG}.https_success
-    RET=1
-fi
-
-# Test that HTTP protocol with SSL works correctly without certificates
-$PERF_ANALYZER -v -u https://localhost:443 -i http -m graphdef_int32_int32_int32 \
-    --ssl-https-verify-peer 0 \
-    --ssl-https-verify-host 0 \
-    -s ${STABILITY_THRESHOLD} \
-    > ${CLIENT_LOG}.https_success 2>&1
-if [ $? -ne 0 ]; then
-    cat ${CLIENT_LOG}.https_success
-    RET=1
-fi
-
-# Test that HTTP protocol with SSL fails with incorrect key
-$PERF_ANALYZER -v -u https://localhost:443 -i http -m graphdef_int32_int32_int32 \
-    --ssl-https-verify-peer 1 \
-    --ssl-https-verify-host 2 \
-    --ssl-https-ca-certificates-file ca.crt \
-    --ssl-https-client-certificate-file client.crt \
-    --ssl-https-client-certificate-type PEM \
-    --ssl-https-private-key-file client2.key \
-    --ssl-https-private-key-type PEM \
-    -s ${STABILITY_THRESHOLD} \
-    > ${CLIENT_LOG}.https_failure 2>&1
-if [ $? -eq 0 ]; then
-    cat ${CLIENT_LOG}.https_failure
-    echo -e "\n***\n*** Expected test failure\n***"
-    RET=1
-fi
-
-set -e
-
-kill $SERVER_PID
-wait $SERVER_PID
-
-if [ $RET -eq 0 ]; then
-  echo -e "\n***\n*** Test Passed\n***"
-else
-  echo -e "\n***\n*** Test FAILED\n***"
-fi
-
-exit $RET
diff --git a/qa/L0_perf_analyzer_capi/test.sh b/qa/L0_perf_analyzer_capi/test.sh
deleted file mode 100755
index 53196fa762..0000000000
--- a/qa/L0_perf_analyzer_capi/test.sh
+++ /dev/null
@@ -1,320 +0,0 @@
-#!/bin/bash
-# Copyright 2021-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# TESTS COPIED FROM L0_perf_analyzer/test.sh
-REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
-if [ "$#" -ge 1 ]; then
-    REPO_VERSION=$1
-fi
-if [ -z "$REPO_VERSION" ]; then
-    echo -e "Repository version must be specified"
-    echo -e "\n***\n*** Test Failed\n***"
-    exit 1
-fi
-if [ ! -z "$TEST_REPO_ARCH" ]; then
-    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
-fi
-
-export CUDA_VISIBLE_DEVICES=0
-
-CLIENT_LOG="./perf_analyzer.log"
-PERF_ANALYZER=../clients/perf_analyzer
-
-DATADIR=`pwd`/models
-TESTDATADIR=`pwd`/test_data
-
-SERVER_LIBRARY_PATH=/opt/tritonserver
-
-FLOAT_DIFFSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/float_data_with_shape.json
-STRING_WITHSHAPE_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/string_data_with_shape.json
-SEQ_JSONDATAFILE=`pwd`/../common/perf_analyzer_input_data_json/seq_data.json
-SHAPETENSORADTAFILE=`pwd`/../common/perf_analyzer_input_data_json/shape_tensor_data.json
-
-ERROR_STRING="error | Request count: 0 | : 0 infer/sec"
-
-STABILITY_THRESHOLD="9999"
-
-source ../common/util.sh
-
-rm -f $CLIENT_LOG
-rm -rf $DATADIR $TESTDATADIR $ENSEMBLE_DATADIR
-
-mkdir -p $DATADIR
-# Copy fixed-shape models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_int32_int32_int32 $DATADIR/
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_model_repository/graphdef_object_object_object $DATADIR/
-
-# Copy a variable-shape models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_object_int32_int32 $DATADIR/
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_model_repository/graphdef_int32_int32_float32 $DATADIR/
-
-# Copy shape tensor models
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/plan_zero_1_float32_int32 $DATADIR/
-
-# Copying ensemble including a sequential model
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_sequence_model_repository/savedmodel_sequence_object $DATADIR
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_sequence_model_repository/simple_savedmodel_sequence_object $DATADIR
-
-# Copying variable sequence model
-cp -r /data/inferenceserver/${REPO_VERSION}/qa_variable_sequence_model_repository/graphdef_sequence_float32 $DATADIR
-
-# Copying bls model with undefined variable
-mkdir -p $DATADIR/bls_undefined/1 && \
-    cp ../python_models/bls_undefined/model.py $DATADIR/bls_undefined/1/. && \
-    cp ../python_models/bls_undefined/config.pbtxt $DATADIR/bls_undefined/.
-
-# Generating test data
-mkdir -p $TESTDATADIR
-for INPUT in INPUT0 INPUT1; do
-    for i in {1..16}; do
-        echo '1' >> $TESTDATADIR/${INPUT}
-    done
-done
-
-RET=0
-
-########## Test C API #############
-# Make sure tritonserver is not running first
-set +e
-SERVER_PID=$(pidof tritonserver)
-if [ $? -ne 1 ]; then
-echo -e "\n There was a previous instance of tritonserver, killing \n"
-  kill $SERVER_PID
-  wait $SERVER_PID
-fi
-set -e
-
-# Testing simple configuration
-$PERF_ANALYZER -v -m graphdef_int32_int32_int32 \
---service-kind=triton_c_api \
---model-repository=$DATADIR --triton-server-directory=$SERVER_LIBRARY_PATH \
--s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
->$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG | grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-#Testing with string input
-$PERF_ANALYZER -v -m graphdef_object_object_object --string-data=1 -p2000 \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
->$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-# Testing with variable inputs
-$PERF_ANALYZER -v -m graphdef_object_int32_int32 --input-data=$TESTDATADIR \
---shape INPUT0:2,8 --shape INPUT1:2,8 \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
->$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -m graphdef_object_int32_int32 \
---input-data=$STRING_WITHSHAPE_JSONDATAFILE \
---shape INPUT0:2,8 --shape INPUT1:2,8 -p2000 \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
->$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -m graphdef_int32_int32_float32 --shape INPUT0:2,8,2 \
---shape INPUT1:2,8,2 -p2000 \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
->$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-# Shape tensor I/O model (server needs the shape tensor on the CPU)
-$PERF_ANALYZER -v -m plan_zero_1_float32_int32 --input-data=$SHAPETENSORADTAFILE \
---shape DUMMY_INPUT0:4,4 -p2000 -b 8 \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
->$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG | grep ": 0 infer/sec\|: 0 usec" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-$PERF_ANALYZER -v -m  simple_savedmodel_sequence_object -p 2000 -t5 --sync \
--s ${STABILITY_THRESHOLD} \
---input-data=$SEQ_JSONDATAFILE \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-
-set +e
-$PERF_ANALYZER -v -m graphdef_sequence_float32 --shape INPUT:2 \
--s ${STABILITY_THRESHOLD} \
---input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE \
---input-data=$FLOAT_DIFFSHAPE_JSONDATAFILE -p2000 \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH --sync >$CLIENT_LOG 2>&1
-if [ $? -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep -P "The supplied shape .+ is incompatible with the model's input shape" | wc -l) -eq 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-set -e
-
-for SHARED_MEMORY_TYPE in system cuda; do
-    $PERF_ANALYZER -v -m graphdef_int32_int32_int32 -t 1 -p2000 -b 1 \
-    -s ${STABILITY_THRESHOLD} \
-    --shared-memory=$SHARED_MEMORY_TYPE \
-    --service-kind=triton_c_api --model-repository=$DATADIR \
-    --triton-server-directory=$SERVER_LIBRARY_PATH >$CLIENT_LOG 2>&1
-    if [ $? -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat $CLIENT_LOG
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-done
-
-
-$PERF_ANALYZER -v -m graphdef_int32_int32_int32 --request-rate-range 1000:2000:500 -p1000 -b 1 \
---service-kind=triton_c_api --model-repository=$DATADIR \
---triton-server-directory=$SERVER_LIBRARY_PATH -s ${STABILITY_THRESHOLD} \
->$CLIENT_LOG 2>&1
-if [ $? -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-if [ $(cat $CLIENT_LOG |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-set -e
-
-set +e
-# Testing erroneous configuration
-# This model is expected to fail
-$PERF_ANALYZER -v -m bls_undefined --shape INPUT0:1048576 -t 64\
---service-kind=triton_c_api \
---model-repository=$DATADIR --triton-server-directory=$SERVER_LIBRARY_PATH \
--s ${STABILITY_THRESHOLD} >$CLIENT_LOG 2>&1
-if [ $? -ne 99 ]; then
-    cat $CLIENT_LOG
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-set -e
-
-# Make sure server is not still running
-set +e
-SERVER_PID=$(pidof tritonserver)
-if [ $? -eq 0 ]; then
-  echo -e "\n Tritonserver did not exit properly, killing \n"
-  kill $SERVER_PID
-  wait $SERVER_PID
-  RET=1
-fi
-set -e
-
-if [ $RET -eq 0 ]; then
-  echo -e "\n***\n*** Test Passed\n***"
-else
-  echo -e "\n***\n*** Test FAILED\n***"
-fi
-exit $RET
diff --git a/qa/L0_perf_analyzer_doc_links/mkdocs.yml b/qa/L0_perf_analyzer_doc_links/mkdocs.yml
deleted file mode 100644
index 41a4bfe485..0000000000
--- a/qa/L0_perf_analyzer_doc_links/mkdocs.yml
+++ /dev/null
@@ -1,36 +0,0 @@
-# Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-site_name: CI Test
-use_directory_urls: False
-docs_dir: "./docs"
-plugins:
-        - htmlproofer
-        - search
-
-markdown_extensions:
-    - toc:
-        permalink: True
diff --git a/qa/L0_perf_analyzer_doc_links/test.sh b/qa/L0_perf_analyzer_doc_links/test.sh
deleted file mode 100755
index d0757bca9e..0000000000
--- a/qa/L0_perf_analyzer_doc_links/test.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-LOG="`pwd`/doc_links.log"
-CONFIG="`pwd`/mkdocs.yml"
-RET=0
-
-# Download necessary packages
-python3 -m pip install mkdocs
-python3 -m pip install mkdocs-htmlproofer-plugin==0.10.3
-
-#Download perf_analyzer docs
-TRITON_REPO_ORGANIZATION=${TRITON_REPO_ORGANIZATION:="http://github.com/triton-inference-server"}
-TRITON_PERF_ANALYZER_REPO_TAG="${TRITON_PERF_ANALYZER_REPO_TAG:=main}"
-git clone -b ${TRITON_PERF_ANALYZER_REPO_TAG} ${TRITON_REPO_ORGANIZATION}/perf_analyzer.git
-cp `pwd`/perf_analyzer/README.md .
-cp -rf `pwd`/perf_analyzer/docs .
-
-# Need to remove all links that start with -- or -. Mkdocs converts all -- to - for anchor links.
-# This breaks all links to cli commands throughout the docs. This will iterate over all
-# files in the docs directory and remove -- and - at the start of options, which allows the
-# tool to check links for correctness.
-for file in `pwd`/docs/*.md
-do
-  echo $file
-  sed -i 's/`-*/`/g' $file
-  sed -i 's/#-*/#/g' $file
-done
-
-exec mkdocs serve -f $CONFIG > $LOG &
-PID=$!
-sleep 20
-
-until [[ (-z `pgrep mkdocs`) ]]; do
-    kill -2 $PID
-    sleep 2
-done
-
-if [[ ! -z `grep "invalid url" $LOG` ]]; then
-    cat $LOG
-    RET=1
-fi
-
-
-if [ $RET -eq 0 ]; then
-    echo -e "\n***\n*** Test PASSED\n***"
-else
-    echo -e "\n***\n*** Test FAILED\n***"
-fi
-exit $RET
diff --git a/qa/L0_perf_analyzer_ground_truth/test.sh b/qa/L0_perf_analyzer_ground_truth/test.sh
deleted file mode 100755
index d5d78e63f4..0000000000
--- a/qa/L0_perf_analyzer_ground_truth/test.sh
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
-if [ "$#" -ge 1 ]; then
-    REPO_VERSION=$1
-fi
-if [ -z "${REPO_VERSION}" ]; then
-    echo -e "Repository version must be specified"
-    echo -e "\n***\n*** Test Failed\n***"
-    exit 1
-fi
-if [ ! -z "$TEST_REPO_ARCH" ]; then
-    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
-fi
-
-source ../common/util.sh
-
-# Setup client/perf_analyzer
-CLIENT_LOG="./perf_analyzer.log"
-PERF_ANALYZER=../clients/perf_analyzer
-
-function check_perf_analyzer_error {
-    ERROR_STRING="error | Request count: 0 | : 0 infer/sec"
-    CLIENT_RET="$1"
-    if [ ${CLIENT_RET} -ne 0 ]; then
-        cat ${CLIENT_LOG}
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat ${CLIENT_LOG} |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat ${CLIENT_LOG}
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-}
-
-# Checks that the model infer/sec performance is equal to an expected value
-# +/- some tolerance.
-# $1: csv result file from PA run
-# $2: expected infer/sec value
-# $3: tolerance for expected value equality
-function check_performance {
-    # get the boundary values based on the tolerance percentage
-    MIN=$(python3 -c "print(${2} * (1 - ${3}))")
-    MAX=$(python3 -c "print(${2} * (1 + ${3}))")
-
-    # delete all but the 2nd line in the resulting file
-    # then get the 2nd column value which is the infer/sec measurement
-    report_val=$(sed '2!d' $1 | awk -F ',' {'print $2'})
-
-    # check if within tolerance
-    ret=$(python3 -c "print(${report_val} >= ${MIN} and ${report_val} <= ${MAX})")
-    if [ "$ret" = "False" ]; then
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-}
-
-# Iterate over the grpc results to ensure gRPC times are greater than 0
-# $1: client log file
-# example line: Avg gRPC time: 42648 usec (marshal 6 usec + response wait 42640 usec + unmarshal 2 usec)
-function check_grpc_time {
-    grep "gRPC" $1 | awk '{print $4}' | while read -r line; do
-        if [ $line -eq 0 ]; then
-            RET=1
-        fi
-    done
-}
-
-# Create input_data.json to communicate the requested model delay
-# $1: desired model delay
-function create_input_data {
-    echo "{\"data\":[{\"INPUT0\" : [${1}]}]}" > input_data.json
-}
-
-# Setup server
-export CUDA_VISIBLE_DEVICES=0
-SERVER=/opt/tritonserver/bin/tritonserver
-SERVER_ARGS="--model-repository=`pwd`/models"
-SERVER_LOG="./inference_server.log"
-
-rm -f $SERVER_LOG $CLIENT_LOG
-MODEL_DIR="./models"
-rm -fr ${MODEL_DIR} && mkdir ${MODEL_DIR}
-MODELS="ground_truth"
-
-for model in ${MODELS}; do
-    # Add version directory to each model if non-existent
-    mkdir -p "${MODEL_DIR}/${model}/1"
-    cp ../python_models/${model}/model.py     ./models/${model}/1/model.py
-    cp ../python_models/${model}/config.pbtxt ./models/${model}/config.pbtxt
-done
-
-# Run server
-run_server
-if [ "${SERVER_PID}" == "0" ]; then
-    echo -e "\n***\n*** Failed to start ${SERVER}\n***"
-    cat ${SERVER_LOG}
-    exit 1
-fi
-
-# Run perf_analyzer
-set +e
-RET=0
-PROTOCOLS="http grpc"
-OUTPUT_FILE="results"
-MODEL_DELAYS=(0.05 0.5)
-TOLERANCE="0.05"
-
-for model_delay in ${MODEL_DELAYS[@]}; do
-    create_input_data ${model_delay}
-    EXPECTED_RESULT=$(python3 -c "print(1 / ${model_delay})")
-    for protocol in ${PROTOCOLS}; do
-        for model in ${MODELS}; do
-        echo "================================================================"
-        echo "[PERMUTATION] Protocol=${protocol} Model=${model}"
-        echo "================================================================"
-
-            ${PERF_ANALYZER} -v -i ${protocol} --concurrency-range 2 --input-data input_data.json -m ${model} -f ${OUTPUT_FILE} | tee ${CLIENT_LOG} 2>&1
-            check_perf_analyzer_error $?
-
-            check_performance ${OUTPUT_FILE} ${EXPECTED_RESULT} ${TOLERANCE}
-
-            if [ "${protocol}" == "grpc" ]; then
-                check_grpc_time ${CLIENT_LOG}
-            fi
-        done;
-    done;
-done;
-
-
-set -e
-
-# Cleanup
-kill $SERVER_PID
-wait $SERVER_PID
-
-if [ $RET -eq 0 ]; then
-  echo -e "\n***\n*** Test Passed\n***"
-else
-  echo "=== START SERVER LOG ==="
-  cat ${SERVER_LOG}
-  echo "=== END SERVER LOG ==="
-  echo "=== START CLIENT LOG ==="
-  cat ${CLIENT_LOG}
-  echo "=== END CLIENT LOG ==="
-  echo -e "\n***\n*** Test FAILED\n***"
-fi
-
-exit ${RET}
diff --git a/qa/L0_perf_analyzer_report/test.sh b/qa/L0_perf_analyzer_report/test.sh
deleted file mode 100755
index 469d11ce3a..0000000000
--- a/qa/L0_perf_analyzer_report/test.sh
+++ /dev/null
@@ -1,175 +0,0 @@
-#!/bin/bash
-# Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
-if [ "$#" -ge 1 ]; then
-    REPO_VERSION=$1
-fi
-if [ -z "${REPO_VERSION}" ]; then
-    echo -e "Repository version must be specified"
-    echo -e "\n***\n*** Test Failed\n***"
-    exit 1
-fi
-if [ ! -z "$TEST_REPO_ARCH" ]; then
-    REPO_VERSION=${REPO_VERSION}_${TEST_REPO_ARCH}
-fi
-
-source ../common/util.sh
-
-# Setup client/perf_analyzer
-CLIENT_LOG="./perf_analyzer.log"
-PERF_ANALYZER=../clients/perf_analyzer
-
-function check_perf_analyzer_error {
-    ERROR_STRING="error | Request count: 0 | : 0 infer/sec"
-    CLIENT_RET="$1"
-    if [ ${CLIENT_RET} -ne 0 ]; then
-        cat ${CLIENT_LOG}
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-    if [ $(cat ${CLIENT_LOG} |  grep "${ERROR_STRING}" | wc -l) -ne 0 ]; then
-        cat ${CLIENT_LOG}
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-}
-
-function check_cache_output {
-    # Validate cache info in perf_analyzer output
-    CACHE_STRING="Cache hit count"
-    if [ $(cat ${CLIENT_LOG} |  grep -i "${CACHE_STRING}" | wc -l) -eq 0 ]; then
-        cat ${CLIENT_LOG}
-	echo "ERROR: No cache hit count found in output"
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-
-    # Validate non-zero number of cache hits
-    ERROR_STRING="Cache hit count: 0"
-    num_cache_hit_lines=$(cat ${CLIENT_LOG} |  grep -i "${CACHE_STRING}" | wc -l)
-    num_cache_hit_zero_lines=$(cat ${CLIENT_LOG} |  grep -i "${ERROR_STRING}" | wc -l)
-    if [ ${num_cache_hit_zero_lines} -eq ${num_cache_hit_lines} ]; then
-        cat ${CLIENT_LOG}
-	echo "ERROR: All cache hit counts were zero, expected a non-zero number of cache hits"
-        echo -e "\n***\n*** Test Failed\n***"
-        RET=1
-    fi
-}
-
-# Setup server
-export CUDA_VISIBLE_DEVICES=0
-SERVER=/opt/tritonserver/bin/tritonserver
-# --response-cache-byte-size must be non-zero to test models with cache enabled
-SERVER_ARGS="--model-repository=`pwd`/models --response-cache-byte-size=8192"
-SERVER_LOG="./inference_server.log"
-
-# Setup model repository from existing qa_model_repository
-rm -f $SERVER_LOG $CLIENT_LOG
-MODEL_DIR="./models"
-rm -fr ${MODEL_DIR} && mkdir ${MODEL_DIR}
-ENSEMBLE_MODEL="simple_onnx_float32_float32_float32"
-COMPOSING_MODEL="onnx_float32_float32_float32"
-ENSEMBLE_MODEL_CACHE_ENABLED="${ENSEMBLE_MODEL}_cache_enabled"
-ENSEMBLE_MODEL_CACHE_DISABLED="${ENSEMBLE_MODEL}_cache_disabled"
-COMPOSING_MODEL_CACHE_ENABLED="${COMPOSING_MODEL}_cache_enabled"
-COMPOSING_MODEL_CACHE_DISABLED="${COMPOSING_MODEL}_cache_disabled"
-MODELS="${ENSEMBLE_MODEL_CACHE_ENABLED} ${ENSEMBLE_MODEL_CACHE_DISABLED} ${COMPOSING_MODEL_CACHE_ENABLED} ${COMPOSING_MODEL_CACHE_DISABLED}"
-
-## Setup ensemble models, one with cache enabled and one with cache disabled
-cp -r "/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository/${ENSEMBLE_MODEL}" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_ENABLED}"
-cp -r "/data/inferenceserver/${REPO_VERSION}/qa_ensemble_model_repository/qa_model_repository/${ENSEMBLE_MODEL}" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_DISABLED}"
-
-## Setup composing models, one with cache enabled and one with cache disabled
-cp -r "/data/inferenceserver/${REPO_VERSION}/qa_model_repository/${COMPOSING_MODEL}" "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_ENABLED}"
-cp -r "/data/inferenceserver/${REPO_VERSION}/qa_model_repository/${COMPOSING_MODEL}" "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_DISABLED}"
-
-for model in ${MODELS}; do
-    # Remove "name" line from each config to use directory name for simplicity
-    sed -i "/^name:/d" "${MODEL_DIR}/${model}/config.pbtxt"
-    # Add version directory to each model if non-existent
-    mkdir -p "${MODEL_DIR}/${model}/1"
-done
-
-## Update "model_name" lines in each ensemble model config ensemble steps
-sed -i "s/${COMPOSING_MODEL}/${COMPOSING_MODEL_CACHE_ENABLED}/g" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_ENABLED}/config.pbtxt"
-sed -i "s/${COMPOSING_MODEL}/${COMPOSING_MODEL_CACHE_DISABLED}/g" "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_DISABLED}/config.pbtxt"
-
-## Append cache config to each model config
-echo -e "response_cache { enable: True }" >> "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_ENABLED}/config.pbtxt"
-echo -e "response_cache { enable: False }" >> "${MODEL_DIR}/${ENSEMBLE_MODEL_CACHE_DISABLED}/config.pbtxt"
-echo -e "response_cache { enable: True }" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_ENABLED}/config.pbtxt"
-echo -e "response_cache { enable: False }" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_DISABLED}/config.pbtxt"
-# Force CPU memory for composing models since cache doesn't currently support GPU memory
-echo -e "instance_group [{ kind: KIND_CPU, count: 1 }]" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_ENABLED}/config.pbtxt"
-echo -e "instance_group [{ kind: KIND_CPU, count: 1 }]" >> "${MODEL_DIR}/${COMPOSING_MODEL_CACHE_DISABLED}/config.pbtxt"
-
-# Run server
-run_server
-if [ "${SERVER_PID}" == "0" ]; then
-    echo -e "\n***\n*** Failed to start ${SERVER}\n***"
-    cat ${SERVER_LOG}
-    exit 1
-fi
-
-# Run perf_analyzer
-set +e
-RET=0
-PROTOCOLS="http grpc"
-STABILITY_THRESHOLD="15"
-for protocol in ${PROTOCOLS}; do
-    for model in ${MODELS}; do
-	echo "================================================================"
-	echo "[PERMUTATION] Protocol=${protocol} Model=${model}"
-	echo "================================================================"
-
-        ${PERF_ANALYZER} -v -i ${protocol} -m ${model} -s ${STABILITY_THRESHOLD} | tee ${CLIENT_LOG} 2>&1
-        check_perf_analyzer_error $?
-
-	# Check response cache outputs
-	if [[ ${model} == *"cache_enabled"* ]]; then
-	  check_cache_output
-	fi
-    done;
-done;
-set -e
-
-# Cleanup
-kill $SERVER_PID
-wait $SERVER_PID
-
-
-if [ $RET -eq 0 ]; then
-  echo -e "\n***\n*** Test Passed\n***"
-else
-  echo "=== START SERVER LOG ==="
-  cat ${SERVER_LOG}
-  echo "=== END SERVER LOG ==="
-  echo -e "\n***\n*** Test FAILED\n***"
-fi
-
-exit ${RET}
diff --git a/qa/L0_perf_analyzer_unit_tests/test.sh b/qa/L0_perf_analyzer_unit_tests/test.sh
deleted file mode 100755
index f2a70d23ff..0000000000
--- a/qa/L0_perf_analyzer_unit_tests/test.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-# Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-TEST_LOG="./perf_analyzer_unit_tests.log"
-PERF_ANALYZER_UNIT_TESTS=../clients/perf_analyzer_unit_tests
-
-RET=0
-
-rm -f $TEST_LOG
-
-set +e
-$PERF_ANALYZER_UNIT_TESTS >> $TEST_LOG 2>&1
-if [ $? -ne 0 ]; then
-    echo -e "\n***\n*** Test Failed\n***"
-    RET=1
-fi
-set -e
-
-if [ $RET -eq 0 ]; then
-    echo -e "\n***\n*** Test Passed\n***"
-else
-    cat $TEST_LOG
-    echo -e "\n***\n*** Test FAILED\n***"
-fi
-
-exit $RET