From ee382f2e8f718f38fbe3c8eba68a26bb94f572b0 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Tue, 14 Jul 2020 19:42:41 +0530
Subject: [PATCH 01/21] modular test cases cut2

---
 tests/performance/README.md                   | 145 +++++++++----
 tests/performance/agents/config.ini           |   2 +-
 tests/performance/agents/metrics/__init__.py  |  25 ++-
 tests/performance/agents/utils/process.py     |   5 +-
 tests/performance/requirements.txt            |   3 +-
 tests/performance/run_performance_suite.py    |  12 +-
 tests/performance/runs/compare.py             |  49 +++--
 tests/performance/runs/context.py             |  23 +-
 tests/performance/runs/storage.py             |  12 +-
 tests/performance/runs/taurus/__init__.py     |  22 +-
 tests/performance/runs/taurus/reader.py       |  37 +++-
 tests/performance/runs/taurus/x2junit.py      | 170 +++++++++++++--
 .../tests/api_description/api_description.jmx |   2 +-
 .../api_description/api_description.yaml      |  75 ++-----
 .../api_description/environments/xlarge.yaml  |  54 ++++-
 .../batch_and_single_inference.jmx            |   6 +-
 .../batch_and_single_inference.yaml           |  92 +-------
 .../environments/xlarge.yaml                  |  56 ++++-
 .../tests/batch_inference/batch_inference.jmx |   4 +-
 .../batch_inference/batch_inference.yaml      |  78 +------
 .../batch_inference/environments/xlarge.yaml  |  51 ++++-
 .../environments/xlarge.yaml                  |   8 +-
 .../examples_local_criteria.jmx               |   6 +-
 .../examples_local_criteria.yaml              |  51 ++++-
 .../examples_local_monitoring.jmx             |   6 +-
 .../examples_local_monitoring.yaml            |  44 +++-
 .../environments/xlarge.yaml                  |   3 +
 .../examples_remote_criteria.jmx              |   6 +-
 .../examples_remote_criteria.yaml             |  55 +++--
 .../examples_remote_monitoring.jmx            |   6 +-
 .../examples_remote_monitoring.yaml           |  44 +++-
 .../examples_starter/examples_starter.jmx     |   6 +-
 .../examples_starter/examples_starter.yaml    |  49 +++--
 tests/performance/tests/global_config.yaml    | 196 +++++++++++++++++-
 .../health_check/environments/xlarge.yaml     |  49 ++++-
 .../tests/health_check/health_check.jmx       |   2 +-
 .../tests/health_check/health_check.yaml      |  62 +-----
 .../environments/xlarge.yaml                  |  51 ++++-
 .../inference_multiple_models.jmx             |   6 +-
 .../inference_multiple_models.yaml            |  63 +-----
 .../environments/xlarge.yaml                  |  51 ++++-
 .../inference_multiple_worker.jmx             |   4 +-
 .../inference_multiple_worker.yaml            |  57 +----
 .../environments/xlarge.yaml                  |  51 ++++-
 .../inference_single_worker.jmx               |   4 +-
 .../inference_single_worker.yaml              |  56 +----
 .../list_models/environments/xlarge.yaml      |  48 ++++-
 .../tests/list_models/list_models.jmx         |   4 +-
 .../tests/list_models/list_models.yaml        |  57 +----
 .../environments/xlarge.yaml                  |  51 ++++-
 .../model_description/model_description.jmx   |   4 +-
 .../model_description/model_description.yaml  |  55 +----
 .../environments/xlarge.yaml                  |  56 ++++-
 .../multiple_inference_and_scaling.jmx        |  70 ++++++-
 .../multiple_inference_and_scaling.yaml       |  92 ++------
 .../environments/xlarge.yaml                  |  54 ++++-
 .../register_unregister.jmx                   |   6 +-
 .../register_unregister.yaml                  |  67 +-----
 .../environments/xlarge.yaml                  |  57 ++++-
 .../register_unregister_multiple.jmx          |   6 +-
 .../register_unregister_multiple.yaml         |  72 ++-----
 .../environments/xlarge.yaml                  |  24 ++-
 .../scale_down_workers/scale_down_workers.jmx |   4 +-
 .../scale_down_workers.yaml                   | 116 +++++------
 .../scale_up_workers/environments/xlarge.yaml |  24 ++-
 .../scale_up_workers/scale_up_workers.jmx     |   4 +-
 .../scale_up_workers/scale_up_workers.yaml    | 110 +++++-----
 tests/performance/utils/fs.py                 |  16 +-
 tests/performance/utils/pyshell.py            |  13 +-
 tests/performance/utils/timer.py              |   4 +
 70 files changed, 1723 insertions(+), 1150 deletions(-)

diff --git a/tests/performance/README.md b/tests/performance/README.md
index 32f3c59f9..88fd6e0bc 100644
--- a/tests/performance/README.md
+++ b/tests/performance/README.md
@@ -9,7 +9,7 @@ The salient features of the performance regression suite are
 * Non-intrusive - Does not need any code-changes or instrumentation on the server being monitored. 
 * It can be used to monitor a wide variety of server metrics - memory, cpu, io - in addition to 
 traditional API level metrics such as latency, throughput etc. 
-* It is easy to add custom metrics. For example, in MMS server, `the number of workers spawned` would be an interesting 
+* It is easy to add custom metrics. For example, in Model server, `the number of workers spawned` would be an interesting 
 metric to track. The platform allows for easy addition of these metrics.
 * Test cases are specified in human readable yaml files. Every test case has a pass or fail status. This is determined 
 by evaluating expressions specified in the test case. Every expression checks metrics against threshold values. For 
@@ -19,7 +19,7 @@ possible to specify multiple compute environments against which the test cases w
 environment, will have its own threshold values.
 * This suite leverages the open source [Taurus framework](https://gettaurus.org/). 
 * This suite extends the Taurus framework in the following ways
-   * Adds resource monitoring service. This allows MMS specific metrics to be added. 
+   * Adds resource monitoring service. This allows Model Server specific metrics to be added. 
    * Environments as described earlier.
    * Specification of pass/fail criterion between two commits. For example, memory consumed by workers should not 
    increase by more than 10% between two commits for the given test case.
@@ -38,21 +38,21 @@ The building blocks of the performance regression suite and flow is captured in
     ``` 
 2. Install performance regression suite dependencies.
    ```bash 
-    export MMS_HOME=<MMS_HOME_PATH>
-    pip install -r $MMS_HOME/tests/performance/requirements.txt
+    export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+    pip install -r $MODEL_SERVER_HOME/tests/performance/requirements.txt
     ``` 
-3. Make sure that `git` is installed and the test suites are run from the MMS working directory.
+3. Make sure that `git` is installed and the test suites are run from the Model Server working directory.
 
 ### B. Running the test suite
-1. Make sure parameters set in [tests/common/global_config.yaml](tests/performance/tests/global_config.yaml) are correct.
+1. Make sure parameters set in [tests/global_config.yaml](tests/performance/tests/global_config.yaml) are correct.
 2. To run the test suite execute [run_performance_suite.py](run_performance_suite.py) with the following 
 parameters
 
    * `--artifacts-dir` or `-a` is a directory where the test case results will be stored. The default value is 
-`$MMS_HOME/tests/performance/run_artifacts`.  
+`$MODEL_SERVER_HOME/tests/performance/run_artifacts`.  
 
    * `--test-dir` or `-t` is a directory containing the test cases. The default value is 
-`$MMS_HOME/tests/performance/tests`.
+`$MODEL_SERVER_HOME/tests/performance/tests`.
  
    * `--pattern` or `-p` glob pattern picks up certain test cases for execution within the `test-dir`. The default value picks up 
 all test cases.
@@ -64,23 +64,34 @@ The default value excludes nothing.
 the file (minus the extension) found inside the environments folder in each test case. They encapsulate parameter 
 values which are specific to the execution environment. This is a mandatory parameter.   
 
+   * `--compare-local` or `--no-compare-local` specifies whether to do comparison with run artifacts data  available on local machine
+   or the data available on S3 bucket.
+   
+   * `--compare-with` or `-c` specifies the commit id compare against.  The default value is 'HEAD~1'. The branch name, tag,
+   can also be specified. The comparison happens if the run artifacts folder for the commit_id and env is available.
+   
+
+
+
    The script does the following:  
    1. Starts the metrics monitoring server.
-   2. Collects all the tests from test-dir satisfying the pattern
-   3. Executes the tests
+   2. Collects all the tests from test-dir satisfying the pattern, excluding exclude pattern and test starting with 'skip'
+   3. Executes the collected tests
    4. Generates artifacts in the artifacts-dir against each test case.  
+   5. Generate Pass Fail report for test cases
+   6. Generate comparison report for specified commit id
 
-3. Check the console logs, $artifacts-dir$/<run-dir>/performance_results.html report, comparison.csv, comparison.html 
+3. Check the console logs, $artifacts-dir$/<run-dir>/performance_results.html report, comparison_result.csv, comparison_result.html 
 and other artifacts.
 
 **Steps are provided below**
 
 ```bash
-export MMS_HOME=<MMS_HOME_PATH>
-cd $MMS_HOME/tests/performance
+export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+cd $MODEL_SERVER_HOME/tests/performance
  
-# Note that MMS server started and stopped by the individual test suite.
-# check variables such as MMS server PORT etc 
+# Note that Model server started and stopped by the individual test suite.
+# check variables such as Model server PORT etc 
 # vi tests/common/global_config.yaml 
 
 #all tests
@@ -92,17 +103,55 @@ python -m run_performance_suite -e xlarge -p inference_single_worker
 ```
 
 ### C. Understanding the test suite artifacts and reports
-1. The $artifacts-dir$/<run-dir>/performance_results.html is a summary report of the test run. 
+1. The $artifacts-dir/<run-dir>/performance_results.html is a summary report of the test run. 
 2. Each test yaml is treated as a test suite. Each criteria in the test suite is treated as a test case. 
 If the test suite does not specify any criteria, then the test suite is reported as skipped with 0 test cases.
 3. For each test suite, a sub-directory is created containing relevant run artifacts. Important files in this directory are
    * metrics.csv -- contains the values of the various system-monitored metrics over time
+   * metrics_agg.csv -- contains percentile values for columns in metrics.csv
    * finals_stats.csv -- contains the values of the various api metrics over time  
-4. The $artifacts-dir$/<run-dir>/comparison_results.html is a summary report which shows performance difference between
+4. The $artifacts-dir/<run-dir>/comparison_results.html is a summary report which shows performance difference between
 the last two commits.
 5. The run completes with a console summary of the performance and comparision suites which have failed
 ![](assets/console.png) 
 
+### D. Understanding the test case components
+A Test Case consists of the test.yaml, test.jmx, environments/*.yaml files and a global_config.yaml.
+Below is the sample folder structure for 'api_description' test case:
+```bash
+tests
+   -- api_description
+      --- environments
+          ---- xlarge.yaml
+          ---- mac_xlarge.yaml
+      --- api_description.jmx
+      --- api_description.yaml
+   -- global_config.yaml
+```
+
+1. global_config.yaml  
+   - It is a master template for all_comm the test cases and is shared across all the tests.  
+   - It contains all the common yaml sections, criteria, monitoring metrics etc.  
+   - It also contain variables in the format ${variable} for metric thresholds and other test specific attributes.
+
+2. environments/*.yaml  
+   - A test case can have multiple environment files. If you have a environment dependent metrics you can create an environment
+   yaml file. For ex. macos_xlarge, ubuntu_xlarge etc.  
+   - The environment file contains values for all the variables mentioned in global_config.yaml and test.yaml.  
+
+3. test.yaml  
+   - The test.yaml is main yaml for a test case. Note the name of the yaml should be same as the test folder.  
+   - It inherits the master template global_config.yaml.  
+   And it usually contains the scenario, specific pre-processing commands (if any), and special criteria (if any) applicable for that test case only. 
+   - If you want a behavior other than defined in the master template, It is possible to override sections of global_config.yaml in the individual test case. 
+   The global_config.yaml's top-level sections can be overridden, merged, or appended based on below rules:  
+        1. By default the dictionaries get merged.  
+        2. If the dictionary key is prepended with '~' it will get overridden.  
+        3. The list gets appended.  
+4. test.jmx 
+   -  The JMeter test scenario file. The test.yaml runs the scenarion mentioned in the .jmx file.
+
+
 ## Add a new test
 
 Follow these three steps to add a new test case to the test suite.
@@ -110,6 +159,7 @@ Follow these three steps to add a new test case to the test suite.
 1. Add scenario (a.k.a test suite)
 2. Add metrics to monitor
 3. Add pass/fail criteria (a.k.a test case)
+4. Add compare criteria (a.k.a compare test cases)
 
 
 #### 1. Add scenario (a.k.a test suite)
@@ -139,8 +189,8 @@ Please note that various global configuration settings used by examples_starter.
 To execute this test suite, run the following command
     
  ```bash
- export MMS_HOME=<MMS_HOME_PATH>
- cd $MMS_HOME/tests/performance
+ export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+ cd $MODEL_SERVER_HOME/tests/performance
  python -m run_performance_suite -p examples_starter -e xlarge
  ```
 
@@ -154,15 +204,15 @@ Specify the metrics of interest in the services/monitoring section of the yaml.
 
 1. Standalone monitoring server
 
-   Use this technique if MMS and the tests execute on different machines. Before running the test cases, 
+   Use this technique if Model Server and the tests execute on different machines. Before running the test cases, 
    please start the [metrics_monitoring_server.py](metrics_monitoring_server.py) script. It will communicate server 
    metric data with the test client over sockets. The monitoring server runs on port 9009 by default.
     
-   To start the monitoring server, run the following commands on the MMS host:
+   To start the monitoring server, run the following commands on the Model Server host:
     ```bash 
-    export MMS_HOME=<MMS_HOME_PATH>
-    pip install -r $MMS_HOME/tests/performance/requirements.txt
-    python $MMS_HOME/tests/performance/metrics_monitoring_server.py --start
+    export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+    pip install -r $MODEL_SERVER_HOME/tests/performance/requirements.txt
+    python $MODEL_SERVER_HOME/tests/performance/metrics_monitoring_server.py --start
     ```     
       
    The monitoring section configuration is shown below. 
@@ -171,29 +221,29 @@ Specify the metrics of interest in the services/monitoring section of the yaml.
     services:
       - module: monitoring
         server-agent:
-          - address: <mms-host>:9009 # metric monitoring service address
-            label: mms-inference-server  # Specified label will be used in reports instead of ip:port
+          - address: <Model-Server-host>:9009 # metric monitoring service address
+            label: Model-Server-inference-server  # Specified label will be used in reports instead of ip:port
             interval: 1s    # polling interval
             logging: True # those logs will be saved to "SAlogs_192.168.0.1_9009.csv" in the artifacts dir
             metrics: # metrics should be supported by monitoring service
-              - sum_cpu_percent # cpu percent used by all the mms server processes and workers
+              - sum_cpu_percent # cpu percent used by all the Model server processes and workers
               - sum_memory_percent
               - sum_num_handles
-              - server_workers # no of mms workers
+              - server_workers # no of Model Server workers
     ```
    The complete yaml can be found [here](tests/examples_remote_monitoring/examples_remote_monitoring.yaml)
     
    Use the command below to run the test suite.
     
     ```bash
-    export MMS_HOME=<MMS_HOME_PATH>
-    cd $MMS_HOME/tests/performance
+    export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+    cd $MODEL_SERVER_HOME/tests/performance
     python -m run_performance_suite -p examples_remote_monitoring -e xlarge
     ```
 
 2. Local monitoring plugin
 
-   Use this technique if both MMS and the tests run on the same host.   
+   Use this technique if both Model Server and the tests run on the same host.   
    The monitoring section configuration is shown below.
     
     ```yaml
@@ -218,8 +268,8 @@ Specify the metrics of interest in the services/monitoring section of the yaml.
    Use the command below to run the test suite.
     
     ```bash
-    export MMS_HOME=<MMS_HOME_PATH>
-    cd $MMS_HOME/tests/performance
+    export MODEL_SERVER_HOME=<MODEL_SERVER_HOME_PATH>
+    cd $MODEL_SERVER_HOME/tests/performance
     python -m run_performance_suite -p examples_local_monitoring -e xlarge
     ```
 
@@ -235,7 +285,7 @@ pass-fail module from Taurus to achieve this functionality. More details can be
     - module: passfail
       criteria:
       - class: bzt.modules.monitoring.MonitoringCriteria
-        subject: mms-inference-server/sum_num_handles
+        subject: model-server/sum_num_handles
         condition: '>'
         threshold: 180
         timeframe: 1s
@@ -255,19 +305,18 @@ specified in the pass/fail criterion are used for comparison with the previous r
     - module: passfail
       criteria:
       - class: bzt.modules.monitoring.MonitoringCriteria
-        subject: mms-inference-server/sum_num_handles
+        subject: model-server/sum_num_handles
         condition: '>'
         threshold: 180
         timeframe: 1s
         fail: true
         stop: true
-        diff_percent : 30
     
     ```
     Note that 
     1. At least one test suite run on the same environment should have happened in order to do the comparison.
     2. The $artifacts-dir$/<run-dir>/comparison_results.html is a summary report which shows performance difference 
-    between the last two commits.
+    between the current run and user specified compare_with commit_id run.
     3. The test case fails if the diff_percent is greater than the specified value across runs.
 
 3. Metrics available for pass-fail criteria  
@@ -307,18 +356,34 @@ specified in the pass/fail criterion are used for comparison with the previous r
       * total_workers - Total number of workers spawned
       * orphans - Total number of orphan processes
 
+4. Add compare criteria:  
+There are two types of compare criteria you can add for metrics:
+    1. diff_percent_run  
+    This criteria is used to check the percent difference between first and last value of the metric for a run. 
+    In other words it is used to verify if metrics values are same before and after the scenario run. 
+    2. diff_percent_previous  
+    Compare the metric aggregate values with previous run. Here we take aggregate min, max and avg of metric values for current run
+    and previous run and check if percentage difference is not greater than diff_percent_previous. 
+
+Note formula for percentage difference is abs(value1 - value2)/((value1 + value2)/2) * 100
+
+## Guidelines for writing good test cases:
+1. The 'timeframe' duration to check values for threshold criteria should be sufficiently large at least 5 sec. 
+2. The duration specified using 'hold-for' property should also be sufficiently large at least 5 min.
+3. When you use diff_percent_run, make sure that scenario (JMX script) results in deterministic state across different runs.
+
 ## Test Strategy & Cases
 More details about our testing strategy and test cases can be found [here](TESTS.md) 
 
 ## FAQ
 
-Q1. Is it possible to use the performance regression framework to test MMS on Python2.7?
+Q1. Is it possible to use the performance regression framework to test Model Server on Python2.7?
 
 Yes. Even though, the performance regression framework needs Python 3.7+ (as Taurus requires Python 3.7+), there are two
 possible ways to achieve this
-* Please create a Python 2.7 virtual env which runs MMS and a Python 3.7 virtual env which runs 
+* Please create a Python 2.7 virtual env which runs Model Server and a Python 3.7 virtual env which runs 
   the test framework and test cases.
-* Alternatively, deploy the standalone monitoring agent on the MMS instance and run the test cases against the remote
+* Alternatively, deploy the standalone monitoring agent on the Model Server instance and run the test cases against the remote
 server. Note that the standalone monitoring agent works on both Python 2/3. 
 
 
diff --git a/tests/performance/agents/config.ini b/tests/performance/agents/config.ini
index aacbe97e1..f2caaebcd 100644
--- a/tests/performance/agents/config.ini
+++ b/tests/performance/agents/config.ini
@@ -6,4 +6,4 @@ HOST =
 PORT = 9009
 
 [suite]
-s3_bucket = mms-performance-regression-reports
\ No newline at end of file
+s3_bucket = torchserve-performance-regression-reports
\ No newline at end of file
diff --git a/tests/performance/agents/metrics/__init__.py b/tests/performance/agents/metrics/__init__.py
index 9d7bf7eb2..642976be5 100644
--- a/tests/performance/agents/metrics/__init__.py
+++ b/tests/performance/agents/metrics/__init__.py
@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-""" Customised system and mms process metrics for monitoring and pass-fail criteria in taurus"""
+""" Customised system and Model Server process metrics for monitoring and pass-fail criteria in taurus"""
 
 # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License").
@@ -19,7 +19,7 @@
 
 
 class ProcessType(Enum):
-    """ Type of MMS processes to compute metrics on """
+    """ Type of Server processes to compute metrics on """
     FRONTEND = 1
     WORKER = 2
     ALL = 3
@@ -64,7 +64,8 @@ class ProcessType(Enum):
 misc_metrics = {
     'total_processes': None,
     'total_workers': None,
-    'orphans': None
+    'orphans': None,
+    'zombies': None
 }
 
 AVAILABLE_METRICS = list(system_metrics) + list(misc_metrics)
@@ -85,6 +86,7 @@ class ProcessType(Enum):
                 AVAILABLE_METRICS.append('{}_{}_{}'.format(op, PNAME, metric))
 
 children = set()
+zombie_children = set()
 
 
 def get_metrics(server_process, child_processes, logger):
@@ -118,7 +120,7 @@ def update_metric(metric_name, proc_type, stats):
         processes_stats.append({'type': ProcessType.FRONTEND, 'stats': server_process.as_dict()})
     except:
         pass
-    for child in children:
+    for child in children | zombie_children:
         try:
             child_cmdline = child.cmdline()
             if psutil.pid_exists(child.pid) and len(child_cmdline) >= 2 and WORKER_NAME in child_cmdline[1]:
@@ -126,12 +128,18 @@ def update_metric(metric_name, proc_type, stats):
             else:
                 reclaimed_pids.append(child)
                 logger.debug('child {0} no longer available'.format(child.pid))
-        except (NoSuchProcess, ZombieProcess):
+        except ZombieProcess:
+            zombie_children.add(child)
+        except NoSuchProcess:
             reclaimed_pids.append(child)
             logger.debug('child {0} no longer available'.format(child.pid))
 
     for p in reclaimed_pids:
-        children.remove(p)
+        if p in children:
+            children.remove(p)
+        if p in zombie_children:
+            zombie_children.remove(p)
+
 
     ### PROCESS METRICS ###
     worker_stats = list(map(lambda x: x['stats'], \
@@ -147,10 +155,11 @@ def update_metric(metric_name, proc_type, stats):
 
     # Total processes
     result['total_processes'] = len(worker_stats) + 1
-    result['total_workers'] = max(len(worker_stats) - 1, 0)
+    result['total_workers'] = max(len(worker_stats), 0)
     result['orphans'] = len(list(filter(lambda p: p['ppid'] == 1, worker_stats)))
+    result['zombies'] = len(zombie_children)
 
-    ### SYSTEM METRICS ###
+    # ###SYSTEM METRICS ###
     result['system_disk_used'] = psutil.disk_usage('/').used
     result['system_memory_percent'] = psutil.virtual_memory().percent
     system_disk_io_counters = psutil.disk_io_counters()
diff --git a/tests/performance/agents/utils/process.py b/tests/performance/agents/utils/process.py
index 8bdfb5078..c9a8e98de 100644
--- a/tests/performance/agents/utils/process.py
+++ b/tests/performance/agents/utils/process.py
@@ -56,9 +56,7 @@ def get_child_processes(process):
 
 
 def get_server_processes(server_process_pid):
-    """ It caches the main server and child processes at module level.
-    Ensure that you call this process so that MMS process
-    """
+    """get psutil Process object from process id """
     try:
         server_process = psutil.Process(server_process_pid)
     except Exception as e:
@@ -68,4 +66,5 @@ def get_server_processes(server_process_pid):
 
 
 def get_server_pidfile(file):
+    """get temp server pid file"""
     return os.path.join(tempfile.gettempdir(), ".{}".format(file))
diff --git a/tests/performance/requirements.txt b/tests/performance/requirements.txt
index 2fa19c26a..55a4486fe 100644
--- a/tests/performance/requirements.txt
+++ b/tests/performance/requirements.txt
@@ -8,4 +8,5 @@ awscli==1.18.80
 click==7.1.2
 tabulate==0.8.7
 pandas==1.0.3
-termcolor==1.1.0
\ No newline at end of file
+termcolor==1.1.0
+bzt== 1.14.2
\ No newline at end of file
diff --git a/tests/performance/run_performance_suite.py b/tests/performance/run_performance_suite.py
index 45948dff4..66bc41713 100755
--- a/tests/performance/run_performance_suite.py
+++ b/tests/performance/run_performance_suite.py
@@ -71,8 +71,9 @@ def validate_env(ctx, param, value):
 @click.option('--monit/--no-monit', help='Start Monitoring server', default=True)
 @click.option('--compare-local/--no-compare-local', help='Compare with previous run with files stored'
                                                          ' in artifacts directory', default=True)
+@click.option('-c', '--compare-with', help='Compare with commit id, branch, tag, HEAD~N.', default="HEAD~1")
 def run_test_suite(artifacts_dir, test_dir, pattern, exclude_pattern,
-                   jmeter_path, env_name, monit, compare_local):
+                   jmeter_path, env_name, monit, compare_local, compare_with):
     """Collect test suites, run them and generate reports"""
 
     logger.info("Artifacts will be stored in directory %s", artifacts_dir)
@@ -84,7 +85,7 @@ def run_test_suite(artifacts_dir, test_dir, pattern, exclude_pattern,
     else:
         logger.info("Collected tests %s", test_dirs)
 
-    with ExecutionEnv(MONITORING_AGENT, artifacts_dir, env_name, compare_local, monit) as prt:
+    with ExecutionEnv(MONITORING_AGENT, artifacts_dir, env_name, compare_local, compare_with, monit) as prt:
         pre_command = 'export PYTHONPATH={}:$PYTHONPATH;'.format(os.path.join(str(ROOT_PATH), "agents"))
         for suite_name in tqdm(test_dirs, desc="Test Suites"):
             with Timer("Test suite {} execution time".format(suite_name)) as t:
@@ -95,10 +96,13 @@ def run_test_suite(artifacts_dir, test_dir, pattern, exclude_pattern,
                 test_file = os.path.join(test_dir, suite_name, "{}.yaml".format(suite_name))
                 with x2junit.X2Junit(suite_name, suite_artifacts_dir, prt.reporter, t, env_name) as s:
                     s.code, s.err = run_process("{} bzt {} {} {} {}".format(pre_command, options_str,
-                                                                            test_file, env_yaml_path,
-                                                                            GLOBAL_CONFIG_PATH))
+                                                                            GLOBAL_CONFIG_PATH, test_file,
+                                                                            env_yaml_path))
 
                     update_taurus_metric_files(suite_artifacts_dir, test_file)
 
+    sys.exit(prt.exit_code)
+
+
 if __name__ == "__main__":
     run_test_suite()
diff --git a/tests/performance/runs/compare.py b/tests/performance/runs/compare.py
index 8ebbb4b67..ea5d4bf6a 100644
--- a/tests/performance/runs/compare.py
+++ b/tests/performance/runs/compare.py
@@ -35,12 +35,13 @@
 
 class CompareReportGenerator():
 
-    def __init__(self, path, env_name, local_run):
+    def __init__(self, path, env_name, local_run, compare_with):
         self.artifacts_dir = path
         self.current_run_name = os.path.basename(path)
         self.env_name = env_name
+        self.comare_with = compare_with
         storage_class = LocalStorage if local_run else S3Storage
-        self.storage = storage_class(self.artifacts_dir, self.env_name)
+        self.storage = storage_class(self.artifacts_dir, self.env_name, compare_with)
         self.junit_reporter = None
         self.pandas_result = None
         self.pass_fail =  True
@@ -87,7 +88,7 @@ def add_test_case(self, name, msg, type):
 
 def get_log_file(dir, sub_dir):
     """Get metric monitoring log files"""
-    metrics_file = os.path.join(dir, sub_dir, "metrics.csv")
+    metrics_file = os.path.join(dir, sub_dir, "metrics_agg.csv")
     return metrics_file if os.path.exists(metrics_file) else None
 
 
@@ -102,11 +103,21 @@ def get_aggregate_val(df, agg_func, col):
     return val
 
 
+def get_centile_val(df, agg_func, col):
+    """Get aggregate values of a pandas dataframe coulmn for given aggregate function"""
+
+    val = None
+    if "metric_name" in df and agg_func in df:
+            val = df[df["metric_name"] == col][agg_func]
+            val = val[0] if len(val) else None
+    return val
+
+
 def compare_values(val1, val2, diff_percent, run_name1, run_name2):
     """ Compare percentage diff values of val1 and val2 """
     if pd.isna(val1) or pd.isna(val2):
-        msg = "Either of the value can not be determined. The run1 value is '{}' and " \
-              "run2 value is {}.".format(val1, val2)
+        msg = "Either of the value can not be determined. run1_value='{}' and " \
+              "run2_value='{}'.".format(val1, val2)
         pass_fail, diff, msg = "error", "NA", msg
     else:
         try:
@@ -116,15 +127,15 @@ def compare_values(val1, val2, diff_percent, run_name1, run_name2):
                 if diff < float(diff_percent):
                     pass_fail, diff, msg = "pass", diff, "passed"
                 else:
-                    msg = "The diff_percent criteria has failed. The expected diff_percent is '{}' and actual " \
-                          "diff percent is '{}' and the '{}' run value is '{}' and '{}' run value is '{}'. ". \
+                    msg = "The diff_percent criteria has failed. Expected='{}', actual='{}' " \
+                          "run1='{}', run1_value='{}', run2='{}', run2_value='{}' ". \
                         format(diff_percent, diff, run_name1, val1, run_name2, val2)
 
                     pass_fail, diff, msg = "fail", diff, msg
             else:  # special case of 0
                 pass_fail, diff, msg = "pass", 0, ""
         except Exception as e:
-            msg = "error while calculating the diff for val1={} and val2={}." \
+            msg = "error while calculating the diff for val1='{}' and val2='{}'." \
                   "Error is: {}".format(val1, val2, str(e))
             logger.info(msg)
             pass_fail, diff, msg = "pass", "NA", msg
@@ -139,7 +150,7 @@ def compare_artifacts(dir1, dir2, run_name1, run_name2):
     sub_dirs_1 = get_sub_dirs(dir1)
 
     over_all_pass = True
-    aggregates = ["mean", "max", "min"]
+    aggregates = ["first_value", "last_value"]
     header = ["run_name1", "run_name2", "test_suite", "metric", "run1", "run2",
               "percentage_diff", "expected_diff", "result", "message"]
     rows = [header]
@@ -161,15 +172,18 @@ def compare_artifacts(dir1, dir2, run_name1, run_name2):
 
             metrics_from_file1 = pd.read_csv(metrics_file1)
             metrics_from_file2 = pd.read_csv(metrics_file2)
-            metrics, diff_percents = taurus_reader.get_compare_metric_list(dir1, sub_dir1)
+            metrics = taurus_reader.get_compare_metric_list(dir1, sub_dir1)
 
-            for col, diff_percent in zip(metrics, diff_percents):
+            for metric_values in metrics:
+                col = metric_values[0]
+                diff_percent = metric_values[1]
+                if diff_percent is None:
+                    continue
                 for agg_func in aggregates:
                     name = "{}_{}".format(agg_func, str(col))
 
-                    val1 = get_aggregate_val(metrics_from_file1, agg_func, col)
-                    val2 = get_aggregate_val(metrics_from_file2, agg_func, col)
-
+                    val2 = get_centile_val(metrics_from_file2, agg_func, col)
+                    val1 = get_centile_val(metrics_from_file2, agg_func, col)
                     diff, pass_fail, msg = compare_values(val1, val2, diff_percent, run_name1, run_name2)
 
                     if over_all_pass:
@@ -188,3 +202,10 @@ def compare_artifacts(dir1, dir2, run_name1, run_name2):
     dataframe = pd.DataFrame(rows[1:], columns=rows[0])
     return reporter, dataframe
 
+
+if __name__ == "__main__":
+    compare_artifacts(
+    "/Users/demo/git/serve/test/performance/run_artifacts/xlarge__45b6399__1594725947",
+    "/Users/demo/git/serve/test/performance/run_artifacts/xlarge__45b6399__1594725717",
+    "xlarge__45b6399__1594725947", "xlarge__45b6399__1594725717"
+    )
\ No newline at end of file
diff --git a/tests/performance/runs/context.py b/tests/performance/runs/context.py
index a204c67bc..860b35454 100644
--- a/tests/performance/runs/context.py
+++ b/tests/performance/runs/context.py
@@ -19,6 +19,7 @@
 import os
 import sys
 import time
+import subprocess
 import webbrowser
 from termcolor import colored
 
@@ -32,20 +33,27 @@
 logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
 
 
+def get_git_commit_id(compare_with):
+    return subprocess.check_output('git rev-parse --short {}'.format(compare_with).split()).decode(
+            "utf-8")[:-1]
+
+
 class ExecutionEnv(object):
     """
     Context Manager class to run the performance regression suites
     """
 
-    def __init__(self, agent, artifacts_dir, env, local_run, use=True, check_mms_server_status=False):
+    def __init__(self, agent, artifacts_dir, env, local_run, compare_with, use=True, check_model_server_status=False):
         self.monitoring_agent = agent
         self.artifacts_dir = artifacts_dir
         self.use = use
         self.env = env
         self.local_run = local_run
-        self.check_mms_server_status = check_mms_server_status
+        self.compare_with = get_git_commit_id(compare_with)
+        self.check_model_server_status = check_model_server_status
         self.reporter = JUnitXml()
-        self.compare_reporter_generator = CompareReportGenerator(self.artifacts_dir, self.env, self.local_run)
+        self.compare_reporter_generator = CompareReportGenerator(self.artifacts_dir, self.env, self.local_run, compare_with)
+        self.exit_code = 1
 
     def __enter__(self):
         if self.use:
@@ -63,7 +71,7 @@ def open_report(file_path):
     @staticmethod
     def report_summary(reporter, suite_name):
         if reporter and os.path.exists(reporter.junit_html_path):
-            status = reporter.junit_xml.errors or reporter.junit_xml.failures or reporter.junit_xml.skipped
+            status = reporter.junit_xml.errors or reporter.junit_xml.failures
             status, code, color = ("failed", 3, "red") if status else ("passed", 0, "green")
 
             msg = "{} run has {}.".format(suite_name, status)
@@ -95,4 +103,9 @@ def __exit__(self, type, value, traceback):
         compare_exit_code = ExecutionEnv.report_summary(junit_compare_reporter, "Comparison Test suite")
         exit_code = ExecutionEnv.report_summary(junit_reporter, "Performance Regression Test suite")
 
-        sys.exit(0 if 0 == exit_code == compare_exit_code else 3)
+        self.exit_code = 0 if 0 == exit_code == compare_exit_code else 3
+
+        # Return True needed so that __exit__ method do no ignore the exception
+        # otherwise exception are not reported
+        return False
+
diff --git a/tests/performance/runs/storage.py b/tests/performance/runs/storage.py
index 6db69716c..1f7e0c421 100644
--- a/tests/performance/runs/storage.py
+++ b/tests/performance/runs/storage.py
@@ -35,10 +35,11 @@
 class Storage():
     """Class to store and retrieve artifacts"""
 
-    def __init__(self, path, env_name):
+    def __init__(self, path, env_name, compare_with):
         self.artifacts_dir = path
         self.current_run_name = os.path.basename(path)
         self.env_name = env_name
+        self.compare_with = compare_with
 
     def get_dir_to_compare(self):
         """get the artifacts dir to compare to"""
@@ -47,7 +48,7 @@ def store_results(self):
         """Store the results"""
 
     @staticmethod
-    def get_latest(names, env_name, exclude_name):
+    def get_latest(names, env_name, exclude_name, compare_with):
         """
         Get latest directory for same env_name name given a list of them.
         :param names: list of folder names in the format env_name___commitid__timestamp
@@ -59,7 +60,8 @@ def get_latest(names, env_name, exclude_name):
         latest_run = ''
         for run_name in names:
             run_name_list = run_name.split('__')
-            if env_name == run_name_list[0] and run_name != exclude_name:
+            if env_name == run_name_list[0] and compare_with == run_name_list[1]\
+                    and run_name != exclude_name:
                 if int(run_name_list[2]) > max_ts:
                     max_ts = int(run_name_list[2])
                     latest_run = run_name
@@ -76,7 +78,7 @@ def get_dir_to_compare(self):
         """Get latest run directory name to be compared with"""
         parent_dir = pathlib.Path(self.artifacts_dir).parent
         names = [di for di in os.listdir(parent_dir) if os.path.isdir(os.path.join(parent_dir, di))]
-        latest_run = self.get_latest(names, self.env_name, self.current_run_name)
+        latest_run = self.get_latest(names, self.env_name, self.current_run_name, self.compare_with)
         return os.path.join(parent_dir, latest_run), latest_run
 
 
@@ -96,7 +98,7 @@ def get_dir_to_compare(self):
         for o in result.get('CommonPrefixes'):
             run_names.append(o.get('Prefix')[:-1])
 
-        latest_run = self.get_latest(run_names, self.env_name, self.current_run_name)
+        latest_run = self.get_latest(run_names, self.env_name, self.current_run_name, self.compare_with)
         if not latest_run:
             logger.info("No run found for env_id %s", self.env_name)
             return '', ''
diff --git a/tests/performance/runs/taurus/__init__.py b/tests/performance/runs/taurus/__init__.py
index 4a07717ec..973acf64d 100644
--- a/tests/performance/runs/taurus/__init__.py
+++ b/tests/performance/runs/taurus/__init__.py
@@ -18,8 +18,14 @@
 import glob
 import shutil
 import os
+import sys
+import logging
 
 from .reader import get_mon_metrics_list
+from utils.pyshell import run_process
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
 
 
 def get_taurus_options(artifacts_dir, jmeter_path=None):
@@ -29,7 +35,7 @@ def get_taurus_options(artifacts_dir, jmeter_path=None):
         options.append('-o modules.jmeter.path={}'.format(jmeter_path))
     options.append('-o settings.artifacts-dir={}'.format(artifacts_dir))
     options.append('-o modules.console.disable=true')
-    options.append('-o settings.env.BASEDIR={}'.format(artifacts_dir))
+    options.append('-o settings.env.ARTIFACTS_DIR={}'.format(artifacts_dir))
     options_str = ' '.join(options)
 
     return options_str
@@ -61,3 +67,17 @@ def update_taurus_metric_files(suite_artifacts_dir, test_file):
          metrics_log_file = os.path.join(suite_artifacts_dir, "local_monitoring_logs.csv")
          if os.path.exists(metrics_log_file):
              os.rename(metrics_log_file, metrics_new_file)
+
+    KEEP_LINES = 10000
+
+    def handle_big_files(name):
+        report_file = os.path.join(suite_artifacts_dir, name)
+        report_tmp_file = os.path.join(suite_artifacts_dir, "{}_tmp".format(name))
+        if os.path.exists(report_file) and os.stat(report_file).st_size > 1e+7: #10MB
+            logger.info("Keeping first {} records from file {} as it is >10MB".format(KEEP_LINES, report_file))
+            run_process("head -{0} {1} > {2}; mv {2} {1};".format(KEEP_LINES, report_file, report_tmp_file))
+
+    handle_big_files("error.jtl")
+    handle_big_files("kpi.jtl")
+
+
diff --git a/tests/performance/runs/taurus/reader.py b/tests/performance/runs/taurus/reader.py
index 2222afdbe..7abfdf7cc 100644
--- a/tests/performance/runs/taurus/reader.py
+++ b/tests/performance/runs/taurus/reader.py
@@ -34,9 +34,18 @@ def get_mon_metrics_list(test_yaml_path):
     return metrics
 
 
-def get_compare_metric_list(dir, sub_dir):
+def parse_criterion_sec(criterion):
+    subject = criterion["subject"]
+    metric = subject.rsplit('/', 1)
+    metric = metric[1] if len(metric) == 2 else metric[0]
+    diff_percent_prev = criterion.get("diff_percent_previous", None)
+    diff_percent_run = criterion.get("diff_percent_run", None)
+
+    return [metric, diff_percent_prev, diff_percent_run]
+
+
+def get_compare_metric_list_taurus(dir, sub_dir):
     """Utility method to get list of compare monitoring metrics identified by diff_percent property"""
-    diff_percents = []
     metrics = []
     test_yaml = os.path.join(dir, sub_dir, "effective.yml")
     with open(test_yaml) as test_yaml:
@@ -45,13 +54,21 @@ def get_compare_metric_list(dir, sub_dir):
             if rep_section.get('module', None) == 'passfail':
                 for criterion in rep_section.get('criteria', []):
                     if isinstance(criterion, dict) and 'monitoring' in criterion.get('class', ''):
-                        subject = criterion["subject"]
-                        metric = subject.rsplit('/', 1)
-                        metric = metric[1] if len(metric) == 2 else metric[0]
-                        diff_percent = criterion.get("diff_percent", None)
+                        metrics.append(parse_criterion_sec(criterion))
+
+    return metrics
+
 
-                        if diff_percent:
-                            metrics.append(metric)
-                            diff_percents.append(diff_percent)
+def get_compare_metric_list(dir, sub_dir):
+    """Utility method to get list of compare monitoring metrics identified by diff_percent property"""
+    metrics = []
+    test_yaml = os.path.join(dir, sub_dir, "effective.yml")
+    with open(test_yaml) as test_yaml:
+        test_yaml = yaml.safe_load(test_yaml)
+        sec = test_yaml.get('compare_criteria', [])
+        if sec:
+            for criterion in sec:
+                if criterion:
+                    metrics.append(parse_criterion_sec(criterion))
 
-    return metrics, diff_percents
+    return metrics
diff --git a/tests/performance/runs/taurus/x2junit.py b/tests/performance/runs/taurus/x2junit.py
index 0219ef76d..a209a7a5e 100644
--- a/tests/performance/runs/taurus/x2junit.py
+++ b/tests/performance/runs/taurus/x2junit.py
@@ -17,7 +17,12 @@
 
 
 import os
+import pandas as pd
+from runs.taurus.reader import get_compare_metric_list
 
+import html
+import tabulate
+from bzt.modules.passfail import DataCriterion
 from junitparser import TestCase, TestSuite, JUnitXml, Skipped, Error, Failure
 
 
@@ -33,44 +38,183 @@ def __init__(self, name, artifacts_dir, junit_xml, timer, env_name):
         self.timer = timer
         self.artifacts_dir = artifacts_dir
         self.env_name = env_name
+        self.metrics = None
+        self.metrics_agg_dict = {}
+
+        self.code = 0
+        self.err = ""
+
+        self.ts.tests, self.ts.failures, self.ts.skipped, self.ts.errors = 0, 0, 0, 0
 
     def __enter__(self):
         return self
 
+    def add_compare_tests(self):
+        compare_list = get_compare_metric_list(self.artifacts_dir, "")
+        for metric_values in compare_list:
+            col = metric_values[0]
+            diff_percent = metric_values[2]
+            tc = TestCase("{}_diff_run > {}".format(col, diff_percent))
+            if diff_percent is None:
+                tc.result = Skipped("diff_percent_run value is not mentioned")
+                self.ts.skipped += 1
+            elif self.metrics is None:
+                tc.result = Skipped("Metrics are not captured")
+                self.ts.skipped += 1
+            else:
+                col_metric_values = getattr(self.metrics, col, None)
+                if col_metric_values is None:
+                    tc.result = Error("Metric {} is not captured".format(col))
+                    self.ts.errors += 1
+                elif len(col_metric_values) < 2:
+                    tc.result = Skipped("Enough values are not captured")
+                    self.ts.errors += 1
+                else:
+                    first_value = col_metric_values.iloc[0]
+                    last_value = col_metric_values.iloc[-1]
+
+                    try:
+                        if last_value == first_value == 0:
+                            diff_actual = 0
+                        else:
+                            diff_actual = (abs(last_value - first_value) / ((last_value + first_value) / 2)) * 100
+
+                        if float(diff_actual) <= float(diff_percent):
+                            self.ts.tests += 1
+                        else:
+                            tc.result = Failure("The first value and last value of run are {}, {} "
+                                                "with percent diff {}".format(first_value, last_value, diff_actual))
+
+                    except Exception as e:
+                        tc.result = Error("Error while comparing values {}".format(str(e)))
+                        self.ts.errors += 1
+
+            self.ts.add_testcase(tc)
+
+    @staticmethod
+    def casename_to_criteria(test_name):
+        metric = None
+        if ' of ' not in test_name:
+            test_name = "label of {}".format(test_name)
+        try:
+            test_name = html.unescape(html.unescape(test_name))
+            criteria = DataCriterion.string_to_config(test_name)
+        except Exception as e:
+            return None
+
+        label = criteria["label"].split('/')
+        if len(label) == 2:
+            metric = label[1]
+        return metric
+
+    def percentile_values(self, metric_name):
+        values = {}
+        if self.metrics is not None and metric_name is not None:
+            metric_vals = getattr(self.metrics, metric_name, None)
+            if metric_vals is not None:
+                centile_values = [0, 0.5, 0.9, 0.95, 0.99, 0.999, 1]
+                for centile in centile_values:
+                    val = getattr(metric_vals, 'quantile')(centile)
+                    values.update({str(centile * 100)+"%": val})
+
+        return values
+
+    def update_metrics(self):
+        metrics_file = os.path.join(self.artifacts_dir, "metrics.csv")
+        rows = []
+        agg_dict = {}
+        if os.path.exists(metrics_file):
+            self.metrics = pd.read_csv(metrics_file)
+            centile_values = [0, 0.5, 0.9, 0.95, 0.99, 0.999, 1]
+            header_names = ['test_name', 'metric_name']
+            header_names.extend([str(colname * 100) + "%" for colname in centile_values])
+            header_names.extend(['first_value', 'last_value'])
+            if self.metrics.size:
+                 for col in self.metrics.columns:
+                     row = [self.name, str(col)]
+                     metric_vals = getattr(self.metrics, str(col), None)
+                     for centile in centile_values:
+                         row.append(getattr(metric_vals, 'quantile')(centile))
+                     row.extend([metric_vals.iloc[0], metric_vals.iloc[-1]])
+                     agg_dict.update({row[0]: dict(zip(header_names, row[1:]))})
+                     rows.append(row)
+
+                 dataframe = pd.DataFrame(rows, columns=header_names)
+                 print("Metric percentile values:\n")
+                 print(tabulate.tabulate(rows, headers=header_names, tablefmt="grid"))
+                 dataframe.to_csv(os.path.join(self.artifacts_dir, "metrics_agg.csv"), index=False)
+
+        self.metrics_agg_dict = agg_dict
+
     def __exit__(self, type, value, traceback):
+        print("error code is "+str(self.code))
+
+        self.update_metrics()
         xunit_file = os.path.join(self.artifacts_dir, "xunit.xml")
-        tests, failures, skipped, errors = 0, 0, 0, 0
-        if os.path.exists(xunit_file):
+        if self.code == 1:
+            tc = TestCase(self.name)
+            tc.result = Error(self.err)
+            self.ts.add_testcase(tc)
+        elif os.path.exists(xunit_file):
             xml = JUnitXml.fromfile(xunit_file)
             for i, suite in enumerate(xml):
                 for case in suite:
                     name = "scenario_{}: {}".format(i, case.name)
                     result = case.result
+
+                    metric_name = X2Junit.casename_to_criteria(case.name)
+                    values = self.metrics_agg_dict.get(metric_name, None)
+                    msg = result.message if result else ""
+                    if values:
+                        val_msg = "Actual percentile values are {}".format(values)
+                        msg = "{}. {}".format(msg, val_msg)
+
                     if isinstance(result, Error):
-                        failures += 1
-                        result = Failure(result.message, result.type)
+                        self.ts.failures += 1
+                        result = Failure(msg, result.type)
                     elif isinstance(result, Failure):
-                        errors += 1
-                        result = Error(result.message, result.type)
+                        self.ts.errors += 1
+                        result = Error(msg, result.type)
                     elif isinstance(result, Skipped):
-                        skipped += 1
+                        self.ts.skipped += 1
+                        result = Skipped(msg, result.type)
                     else:
-                        tests += 1
+                        self.ts.tests += 1
 
                     tc = TestCase(name)
                     tc.result = result
                     self.ts.add_testcase(tc)
         else:
             tc = TestCase(self.name)
-            tc.result = Skipped()
+            tc.result = Skipped("Skipped criteria test cases as Taurus XUnit file is not generated.")
             self.ts.add_testcase(tc)
 
+        self.add_compare_tests()
+
         self.ts.hostname = self.env_name
         self.ts.timestamp = self.timer.start
         self.ts.time = self.timer.diff()
-        self.ts.tests = tests
-        self.ts.failures = failures
-        self.ts.skipped = skipped
-        self.ts.errors = errors
         self.ts.update_statistics()
         self.junit_xml.add_testsuite(self.ts)
+
+        # Return False needed so that __exit__ method do no ignore the exception
+        # otherwise exception are not reported
+        return False
+
+if __name__ == "__main__":
+    from utils.timer import Timer
+    with Timer("ads") as t:
+        test_folder = '/Users/demo/git/serve/test/performance/'\
+                        'run_artifacts/xlarge__2dc700f__1594662587/scale_down_workers'
+        x = X2Junit("test", test_folder, JUnitXml(), t, "xlarge")
+
+    # x.update_metrics()
+    #
+    # x.add_compare_tests()
+
+    x.__exit__(None, None, None)
+    x.ts
+
+    print("a")
+
+
diff --git a/tests/performance/tests/api_description/api_description.jmx b/tests/performance/tests/api_description/api_description.jmx
index a026cb312..69a31b5cf 100644
--- a/tests/performance/tests/api_description/api_description.jmx
+++ b/tests/performance/tests/api_description/api_description.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS API Description Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server API Description Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
diff --git a/tests/performance/tests/api_description/api_description.yaml b/tests/performance/tests/api_description/api_description.yaml
index da3316228..bb2ec1e88 100644
--- a/tests/performance/tests/api_description/api_description.yaml
+++ b/tests/performance/tests/api_description/api_description.yaml
@@ -1,68 +1,19 @@
----
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: api_description
-
-scenarios:
-  api_description:
-    script: api_description.jmx
-
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
-services:
-  - module: shellexec
-    prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
 
 reporting:
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of ManagementAPIDescription<${MGMT_DESC_SUCC}, stop as failed
-    - success of InferenceAPIDescription<${INFR_DESC_SUCC}, stop as failed
-    - avg-rt of ManagementAPIDescription>${MGMT_DESC_RT}, stop as failed
-    - avg-rt of InferenceAPIDescription>${INFR_DESC_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '<'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 5s
-      stop : true
-      fail : true
+    - success of ManagementAPIDescription<${MGMT_DESC_SUCC} for 10s, ${STOP_ALIAS} as failed
+    - avg-rt of ManagementAPIDescription>${MGMT_DESC_AVG_RT}, ${STOP_ALIAS} as failed
+#    # Custom Criteria
 #    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
+#      subject: ServerLocalClient/total_processes
+#      condition: '<'
+#      threshold: ${TOTAL_PROCS}
+#      timeframe: 1s
+#      stop : ${STOP}
+#      fail : true
+
+scenarios:
+  ~scenario_0:
+    script: api_description.jmx
\ No newline at end of file
diff --git a/tests/performance/tests/api_description/environments/xlarge.yaml b/tests/performance/tests/api_description/environments/xlarge.yaml
index f7bc5561d..be4e38930 100644
--- a/tests/performance/tests/api_description/environments/xlarge.yaml
+++ b/tests/performance/tests/api_description/environments/xlarge.yaml
@@ -1,10 +1,52 @@
 ---
 settings:
   env:
-    MGMT_DESC_SUCC: 100%
-    INFR_DESC_SUCC: 100%
-    MGMT_DESC_RT : 10ms
-    INFR_DESC_RT : 10ms
+    MGMT_DESC_SUCC: 80%
+    MGMT_DESC_AVG_RT: 30ms
+
+    API_LABEL : ManagementAPIDescription
+    API_SUCCESS : 80%
+    API_AVG_RT : 30ms
+
+    TOTAL_WORKERS: 0
+    TOTAL_WORKERS_MEM: 0
+    TOTAL_WORKERS_FDS: 0
+
+    TOTAL_MEM : 1500098304
     TOTAL_PROCS : 1
-    TOTAL_FDS : 73
-    TOTAL_MEM: 100000000 #100MB
+    TOTAL_FDS : 10
+
+    FRNTEND_MEM: 1500098304
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 0
+    TOTAL_WORKERS_FDS_RUN_DIFF: 0
+    TOTAL_MEM_RUN_DIFF: 185
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 185
+    FRNTEND_MEM_RUN_DIFF: 30
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 30s
+    SCRIPT : api_description.jmx
+
+    STOP : ''
+    STOP_ALIAS: continue
\ No newline at end of file
diff --git a/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.jmx b/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.jmx
index d119e9b7f..de4e2a00d 100644
--- a/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.jmx
+++ b/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Benchmarking Image Input Model Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Benchmarking Image Input Model Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,12 +16,12 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model1" elementType="Argument">
             <stringProp name="Argument.name">model1</stringProp>
-            <stringProp name="Argument.value">${__P(model_name1,resnet-152)}</stringProp>
+            <stringProp name="Argument.value">${__P(RESNET_152_BATCH_NAME,resnet-152-batch)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
           <elementProp name="model2" elementType="Argument">
             <stringProp name="Argument.name">model2</stringProp>
-            <stringProp name="Argument.value">${__P(model_name2,squeezenet_v1.1)}</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml b/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml
index 73e9ab957..54d6b8475 100644
--- a/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml
+++ b/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml
@@ -1,96 +1,22 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 5s
-  hold-for: 20s
-  scenario: Inference
-
 scenarios:
-  Inference:
+  scenario_0:
     script: batch_and_single_inference.jmx
 
-modules:
-  server_local_monitoring:
-    # metrics_monitoring_inproc and dependencies should be in python path
-    class : metrics_monitoring_inproc.Monitor # monitoring class.
 
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/examples/resnet-152-batching/resnet-152.mar&batch_size=8&max_batch_delay=50"
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_152_BATCH_URL}&batch_size=8&max_batch_delay=50"
       # uncomment below and comment prev and use downloaded model with model-store
-      #- curl -s -X POST "http://localhost:8081/models?url=resnet-152.mar&batch_size=8&max_batch_delay=60&initial_workers=1"
-      - "curl -s -X PUT  http://localhost:8081/models/resnet-152?min_worker=2&synchronous=true"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=2&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring # should be added in modules section
-    ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
-    - interval: 1s
-      logging : True
-      metrics:
-        - sum_workers_memory_rss
-        - sum_workers_file_descriptors
-        - total_workers
-        - orphans
+      #- curl -s -X POST "http://localhost:8081/models?url=${RESNET_152_BATCH_NAME}.mar&batch_size=8&max_batch_delay=60&initial_workers=1"
+      - "curl -s -X PUT  http://localhost:8081/models/${RESNET_152_BATCH_NAME}?min_worker=2&synchronous=true"
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=2&synchronous=true"
 
 reporting:
 - module: passfail
   criteria:
-    - subject: avg-rt  # required
-      label: 'Inference1'  # optional, default is ''
-      condition: '>'  # required
-      threshold: ${INFR1_RT}  # required
-      logic: for  # optional, logic to aggregate values within timeframe.
-      # Default 'for' means take latest,
-      # 'within' and 'over' means take sum/avg of all values within interval
-      timeframe: 1s  # optional, default is none
-      stop: true  # optional, default is true. false for nonstop testing until the end
-      fail: true  # optional, default is true
-    - subject: avg-rt  # required
-      label: 'Inference2'  # optional, default is ''
-      condition: '>'  # required
-      threshold: ${INFR2_RT} # required
-      logic: for  # optional, logic to aggregate values within timeframe.
-      # Default 'for' means take latest,
-      # 'within' and 'over' means take sum/avg of all values within interval
-      timeframe: 1s  # optional, default is none
-      stop: true  # optional, default is true. false for nonstop testing until the end
-      fail: true  # optional, default is true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
-      condition: '>'
-      threshold: ${TOTAL_WORKERS_MEM}
-      timeframe: 1s
-      stop : true
-      fail : true
-      diff_percent : 30
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/orphans
-      condition: '>'
-      threshold: ${TOTAL_ORPHANS}
-      timeframe: 1s
-      stop : true
-      fail : true
-      diff_percent : 0
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_workers
-      condition: '>'
-      threshold: ${TOTAL_WORKERS}
-      timeframe: 1s
-      stop: true
-      fail: true
-      diff_percent: 0
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_WORKERS_FDS}
-      timeframe: 1s
-      stop: true
-      fail: true
-      diff_percent: 30
+    # Inbuilt Criteria
+    - success of ManagementAPIDescription<${INF2_SUCC} for 10s, ${STOP_ALIAS} as failed
+    - avg-rt of ManagementAPIDescription>${INF2_AVG_RT}, ${STOP_ALIAS} as failed
\ No newline at end of file
diff --git a/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml b/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml
index 97307b690..cd22dcdb3 100644
--- a/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml
+++ b/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml
@@ -1,9 +1,55 @@
 ---
 settings:
   env:
-    INFR1_RT : 6s
-    INFR2_RT : 0.08s
-    TOTAL_WORKERS_MEM : 4000000000 #4GB
-    TOTAL_WORKERS : 9
+    API_LABEL : Inference1
+    API_SUCCESS : 80%
+    API_AVG_RT : 30ms
+
+    INF2_SUCC: 80%
+    INF2_AVG_RT: 30ms
+
+    TOTAL_WORKERS: 5
+    TOTAL_WORKERS_MEM: 999686400
+    TOTAL_WORKERS_FDS: 60
+
+    TOTAL_MEM : 1292481024
+    TOTAL_PROCS : 7
+    TOTAL_FDS : 230
+
+    FRNTEND_MEM: 435241216
+
     TOTAL_ORPHANS : 0
-    TOTAL_WORKERS_FDS : 78
+    TOTAL_ZOMBIES : 0
+
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 45
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 45
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 45
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : batch_and_single_inference.jmx
+
+    STOP : ''
+    STOP_ALIAS: continue
+
+
diff --git a/tests/performance/tests/batch_inference/batch_inference.jmx b/tests/performance/tests/batch_inference/batch_inference.jmx
index 885fac295..111799a57 100644
--- a/tests/performance/tests/batch_inference/batch_inference.jmx
+++ b/tests/performance/tests/batch_inference/batch_inference.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Benchmarking Image Input Model Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Benchmarking Image Input Model Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">${__P(model_name,resnet-152)}</stringProp>
+            <stringProp name="Argument.value">${__P(RESNET_152_BATCH_NAME,resnet-152-batch)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/batch_inference/batch_inference.yaml b/tests/performance/tests/batch_inference/batch_inference.yaml
index 7c4485c06..ef8c4c700 100644
--- a/tests/performance/tests/batch_inference/batch_inference.yaml
+++ b/tests/performance/tests/batch_inference/batch_inference.yaml
@@ -1,84 +1,14 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 5s
-  hold-for: 20s
-  scenario: Inference
-
 scenarios:
-  Inference:
+  scenario_0:
     script: batch_inference.jmx
 
-modules:
-  server_local_monitoring:
-    # metrics_monitoring_inproc and dependencies should be in python path
-    class : metrics_monitoring_inproc.Monitor # monitoring class.
 
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/examples/resnet-152-batching/resnet-152.mar&batch_size=8&max_batch_delay=50"
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_152_BATCH_URL}&batch_size=8&max_batch_delay=50"
       # uncomment below and comment prev and use downloaded model with model-store
-      #- "curl -s -X POST http://localhost:8081/models?url=resnet-152.mar&batch_size=8&max_batch_delay=60&initial_workers=1"
-      - "curl -s -X PUT  http://localhost:8081/models/resnet-152?min_worker=2&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring # should be added in modules section
-    ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
-    - interval: 1s
-      logging : True
-      metrics:
-        - sum_workers_memory_rss
-        - sum_workers_file_descriptors
-        - total_workers
-        - orphans
+      #- "curl -s -X POST http://localhost:8081/models?url=${RESNET_152_BATCH_NAME}.mar&batch_size=8&max_batch_delay=60&initial_workers=1"
+      - "curl -s -X PUT  http://localhost:8081/models/${RESNET_152_BATCH_NAME}?min_worker=2&synchronous=true"
 
-reporting:
-- module: passfail
-  criteria:
-    - subject: avg-rt  # required
-      label: 'Inference'  # optional, default is ''
-      condition: '>'  # required
-      threshold: ${INFR_RT}  # required
-      logic: for  # optional, logic to aggregate values within timeframe.
-      # Default 'for' means take latest,
-      # 'within' and 'over' means take sum/avg of all values within interval
-      timeframe: 1s  # optional, default is none
-      stop: true  # optional, default is true. false for nonstop testing until the end
-      fail: true  # optional, default is true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
-      condition: '>'
-      threshold: ${TOTAL_WORKERS_MEM}
-      timeframe: 1s
-      stop : true
-      fail : true
-      diff_percent : 30
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/orphans
-      condition: '>'
-      threshold: ${TOTAL_ORPHANS}
-      timeframe: 1s
-      stop : true
-      fail : true
-      diff_percent : 0
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_workers
-      condition: '>'
-      threshold: ${TOTAL_WORKERS}
-      timeframe: 1s
-      stop: true
-      fail: true
-      diff_percent: 0
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_WORKERS_FDS}
-      timeframe: 1s
-      stop: true
-      fail: true
-      diff_percent: 30
diff --git a/tests/performance/tests/batch_inference/environments/xlarge.yaml b/tests/performance/tests/batch_inference/environments/xlarge.yaml
index 23a443aaf..5ee95f963 100644
--- a/tests/performance/tests/batch_inference/environments/xlarge.yaml
+++ b/tests/performance/tests/batch_inference/environments/xlarge.yaml
@@ -1,8 +1,51 @@
 ---
 settings:
   env:
-    INFR_RT : 1.5s
-    TOTAL_WORKERS_MEM : 3000000000 #3GB
-    TOTAL_WORKERS : 4
+    API_LABEL : Inference
+    API_SUCCESS : 80%
+    API_AVG_RT : 30ms
+
+    TOTAL_WORKERS: 4
+    TOTAL_WORKERS_MEM: 3000000000
+    TOTAL_WORKERS_FDS: 400
+
+    TOTAL_MEM : 4000000000
+    TOTAL_PROCS : 7
+    TOTAL_FDS : 200
+
+    FRNTEND_MEM: 1000000000
+
     TOTAL_ORPHANS : 0
-    TOTAL_WORKERS_FDS : 38
\ No newline at end of file
+    TOTAL_ZOMBIES : 0
+
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 50
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 45
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 80
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : batch_inference.jmx
+
+    STOP : ''
+    STOP_ALIAS: continue
+
+
diff --git a/tests/performance/tests/examples_local_criteria/environments/xlarge.yaml b/tests/performance/tests/examples_local_criteria/environments/xlarge.yaml
index 6c3835292..6cbbedc7d 100644
--- a/tests/performance/tests/examples_local_criteria/environments/xlarge.yaml
+++ b/tests/performance/tests/examples_local_criteria/environments/xlarge.yaml
@@ -4,5 +4,9 @@ settings:
     FAIL : 100%
     P90 : 290ms
     AVG_RT : 1s
-    TOTAL_WORKERS_MEM : 132000000
-    PERCENT_DIFF_TOTAL_WORKERS_MEM : 5
+    TOTAL_WORKERS_MEM : 135000000
+    TOTAL_WORKERS_MEM_DIFF : 5
+
+    STOP : false
+
+~compare_criteria:
diff --git a/tests/performance/tests/examples_local_criteria/examples_local_criteria.jmx b/tests/performance/tests/examples_local_criteria/examples_local_criteria.jmx
index 0d60d304c..618b88095 100644
--- a/tests/performance/tests/examples_local_criteria/examples_local_criteria.jmx
+++ b/tests/performance/tests/examples_local_criteria/examples_local_criteria.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Register and Inference Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Register and Inference Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="cnn_url" elementType="Argument">
             <stringProp name="Argument.name">cnn_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">The url from where to fetch noop model from</stringProp>
           </elementProp>
@@ -34,7 +34,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml b/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
index 9d8b87907..3adbf0a48 100644
--- a/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
+++ b/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
@@ -1,11 +1,11 @@
 ---
-execution:
+~execution:
 - concurrency: 1
   ramp-up: 5s
   hold-for: 20s
   scenario: Inference
 
-scenarios:
+~scenarios:
   Inference:
     script: examples_local_criteria.jmx
 
@@ -14,15 +14,34 @@ modules:
     # metrics_monitoring_inproc and dependencies should be in python path
     class : metrics_monitoring_inproc.Monitor # monitoring class.
 
-services:
+~services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
+      - "curl -s -O $INPUT_IMG_URL"
+      - "mkdir /tmp/ts_model_store"
+      - "ps aux | grep '$SERVER_PROCESS_NAME' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "$SERVER_CMD --start --ncs --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
     post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
+      - "$SERVER_CMD --stop > /dev/null 2>&1"
+      - "rm $INPUT_IMG_PATH"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+    env:
+      SERVER_CMD : ${SERVER_CMD}
+      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
+      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
+      INPUT_IMG_URL : ${INPUT_IMG_URL}
+      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
+
+      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
+      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
+      SQZNET_URL: ${SQZNET_URL}
+      SQZNET_NAME: ${SQZNET_NAME}
+      RESNET_URL: ${RESNET_URL}
+      RESNET_NAME: ${RESNET_NAME}
+
   - module: server_local_monitoring # should be added in modules section
     ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
     - interval: 1s
@@ -33,7 +52,14 @@ services:
         - mem
         - sum_workers_memory_rss
 
-reporting:
+~reporting:
+- module: passfail # this is to enable passfail module
+- module: junit-xml
+  data-source: pass-fail
+- module: junit-xml
+  data-source: sample-labels
+- module: final-stats
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
 - module: passfail
   criteria:
     - fail >${FAIL}, stop as failed
@@ -44,6 +70,9 @@ reporting:
       condition: '>'
       threshold: ${TOTAL_WORKERS_MEM}
       timeframe: 1s
-      stop : true
+      stop : ${STOP}
       fail : true
-      diff_percent : ${PERCENT_DIFF_TOTAL_WORKERS_MEM}
+      diff_percent_previous : ${TOTAL_WORKERS_MEM_DIFF}
+
+~compare_criteria:
+  -
\ No newline at end of file
diff --git a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.jmx b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.jmx
index 0d60d304c..618b88095 100644
--- a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.jmx
+++ b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Register and Inference Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Register and Inference Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="cnn_url" elementType="Argument">
             <stringProp name="Argument.name">cnn_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">The url from where to fetch noop model from</stringProp>
           </elementProp>
@@ -34,7 +34,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
index d00226470..ee8fdac7c 100644
--- a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
+++ b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
@@ -1,10 +1,11 @@
 ---
-execution:
+~execution:
 - concurrency: 1
   ramp-up: 5s
   hold-for: 20s
   scenario: Inference
-scenarios:
+
+~scenarios:
   Inference:
     script: examples_local_monitoring.jmx
 
@@ -13,15 +14,34 @@ modules:
     # metrics_monitoring_inproc and dependencies should be in python path
     class : metrics_monitoring_inproc.Monitor # monitoring class.
 
-services:
+~services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
+      - "curl -s -O $INPUT_IMG_URL"
+      - "mkdir /tmp/ts_model_store"
+      - "ps aux | grep '$SERVER_PROCESS_NAME' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "$SERVER_CMD --start --ncs --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
     post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
+      - "$SERVER_CMD --stop > /dev/null 2>&1"
+      - "rm $INPUT_IMG_PATH"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+    env:
+      SERVER_CMD : ${SERVER_CMD}
+      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
+      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
+      INPUT_IMG_URL : ${INPUT_IMG_URL}
+      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
+
+      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
+      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
+      SQZNET_URL: ${SQZNET_URL}
+      SQZNET_NAME: ${SQZNET_NAME}
+      RESNET_URL: ${RESNET_URL}
+      RESNET_NAME: ${RESNET_NAME}
+
   - module: server_local_monitoring # should be added in modules section
     ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
       - interval: 1s
@@ -29,4 +49,10 @@ services:
           - cpu
           - disk-space
           - mem
-          - sum_workers_memory_percent
\ No newline at end of file
+          - sum_workers_memory_percent
+
+~reporting:
+  - module: passfail
+
+~compare_criteria:
+  -
diff --git a/tests/performance/tests/examples_remote_criteria/environments/xlarge.yaml b/tests/performance/tests/examples_remote_criteria/environments/xlarge.yaml
index 674a6c1ff..6ad41860d 100644
--- a/tests/performance/tests/examples_remote_criteria/environments/xlarge.yaml
+++ b/tests/performance/tests/examples_remote_criteria/environments/xlarge.yaml
@@ -5,3 +5,6 @@ settings:
     P90 : 250ms
     AVG_RT : 1s
     TOTAL_WORKERS_FDS : 80
+    TOTAL_WORKERS_FDS_DIFF : 35
+
+~compare_criteria:
\ No newline at end of file
diff --git a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.jmx b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.jmx
index 0d60d304c..618b88095 100644
--- a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.jmx
+++ b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Register and Inference Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Register and Inference Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="cnn_url" elementType="Argument">
             <stringProp name="Argument.name">cnn_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">The url from where to fetch noop model from</stringProp>
           </elementProp>
@@ -34,7 +34,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
index 0c3c206d1..10b028895 100644
--- a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
+++ b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
@@ -1,47 +1,74 @@
 
-execution:
+~execution:
 - concurrency: 4
   ramp-up: 1s
   hold-for: 20s
   scenario: Inference
 
-scenarios:
+~scenarios:
   Inference:
     script: examples_remote_criteria.jmx
 
-services:
+~services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
+      - "curl -s -O $INPUT_IMG_URL"
+      - "mkdir /tmp/ts_model_store"
+      - "ps aux | grep '$SERVER_PROCESS_NAME' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "$SERVER_CMD --start --ncs --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
     post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
+      - "$SERVER_CMD --stop > /dev/null 2>&1"
+      - "rm $INPUT_IMG_PATH"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+    env:
+      SERVER_CMD : ${SERVER_CMD}
+      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
+      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
+      INPUT_IMG_URL : ${INPUT_IMG_URL}
+      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
+
+      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
+      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
+      SQZNET_URL: ${SQZNET_URL}
+      SQZNET_NAME: ${SQZNET_NAME}
+      RESNET_URL: ${RESNET_URL}
+      RESNET_NAME: ${RESNET_NAME}
+
   - module: monitoring
     server-agent:
       - address: localhost:9009 # metric monitoring service address
-        label: mms-inference-server  # if you specify label, it will be used in reports instead of ip:port
+        label: model-server  # if you specify label, it will be used in reports instead of ip:port
         interval: 1s    # polling interval
         logging: True # those logs will be saved to "SAlogs_192.168.0.1_9009.csv" in the artifacts dir
         metrics: # metrics should be supported by monitoring service
-          - sum_workers_cpu_percent # cpu percent used by all the mms server processes and workers
+          - sum_workers_cpu_percent # cpu percent used by all the Model Server server processes and workers
           - sum_workers_memory_percent
           - sum_workers_file_descriptors
-          - total_workers # no of mms workers
+          - total_workers # no of Model Server workers
 
 
-reporting:
+~reporting:
+- module: passfail # this is to enable passfail module
+- module: junit-xml
+  data-source: pass-fail
+- module: junit-xml
+  data-source: sample-labels
+- module: final-stats
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
 - module: passfail
   criteria:
   - fail >${FAIL}, stop as failed
   - p90  >${P90}  , stop as failed
   - avg-rt >${AVG_RT} , stop as failed
   - class: bzt.modules.monitoring.MonitoringCriteria
-    subject: mms-inference-server/sum_workers_file_descriptors
+    subject: model-server/sum_workers_file_descriptors
     condition: '>'
     threshold: ${TOTAL_WORKERS_FDS}
     timeframe: 1s
     fail: true
     stop: true
-    diff_percent : 35
\ No newline at end of file
+    diff_percent_previous : ${TOTAL_WORKERS_FDS_DIFF}
+
diff --git a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.jmx b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.jmx
index 0d60d304c..618b88095 100644
--- a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.jmx
+++ b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Register and Inference Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Register and Inference Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="cnn_url" elementType="Argument">
             <stringProp name="Argument.name">cnn_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">The url from where to fetch noop model from</stringProp>
           </elementProp>
@@ -34,7 +34,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
index 235c3b803..e3b71cf4e 100644
--- a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
+++ b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
@@ -1,34 +1,56 @@
 
-execution:
+~execution:
 - concurrency: 4
   ramp-up: 1s
   hold-for: 20s
   scenario: Inference
 
-scenarios:
+~scenarios:
   Inference:
     script: examples_remote_monitoring.jmx
 
 
 
-services:
+~services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
+      - "curl -s -O $INPUT_IMG_URL"
+      - "mkdir /tmp/ts_model_store"
+      - "ps aux | grep '$SERVER_PROCESS_NAME' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "$SERVER_CMD --start --ncs --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
     post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
+      - "$SERVER_CMD --stop > /dev/null 2>&1"
+      - "rm $INPUT_IMG_PATH"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+    env:
+      SERVER_CMD : ${SERVER_CMD}
+      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
+      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
+      INPUT_IMG_URL : ${INPUT_IMG_URL}
+      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
+
+      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
+      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
+      SQZNET_URL: ${SQZNET_URL}
+      SQZNET_NAME: ${SQZNET_NAME}
+      RESNET_URL: ${RESNET_URL}
+      RESNET_NAME: ${RESNET_NAME}
   - module: monitoring
     server-agent:
       - address: localhost:9009 # metric monitoring service address
-        label: mms-inference-server  # if you specify label, it will be used in reports instead of ip:port
+        label: model-server  # if you specify label, it will be used in reports instead of ip:port
         interval: 1s    # polling interval
         logging: True # those logs will be saved to "SAlogs_192.168.0.1_9009.csv" in the artifacts dir
         metrics: # metrics should be supported by monitoring service
-          - sum_all_cpu_percent # cpu percent used by all the mms server processes and workers
+          - sum_all_cpu_percent # cpu percent used by all the Model server processes and workers
           - sum_workers_memory_percent
           - frontend_file_descriptors
-          - total_workers # no of mms workers
+          - total_workers # no of Model Server workers
+
+~reporting:
+  - module: passfail
 
+~compare_criteria:
diff --git a/tests/performance/tests/examples_starter/examples_starter.jmx b/tests/performance/tests/examples_starter/examples_starter.jmx
index 0d60d304c..618b88095 100644
--- a/tests/performance/tests/examples_starter/examples_starter.jmx
+++ b/tests/performance/tests/examples_starter/examples_starter.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Register and Inference Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Register and Inference Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="cnn_url" elementType="Argument">
             <stringProp name="Argument.name">cnn_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">The url from where to fetch noop model from</stringProp>
           </elementProp>
@@ -34,7 +34,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
         </collectionProp>
diff --git a/tests/performance/tests/examples_starter/examples_starter.yaml b/tests/performance/tests/examples_starter/examples_starter.yaml
index ac6aaa50b..ee191afaf 100644
--- a/tests/performance/tests/examples_starter/examples_starter.yaml
+++ b/tests/performance/tests/examples_starter/examples_starter.yaml
@@ -1,20 +1,45 @@
 ---
-execution:
-- concurrency: 1
-  ramp-up: 1s
-  hold-for: 40s
-  scenario: Inference
-scenarios:
+~execution:
+  - concurrency: 1
+    ramp-up: 1s
+    hold-for: 40s
+    scenario: Inference
+
+~scenarios:
   Inference:
     script: examples_starter.jmx
 
-services:
+~services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
+      - "curl -s -O $INPUT_IMG_URL"
+      - "mkdir /tmp/ts_model_store"
+      - "ps aux | grep '$SERVER_PROCESS_NAME' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "$SERVER_CMD --start --ncs --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
     post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
+      - "$SERVER_CMD --stop > /dev/null 2>&1"
+      - "rm $INPUT_IMG_PATH"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+    env:
+      SERVER_CMD : ${SERVER_CMD}
+      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
+      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
+      INPUT_IMG_URL : ${INPUT_IMG_URL}
+      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
+
+      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
+      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
+      SQZNET_URL: ${SQZNET_URL}
+      SQZNET_NAME: ${SQZNET_NAME}
+      RESNET_URL: ${RESNET_URL}
+      RESNET_NAME: ${RESNET_NAME}
+
+
+~reporting:
+  - module: passfail
 
+~compare_criteria:
+  -
diff --git a/tests/performance/tests/global_config.yaml b/tests/performance/tests/global_config.yaml
index 94731f4a3..7fd5acfc7 100644
--- a/tests/performance/tests/global_config.yaml
+++ b/tests/performance/tests/global_config.yaml
@@ -1,17 +1,74 @@
+---
+execution:
+- concurrency: ${CONCURRENCY}
+  ramp-up: ${RAMP-UP}
+  hold-for: ${HOLD-FOR}
+  scenario: scenario_0
+
+scenarios:
+  scenario_0:
+    script: ${SCRIPT}
+
 modules:
   jmeter:
     # These are JMeter test case properties. These variables are used in jmx files.
     # Change the vaues as per your setup
     properties:
-      hostname : 127.0.0.1 # MMS properties
-      port : 8080
-      management_port : 8081
-      protocol : http
-      input_filepath : kitten.jpg # make sure jpg is available at this path
+      hostname: 127.0.0.1 # Model Server properties
+      port: 8080
+      management_port: 8081
+      protocol: http
+      input_filepath: kitten.jpg # make sure jpg is available at this path
       # if relative path is provided this will be relative to current working directory
 
-# DO-NOT change properties below unless you know what you are doing.
-# They are needed for performance test suite runner script.
+  server_local_monitoring:
+    # metrics_monitoring_inproc and dependencies should be in python path
+    class : metrics_monitoring_inproc.Monitor # monitoring class.
+
+services:
+  - module: shellexec
+    prepare:
+      - "curl -s -O ${INPUT_IMG_URL}"
+      - "mkdir /tmp/ts_model_store"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_CMD} --start  --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "sleep 20s"
+    post-process:
+      - "${SERVER_CMD} --stop > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
+      - "rm -r /tmp/ts_model_store"
+      - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
+
+    env:
+      SERVER_CMD : ${SERVER_CMD}
+      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
+      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
+      INPUT_IMG_URL : ${INPUT_IMG_URL}
+      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
+
+      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
+      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
+      SQZNET_URL: ${SQZNET_URL}
+      SQZNET_NAME: ${SQZNET_NAME}
+      RESNET_URL: ${RESNET_URL}
+      RESNET_NAME: ${RESNET_NAME}
+
+
+  - module: server_local_monitoring # should be added in modules section
+    ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
+    - interval: 1s
+      logging : True
+      metrics:
+        - sum_workers_memory_rss
+        - sum_workers_file_descriptors
+        - total_workers
+        - orphans
+        - zombies
+        - frontend_memory_rss
+        - sum_all_memory_rss
+        - total_processes
+        - sum_all_file_descriptors
+
 reporting:
 - module: passfail # this is to enable passfail module
 - module: junit-xml
@@ -19,8 +76,129 @@ reporting:
 - module: junit-xml
   data-source: sample-labels
 - module: final-stats
-  dump-csv : ${BASEDIR}/final_stats.csv
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
+
+- module: passfail
+  criteria:
+    # API requests KPI crieteria
+    - success of ${API_LABEL}<${API_SUCCESS} for 10s, stop as failed
+    - avg-rt of ${API_LABEL}>${API_AVG_RT}, ${STOP_ALIAS} as failed
+#
+#    # Monitoring metrics criteria
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/total_workers
+#      condition: '>'
+#      threshold: ${TOTAL_WORKERS}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#      diff_percent_previous: ${TOTAL_WORKERS_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_workers_memory_rss
+#      condition: '>'
+#      threshold: ${TOTAL_WORKERS_MEM}
+#      timeframe: 5s
+#      stop : ${STOP}
+#      fail : true
+#      diff_percent_previous : ${TOTAL_WORKERS_MEM_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_workers_file_descriptors
+#      condition: '>'
+#      threshold: ${TOTAL_WORKERS_FDS}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#      diff_percent_previous: ${TOTAL_WORKERS_FDS_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_all_memory_rss
+#      condition: '>'
+#      threshold: ${TOTAL_MEM}
+#      timeframe: 5s
+#      stop : ${STOP}
+#      fail : true
+#      diff_percent_previous: ${TOTAL_MEM_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/total_processes
+#      condition: '>'
+#      threshold: ${TOTAL_PROCS}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_all_file_descriptors
+#      condition: '>'
+#      threshold: ${TOTAL_FDS}
+#      timeframe: 1s
+#      stop: ${STOP}
+#      fail: true
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/frontend_memory_rss
+#      condition: '>'
+#      threshold: ${FRNTEND_MEM}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#      diff_percent_previous: ${FRNTEND_MEM_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/orphans
+#      condition: '>'
+#      threshold: ${TOTAL_ORPHANS}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#      diff_percent_previous: ${TOTAL_ORPHANS_DIFF}
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/zombies
+#      condition: '>'
+#      threshold: ${TOTAL_ZOMBIES}
+#      timeframe: 5s
+#      stop: ${STOP}
+#      fail: true
+#      diff_percent_previous: ${TOTAL_ZOMBIES_DIFF}
+
+compare_criteria:
+  # Monitoring metrics criteria
+  - subject: ServerLocalClient/total_workers
+    diff_percent_previous: ${TOTAL_WORKERS_PREV_DIFF}
+    diff_percent_run: ${TOTAL_WORKERS_RUN_DIFF}
+  - subject: ServerLocalClient/sum_workers_memory_rss
+    diff_percent_previous: ${TOTAL_WORKERS_MEM_PREV_DIFF}
+    diff_percent_run : ${TOTAL_WORKERS_MEM_RUN_DIFF}
+  - subject: ServerLocalClient/sum_workers_file_descriptors
+    diff_percent_previous: ${TOTAL_WORKERS_FDS_PREV_DIFF}
+    diff_percent_run: ${TOTAL_WORKERS_FDS_RUN_DIFF}
+#  - subject: ServerLocalClient/sum_all_memory_rss
+#    diff_percent_previous: ${TOTAL_MEM_PREV_DIFF}
+#    diff_percent_run: ${TOTAL_MEM_RUN_DIFF}
+  - subject: ServerLocalClient/total_processes
+    diff_percent_previous: ${TOTAL_PROCS_PREV_DIFF}
+    diff_percent_run: ${TOTAL_PROCS_RUN_DIFF}
+  - subject: ServerLocalClient/sum_all_file_descriptors
+    diff_percent_previous : ${TOTAL_FDS_PREV_DIFF}
+    diff_percent_run : ${TOTAL_FDS_RUN_DIFF}
+#  - subject: ServerLocalClient/frontend_memory_rss
+#    diff_percent_previous: ${FRNTEND_MEM_PREV_DIFF}
+#    diff_percent_run: ${FRNTEND_MEM_RUN_DIFF}
+  - subject: ServerLocalClient/orphans
+    diff_percent_previous: ${TOTAL_ORPHANS_PREV_DIFF}
+    diff_percent_run: ${TOTAL_ORPHANS_RUN_DIFF}
+  - subject: ServerLocalClient/zombies
+    diff_percent_previous: ${TOTAL_ZOMBIES_PREV_DIFF}
+    diff_percent_run: ${TOTAL_ZOMBIES_RUN_DIFF}
+
 
 settings:
   env:
-    BASEDIR : '.'
+    ARTIFACTS_DIR : '.'
+    SERVER_CMD : "multi-model-server"
+    SERVER_PROCESS_NAME : "[c]om.amazonaws.ml.mms.ModelServer"
+    INPUT_IMG_URL: "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
+    INPUT_IMG_PATH: "kitten.jpg"
+
+    RESNET_152_BATCH_URL : "https://s3.amazonaws.com/model-server/model_archive_1.0/examples/resnet-152-batching/resnet-152.mar"
+    RESNET_152_BATCH_NAME : "resnet-152-batch"
+    SQZNET_URL : "https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
+    SQZNET_NAME : "squeezenet_v1.1"
+    RESNET_URL : "https://s3.amazonaws.com/model-server/model_archive_1.0/resnet-18.mar"
+    RESNET_NAME : "resnet-18"
+
diff --git a/tests/performance/tests/health_check/environments/xlarge.yaml b/tests/performance/tests/health_check/environments/xlarge.yaml
index 689a5d66b..2a8e2332d 100644
--- a/tests/performance/tests/health_check/environments/xlarge.yaml
+++ b/tests/performance/tests/health_check/environments/xlarge.yaml
@@ -1,8 +1,49 @@
+
 ---
 settings:
   env:
-    HLTH_CHK_SUCC : 100%
-    HLTH_CHK_RT : 14ms
+    API_LABEL : HealthCheck
+    API_SUCCESS : 80%
+    API_AVG_RT : 30ms
+
+    TOTAL_WORKERS: 0
+    TOTAL_WORKERS_MEM: 0
+    TOTAL_WORKERS_FDS: 0
+
+    TOTAL_MEM : 1500098304
     TOTAL_PROCS : 1
-    TOTAL_FDS : 67
-    TOTAL_MEM : 750000000 #750MB
\ No newline at end of file
+    TOTAL_FDS : 73
+
+    FRNTEND_MEM: 1500098304
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 0
+    TOTAL_WORKERS_FDS_RUN_DIFF: 0
+    TOTAL_MEM_RUN_DIFF: 200
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 200
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : health_check.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/health_check/health_check.jmx b/tests/performance/tests/health_check/health_check.jmx
index 422c45cf9..dc699a6be 100644
--- a/tests/performance/tests/health_check/health_check.jmx
+++ b/tests/performance/tests/health_check/health_check.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Health Check Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Health Check Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
diff --git a/tests/performance/tests/health_check/health_check.yaml b/tests/performance/tests/health_check/health_check.yaml
index 2c8785e3c..3f2d38636 100644
--- a/tests/performance/tests/health_check/health_check.yaml
+++ b/tests/performance/tests/health_check/health_check.yaml
@@ -1,66 +1,18 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: health_check
-
-scenarios:
-  health_check:
-    script: health_check.jmx
-
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
-services:
-  - module: shellexec
-    prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
-
 reporting:
 - module: passfail
   criteria:
-    # Inbuilt Criteria
-    - success of HealthCheck<${HLTH_CHK_SUCC}, stop as failed
-    - avg-rt of HealthCheck>${HLTH_CHK_RT}, stop as failed
     # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
       timeframe: 5s
-      stop : true
+      stop : ${STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
+
+scenarios:
+  ~scenario_0:
+    script: health_check.jmx
+
+
diff --git a/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml b/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml
index 36b7dc0ad..c854de1cd 100644
--- a/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml
+++ b/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml
@@ -1,10 +1,51 @@
+
 ---
 settings:
   env:
-    INFR1_SUCC : 100%
+    API_LABEL : Inference1
+    API_SUCCESS : 80%
+    API_AVG_RT : 30ms
+
     INFR2_SUCC: 100%
-    INFR1_RT : 290ms
     INFR2_RT: 450ms
-    TOTAL_PROCS : 5
-    TOTAL_FDS : 107
-    TOTAL_MEM : 600000000 #600MB
\ No newline at end of file
+
+    TOTAL_WORKERS: 2
+    TOTAL_WORKERS_MEM: 600000000
+    TOTAL_WORKERS_FDS: 150
+
+    TOTAL_MEM : 1400000000
+    TOTAL_PROCS : 3
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 800000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    TOTAL_WORKERS_PREV_DIFF: 30
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 30
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 30
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 40
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : inference_multiple_models.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/inference_multiple_models/inference_multiple_models.jmx b/tests/performance/tests/inference_multiple_models/inference_multiple_models.jmx
index 1ceeaf2c2..67a5d689a 100644
--- a/tests/performance/tests/inference_multiple_models/inference_multiple_models.jmx
+++ b/tests/performance/tests/inference_multiple_models/inference_multiple_models.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Inference with Single Worker Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Inference with Single Worker Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,13 +16,13 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model1" elementType="Argument">
             <stringProp name="Argument.name">model1</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model1 Name</stringProp>
           </elementProp>
           <elementProp name="model2" elementType="Argument">
             <stringProp name="Argument.name">model2</stringProp>
-            <stringProp name="Argument.value">resnet-18</stringProp>
+            <stringProp name="Argument.value">${__P(RESNET_NAME,resnet-18)}</stringProp>
             <stringProp name="Argument.desc">Model2 Name</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml b/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml
index 3244c4d8f..047d4d168 100644
--- a/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml
+++ b/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml
@@ -1,74 +1,27 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: inference_multiple_models
-
 scenarios:
-  inference_multiple_models:
+  scenario_0:
     script: inference_multiple_models.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
 
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/resnet-18.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=1&synchronous=true"
-      - "curl -s -X PUT  http://localhost:8081/models/resnet-18?min_worker=1&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
+      - "curl -s -X PUT  http://localhost:8081/models/${RESNET_NAME}?min_worker=1&synchronous=true"
 
 reporting:
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of Inference1<${INFR1_SUCC}, stop as failed
-    - success of Inference2<${INFR2_SUCC}, stop as failed
-    - avg-rt of Inference1>${INFR1_RT}, stop as failed
-    - avg-rt of Inference2>${INFR2_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
+    - success of Inference2<${INFR2_SUCC} for 10s, ${STOP_ALIAS} as failed
+    - avg-rt of Inference2>${INFR2_RT}, ${STOP_ALIAS} as failed
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
       timeframe: 5s
-      stop : true
+      stop : ${STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
diff --git a/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml b/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml
index 5b9fd6c0a..7f7887242 100644
--- a/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml
+++ b/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml
@@ -1,8 +1,49 @@
+
 ---
 settings:
   env:
-    INFR_SUCC : 100%
-    INFR_RT : 140ms
-    TOTAL_PROCS : 6
-    TOTAL_FDS : 126
-    TOTAL_MEM : 750000000 #750MB
\ No newline at end of file
+    API_LABEL : Inference
+    API_SUCCESS : 80%
+    API_AVG_RT : 140ms
+
+    TOTAL_WORKERS: 4
+    TOTAL_WORKERS_MEM: 600000000
+    TOTAL_WORKERS_FDS: 40
+
+    TOTAL_MEM : 1400000000
+    TOTAL_PROCS : 5
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 800000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 35
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 60
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : inference_multiple_worker.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.jmx b/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.jmx
index 1251a56b8..5d0816ae2 100644
--- a/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.jmx
+++ b/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Inference with Multiple Workers Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Inference with Multiple Workers Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model Name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.yaml b/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.yaml
index 5d73624a6..5f98bbd2c 100644
--- a/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.yaml
+++ b/tests/performance/tests/inference_multiple_worker/inference_multiple_worker.yaml
@@ -1,71 +1,22 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 1m
-  iterations: 100
-  scenario: inference_multiple_worker
-
 scenarios:
   inference_multiple_worker:
     script: inference_multiple_worker.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=4&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=4&synchronous=true"
 
 reporting:
 - module: passfail
   criteria:
-    # Inbuilt Criteria
-    - success of Inference<${INFR_SUCC}, stop as failed
-    - avg-rt of Inference>${INFR_RT}, stop as failed
     # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 1s
-      stop : true
+      timeframe: 5s
+      stop : ${STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
diff --git a/tests/performance/tests/inference_single_worker/environments/xlarge.yaml b/tests/performance/tests/inference_single_worker/environments/xlarge.yaml
index c945e1f91..f160a1bcf 100644
--- a/tests/performance/tests/inference_single_worker/environments/xlarge.yaml
+++ b/tests/performance/tests/inference_single_worker/environments/xlarge.yaml
@@ -1,8 +1,49 @@
+
 ---
 settings:
   env:
-    INFR_SUCC : 100%
-    INFR_RT : 290ms
-    TOTAL_PROCS : 3
-    TOTAL_FDS : 90
-    TOTAL_MEM : 290000000 #290MB
\ No newline at end of file
+    API_LABEL : Inference
+    API_SUCCESS : 80%
+    API_AVG_RT : 140ms
+
+    TOTAL_WORKERS: 1
+    TOTAL_WORKERS_MEM: 300000000
+    TOTAL_WORKERS_FDS: 150
+
+    TOTAL_MEM : 1000000000
+    TOTAL_PROCS : 2
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 600000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 60
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 90
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : inference_single_worker.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/inference_single_worker/inference_single_worker.jmx b/tests/performance/tests/inference_single_worker/inference_single_worker.jmx
index ea05cc1ef..5124dbc6e 100644
--- a/tests/performance/tests/inference_single_worker/inference_single_worker.jmx
+++ b/tests/performance/tests/inference_single_worker/inference_single_worker.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Inference with Single Worker Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Inference with Single Worker Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model Name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/inference_single_worker/inference_single_worker.yaml b/tests/performance/tests/inference_single_worker/inference_single_worker.yaml
index ece9e5b74..1bf6cce30 100644
--- a/tests/performance/tests/inference_single_worker/inference_single_worker.yaml
+++ b/tests/performance/tests/inference_single_worker/inference_single_worker.yaml
@@ -1,71 +1,21 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 1m
-  iterations: 100
-  scenario: inference_single_worker
-
 scenarios:
   inference_single_worker:
     script: inference_single_worker.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=1&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
 
 reporting:
 - module: passfail
   criteria:
-    # Inbuilt Criteria
-    - success of Inference<${INFR_SUCC}, stop as failed
-    - avg-rt of Inference>${INFR_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
       timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 1s
-      stop : true
+      stop : ${STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
diff --git a/tests/performance/tests/list_models/environments/xlarge.yaml b/tests/performance/tests/list_models/environments/xlarge.yaml
index 611624824..bed934bf9 100644
--- a/tests/performance/tests/list_models/environments/xlarge.yaml
+++ b/tests/performance/tests/list_models/environments/xlarge.yaml
@@ -1,8 +1,48 @@
 ---
 settings:
   env:
-    LST_MODLS_SUCC : 100%
-    LST_MODLS_RT : 14ms
+    API_LABEL : ListModels
+    API_SUCCESS : 80%
+    API_AVG_RT : 14ms
+
+    TOTAL_WORKERS: 2
+    TOTAL_WORKERS_MEM: 600000000
+    TOTAL_WORKERS_FDS: 40
+
+    TOTAL_MEM : 1400000000
     TOTAL_PROCS : 3
-    TOTAL_FDS : 86
-    TOTAL_MEM : 185000000 #185MB
\ No newline at end of file
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 800000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 30
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 30
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : list_models.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/list_models/list_models.jmx b/tests/performance/tests/list_models/list_models.jmx
index cd5490dc4..0323fcee8 100644
--- a/tests/performance/tests/list_models/list_models.jmx
+++ b/tests/performance/tests/list_models/list_models.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS List Models Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server List Models Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/list_models/list_models.yaml b/tests/performance/tests/list_models/list_models.yaml
index 81dd8ada7..60d29551c 100644
--- a/tests/performance/tests/list_models/list_models.yaml
+++ b/tests/performance/tests/list_models/list_models.yaml
@@ -1,68 +1,21 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: list_models
-
 scenarios:
-  list_models:
+  scenario_0:
     script: list_models.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/shufflenet.mar"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_URL}"
 
 reporting:
 - module: passfail
   criteria:
-    # Inbuilt Criteria
-    - success of ListModels<${LST_MODLS_SUCC}, stop as failed
-    - avg-rt of ListModels>${LST_MODLS_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 1s
-      stop : true
+      timeframe: 5s
+      stop : {STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
diff --git a/tests/performance/tests/model_description/environments/xlarge.yaml b/tests/performance/tests/model_description/environments/xlarge.yaml
index 00e0aac87..f62c5e282 100644
--- a/tests/performance/tests/model_description/environments/xlarge.yaml
+++ b/tests/performance/tests/model_description/environments/xlarge.yaml
@@ -1,8 +1,49 @@
+
 ---
 settings:
   env:
-    MODL_DESC_SUCC : 100%
-    MODL_DESC_RT : 14ms
-    TOTAL_PROCS : 3
-    TOTAL_FDS : 90
-    TOTAL_MEM : 300000000 #300MB
\ No newline at end of file
+    API_LABEL : ModelDescription
+    API_SUCCESS : 80%
+    API_AVG_RT : 14ms
+
+    TOTAL_WORKERS: 1
+    TOTAL_WORKERS_MEM: 150205952
+    TOTAL_WORKERS_FDS: 40
+
+    TOTAL_MEM : 1400000000
+    TOTAL_PROCS : 2
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 800000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 50
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 30
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 30
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : model_description.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/model_description/model_description.jmx b/tests/performance/tests/model_description/model_description.jmx
index 4d8898adb..5cb68f9ac 100644
--- a/tests/performance/tests/model_description/model_description.jmx
+++ b/tests/performance/tests/model_description/model_description.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Model Description Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Model Description Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,7 +16,7 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/model_description/model_description.yaml b/tests/performance/tests/model_description/model_description.yaml
index 05358a53c..ef429dd5e 100644
--- a/tests/performance/tests/model_description/model_description.yaml
+++ b/tests/performance/tests/model_description/model_description.yaml
@@ -1,68 +1,21 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: model_description
-
 scenarios:
-  model_description:
+  scenario_0:
     script: model_description.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=1&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
 
 reporting:
 - module: passfail
   criteria:
-    # Inbuilt Criteria
-    - success of ModelDescription<${MODL_DESC_SUCC}, stop as failed
-    - avg-rt of ModelDescription>${MODL_DESC_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 1s
-      stop : true
-      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS}
       timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 1s
-      stop : true
+      stop : ${STOP}
       fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/sum_all_memory_rss
-#      condition: '>'
-#      threshold: ${TOTAL_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
diff --git a/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml b/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml
index 1671213d1..704576848 100644
--- a/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml
+++ b/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml
@@ -1,12 +1,56 @@
+
 ---
 settings:
   env:
-    INFR1_SUCC : 100%
-    INFR2_SUCC: 100%
-    INFR1_RT : 290ms
+    API_LABEL : Inference1
+    API_SUCCESS : 80%
+    API_AVG_RT : 290ms
+
+    INFR2_SUCC: 80%
     INFR2_RT: 450ms
-    TOTAL_PROCS : 14
+    SCALEUP1_RT : 500ms
+    SCALEUP2_RT : 500ms
+    SCALEDOWN1_RT : 100ms
+    SCALEDOWN2_RT : 100ms
+
+    TOTAL_WORKERS: 9
+    TOTAL_WORKERS_MEM: 2668554752
+    TOTAL_WORKERS_FDS: 100
+
+    TOTAL_MEM : 2000000000
+    TOTAL_PROCS : 11
     TOTAL_FDS : 300
-    TOTAL_MEM : 2000000000 #~2GB
+
+    FRNTEND_MEM: 1000000000
+
     TOTAL_ORPHANS : 0
-    FRNTEND_MEM : 1000000000 #~1GB
\ No newline at end of file
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 120
+    TOTAL_WORKERS_MEM_RUN_DIFF: 135
+    TOTAL_WORKERS_FDS_RUN_DIFF: 130
+    TOTAL_MEM_RUN_DIFF: 130
+    TOTAL_PROCS_RUN_DIFF: 100
+    TOTAL_FDS_RUN_DIFF: 40
+    FRNTEND_MEM_RUN_DIFF: 130
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : multiple_inference_and_scaling.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.jmx b/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.jmx
index cbff8debc..cbff660c4 100644
--- a/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.jmx
+++ b/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Inference, Scale up and Down with multiple models" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Inference, Scale up and Down with multiple models" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,13 +16,13 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model1" elementType="Argument">
             <stringProp name="Argument.name">model1</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model1 Name</stringProp>
           </elementProp>
           <elementProp name="model2" elementType="Argument">
             <stringProp name="Argument.name">model2</stringProp>
-            <stringProp name="Argument.value">resnet-18</stringProp>
+            <stringProp name="Argument.value">${__P(RESNET_NAME,resnet-18)}</stringProp>
             <stringProp name="Argument.desc">Model2 Name</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
@@ -117,7 +117,7 @@
           </UniformRandomTimer>
           <hashTree/>
         </hashTree>
-        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleUp1" enabled="true">
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleDown1" enabled="true">
           <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
             <collectionProp name="Arguments.arguments"/>
           </elementProp>
@@ -148,7 +148,35 @@
           </UniformRandomTimer>
           <hashTree/>
         </hashTree>
-        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleDown1" enabled="true">
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="Inference11" enabled="true">
+          <elementProp name="HTTPsampler.Files" elementType="HTTPFileArgs">
+            <collectionProp name="HTTPFileArgs.files">
+              <elementProp name="${__P(input_filepath)}" elementType="HTTPFileArg">
+                <stringProp name="File.path">${__P(input_filepath)}</stringProp>
+                <stringProp name="File.paramname">data</stringProp>
+                <stringProp name="File.mimetype">image/jpeg</stringProp>
+              </elementProp>
+            </collectionProp>
+          </elementProp>
+          <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
+            <collectionProp name="Arguments.arguments"/>
+          </elementProp>
+          <stringProp name="HTTPSampler.domain"></stringProp>
+          <stringProp name="HTTPSampler.port"></stringProp>
+          <stringProp name="HTTPSampler.protocol"></stringProp>
+          <stringProp name="HTTPSampler.contentEncoding"></stringProp>
+          <stringProp name="HTTPSampler.path">/predictions/${model1}</stringProp>
+          <stringProp name="HTTPSampler.method">POST</stringProp>
+          <boolProp name="HTTPSampler.follow_redirects">true</boolProp>
+          <boolProp name="HTTPSampler.auto_redirects">false</boolProp>
+          <boolProp name="HTTPSampler.use_keepalive">true</boolProp>
+          <boolProp name="HTTPSampler.DO_MULTIPART_POST">true</boolProp>
+          <stringProp name="HTTPSampler.embedded_url_re"></stringProp>
+          <stringProp name="HTTPSampler.connect_timeout"></stringProp>
+          <stringProp name="HTTPSampler.response_timeout"></stringProp>
+        </HTTPSamplerProxy>
+        <hashTree/>
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleUp1" enabled="true">
           <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
             <collectionProp name="Arguments.arguments"/>
           </elementProp>
@@ -234,7 +262,7 @@
           </UniformRandomTimer>
           <hashTree/>
         </hashTree>
-        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleUp2" enabled="true">
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleDown2" enabled="true">
           <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
             <collectionProp name="Arguments.arguments"/>
           </elementProp>
@@ -265,7 +293,35 @@
           </UniformRandomTimer>
           <hashTree/>
         </hashTree>
-        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleDown2" enabled="true">
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="Inference21" enabled="true">
+          <elementProp name="HTTPsampler.Files" elementType="HTTPFileArgs">
+            <collectionProp name="HTTPFileArgs.files">
+              <elementProp name="${__P(input_filepath)}" elementType="HTTPFileArg">
+                <stringProp name="File.path">${__P(input_filepath)}</stringProp>
+                <stringProp name="File.paramname">data</stringProp>
+                <stringProp name="File.mimetype">image/jpeg</stringProp>
+              </elementProp>
+            </collectionProp>
+          </elementProp>
+          <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
+            <collectionProp name="Arguments.arguments"/>
+          </elementProp>
+          <stringProp name="HTTPSampler.domain"></stringProp>
+          <stringProp name="HTTPSampler.port"></stringProp>
+          <stringProp name="HTTPSampler.protocol"></stringProp>
+          <stringProp name="HTTPSampler.contentEncoding"></stringProp>
+          <stringProp name="HTTPSampler.path">/predictions/${model2}</stringProp>
+          <stringProp name="HTTPSampler.method">POST</stringProp>
+          <boolProp name="HTTPSampler.follow_redirects">true</boolProp>
+          <boolProp name="HTTPSampler.auto_redirects">false</boolProp>
+          <boolProp name="HTTPSampler.use_keepalive">true</boolProp>
+          <boolProp name="HTTPSampler.DO_MULTIPART_POST">true</boolProp>
+          <stringProp name="HTTPSampler.embedded_url_re"></stringProp>
+          <stringProp name="HTTPSampler.connect_timeout"></stringProp>
+          <stringProp name="HTTPSampler.response_timeout"></stringProp>
+        </HTTPSamplerProxy>
+        <hashTree/>
+        <HTTPSamplerProxy guiclass="HttpTestSampleGui" testclass="HTTPSamplerProxy" testname="ScaleUp2" enabled="true">
           <elementProp name="HTTPsampler.Arguments" elementType="Arguments" guiclass="HTTPArgumentsPanel" testclass="Arguments" testname="User Defined Variables" enabled="true">
             <collectionProp name="Arguments.arguments"/>
           </elementProp>
diff --git a/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml b/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml
index 8f3324f2a..ce5c9725b 100644
--- a/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml
+++ b/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml
@@ -1,83 +1,33 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 300s
-  scenario: inference_multiple_models
-
 scenarios:
-  inference_multiple_models:
+  scenario_0:
     script: multiple_inference_and_scaling.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/resnet-18.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=1&synchronous=true"
-      - "curl -s -X PUT  http://localhost:8081/models/resnet-18?min_worker=1&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - sum_all_memory_rss
-          - frontend_memory_rss
-          - orphans
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
+      - "curl -s -X PUT  http://localhost:8081/models/${RESNET_NAME}?min_worker=1&synchronous=true"
+
 
 reporting:
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of Inference1<${INFR1_SUCC}, stop as failed
-    - success of Inference2<${INFR2_SUCC}, stop as failed
-    - avg-rt of Inference1>${INFR1_RT}, stop as failed
-    - avg-rt of Inference2>${INFR2_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 10s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_memory_rss
-      condition: '>'
-      threshold: ${TOTAL_MEM}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/orphans
-      condition: '>'
-      threshold: ${TOTAL_ORPHANS}
-      timeframe: 1s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/frontend_memory_rss
-      condition: '>'
-      threshold: ${FRNTEND_MEM}
-      timeframe: 5s
-      stop : true
-      fail : true
+    - success of Inference2<${INFR2_SUCC} for 10s, stop as failed
+    - avg-rt of Inference2>${INFR2_RT}, ${STOP_ALIAS} as failed
+    - success of Inference11<${INFR1_SUCC} for 10s, stop as failed
+    - success of Inference21<${INFR2_SUCC} for 10s, stop as failed
+    - avg-rt of Inference11>${INFR1_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of Inference21>${INFR2_RT}, ${STOP_ALIAS} as failed
+    - success of ScaleUp1<${INFR2_SUCC} for 10s, stop as failed
+    - avg-rt of ScaleUp1>${SCALEUP1_RT}, ${STOP_ALIAS} as failed
+    - success of ScaleUp2<${INFR2_SUCC} for 10s, stop as failed
+    - avg-rt of ScaleUp2>${SCALEUP2_RT}, ${STOP_ALIAS} as failed
+    - success of ScaleDown1<${INFR2_SUCC} for 10s, stop as failed
+    - avg-rt of ScaleDown1>${SCALEDOWN1_RT}, ${STOP_ALIAS} as failed
+    - success of ScaleDown2<${INFR2_SUCC} for 10s, stop as failed
+    - avg-rt of ScaleDown2>${SCALEDOWN2_RT}, ${STOP_ALIAS} as failed
+
diff --git a/tests/performance/tests/register_unregister/environments/xlarge.yaml b/tests/performance/tests/register_unregister/environments/xlarge.yaml
index 0e099afed..1407c6336 100644
--- a/tests/performance/tests/register_unregister/environments/xlarge.yaml
+++ b/tests/performance/tests/register_unregister/environments/xlarge.yaml
@@ -1,11 +1,51 @@
 ---
 settings:
   env:
-    REG_SUCC : 100%
-    UNREG_SUCC: 100%
-    REG_RT : 15s
-    UNREG_RT: 10ms
-    TOTAL_PROCS : 1
-    TOTAL_FDS : 66
+    API_LABEL : RegisterModel
+    API_SUCCESS : 80%
+    API_AVG_RT : 290ms
+
+    UNREG_SUCC: 80%
+    UNREG_RT: 290ms
+
+    TOTAL_WORKERS: 1
+    TOTAL_WORKERS_MEM: 14054528
+    TOTAL_WORKERS_FDS: 50
+
+    TOTAL_MEM : 1400000000
+    TOTAL_PROCS : 2
+    TOTAL_FDS : 100
+
+    FRNTEND_MEM: 1200000000
+
     TOTAL_ORPHANS : 0
-    FRNTEND_MEM : 75000000 #75MB
\ No newline at end of file
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 220
+    TOTAL_WORKERS_FDS_RUN_DIFF: 220
+    TOTAL_MEM_RUN_DIFF: 150
+    TOTAL_PROCS_RUN_DIFF: 70
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 140
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 1
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : register_unregister.jmx
+
+    STOP : false    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/register_unregister/register_unregister.jmx b/tests/performance/tests/register_unregister/register_unregister.jmx
index 504c3ae8c..aa420f88e 100644
--- a/tests/performance/tests/register_unregister/register_unregister.jmx
+++ b/tests/performance/tests/register_unregister/register_unregister.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Model Register-Unregister Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Model Register-Unregister Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,13 +16,13 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
           <elementProp name="model_url" elementType="Argument">
             <stringProp name="Argument.name">model_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.desc">URL to model store on s3</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/register_unregister/register_unregister.yaml b/tests/performance/tests/register_unregister/register_unregister.yaml
index 1feb22487..6892e3531 100644
--- a/tests/performance/tests/register_unregister/register_unregister.yaml
+++ b/tests/performance/tests/register_unregister/register_unregister.yaml
@@ -1,72 +1,11 @@
 ---
-execution:
-- concurrency: 1
-  ramp-up: 0s
-#  hold-for: 5h
-  iterations: 5
-  scenario: register_unregister
-
 scenarios:
-  register_unregister:
+  scenario_0:
     script: register_unregister.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
-services:
-  - module: shellexec
-    prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 10s"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - frontend_memory_rss
-          - orphans
-
 reporting:
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of RegisterModel<${REG_SUCC}, stop as failed
-    - success of UnregisterModel<${UNREG_SUCC}, stop as failed
-    - avg-rt of RegisterModel>${REG_RT}, stop as failed
-    - avg-rt of UnregisterModel>${UNREG_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/orphans
-      condition: '>'
-      threshold: ${TOTAL_ORPHANS}
-      timeframe: 1s
-      stop : true
-      fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/frontend_memory_rss
-#      condition: '>'
-#      threshold: ${FRNTEND_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
+    - success of UnregisterModel<${UNREG_SUCC} for 10s, ${STOP_ALIAS} as failed
+    - avg-rt of UnregisterModel>${UNREG_RT}, ${STOP_ALIAS} as failed
diff --git a/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml b/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml
index 24c07f5cf..4affbc8ef 100644
--- a/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml
+++ b/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml
@@ -1,12 +1,55 @@
 ---
 settings:
   env:
-    REG_SUCC : 100%
-    SCL_UP_SUCC: 100%
-    UNREG_SUCC: 100%
-    REG_RT : 15s
+    API_LABEL : RegisterModel
+    API_SUCCESS : 80%
+    API_AVG_RT : 15s
+
+    SCL_UP_SUCC: 80%
+    UNREG_SUCC: 80%
     SCL_UP_RT: 1.5s
     UNREG_RT: 18ms
-    TOTAL_PROCS : 2
-    TOTAL_FDS : 73
-    FRNTEND_MEM : 120000000 #120MB
\ No newline at end of file
+
+    TOTAL_WORKERS: 4
+    TOTAL_WORKERS_MEM: 100000000
+    TOTAL_WORKERS_FDS: 200
+
+
+    TOTAL_MEM : 2000000000
+    TOTAL_PROCS : 5
+    TOTAL_FDS : 200
+
+    FRNTEND_MEM: 100000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 200
+    TOTAL_WORKERS_MEM_RUN_DIFF: 200
+    TOTAL_WORKERS_FDS_RUN_DIFF: 200
+    TOTAL_MEM_RUN_DIFF: 200
+    TOTAL_PROCS_RUN_DIFF: 150
+    TOTAL_FDS_RUN_DIFF: 200
+    FRNTEND_MEM_RUN_DIFF: 200
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+
+    CONCURRENCY : 1
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+    SCRIPT : register_unregister_multiple.jmx
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.jmx b/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.jmx
index 1dac0d5fe..876e51513 100644
--- a/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.jmx
+++ b/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Model Register-Unregister Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Model Register-Unregister Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -16,13 +16,13 @@
         <collectionProp name="Arguments.arguments">
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
           <elementProp name="model_url" elementType="Argument">
             <stringProp name="Argument.name">model_url</stringProp>
-            <stringProp name="Argument.value">https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_URL,https://torchserve.s3.amazonaws.com/mar_files/squeezenet1_1.mar)}</stringProp>
             <stringProp name="Argument.desc">URL to model store on s3</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml b/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml
index 5c8fcb85e..def6b57e3 100644
--- a/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml
+++ b/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml
@@ -1,66 +1,28 @@
 ---
-execution:
-- concurrency: 1
-  ramp-up: 0s
-  iterations: 5
-  scenario: register_unregister_multiple
-
 scenarios:
-  register_unregister_multiple:
+  scenario_0:
     script: register_unregister_multiple.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
-
 services:
   - module: shellexec
     prepare:
-      - "curl -s -O https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/resnet-18.mar"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-      - "rm kitten.jpg"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - sum_all_file_descriptors
-          - frontend_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${RESNET_URL}"
+
 
-reporting:
+~reporting:
+- module: passfail # this is to enable passfail module
+- module: junit-xml
+  data-source: pass-fail
+- module: junit-xml
+  data-source: sample-labels
+- module: final-stats
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of RegisterModel<${REG_SUCC}, stop as failed
-    - success of ScaleUp<${SCL_UP_SUCC}, stop as failed
-    - success of UnregisterModel<${UNREG_SUCC}, stop as failed
-    - avg-rt of RegisterModel>${REG_RT}, stop as failed
-    - avg-rt of ScaleUp>${SCL_UP_RT}, stop as failed
-    - avg-rt of UnregisterModel>${UNREG_RT}, stop as failed
-    # Custom Criteria
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/total_processes
-      condition: '>'
-      threshold: ${TOTAL_PROCS}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_all_file_descriptors
-      condition: '>'
-      threshold: ${TOTAL_FDS}
-      timeframe: 5s
-      stop : true
-      fail : true
-#    - class: bzt.modules.monitoring.MonitoringCriteria
-#      subject: ServerLocalClient/frontend_memory_rss
-#      condition: '>'
-#      threshold: ${FRNTEND_MEM}
-#      timeframe: 5s
-#      stop : true
-#      fail : true
\ No newline at end of file
+    - success of ${API_LABEL}<${API_SUCCESS} for 10s, ${STOP_ALIAS} as failed
+    - avg-rt of ${API_LABEL}>${API_AVG_RT}, ${STOP_ALIAS} as failed
+    - success of ScaleUp<${SCL_UP_SUCC} for 10s, ${STOP_ALIAS} as failed
+    - success of UnregisterModel<${UNREG_SUCC} for 10s, ${STOP_ALIAS} as failed
+    - avg-rt of ScaleUp>${SCL_UP_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of UnregisterModel>${UNREG_RT}, ${STOP_ALIAS} as failed
diff --git a/tests/performance/tests/scale_down_workers/environments/xlarge.yaml b/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
index 6d43899b3..e02d16bc0 100644
--- a/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
@@ -1,15 +1,25 @@
 ---
 settings:
   env:
-    SCL_DWN_SUCC : 100%
+    SCL_DWN_SUCC : 80%
     SCL_DWN_RT : 10ms
-    TOTAL_PROCS_B4_SCL_DWN : 6
-    TOTAL_PROCS_AFTR_SCL_DWN : 4
+    TOTAL_PROCS_B4_SCL_DWN : 5
+    TOTAL_PROCS_AFTR_SCL_DWN : 3
     TOTAL_WRKRS_B4_SCL_DWN : 4
     TOTAL_WRKRS_AFTR_SCL_DWN  : 2
     FRNTEND_FDS : 78
     TOTAL_WRKRS_FDS_B4_SCL_DWN: 38
-    TOTAL_WRKRS_FDS_AFTR_SCL_DWN: 23
-    FRNTEND_MEM : 290000000 #290MB
-    TOTAL_WRKRS_MEM_B4_SCL_DWN : 450000000 #450MB
-    TOTAL_WRKRS_MEM_AFTR_SCL_DWN : 210000000 #210MB
\ No newline at end of file
+    FRNTEND_MEM : 1000000000
+    TOTAL_WRKRS_MEM_B4_SCL_DWN : 650000000
+    TOTAL_WRKRS_MEM_AFTR_SCL_DWN : 200000000
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+
+    CONCURRENCY: 10
+    RAMP-UP: 1s
+    HOLD-FOR: 300s
+    SCRIPT: scale_down_workers.jmx
+
+    STOP : ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/scale_down_workers/scale_down_workers.jmx b/tests/performance/tests/scale_down_workers/scale_down_workers.jmx
index 512444b07..c995a47fd 100644
--- a/tests/performance/tests/scale_down_workers/scale_down_workers.jmx
+++ b/tests/performance/tests/scale_down_workers/scale_down_workers.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Scale Down Workers Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Scale Down Workers Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -22,7 +22,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/scale_down_workers/scale_down_workers.yaml b/tests/performance/tests/scale_down_workers/scale_down_workers.yaml
index dc8cc1382..7e99ba2d6 100644
--- a/tests/performance/tests/scale_down_workers/scale_down_workers.yaml
+++ b/tests/performance/tests/scale_down_workers/scale_down_workers.yaml
@@ -1,113 +1,109 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: scaledown
 
 scenarios:
-  scaledown:
+  scenario_0:
     script: scale_down_workers.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
 
 services:
   - module: shellexec
     prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=4&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - total_workers
-          - frontend_file_descriptors
-          - sum_workers_file_descriptors
-          - frontend_memory_rss
-          - sum_workers_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=4&synchronous=true"
+      - "sleep 10s"
 
-reporting:
+~reporting:
+- module: passfail # this is to enable passfail module
+- module: junit-xml
+  data-source: pass-fail
+- module: junit-xml
+  data-source: sample-labels
+- module: final-stats
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of ScaleDown<${SCL_DWN_SUCC}, stop as failed
-    - avg-rt of ScaleDown>${SCL_DWN_RT}, stop as failed
+    - success of ScaleDown<${SCL_DWN_SUCC} for 10s, ${STOP_ALIAS} as failed
+    - avg-rt of ScaleDown>${SCL_DWN_RT}, ${STOP_ALIAS} as failed
     # Custom Criteria
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '>'
       threshold: ${TOTAL_PROCS_B4_SCL_DWN}
       timeframe: 1s
-      stop : true
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS_AFTR_SCL_DWN}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_workers
       condition: '>'
       threshold: ${TOTAL_WRKRS_B4_SCL_DWN}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_workers
       condition: '<'
       threshold: ${TOTAL_WRKRS_AFTR_SCL_DWN}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/frontend_file_descriptors
       condition: '>'
       threshold: ${FRNTEND_FDS}
-      timeframe: 5s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/sum_workers_file_descriptors
       condition: '>'
       threshold: ${TOTAL_WRKRS_FDS_B4_SCL_DWN}
-      timeframe: 5s
-      stop : true
-      fail : true
-    - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_file_descriptors
-      condition: '<'
-      threshold: ${TOTAL_WRKRS_FDS_AFTR_SCL_DWN}
-      timeframe: 5s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
 #    - class: bzt.modules.monitoring.MonitoringCriteria
 #      subject: ServerLocalClient/frontend_memory_rss
 #      condition: '>'
 #      threshold: ${FRNTEND_MEM}
-#      timeframe: 5s
-#      stop : true
+#      timeframe: 10s
+#      stop : ${STOP}
+#      fail : true
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_workers_memory_rss
+#      condition: '>'
+#      threshold: ${TOTAL_WRKRS_MEM_B4_SCL_DWN}
+#      timeframe: 10s
+#      stop : ${STOP}
+#      fail : true
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_workers_memory_rss
+#      condition: '<'
+#      threshold: ${TOTAL_WRKRS_MEM_AFTR_SCL_DWN}
+#      timeframe: 10s
+#      stop : ${STOP}
 #      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
+      subject: ServerLocalClient/orphans
       condition: '>'
-      threshold: ${TOTAL_WRKRS_MEM_B4_SCL_DWN}
-      timeframe: 5s
-      stop : true
-      fail : true
+      threshold: ${TOTAL_ORPHANS}
+      timeframe: 10s
+      stop: ${STOP}
+      fail: true
+      diff_percent_previous: ${TOTAL_ORPHANS_DIFF}
     - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
-      condition: '<'
-      threshold: ${TOTAL_WRKRS_MEM_AFTR_SCL_DWN}
-      timeframe: 5s
-      stop : true
-      fail : true
\ No newline at end of file
+      subject: ServerLocalClient/zombies
+      condition: '>'
+      threshold: ${TOTAL_ZOMBIES}
+      timeframe: 10s
+      stop: ${STOP}
+      fail: true
+
+~compare_criteria:
+  -
\ No newline at end of file
diff --git a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
index 9e3182c3e..83ef339b6 100644
--- a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
@@ -1,15 +1,25 @@
 ---
 settings:
   env:
-    SCL_UP_SUCC : 100%
+    SCL_UP_SUCC : 80%
     SCL_UP_RT : 10ms
-    TOTAL_PROCS_AFTR_SCL_UP : 6
-    TOTAL_PROCS_B4_SCL_UP : 3
+    TOTAL_PROCS_AFTR_SCL_UP : 5
+    TOTAL_PROCS_B4_SCL_UP : 2
     TOTAL_WRKRS_AFTR_SCL_UP : 4
     TOTAL_WRKRS_B4_SCL_UP  : 1
     FRNTEND_FDS : 88
     TOTAL_WRKRS_FDS_AFTR_SCL_UP : 38
-    TOTAL_WRKRS_FDS_B4_SCL_UP : 11
-    FRNTEND_MEM : 290000000 #290MB
-    TOTAL_WRKRS_MEM_AFTR_SCL_UP : 450000000 #450MB
-    TOTAL_WRKRS_MEM_B4_SCL_UP : 115000000 #115MB
\ No newline at end of file
+    FRNTEND_MEM : 1000000000
+    TOTAL_WRKRS_MEM_AFTR_SCL_UP : 796492032
+    TOTAL_WRKRS_MEM_B4_SCL_UP : 115000000 #115MB
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+
+    CONCURRENCY: 10
+    RAMP-UP: 1s
+    HOLD-FOR: 300s
+    SCRIPT: scale_up_workers.jmx
+
+    STOP : ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/scale_up_workers/scale_up_workers.jmx b/tests/performance/tests/scale_up_workers/scale_up_workers.jmx
index 997547d66..d875872e8 100644
--- a/tests/performance/tests/scale_up_workers/scale_up_workers.jmx
+++ b/tests/performance/tests/scale_up_workers/scale_up_workers.jmx
@@ -1,7 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <jmeterTestPlan version="1.2" properties="5.0" jmeter="5.3">
   <hashTree>
-    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="MMS Scale Up Workers Test Plan" enabled="true">
+    <TestPlan guiclass="TestPlanGui" testclass="TestPlan" testname="Model Server Scale Up Workers Test Plan" enabled="true">
       <stringProp name="TestPlan.comments"></stringProp>
       <boolProp name="TestPlan.functional_mode">false</boolProp>
       <boolProp name="TestPlan.tearDown_on_shutdown">true</boolProp>
@@ -22,7 +22,7 @@
           </elementProp>
           <elementProp name="model" elementType="Argument">
             <stringProp name="Argument.name">model</stringProp>
-            <stringProp name="Argument.value">squeezenet_v1.1</stringProp>
+            <stringProp name="Argument.value">${__P(SQZNET_NAME,squeezenet1_1)}</stringProp>
             <stringProp name="Argument.metadata">=</stringProp>
             <stringProp name="Argument.desc">Model name</stringProp>
           </elementProp>
diff --git a/tests/performance/tests/scale_up_workers/scale_up_workers.yaml b/tests/performance/tests/scale_up_workers/scale_up_workers.yaml
index 125aff830..051122d92 100644
--- a/tests/performance/tests/scale_up_workers/scale_up_workers.yaml
+++ b/tests/performance/tests/scale_up_workers/scale_up_workers.yaml
@@ -1,113 +1,109 @@
 ---
-execution:
-- concurrency: 10
-  ramp-up: 1s
-  hold-for: 30s
-  scenario: scaleup
-
 scenarios:
-  scaleup:
+  scenario_0:
     script: scale_up_workers.jmx
 
-modules:
-  server_local_monitoring:
-    class : metrics_monitoring_inproc.Monitor
 
 services:
   - module: shellexec
     prepare:
-      - "multi-model-server --start > /dev/null 2>&1"
-      - "sleep 20s"
-      - "curl -s -X POST http://localhost:8081/models?url=https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
-      - "curl -s -X PUT  http://localhost:8081/models/squeezenet_v1.1?min_worker=1&synchronous=true"
-    post-process:
-      - "multi-model-server --stop > /dev/null 2>&1"
-  - module: server_local_monitoring
-    ServerLocalClient:
-      - interval: 1s
-        logging : True
-        metrics:
-          - total_processes
-          - total_workers
-          - frontend_file_descriptors
-          - sum_workers_file_descriptors
-          - frontend_memory_rss
-          - sum_workers_memory_rss
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
+      - "sleep 10s"
+
 
-reporting:
+~reporting:
+- module: passfail # this is to enable passfail module
+- module: junit-xml
+  data-source: pass-fail
+- module: junit-xml
+  data-source: sample-labels
+- module: final-stats
+  dump-csv : ${ARTIFACTS_DIR}/final_stats.csv
 - module: passfail
   criteria:
     # Inbuilt Criteria
-    - success of ScaleUp<${SCL_UP_SUCC}, stop as failed
-    - avg-rt of ScaleUp>${SCL_UP_RT}, stop as failed
+    - success of ScaleUp<${SCL_UP_SUCC} for 10s, ${STOP_ALIAS} as failed
+    - avg-rt of ScaleUp>${SCL_UP_RT}, ${STOP_ALIAS} as failed
     # Custom Criteria
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '>'
       threshold: ${TOTAL_PROCS_AFTR_SCL_UP}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
       threshold: ${TOTAL_PROCS_B4_SCL_UP}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_workers
       condition: '>'
       threshold: ${TOTAL_WRKRS_AFTR_SCL_UP}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_workers
       condition: '<'
       threshold: ${TOTAL_WRKRS_B4_SCL_UP}
-      timeframe: 1s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/frontend_file_descriptors
       condition: '>'
       threshold: ${FRNTEND_FDS}
-      timeframe: 5s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/sum_workers_file_descriptors
       condition: '>'
       threshold: ${TOTAL_WRKRS_FDS_AFTR_SCL_UP}
-      timeframe: 5s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/sum_workers_file_descriptors
       condition: '<'
       threshold: ${TOTAL_WRKRS_FDS_B4_SCL_UP}
-      timeframe: 5s
-      stop : true
+      timeframe: 10s
+      stop : ${STOP}
       fail : true
 #    - class: bzt.modules.monitoring.MonitoringCriteria
 #      subject: ServerLocalClient/frontend_memory_rss
 #      condition: '>'
 #      threshold: ${FRNTEND_MEM}
-#      timeframe: 5s
-#      stop : true
+#      timeframe: 10s
+#      stop : ${STOP}
+#      fail : true
+#    - class: bzt.modules.monitoring.MonitoringCriteria
+#      subject: ServerLocalClient/sum_workers_memory_rss
+#      condition: '>'
+#      threshold: ${TOTAL_WRKRS_MEM_AFTR_SCL_UP}
+#      timeframe: 10s
+#      stop : ${STOP}
 #      fail : true
     - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
+      subject: ServerLocalClient/orphans
       condition: '>'
-      threshold: ${TOTAL_WRKRS_MEM_AFTR_SCL_UP}
-      timeframe: 5s
-      stop : true
-      fail : true
+      threshold: ${TOTAL_ORPHANS}
+      timeframe: 10s
+      stop: ${STOP}
+      fail: true
+      diff_percent_previous: ${TOTAL_ORPHANS_DIFF}
     - class: bzt.modules.monitoring.MonitoringCriteria
-      subject: ServerLocalClient/sum_workers_memory_rss
-      condition: '<'
-      threshold: ${TOTAL_WRKRS_MEM_B4_SCL_UP}
-      timeframe: 5s
-      stop : true
-      fail : true
\ No newline at end of file
+      subject: ServerLocalClient/zombiesx
+      condition: '>'
+      threshold: ${TOTAL_ZOMBIES}
+      timeframe: 10s
+      stop: ${STOP}
+      fail: true
+
+~compare_criteria:
+  -
\ No newline at end of file
diff --git a/tests/performance/utils/fs.py b/tests/performance/utils/fs.py
index 2a2a73178..8ade8d5bb 100644
--- a/tests/performance/utils/fs.py
+++ b/tests/performance/utils/fs.py
@@ -23,7 +23,7 @@
 logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
 
 
-def get_sub_dirs(dir, exclude_list=['comp_data'], include_pattern='*', exclude_pattern=None):
+def get_sub_dirs(dir, exclude_list=[], include_pattern='*', exclude_pattern=None):
     """Utility method to get list of folders in a directory"""
     dir = dir.strip()
     if not os.path.exists(dir):
@@ -32,8 +32,16 @@ def get_sub_dirs(dir, exclude_list=['comp_data'], include_pattern='*', exclude_p
         raise Exception(msg)
 
     pattern_list = glob.glob(dir + "/" + include_pattern)
-    exclude_pattern_list = glob.glob(dir + "/" + exclude_pattern) if exclude_pattern is not None else []
-    return list([x for x in os.listdir(dir) if os.path.isdir(dir + "/" + x)
+    exclude_pattern_list, exclude_pattern = (glob.glob(dir + "/" + exclude_pattern), exclude_pattern)\
+        if exclude_pattern is not None else ([], '')
+    skip_pattern = "/skip*"
+    skip_list = glob.glob(dir + skip_pattern)
+
+    exclude_patterns = exclude_list
+    exclude_patterns.extend([skip_pattern, exclude_pattern])
+    logger.info("Excluding the tests with name patterns '{}'.".format("','".join(exclude_patterns)))
+    return sorted(list([x for x in os.listdir(dir) if os.path.isdir(dir + "/" + x)
                  and x not in exclude_list
                  and dir + "/" + x in pattern_list
-                 and dir + "/" + x not in exclude_pattern_list])
+                 and dir + "/" + x not in exclude_pattern_list
+                 and dir + "/" + x not in skip_list]))
diff --git a/tests/performance/utils/pyshell.py b/tests/performance/utils/pyshell.py
index 108cb9253..6178fb2a9 100644
--- a/tests/performance/utils/pyshell.py
+++ b/tests/performance/utils/pyshell.py
@@ -38,9 +38,20 @@ def run_process(cmd, wait=True):
             if not line:
                 break
             lines.append(line)
+            if len(lines) > 20:
+                lines = lines[1:]
             logger.info(line)
 
-        return process.returncode, '\n'.join(lines)
+        process.communicate()
+        code = process.returncode
+        error_msg = ""
+        if code:
+            error_msg = "Error (error_code={}) while executing command : {}. ".format(code, cmd)
+            logger.info(error_msg)
+            error_msg += "\n\n$$$$Here are the last 20 lines of the logs." \
+                         " For more details refer log file.$$$$\n\n"
+            error_msg += '\n'.join(lines)
+        return code, error_msg
     else:
         process = subprocess.Popen(cmd, shell=True)
         return process.returncode, ''
diff --git a/tests/performance/utils/timer.py b/tests/performance/utils/timer.py
index 1fa47d086..8beb03c29 100644
--- a/tests/performance/utils/timer.py
+++ b/tests/performance/utils/timer.py
@@ -37,5 +37,9 @@ def __enter__(self):
     def __exit__(self, type, value, traceback):
         logger.info("%s: %ss", self.description, self.diff())
 
+        # Return False needed so that __exit__ method do no ignore the exception
+        # otherwise exception are not reported
+        return False
+
     def diff(self):
         return int(time.time()) - self.start

From 3e638cdd0ef03c2355237ccaa9c198b4875c3ea2 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Tue, 14 Jul 2020 19:46:28 +0530
Subject: [PATCH 02/21] change S3 bucket name

---
 tests/performance/agents/config.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/agents/config.ini b/tests/performance/agents/config.ini
index f2caaebcd..aacbe97e1 100644
--- a/tests/performance/agents/config.ini
+++ b/tests/performance/agents/config.ini
@@ -6,4 +6,4 @@ HOST =
 PORT = 9009
 
 [suite]
-s3_bucket = torchserve-performance-regression-reports
\ No newline at end of file
+s3_bucket = mms-performance-regression-reports
\ No newline at end of file

From 525afacbf2ad6e1634791b552cc38a35ab15ef12 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 09:04:54 +0530
Subject: [PATCH 03/21] fix for worker processes

---
 tests/performance/agents/metrics/__init__.py | 22 +++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tests/performance/agents/metrics/__init__.py b/tests/performance/agents/metrics/__init__.py
index 642976be5..b814e8c2d 100644
--- a/tests/performance/agents/metrics/__init__.py
+++ b/tests/performance/agents/metrics/__init__.py
@@ -155,7 +155,7 @@ def update_metric(metric_name, proc_type, stats):
 
     # Total processes
     result['total_processes'] = len(worker_stats) + 1
-    result['total_workers'] = max(len(worker_stats), 0)
+    result['total_workers'] = max(len(worker_stats) -1 , 0)
     result['orphans'] = len(list(filter(lambda p: p['ppid'] == 1, worker_stats)))
     result['zombies'] = len(zombie_children)
 
@@ -169,3 +169,23 @@ def update_metric(metric_name, proc_type, stats):
     result['system_write_bytes'] = system_disk_io_counters.write_bytes
 
     return result
+
+
+if __name__ == "__main__":
+    import logging
+    import sys
+    from agents.utils.process import *
+    from agents import configuration
+
+    logger = logging.getLogger(__name__)
+    logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
+
+    PID_FILE = configuration.get('server', 'pid_file', 'model_server.pid')
+    server_pid = get_process_pid_from_file(get_server_pidfile(PID_FILE))
+    server_process = get_server_processes(server_pid)
+    children = get_child_processes(server_process)
+
+    metrics = get_metrics(server_process, children, logger)
+
+
+    print(metrics)
\ No newline at end of file

From e4e976b0fdd558e7ad1bc46db44c604ab82f473b Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 09:17:54 +0530
Subject: [PATCH 04/21] jmeter property

---
 tests/performance/tests/global_config.yaml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tests/performance/tests/global_config.yaml b/tests/performance/tests/global_config.yaml
index 7fd5acfc7..4f7a68bbc 100644
--- a/tests/performance/tests/global_config.yaml
+++ b/tests/performance/tests/global_config.yaml
@@ -21,6 +21,13 @@ modules:
       input_filepath: kitten.jpg # make sure jpg is available at this path
       # if relative path is provided this will be relative to current working directory
 
+      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
+      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
+      SQZNET_URL: ${SQZNET_URL}
+      SQZNET_NAME: ${SQZNET_NAME}
+      RESNET_URL: ${RESNET_URL}
+      RESNET_NAME: ${RESNET_NAME}
+
   server_local_monitoring:
     # metrics_monitoring_inproc and dependencies should be in python path
     class : metrics_monitoring_inproc.Monitor # monitoring class.

From 7bc198257d9b820fc8743a180090fa9866cdd05c Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 09:34:34 +0530
Subject: [PATCH 05/21] modular

---
 .../tests/scale_down_workers/environments/xlarge.yaml         | 4 ++--
 .../tests/scale_up_workers/environments/xlarge.yaml           | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/performance/tests/scale_down_workers/environments/xlarge.yaml b/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
index e02d16bc0..e30b12c7b 100644
--- a/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
@@ -3,8 +3,8 @@ settings:
   env:
     SCL_DWN_SUCC : 80%
     SCL_DWN_RT : 10ms
-    TOTAL_PROCS_B4_SCL_DWN : 5
-    TOTAL_PROCS_AFTR_SCL_DWN : 3
+    TOTAL_PROCS_B4_SCL_DWN : 6
+    TOTAL_PROCS_AFTR_SCL_DWN : 4
     TOTAL_WRKRS_B4_SCL_DWN : 4
     TOTAL_WRKRS_AFTR_SCL_DWN  : 2
     FRNTEND_FDS : 78
diff --git a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
index 83ef339b6..8ed1da50e 100644
--- a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
@@ -3,7 +3,7 @@ settings:
   env:
     SCL_UP_SUCC : 80%
     SCL_UP_RT : 10ms
-    TOTAL_PROCS_AFTR_SCL_UP : 5
+    TOTAL_PROCS_AFTR_SCL_UP : 6
     TOTAL_PROCS_B4_SCL_UP : 2
     TOTAL_WRKRS_AFTR_SCL_UP : 4
     TOTAL_WRKRS_B4_SCL_UP  : 1

From 0a904f913444556df56abac0cedde13de93c5884 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 12:54:29 +0530
Subject: [PATCH 06/21] fixes

---
 tests/performance/runs/compare.py             | 16 ++++++------
 tests/performance/runs/taurus/x2junit.py      |  7 ++++--
 .../examples_local_criteria.yaml              | 24 ++++--------------
 .../examples_local_monitoring.yaml            | 24 ++++--------------
 .../examples_remote_criteria.yaml             | 24 ++++--------------
 .../examples_remote_monitoring.yaml           | 23 ++++-------------
 .../examples_starter/examples_starter.yaml    | 25 ++++---------------
 tests/performance/tests/global_config.yaml    | 24 ++++--------------
 .../scale_up_workers/environments/xlarge.yaml |  1 +
 9 files changed, 44 insertions(+), 124 deletions(-)

diff --git a/tests/performance/runs/compare.py b/tests/performance/runs/compare.py
index ea5d4bf6a..982ea63c4 100644
--- a/tests/performance/runs/compare.py
+++ b/tests/performance/runs/compare.py
@@ -44,7 +44,7 @@ def __init__(self, path, env_name, local_run, compare_with):
         self.storage = storage_class(self.artifacts_dir, self.env_name, compare_with)
         self.junit_reporter = None
         self.pandas_result = None
-        self.pass_fail =  True
+        self.pass_fail = True
 
     def gen(self):
         """Driver method to get comparison directory, do the comparison of it with current run directory
@@ -53,7 +53,7 @@ def gen(self):
         compare_dir, compare_run_name = self.storage.get_dir_to_compare()
         if compare_run_name:
             self.junit_reporter, self.pandas_result = compare_artifacts(self.storage.artifacts_dir, compare_dir,
-                                       self.storage.current_run_name, compare_run_name)
+                                                                        self.storage.current_run_name, compare_run_name)
             self.pandas_result.to_csv(os.path.join(self.artifacts_dir, "comparison_result.csv"))
         else:
             logger.warning("The latest run not found for env.")
@@ -108,8 +108,8 @@ def get_centile_val(df, agg_func, col):
 
     val = None
     if "metric_name" in df and agg_func in df:
-            val = df[df["metric_name"] == col][agg_func]
-            val = val[0] if len(val) else None
+        val = df[df["metric_name"] == col][agg_func]
+        val = val[0] if len(val) else None
     return val
 
 
@@ -205,7 +205,7 @@ def compare_artifacts(dir1, dir2, run_name1, run_name2):
 
 if __name__ == "__main__":
     compare_artifacts(
-    "/Users/demo/git/serve/test/performance/run_artifacts/xlarge__45b6399__1594725947",
-    "/Users/demo/git/serve/test/performance/run_artifacts/xlarge__45b6399__1594725717",
-    "xlarge__45b6399__1594725947", "xlarge__45b6399__1594725717"
-    )
\ No newline at end of file
+        "./run_artifacts/xlarge__45b6399__1594725947",
+        "./run_artifacts/xlarge__45b6399__1594725717",
+        "xlarge__45b6399__1594725947", "xlarge__45b6399__1594725717"
+    )
diff --git a/tests/performance/runs/taurus/x2junit.py b/tests/performance/runs/taurus/x2junit.py
index a209a7a5e..7d1185e0d 100644
--- a/tests/performance/runs/taurus/x2junit.py
+++ b/tests/performance/runs/taurus/x2junit.py
@@ -54,6 +54,10 @@ def add_compare_tests(self):
         for metric_values in compare_list:
             col = metric_values[0]
             diff_percent = metric_values[2]
+            try:
+                diff_percent = float(diff_percent)
+            except Exception as e:
+                diff_percent = None
             tc = TestCase("{}_diff_run > {}".format(col, diff_percent))
             if diff_percent is None:
                 tc.result = Skipped("diff_percent_run value is not mentioned")
@@ -204,8 +208,7 @@ def __exit__(self, type, value, traceback):
 if __name__ == "__main__":
     from utils.timer import Timer
     with Timer("ads") as t:
-        test_folder = '/Users/demo/git/serve/test/performance/'\
-                        'run_artifacts/xlarge__2dc700f__1594662587/scale_down_workers'
+        test_folder = './run_artifacts/xlarge__7bc1982__1594795786/scale_up_workers'
         x = X2Junit("test", test_folder, JUnitXml(), t, "xlarge")
 
     # x.update_metrics()
diff --git a/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml b/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
index 3adbf0a48..dd9864ac1 100644
--- a/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
+++ b/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
@@ -17,31 +17,17 @@ modules:
 ~services:
   - module: shellexec
     prepare:
-      - "curl -s -O $INPUT_IMG_URL"
+      - "curl -s -O ${INPUT_IMG_URL}"
       - "mkdir /tmp/ts_model_store"
-      - "ps aux | grep '$SERVER_PROCESS_NAME' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
-      - "$SERVER_CMD --start --ncs --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
     post-process:
-      - "$SERVER_CMD --stop > /dev/null 2>&1"
-      - "rm $INPUT_IMG_PATH"
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-    env:
-      SERVER_CMD : ${SERVER_CMD}
-      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
-      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
-      INPUT_IMG_URL : ${INPUT_IMG_URL}
-      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
-
-      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
-      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
-      SQZNET_URL: ${SQZNET_URL}
-      SQZNET_NAME: ${SQZNET_NAME}
-      RESNET_URL: ${RESNET_URL}
-      RESNET_NAME: ${RESNET_NAME}
-
   - module: server_local_monitoring # should be added in modules section
     ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
     - interval: 1s
diff --git a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
index ee8fdac7c..da615c04d 100644
--- a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
+++ b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
@@ -17,31 +17,17 @@ modules:
 ~services:
   - module: shellexec
     prepare:
-      - "curl -s -O $INPUT_IMG_URL"
+      - "curl -s -O ${INPUT_IMG_URL}"
       - "mkdir /tmp/ts_model_store"
-      - "ps aux | grep '$SERVER_PROCESS_NAME' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
-      - "$SERVER_CMD --start --ncs --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
     post-process:
-      - "$SERVER_CMD --stop > /dev/null 2>&1"
-      - "rm $INPUT_IMG_PATH"
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-    env:
-      SERVER_CMD : ${SERVER_CMD}
-      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
-      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
-      INPUT_IMG_URL : ${INPUT_IMG_URL}
-      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
-
-      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
-      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
-      SQZNET_URL: ${SQZNET_URL}
-      SQZNET_NAME: ${SQZNET_NAME}
-      RESNET_URL: ${RESNET_URL}
-      RESNET_NAME: ${RESNET_NAME}
-
   - module: server_local_monitoring # should be added in modules section
     ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
       - interval: 1s
diff --git a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
index 10b028895..4fbbe31d0 100644
--- a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
+++ b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
@@ -12,31 +12,17 @@
 ~services:
   - module: shellexec
     prepare:
-      - "curl -s -O $INPUT_IMG_URL"
+      - "curl -s -O ${INPUT_IMG_URL}"
       - "mkdir /tmp/ts_model_store"
-      - "ps aux | grep '$SERVER_PROCESS_NAME' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
-      - "$SERVER_CMD --start --ncs --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
     post-process:
-      - "$SERVER_CMD --stop > /dev/null 2>&1"
-      - "rm $INPUT_IMG_PATH"
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-    env:
-      SERVER_CMD : ${SERVER_CMD}
-      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
-      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
-      INPUT_IMG_URL : ${INPUT_IMG_URL}
-      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
-
-      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
-      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
-      SQZNET_URL: ${SQZNET_URL}
-      SQZNET_NAME: ${SQZNET_NAME}
-      RESNET_URL: ${RESNET_URL}
-      RESNET_NAME: ${RESNET_NAME}
-
   - module: monitoring
     server-agent:
       - address: localhost:9009 # metric monitoring service address
diff --git a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
index e3b71cf4e..407a1e6ac 100644
--- a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
+++ b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
@@ -14,30 +14,17 @@
 ~services:
   - module: shellexec
     prepare:
-      - "curl -s -O $INPUT_IMG_URL"
+      - "curl -s -O ${INPUT_IMG_URL}"
       - "mkdir /tmp/ts_model_store"
-      - "ps aux | grep '$SERVER_PROCESS_NAME' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
-      - "$SERVER_CMD --start --ncs --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
     post-process:
-      - "$SERVER_CMD --stop > /dev/null 2>&1"
-      - "rm $INPUT_IMG_PATH"
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-    env:
-      SERVER_CMD : ${SERVER_CMD}
-      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
-      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
-      INPUT_IMG_URL : ${INPUT_IMG_URL}
-      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
-
-      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
-      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
-      SQZNET_URL: ${SQZNET_URL}
-      SQZNET_NAME: ${SQZNET_NAME}
-      RESNET_URL: ${RESNET_URL}
-      RESNET_NAME: ${RESNET_NAME}
   - module: monitoring
     server-agent:
       - address: localhost:9009 # metric monitoring service address
diff --git a/tests/performance/tests/examples_starter/examples_starter.yaml b/tests/performance/tests/examples_starter/examples_starter.yaml
index ee191afaf..5a8ebe8db 100644
--- a/tests/performance/tests/examples_starter/examples_starter.yaml
+++ b/tests/performance/tests/examples_starter/examples_starter.yaml
@@ -12,32 +12,17 @@
 ~services:
   - module: shellexec
     prepare:
-      - "curl -s -O $INPUT_IMG_URL"
+      - "curl -s -O ${INPUT_IMG_URL}"
       - "mkdir /tmp/ts_model_store"
-      - "ps aux | grep '$SERVER_PROCESS_NAME' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
-      - "$SERVER_CMD --start --ncs --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
     post-process:
-      - "$SERVER_CMD --stop > /dev/null 2>&1"
-      - "rm $INPUT_IMG_PATH"
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
+      - "rm ${INPUT_IMG_PATH}"
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-    env:
-      SERVER_CMD : ${SERVER_CMD}
-      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
-      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
-      INPUT_IMG_URL : ${INPUT_IMG_URL}
-      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
-
-      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
-      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
-      SQZNET_URL: ${SQZNET_URL}
-      SQZNET_NAME: ${SQZNET_NAME}
-      RESNET_URL: ${RESNET_URL}
-      RESNET_NAME: ${RESNET_NAME}
-
-
 ~reporting:
   - module: passfail
 
diff --git a/tests/performance/tests/global_config.yaml b/tests/performance/tests/global_config.yaml
index 4f7a68bbc..471a16a04 100644
--- a/tests/performance/tests/global_config.yaml
+++ b/tests/performance/tests/global_config.yaml
@@ -38,29 +38,14 @@ services:
       - "curl -s -O ${INPUT_IMG_URL}"
       - "mkdir /tmp/ts_model_store"
       - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
-      - "${SERVER_CMD} --start  --model-store /tmp/ts_model_store > /dev/null 2>&1"
+      - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
     post-process:
-      - "${SERVER_CMD} --stop > /dev/null 2>&1"
+      - "${SERVER_STOP_CMD} > /dev/null 2>&1"
       - "rm ${INPUT_IMG_PATH}"
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-    env:
-      SERVER_CMD : ${SERVER_CMD}
-      ARTIFACTS_DIR : ${ARTIFACTS_DIR}
-      SERVER_PROCESS_NAME : ${SERVER_PROCESS_NAME}
-      INPUT_IMG_URL : ${INPUT_IMG_URL}
-      INPUT_IMG_PATH : ${INPUT_IMG_PATH}
-
-      RESNET_152_BATCH_URL: ${RESNET_152_BATCH_URL}
-      RESNET_152_BATCH_NAME: ${RESNET_152_BATCH_NAME}
-      SQZNET_URL: ${SQZNET_URL}
-      SQZNET_NAME: ${SQZNET_NAME}
-      RESNET_URL: ${RESNET_URL}
-      RESNET_NAME: ${RESNET_NAME}
-
-
   - module: server_local_monitoring # should be added in modules section
     ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
     - interval: 1s
@@ -197,13 +182,14 @@ compare_criteria:
 settings:
   env:
     ARTIFACTS_DIR : '.'
-    SERVER_CMD : "multi-model-server"
+    SERVER_START_CMD : "multi-model-server --start "
+    SERVER_STOP_CMD : "multi-model-server --stop "
     SERVER_PROCESS_NAME : "[c]om.amazonaws.ml.mms.ModelServer"
     INPUT_IMG_URL: "https://s3.amazonaws.com/model-server/inputs/kitten.jpg"
     INPUT_IMG_PATH: "kitten.jpg"
 
     RESNET_152_BATCH_URL : "https://s3.amazonaws.com/model-server/model_archive_1.0/examples/resnet-152-batching/resnet-152.mar"
-    RESNET_152_BATCH_NAME : "resnet-152-batch"
+    RESNET_152_BATCH_NAME : "resnet-152"
     SQZNET_URL : "https://s3.amazonaws.com/model-server/model_archive_1.0/squeezenet_v1.1.mar"
     SQZNET_NAME : "squeezenet_v1.1"
     RESNET_URL : "https://s3.amazonaws.com/model-server/model_archive_1.0/resnet-18.mar"
diff --git a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
index 8ed1da50e..32004800e 100644
--- a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
@@ -14,6 +14,7 @@ settings:
     TOTAL_WRKRS_MEM_B4_SCL_UP : 115000000 #115MB
     TOTAL_ORPHANS : 0
     TOTAL_ZOMBIES : 0
+    TOTAL_WRKRS_FDS_B4_SCL_UP : 0
 
 
     CONCURRENCY: 10

From 75a06fe08b80c346be5c365581dfdb68dd3765e6 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 14:11:40 +0530
Subject: [PATCH 07/21] pylint fixes

---
 tests/performance/README.md                  | 76 +++++++++++++++-----
 tests/performance/agents/metrics/__init__.py | 10 +--
 tests/performance/agents/utils/process.py    |  1 -
 tests/performance/run_performance_suite.py   | 10 ++-
 tests/performance/runs/compare.py            |  7 +-
 tests/performance/runs/context.py            |  8 ++-
 tests/performance/runs/junit.py              |  7 +-
 tests/performance/runs/storage.py            |  4 +-
 tests/performance/runs/taurus/__init__.py    | 15 ++--
 tests/performance/runs/taurus/x2junit.py     | 54 +++++++-------
 tests/performance/utils/fs.py                | 10 +--
 11 files changed, 123 insertions(+), 79 deletions(-)

diff --git a/tests/performance/README.md b/tests/performance/README.md
index 88fd6e0bc..8f6258682 100644
--- a/tests/performance/README.md
+++ b/tests/performance/README.md
@@ -78,8 +78,8 @@ values which are specific to the execution environment. This is a mandatory para
    2. Collects all the tests from test-dir satisfying the pattern, excluding exclude pattern and test starting with 'skip'
    3. Executes the collected tests
    4. Generates artifacts in the artifacts-dir against each test case.  
-   5. Generate Pass Fail report for test cases
-   6. Generate comparison report for specified commit id
+   5. Generates Pass/Fail report for test cases
+   6. Generates comparison report for specified commit id
 
 3. Check the console logs, $artifacts-dir$/<run-dir>/performance_results.html report, comparison_result.csv, comparison_result.html 
 and other artifacts.
@@ -130,25 +130,63 @@ tests
 ```
 
 1. global_config.yaml  
-   - It is a master template for all_comm the test cases and is shared across all the tests.  
-   - It contains all the common yaml sections, criteria, monitoring metrics etc.  
-   - It also contain variables in the format ${variable} for metric thresholds and other test specific attributes.
+   - It is a master store for common items across all the tests.
+   - It contains the common sections, criteria, monitoring metrics etc.  
+   - It also contain variables in the format ${variable} for metric thresholds and other test specific attributes.   
 
 2. environments/*.yaml  
-   - A test case can have multiple environment files. If you have a environment dependent metrics you can create an environment
-   yaml file. For ex. macos_xlarge, ubuntu_xlarge etc.  
-   - The environment file contains values for all the variables mentioned in global_config.yaml and test.yaml.  
-
-3. test.yaml  
-   - The test.yaml is main yaml for a test case. Note the name of the yaml should be same as the test folder.  
-   - It inherits the master template global_config.yaml.  
-   And it usually contains the scenario, specific pre-processing commands (if any), and special criteria (if any) applicable for that test case only. 
-   - If you want a behavior other than defined in the master template, It is possible to override sections of global_config.yaml in the individual test case. 
-   The global_config.yaml's top-level sections can be overridden, merged, or appended based on below rules:  
-        1. By default the dictionaries get merged.  
-        2. If the dictionary key is prepended with '~' it will get overridden.  
-        3. The list gets appended.  
+   - It stores values specific to an environment. An environment reflects the underlying compute characteristics.  For e.g. macos_xlarge, ubuntu_xlarge etc. 
+   - A test case can have multiple environments.
+   - The environment file can override variable values defined in global_config.yaml and test.yaml. 
+
+3. test_name.yaml  
+   - The central file for a test case. Note the name of the yaml should be same as the test folder.  
+   - It contains the scenario, specific pre-processing commands (optional) and special criteria (optional) relevant for the test case. 
+   - It inherits the settings defined global_config.yaml. global_config.yaml's top-level sections can be overridden, merged, or appended based on following rules  
+        1. By default the test cases configurations get merged with the global configuration.  
+        2. If the dictionary key is pre-pended with '~', it will get overridden.  
+        3. The lists in yaml section gets appended.       
+   - Below are the sample yamls to demonstrate the merging of global_config and test_name yamls. The list in "services" section in global_config will
+   get appended by list in 'services' section of test yaml. The 'reporting' section will get replaced by '~reporting' section from test yaml.
+   Refer test case for more details [tests/scale_down_workers/scale_down_workers.yaml](tests/scale_down_workers/scale_down_workers.yaml) and [global_config.yaml](tests/global_config.yaml)
+   
+    ```yaml
+    #global_config.yaml
+     
+    services:
+      - module: shellexec
+        prepare:
+          - "curl -s -O ${INPUT_IMG_URL}"
+          - "mkdir /tmp/ts_model_store"
+   
+    reporting:
+        - module: passfail
+          criteria:
+            # API requests KPI crieteria
+            - success of ${API_LABEL}<${API_SUCCESS} for 10s, stop as failed
+            - avg-rt of ${API_LABEL}>${API_AVG_RT}, ${STOP_ALIAS} as failed
+                    
+    ```  
+
+    ```yaml
+    #test.yaml
+     
+    services:
+      - module: shellexec
+        prepare:
+            - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
+       
+    
+    ~reporting:
+        - module: passfail
+          criteria:
+            # Inbuilt Criteria
+            - success of ScaleDown<${SCL_DWN_SUCC} for 10s, ${STOP_ALIAS} as failed
+            - avg-rt of ScaleDown>${SCL_DWN_RT}, ${STOP_ALIAS} as failed
+    ```
+   
 4. test.jmx 
+   -  The JMeter test scenario file. The test.yaml runs the scenario mentioned in the .jmx file.4. test.jmx 
    -  The JMeter test scenario file. The test.yaml runs the scenarion mentioned in the .jmx file.
 
 
@@ -362,7 +400,7 @@ There are two types of compare criteria you can add for metrics:
     This criteria is used to check the percent difference between first and last value of the metric for a run. 
     In other words it is used to verify if metrics values are same before and after the scenario run. 
     2. diff_percent_previous  
-    Compare the metric aggregate values with previous run. Here we take aggregate min, max and avg of metric values for current run
+    Compare first and last values of previous run of compare_with commit_id. Here we compare first and last value of metrics for current run
     and previous run and check if percentage difference is not greater than diff_percent_previous. 
 
 Note formula for percentage difference is abs(value1 - value2)/((value1 + value2)/2) * 100
diff --git a/tests/performance/agents/metrics/__init__.py b/tests/performance/agents/metrics/__init__.py
index b814e8c2d..8ecd97ab0 100644
--- a/tests/performance/agents/metrics/__init__.py
+++ b/tests/performance/agents/metrics/__init__.py
@@ -10,6 +10,7 @@
 # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 # express or implied. See the License for the specific language governing
 # permissions and limitations under the License.
+# pylint: disable=redefined-builtin, redefined-outer-name, broad-except, unused-variable
 
 from enum import Enum
 from statistics import mean
@@ -118,8 +119,9 @@ def update_metric(metric_name, proc_type, stats):
     try:
         # as_dict() gets all stats in one shot
         processes_stats.append({'type': ProcessType.FRONTEND, 'stats': server_process.as_dict()})
-    except:
+    except Exception as e:
         pass
+
     for child in children | zombie_children:
         try:
             child_cmdline = child.cmdline()
@@ -140,7 +142,6 @@ def update_metric(metric_name, proc_type, stats):
         if p in zombie_children:
             zombie_children.remove(p)
 
-
     ### PROCESS METRICS ###
     worker_stats = list(map(lambda x: x['stats'], \
                             filter(lambda x: x['type'] == ProcessType.WORKER, processes_stats)))
@@ -155,7 +156,7 @@ def update_metric(metric_name, proc_type, stats):
 
     # Total processes
     result['total_processes'] = len(worker_stats) + 1
-    result['total_workers'] = max(len(worker_stats) -1 , 0)
+    result['total_workers'] = max(len(worker_stats) - 1, 0)
     result['orphans'] = len(list(filter(lambda p: p['ppid'] == 1, worker_stats)))
     result['zombies'] = len(zombie_children)
 
@@ -187,5 +188,4 @@ def update_metric(metric_name, proc_type, stats):
 
     metrics = get_metrics(server_process, children, logger)
 
-
-    print(metrics)
\ No newline at end of file
+    print(metrics)
diff --git a/tests/performance/agents/utils/process.py b/tests/performance/agents/utils/process.py
index c9a8e98de..02cf5d528 100644
--- a/tests/performance/agents/utils/process.py
+++ b/tests/performance/agents/utils/process.py
@@ -17,7 +17,6 @@
 
 import os
 import tempfile
-
 import psutil
 
 
diff --git a/tests/performance/run_performance_suite.py b/tests/performance/run_performance_suite.py
index 66bc41713..7f4b89af6 100755
--- a/tests/performance/run_performance_suite.py
+++ b/tests/performance/run_performance_suite.py
@@ -1,5 +1,3 @@
-
-
 # Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # Licensed under the Apache License, Version 2.0 (the "License").
 # You may not use this file except in compliance with the License.
@@ -13,20 +11,20 @@
 """
 Run Performance Regression Test Cases and Generate Reports
 """
-# pylint: disable=redefined-builtin, no-value-for-parameter
+# pylint: disable=redefined-builtin, no-value-for-parameter, unused-argument
 
 import logging
 import os
 import subprocess
 import sys
 import time
+import pathlib
 
 import click
-import pathlib
-from runs.context import ExecutionEnv
-from runs.taurus import get_taurus_options, x2junit, update_taurus_metric_files
 from tqdm import tqdm
 
+from runs.context import ExecutionEnv
+from runs.taurus import get_taurus_options, x2junit, update_taurus_metric_files
 from utils import run_process, Timer, get_sub_dirs
 
 logger = logging.getLogger(__name__)
diff --git a/tests/performance/runs/compare.py b/tests/performance/runs/compare.py
index 982ea63c4..a9ae49b0e 100644
--- a/tests/performance/runs/compare.py
+++ b/tests/performance/runs/compare.py
@@ -15,9 +15,6 @@
 """
 # pylint: disable=redefined-builtin, self-assigning-variable, broad-except
 
-
-import csv
-import glob
 import logging
 import sys
 import os
@@ -34,6 +31,7 @@
 
 
 class CompareReportGenerator():
+    """Wrapper class to generate the compare report"""
 
     def __init__(self, path, env_name, local_run, compare_with):
         self.artifacts_dir = path
@@ -182,8 +180,9 @@ def compare_artifacts(dir1, dir2, run_name1, run_name2):
                 for agg_func in aggregates:
                     name = "{}_{}".format(agg_func, str(col))
 
+                    val1 = get_centile_val(metrics_from_file1, agg_func, col)
                     val2 = get_centile_val(metrics_from_file2, agg_func, col)
-                    val1 = get_centile_val(metrics_from_file2, agg_func, col)
+
                     diff, pass_fail, msg = compare_values(val1, val2, diff_percent, run_name1, run_name2)
 
                     if over_all_pass:
diff --git a/tests/performance/runs/context.py b/tests/performance/runs/context.py
index 860b35454..c90d83dc6 100644
--- a/tests/performance/runs/context.py
+++ b/tests/performance/runs/context.py
@@ -35,7 +35,7 @@
 
 def get_git_commit_id(compare_with):
     return subprocess.check_output('git rev-parse --short {}'.format(compare_with).split()).decode(
-            "utf-8")[:-1]
+        "utf-8")[:-1]
 
 
 class ExecutionEnv(object):
@@ -52,7 +52,8 @@ def __init__(self, agent, artifacts_dir, env, local_run, compare_with, use=True,
         self.compare_with = get_git_commit_id(compare_with)
         self.check_model_server_status = check_model_server_status
         self.reporter = JUnitXml()
-        self.compare_reporter_generator = CompareReportGenerator(self.artifacts_dir, self.env, self.local_run, compare_with)
+        self.compare_reporter_generator = CompareReportGenerator(self.artifacts_dir, self.env, self.local_run,
+                                                                 compare_with)
         self.exit_code = 1
 
     def __enter__(self):
@@ -64,12 +65,14 @@ def __enter__(self):
 
     @staticmethod
     def open_report(file_path):
+        """Open html report in browser """
         if os.path.exists(file_path):
             return webbrowser.open_new_tab('file://' + os.path.realpath(file_path))
         return False
 
     @staticmethod
     def report_summary(reporter, suite_name):
+        """Create a report summary """
         if reporter and os.path.exists(reporter.junit_html_path):
             status = reporter.junit_xml.errors or reporter.junit_xml.failures
             status, code, color = ("failed", 3, "red") if status else ("passed", 0, "green")
@@ -108,4 +111,3 @@ def __exit__(self, type, value, traceback):
         # Return True needed so that __exit__ method do no ignore the exception
         # otherwise exception are not reported
         return False
-
diff --git a/tests/performance/runs/junit.py b/tests/performance/runs/junit.py
index 8ff8f1951..fb3c41c41 100644
--- a/tests/performance/runs/junit.py
+++ b/tests/performance/runs/junit.py
@@ -19,13 +19,16 @@
 import html
 import textwrap
 import tabulate
-from utils import run_process
 from junitparser import JUnitXml
 
+from utils import run_process
+
+
 header = ["suite_name", "test_case", "result", "message"]
 
 
 class JunitConverter():
+    """Convert JUnit XML object to XML and HTML report"""
 
     def __init__(self, junit_xml, out_dir, report_name):
         self.junit_xml = junit_xml
@@ -50,7 +53,7 @@ def pretty_text(data):
 def junit2array(junit_xml):
     """convert junit xml junitparser.JUnitXml object to 2d array """
     rows = [header]
-    for i, suite in enumerate(junit_xml):
+    for _, suite in enumerate(junit_xml):
         if len(suite) == 0:
             rows.append([suite.name, "", "skipped",
                          "No criteria specified or there is an error."])
diff --git a/tests/performance/runs/storage.py b/tests/performance/runs/storage.py
index 1f7e0c421..648687b9c 100644
--- a/tests/performance/runs/storage.py
+++ b/tests/performance/runs/storage.py
@@ -20,11 +20,11 @@
 import os
 import sys
 import shutil
+import pathlib
 
 import boto3
-import pathlib
-from agents import configuration
 
+from agents import configuration
 from utils import run_process
 
 logger = logging.getLogger(__name__)
diff --git a/tests/performance/runs/taurus/__init__.py b/tests/performance/runs/taurus/__init__.py
index 973acf64d..d2dfd5012 100644
--- a/tests/performance/runs/taurus/__init__.py
+++ b/tests/performance/runs/taurus/__init__.py
@@ -21,8 +21,9 @@
 import sys
 import logging
 
-from .reader import get_mon_metrics_list
 from utils.pyshell import run_process
+from .reader import get_mon_metrics_list
+
 
 logger = logging.getLogger(__name__)
 logging.basicConfig(stream=sys.stdout, format="%(message)s", level=logging.INFO)
@@ -64,20 +65,18 @@ def update_taurus_metric_files(suite_artifacts_dir, test_file):
         os.rename(metrics_log_file[0], metrics_new_file)
 
     else:
-         metrics_log_file = os.path.join(suite_artifacts_dir, "local_monitoring_logs.csv")
-         if os.path.exists(metrics_log_file):
-             os.rename(metrics_log_file, metrics_new_file)
+        metrics_log_file = os.path.join(suite_artifacts_dir, "local_monitoring_logs.csv")
+        if os.path.exists(metrics_log_file):
+            os.rename(metrics_log_file, metrics_new_file)
 
     KEEP_LINES = 10000
 
     def handle_big_files(name):
         report_file = os.path.join(suite_artifacts_dir, name)
         report_tmp_file = os.path.join(suite_artifacts_dir, "{}_tmp".format(name))
-        if os.path.exists(report_file) and os.stat(report_file).st_size > 1e+7: #10MB
-            logger.info("Keeping first {} records from file {} as it is >10MB".format(KEEP_LINES, report_file))
+        if os.path.exists(report_file) and os.stat(report_file).st_size > 1e+7:  # 10MB
+            logger.info("Keeping first %s records from file %s as it is >10MB", KEEP_LINES, report_file)
             run_process("head -{0} {1} > {2}; mv {2} {1};".format(KEEP_LINES, report_file, report_tmp_file))
 
     handle_big_files("error.jtl")
     handle_big_files("kpi.jtl")
-
-
diff --git a/tests/performance/runs/taurus/x2junit.py b/tests/performance/runs/taurus/x2junit.py
index 7d1185e0d..37227870b 100644
--- a/tests/performance/runs/taurus/x2junit.py
+++ b/tests/performance/runs/taurus/x2junit.py
@@ -13,24 +13,26 @@
 """
 Convert the Taurus Test suite XML to Junit XML
 """
-# pylint: disable=redefined-builtin
+# pylint: disable=redefined-builtin, unused-variable, broad-except
 
 
 import os
-import pandas as pd
-from runs.taurus.reader import get_compare_metric_list
-
 import html
+
+import pandas as pd
 import tabulate
 from bzt.modules.passfail import DataCriterion
 from junitparser import TestCase, TestSuite, JUnitXml, Skipped, Error, Failure
 
+from runs.taurus.reader import get_compare_metric_list
+
 
 class X2Junit(object):
     """
        Context Manager class to do convert Taurus Test suite XML report which is in Xunit specifications
        to JUnit XML report.
     """
+
     def __init__(self, name, artifacts_dir, junit_xml, timer, env_name):
         self.ts = TestSuite(name)
         self.name = name
@@ -50,6 +52,9 @@ def __enter__(self):
         return self
 
     def add_compare_tests(self):
+        """Add compare test for a run.
+        Compare actual percentage difference between fist value and last value against provided difference."""
+
         compare_list = get_compare_metric_list(self.artifacts_dir, "")
         for metric_values in compare_list:
             col = metric_values[0]
@@ -97,6 +102,7 @@ def add_compare_tests(self):
 
     @staticmethod
     def casename_to_criteria(test_name):
+        """Extract metric from Taurus pass/fail criteria string"""
         metric = None
         if ' of ' not in test_name:
             test_name = "label of {}".format(test_name)
@@ -112,6 +118,7 @@ def casename_to_criteria(test_name):
         return metric
 
     def percentile_values(self, metric_name):
+        """Calculate percentile values for metric_name column in self.metrics pandas df"""
         values = {}
         if self.metrics is not None and metric_name is not None:
             metric_vals = getattr(self.metrics, metric_name, None)
@@ -119,11 +126,12 @@ def percentile_values(self, metric_name):
                 centile_values = [0, 0.5, 0.9, 0.95, 0.99, 0.999, 1]
                 for centile in centile_values:
                     val = getattr(metric_vals, 'quantile')(centile)
-                    values.update({str(centile * 100)+"%": val})
+                    values.update({str(centile * 100) + "%": val})
 
         return values
 
     def update_metrics(self):
+        """ Update self.mertics and self.metrics_agg_dict"""
         metrics_file = os.path.join(self.artifacts_dir, "metrics.csv")
         rows = []
         agg_dict = {}
@@ -134,24 +142,24 @@ def update_metrics(self):
             header_names.extend([str(colname * 100) + "%" for colname in centile_values])
             header_names.extend(['first_value', 'last_value'])
             if self.metrics.size:
-                 for col in self.metrics.columns:
-                     row = [self.name, str(col)]
-                     metric_vals = getattr(self.metrics, str(col), None)
-                     for centile in centile_values:
-                         row.append(getattr(metric_vals, 'quantile')(centile))
-                     row.extend([metric_vals.iloc[0], metric_vals.iloc[-1]])
-                     agg_dict.update({row[0]: dict(zip(header_names, row[1:]))})
-                     rows.append(row)
-
-                 dataframe = pd.DataFrame(rows, columns=header_names)
-                 print("Metric percentile values:\n")
-                 print(tabulate.tabulate(rows, headers=header_names, tablefmt="grid"))
-                 dataframe.to_csv(os.path.join(self.artifacts_dir, "metrics_agg.csv"), index=False)
+                for col in self.metrics.columns:
+                    row = [self.name, str(col)]
+                    metric_vals = getattr(self.metrics, str(col), None)
+                    for centile in centile_values:
+                        row.append(getattr(metric_vals, 'quantile')(centile))
+                    row.extend([metric_vals.iloc[0], metric_vals.iloc[-1]])
+                    agg_dict.update({row[0]: dict(zip(header_names, row[1:]))})
+                    rows.append(row)
+
+                dataframe = pd.DataFrame(rows, columns=header_names)
+                print("Metric percentile values:\n")
+                print(tabulate.tabulate(rows, headers=header_names, tablefmt="grid"))
+                dataframe.to_csv(os.path.join(self.artifacts_dir, "metrics_agg.csv"), index=False)
 
         self.metrics_agg_dict = agg_dict
 
     def __exit__(self, type, value, traceback):
-        print("error code is "+str(self.code))
+        print("error code is " + str(self.code))
 
         self.update_metrics()
         xunit_file = os.path.join(self.artifacts_dir, "xunit.xml")
@@ -205,19 +213,17 @@ def __exit__(self, type, value, traceback):
         # otherwise exception are not reported
         return False
 
+
 if __name__ == "__main__":
     from utils.timer import Timer
+
     with Timer("ads") as t:
         test_folder = './run_artifacts/xlarge__7bc1982__1594795786/scale_up_workers'
         x = X2Junit("test", test_folder, JUnitXml(), t, "xlarge")
 
     # x.update_metrics()
-    #
     # x.add_compare_tests()
 
     x.__exit__(None, None, None)
-    x.ts
-
+    print(x.ts)
     print("a")
-
-
diff --git a/tests/performance/utils/fs.py b/tests/performance/utils/fs.py
index 8ade8d5bb..eb0cce149 100644
--- a/tests/performance/utils/fs.py
+++ b/tests/performance/utils/fs.py
@@ -32,7 +32,7 @@ def get_sub_dirs(dir, exclude_list=[], include_pattern='*', exclude_pattern=None
         raise Exception(msg)
 
     pattern_list = glob.glob(dir + "/" + include_pattern)
-    exclude_pattern_list, exclude_pattern = (glob.glob(dir + "/" + exclude_pattern), exclude_pattern)\
+    exclude_pattern_list, exclude_pattern = (glob.glob(dir + "/" + exclude_pattern), exclude_pattern) \
         if exclude_pattern is not None else ([], '')
     skip_pattern = "/skip*"
     skip_list = glob.glob(dir + skip_pattern)
@@ -41,7 +41,7 @@ def get_sub_dirs(dir, exclude_list=[], include_pattern='*', exclude_pattern=None
     exclude_patterns.extend([skip_pattern, exclude_pattern])
     logger.info("Excluding the tests with name patterns '{}'.".format("','".join(exclude_patterns)))
     return sorted(list([x for x in os.listdir(dir) if os.path.isdir(dir + "/" + x)
-                 and x not in exclude_list
-                 and dir + "/" + x in pattern_list
-                 and dir + "/" + x not in exclude_pattern_list
-                 and dir + "/" + x not in skip_list]))
+                        and x not in exclude_list
+                        and dir + "/" + x in pattern_list
+                        and dir + "/" + x not in exclude_pattern_list
+                        and dir + "/" + x not in skip_list]))

From fd211555593b47e5483f03bbea13c7b31541cfce Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 14:31:28 +0530
Subject: [PATCH 08/21] modular

---
 tests/performance/runs/compare.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/performance/runs/compare.py b/tests/performance/runs/compare.py
index a9ae49b0e..01885fd8c 100644
--- a/tests/performance/runs/compare.py
+++ b/tests/performance/runs/compare.py
@@ -54,7 +54,8 @@ def gen(self):
                                                                         self.storage.current_run_name, compare_run_name)
             self.pandas_result.to_csv(os.path.join(self.artifacts_dir, "comparison_result.csv"))
         else:
-            logger.warning("The latest run not found for env.")
+            logger.info("The latest run for comparison was not found for env='%s' and commit_id='%s'.",
+                        self.env_name, self.comare_with)
 
         self.storage.store_results()
         return self.junit_reporter

From 84299b58c3c6369b0198f8a7b1b463d2a185f6ee Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 14:32:01 +0530
Subject: [PATCH 09/21] compare_with logging

---
 tests/performance/runs/context.py | 8 ++++++--
 tests/performance/runs/storage.py | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/performance/runs/context.py b/tests/performance/runs/context.py
index c90d83dc6..ef0051061 100644
--- a/tests/performance/runs/context.py
+++ b/tests/performance/runs/context.py
@@ -34,8 +34,12 @@
 
 
 def get_git_commit_id(compare_with):
-    return subprocess.check_output('git rev-parse --short {}'.format(compare_with).split()).decode(
-        "utf-8")[:-1]
+    """Get short commit id for compare_with commit, branch, tag"""
+    cmd = 'git rev-parse --short {}'.format(compare_with)
+    logger.info("Running command: %s", cmd)
+    commit_id = subprocess.check_output(cmd.split()).decode("utf-8")[:-1]
+    logger.info("Commit id for compare_with='%s' is '%s'", compare_with, commit_id)
+    return commit_id
 
 
 class ExecutionEnv(object):
diff --git a/tests/performance/runs/storage.py b/tests/performance/runs/storage.py
index 648687b9c..6098cb4b6 100644
--- a/tests/performance/runs/storage.py
+++ b/tests/performance/runs/storage.py
@@ -100,7 +100,7 @@ def get_dir_to_compare(self):
 
         latest_run = self.get_latest(run_names, self.env_name, self.current_run_name, self.compare_with)
         if not latest_run:
-            logger.info("No run found for env_id %s", self.env_name)
+            logger.info("No run artifacts folder found for env_id %s", self.env_name)
             return '', ''
 
         if not os.path.exists(comp_data_path):

From 32e994989f6d1cdbc4aef315c0504dddcb9bf7d7 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 17:04:52 +0530
Subject: [PATCH 10/21] Fix on fail actual values

---
 tests/performance/runs/taurus/x2junit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/runs/taurus/x2junit.py b/tests/performance/runs/taurus/x2junit.py
index 37227870b..d8e5c864b 100644
--- a/tests/performance/runs/taurus/x2junit.py
+++ b/tests/performance/runs/taurus/x2junit.py
@@ -148,7 +148,7 @@ def update_metrics(self):
                     for centile in centile_values:
                         row.append(getattr(metric_vals, 'quantile')(centile))
                     row.extend([metric_vals.iloc[0], metric_vals.iloc[-1]])
-                    agg_dict.update({row[0]: dict(zip(header_names, row[1:]))})
+                    agg_dict.update({row[1]: dict(zip(header_names[2:], row[2:]))})
                     rows.append(row)
 
                 dataframe = pd.DataFrame(rows, columns=header_names)

From f3860381f723eba06a43b8ba565258c4a99dbc90 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 18:52:48 +0530
Subject: [PATCH 11/21] compare_with fix

---
 tests/performance/runs/context.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/runs/context.py b/tests/performance/runs/context.py
index ef0051061..d1d98383f 100644
--- a/tests/performance/runs/context.py
+++ b/tests/performance/runs/context.py
@@ -57,7 +57,7 @@ def __init__(self, agent, artifacts_dir, env, local_run, compare_with, use=True,
         self.check_model_server_status = check_model_server_status
         self.reporter = JUnitXml()
         self.compare_reporter_generator = CompareReportGenerator(self.artifacts_dir, self.env, self.local_run,
-                                                                 compare_with)
+                                                                 self.compare_with)
         self.exit_code = 1
 
     def __enter__(self):

From 5c35d9808518e8a5e1d22ce7c2fb2b59e1df0217 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 19:00:53 +0530
Subject: [PATCH 12/21] hold-for api description

---
 .../performance/tests/api_description/environments/xlarge.yaml  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/tests/api_description/environments/xlarge.yaml b/tests/performance/tests/api_description/environments/xlarge.yaml
index be4e38930..24e9b4dc4 100644
--- a/tests/performance/tests/api_description/environments/xlarge.yaml
+++ b/tests/performance/tests/api_description/environments/xlarge.yaml
@@ -45,7 +45,7 @@ settings:
 
     CONCURRENCY : 10
     RAMP-UP : 1s
-    HOLD-FOR : 30s
+    HOLD-FOR : 300s
     SCRIPT : api_description.jmx
 
     STOP : ''

From e8b58ce5238b21f3b325a0aeef2bf724210baeaa Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 19:32:01 +0530
Subject: [PATCH 13/21] fix for compare

---
 tests/performance/runs/compare.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/performance/runs/compare.py b/tests/performance/runs/compare.py
index 01885fd8c..28f9068c6 100644
--- a/tests/performance/runs/compare.py
+++ b/tests/performance/runs/compare.py
@@ -108,7 +108,7 @@ def get_centile_val(df, agg_func, col):
     val = None
     if "metric_name" in df and agg_func in df:
         val = df[df["metric_name"] == col][agg_func]
-        val = val[0] if len(val) else None
+        val = val.iloc[0] if len(val) >= 1 else None
     return val
 
 
@@ -205,7 +205,7 @@ def compare_artifacts(dir1, dir2, run_name1, run_name2):
 
 if __name__ == "__main__":
     compare_artifacts(
-        "./run_artifacts/xlarge__45b6399__1594725947",
-        "./run_artifacts/xlarge__45b6399__1594725717",
-        "xlarge__45b6399__1594725947", "xlarge__45b6399__1594725717"
+        "./run_artifacts/xlarge__5c35d98__1594819866",
+        "./run_artifacts/xlarge__f386038__1594819700",
+        "xlarge__5c35d98__1594819866", "xlarge__f386038__1594819700"
     )

From 7428eac398939c8dbf398202ee5e81a5022ee6ce Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 20:50:06 +0530
Subject: [PATCH 14/21] handle error in compare report

---
 tests/performance/runs/context.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/performance/runs/context.py b/tests/performance/runs/context.py
index d1d98383f..edd8d1a1b 100644
--- a/tests/performance/runs/context.py
+++ b/tests/performance/runs/context.py
@@ -101,11 +101,14 @@ def __exit__(self, type, value, traceback):
 
         junit_reporter = JunitConverter(self.reporter, self.artifacts_dir, 'performance_results')
         junit_reporter.generate_junit_report()
-        junit_compare = self.compare_reporter_generator.gen()
         junit_compare_reporter = None
-        if junit_compare:
-            junit_compare_reporter = JunitConverter(junit_compare, self.artifacts_dir, 'comparison_results')
-            junit_compare_reporter.generate_junit_report()
+        try:
+            junit_compare = self.compare_reporter_generator.gen()
+            if junit_compare:
+                junit_compare_reporter = JunitConverter(junit_compare, self.artifacts_dir, 'comparison_results')
+                junit_compare_reporter.generate_junit_report()
+        except Exception as e:
+            logger.info("Exception has occured while comparing results", exc_info=1)
 
         compare_exit_code = ExecutionEnv.report_summary(junit_compare_reporter, "Comparison Test suite")
         exit_code = ExecutionEnv.report_summary(junit_reporter, "Performance Regression Test suite")

From 0b133f2375ff4745136e6d319b0c7bfc6535f7c8 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 20:52:16 +0530
Subject: [PATCH 15/21] pylint

---
 tests/performance/runs/context.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/runs/context.py b/tests/performance/runs/context.py
index edd8d1a1b..35905ba9b 100644
--- a/tests/performance/runs/context.py
+++ b/tests/performance/runs/context.py
@@ -13,7 +13,7 @@
 """
 Start and stop monitoring server
 """
-# pylint: disable=redefined-builtin
+# pylint: disable=redefined-builtin, broad-except
 
 import logging
 import os

From a4fbeb09edcb846c89fb3eb197909389b0e383ad Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Wed, 15 Jul 2020 22:18:17 +0530
Subject: [PATCH 16/21] modular

---
 tests/performance/runs/taurus/x2junit.py           |  2 +-
 .../tests/api_description/api_description.yaml     |  2 +-
 .../tests/api_description/environments/xlarge.yaml |  2 +-
 .../batch_and_single_inference.yaml                |  2 +-
 .../examples_local_criteria.yaml                   |  2 +-
 .../examples_local_monitoring.yaml                 |  2 +-
 .../examples_remote_criteria.yaml                  |  2 +-
 .../examples_remote_monitoring.yaml                |  2 +-
 .../tests/examples_starter/examples_starter.yaml   |  2 +-
 tests/performance/tests/global_config.yaml         |  4 ++--
 .../inference_multiple_models.yaml                 |  2 +-
 .../multiple_inference_and_scaling.yaml            | 14 +++++++-------
 .../register_unregister/register_unregister.yaml   |  2 +-
 .../register_unregister_multiple.yaml              |  6 +++---
 .../scale_down_workers/environments/xlarge.yaml    |  2 +-
 .../scale_down_workers/scale_down_workers.yaml     |  2 +-
 .../scale_up_workers/environments/xlarge.yaml      |  2 +-
 .../tests/scale_up_workers/scale_up_workers.yaml   |  2 +-
 18 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/tests/performance/runs/taurus/x2junit.py b/tests/performance/runs/taurus/x2junit.py
index d8e5c864b..71320a46d 100644
--- a/tests/performance/runs/taurus/x2junit.py
+++ b/tests/performance/runs/taurus/x2junit.py
@@ -163,7 +163,7 @@ def __exit__(self, type, value, traceback):
 
         self.update_metrics()
         xunit_file = os.path.join(self.artifacts_dir, "xunit.xml")
-        if self.code == 1:
+        if self.code not in [0, 3]:  # 0-no error, 3-pass/fail
             tc = TestCase(self.name)
             tc.result = Error(self.err)
             self.ts.add_testcase(tc)
diff --git a/tests/performance/tests/api_description/api_description.yaml b/tests/performance/tests/api_description/api_description.yaml
index bb2ec1e88..873104ba6 100644
--- a/tests/performance/tests/api_description/api_description.yaml
+++ b/tests/performance/tests/api_description/api_description.yaml
@@ -4,7 +4,7 @@ reporting:
   criteria:
     # Inbuilt Criteria
     - success of ManagementAPIDescription<${MGMT_DESC_SUCC} for 10s, ${STOP_ALIAS} as failed
-    - avg-rt of ManagementAPIDescription>${MGMT_DESC_AVG_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of ManagementAPIDescription>${MGMT_DESC_AVG_RT} for 10s, ${STOP_ALIAS} as failed
 #    # Custom Criteria
 #    - class: bzt.modules.monitoring.MonitoringCriteria
 #      subject: ServerLocalClient/total_processes
diff --git a/tests/performance/tests/api_description/environments/xlarge.yaml b/tests/performance/tests/api_description/environments/xlarge.yaml
index 24e9b4dc4..be4e38930 100644
--- a/tests/performance/tests/api_description/environments/xlarge.yaml
+++ b/tests/performance/tests/api_description/environments/xlarge.yaml
@@ -45,7 +45,7 @@ settings:
 
     CONCURRENCY : 10
     RAMP-UP : 1s
-    HOLD-FOR : 300s
+    HOLD-FOR : 30s
     SCRIPT : api_description.jmx
 
     STOP : ''
diff --git a/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml b/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml
index 54d6b8475..60373dcde 100644
--- a/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml
+++ b/tests/performance/tests/batch_and_single_inference/batch_and_single_inference.yaml
@@ -19,4 +19,4 @@ reporting:
   criteria:
     # Inbuilt Criteria
     - success of ManagementAPIDescription<${INF2_SUCC} for 10s, ${STOP_ALIAS} as failed
-    - avg-rt of ManagementAPIDescription>${INF2_AVG_RT}, ${STOP_ALIAS} as failed
\ No newline at end of file
+    - avg-rt of ManagementAPIDescription>${INF2_AVG_RT} for 10s, ${STOP_ALIAS} as failed
\ No newline at end of file
diff --git a/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml b/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
index dd9864ac1..8234a8dd7 100644
--- a/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
+++ b/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
@@ -18,7 +18,7 @@ modules:
   - module: shellexec
     prepare:
       - "curl -s -O ${INPUT_IMG_URL}"
-      - "mkdir /tmp/ts_model_store"
+      - "mkdir -p /tmp/ts_model_store"
       - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
       - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
diff --git a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
index da615c04d..86ac99ac9 100644
--- a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
+++ b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
@@ -18,7 +18,7 @@ modules:
   - module: shellexec
     prepare:
       - "curl -s -O ${INPUT_IMG_URL}"
-      - "mkdir /tmp/ts_model_store"
+      - "mkdir -p /tmp/ts_model_store"
       - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
       - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
diff --git a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
index 4fbbe31d0..384f6bf2b 100644
--- a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
+++ b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
@@ -13,7 +13,7 @@
   - module: shellexec
     prepare:
       - "curl -s -O ${INPUT_IMG_URL}"
-      - "mkdir /tmp/ts_model_store"
+      - "mkdir -p /tmp/ts_model_store"
       - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
       - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
diff --git a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
index 407a1e6ac..f899ec98f 100644
--- a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
+++ b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
@@ -15,7 +15,7 @@
   - module: shellexec
     prepare:
       - "curl -s -O ${INPUT_IMG_URL}"
-      - "mkdir /tmp/ts_model_store"
+      - "mkdir -p /tmp/ts_model_store"
       - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
       - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
diff --git a/tests/performance/tests/examples_starter/examples_starter.yaml b/tests/performance/tests/examples_starter/examples_starter.yaml
index 5a8ebe8db..a94ccc5c8 100644
--- a/tests/performance/tests/examples_starter/examples_starter.yaml
+++ b/tests/performance/tests/examples_starter/examples_starter.yaml
@@ -13,7 +13,7 @@
   - module: shellexec
     prepare:
       - "curl -s -O ${INPUT_IMG_URL}"
-      - "mkdir /tmp/ts_model_store"
+      - "mkdir -p /tmp/ts_model_store"
       - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
       - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
diff --git a/tests/performance/tests/global_config.yaml b/tests/performance/tests/global_config.yaml
index 471a16a04..c88e1a2e4 100644
--- a/tests/performance/tests/global_config.yaml
+++ b/tests/performance/tests/global_config.yaml
@@ -36,7 +36,7 @@ services:
   - module: shellexec
     prepare:
       - "curl -s -O ${INPUT_IMG_URL}"
-      - "mkdir /tmp/ts_model_store"
+      - "mkdir -p /tmp/ts_model_store"
       - "ps aux | grep '${SERVER_PROCESS_NAME}' | awk '{print $2}' | xargs kill -9 2> /dev/null || true"
       - "${SERVER_START_CMD} --model-store /tmp/ts_model_store > /dev/null 2>&1"
       - "sleep 20s"
@@ -74,7 +74,7 @@ reporting:
   criteria:
     # API requests KPI crieteria
     - success of ${API_LABEL}<${API_SUCCESS} for 10s, stop as failed
-    - avg-rt of ${API_LABEL}>${API_AVG_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of ${API_LABEL}>${API_AVG_RT} for 10s, ${STOP_ALIAS} as failed
 #
 #    # Monitoring metrics criteria
 #    - class: bzt.modules.monitoring.MonitoringCriteria
diff --git a/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml b/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml
index 047d4d168..92a8d155c 100644
--- a/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml
+++ b/tests/performance/tests/inference_multiple_models/inference_multiple_models.yaml
@@ -17,7 +17,7 @@ reporting:
   criteria:
     # Inbuilt Criteria
     - success of Inference2<${INFR2_SUCC} for 10s, ${STOP_ALIAS} as failed
-    - avg-rt of Inference2>${INFR2_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of Inference2>${INFR2_RT} for 10s, ${STOP_ALIAS} as failed
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
       condition: '<'
diff --git a/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml b/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml
index ce5c9725b..d5a38242b 100644
--- a/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml
+++ b/tests/performance/tests/multiple_inference_and_scaling/multiple_inference_and_scaling.yaml
@@ -17,17 +17,17 @@ reporting:
   criteria:
     # Inbuilt Criteria
     - success of Inference2<${INFR2_SUCC} for 10s, stop as failed
-    - avg-rt of Inference2>${INFR2_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of Inference2>${INFR2_RT} for 10s, ${STOP_ALIAS} as failed
     - success of Inference11<${INFR1_SUCC} for 10s, stop as failed
     - success of Inference21<${INFR2_SUCC} for 10s, stop as failed
-    - avg-rt of Inference11>${INFR1_RT}, ${STOP_ALIAS} as failed
-    - avg-rt of Inference21>${INFR2_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of Inference11>${INFR1_RT} for 10s, ${STOP_ALIAS} as failed
+    - avg-rt of Inference21>${INFR2_RT} for 10s, ${STOP_ALIAS} as failed
     - success of ScaleUp1<${INFR2_SUCC} for 10s, stop as failed
-    - avg-rt of ScaleUp1>${SCALEUP1_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of ScaleUp1>${SCALEUP1_RT} for 10s, ${STOP_ALIAS} as failed
     - success of ScaleUp2<${INFR2_SUCC} for 10s, stop as failed
-    - avg-rt of ScaleUp2>${SCALEUP2_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of ScaleUp2>${SCALEUP2_RT} for 10s, ${STOP_ALIAS} as failed
     - success of ScaleDown1<${INFR2_SUCC} for 10s, stop as failed
-    - avg-rt of ScaleDown1>${SCALEDOWN1_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of ScaleDown1>${SCALEDOWN1_RT} for 10s, ${STOP_ALIAS} as failed
     - success of ScaleDown2<${INFR2_SUCC} for 10s, stop as failed
-    - avg-rt of ScaleDown2>${SCALEDOWN2_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of ScaleDown2>${SCALEDOWN2_RT} for 10s, ${STOP_ALIAS} as failed
 
diff --git a/tests/performance/tests/register_unregister/register_unregister.yaml b/tests/performance/tests/register_unregister/register_unregister.yaml
index 6892e3531..35778217d 100644
--- a/tests/performance/tests/register_unregister/register_unregister.yaml
+++ b/tests/performance/tests/register_unregister/register_unregister.yaml
@@ -8,4 +8,4 @@ reporting:
   criteria:
     # Inbuilt Criteria
     - success of UnregisterModel<${UNREG_SUCC} for 10s, ${STOP_ALIAS} as failed
-    - avg-rt of UnregisterModel>${UNREG_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of UnregisterModel>${UNREG_RT} for 10s, ${STOP_ALIAS} as failed
diff --git a/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml b/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml
index def6b57e3..b87aa88a9 100644
--- a/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml
+++ b/tests/performance/tests/register_unregister_multiple/register_unregister_multiple.yaml
@@ -21,8 +21,8 @@ services:
   criteria:
     # Inbuilt Criteria
     - success of ${API_LABEL}<${API_SUCCESS} for 10s, ${STOP_ALIAS} as failed
-    - avg-rt of ${API_LABEL}>${API_AVG_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of ${API_LABEL}>${API_AVG_RT} for 10s, ${STOP_ALIAS} as failed
     - success of ScaleUp<${SCL_UP_SUCC} for 10s, ${STOP_ALIAS} as failed
     - success of UnregisterModel<${UNREG_SUCC} for 10s, ${STOP_ALIAS} as failed
-    - avg-rt of ScaleUp>${SCL_UP_RT}, ${STOP_ALIAS} as failed
-    - avg-rt of UnregisterModel>${UNREG_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of ScaleUp>${SCL_UP_RT} for 10s, ${STOP_ALIAS} as failed
+    - avg-rt of UnregisterModel>${UNREG_RT} for 10s, ${STOP_ALIAS} as failed
diff --git a/tests/performance/tests/scale_down_workers/environments/xlarge.yaml b/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
index e30b12c7b..2a0c9101a 100644
--- a/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
@@ -18,7 +18,7 @@ settings:
 
     CONCURRENCY: 10
     RAMP-UP: 1s
-    HOLD-FOR: 300s
+    HOLD-FOR: 30s
     SCRIPT: scale_down_workers.jmx
 
     STOP : ''    #possible values true, false. Bug in bzt so for false use ''
diff --git a/tests/performance/tests/scale_down_workers/scale_down_workers.yaml b/tests/performance/tests/scale_down_workers/scale_down_workers.yaml
index 7e99ba2d6..13924e484 100644
--- a/tests/performance/tests/scale_down_workers/scale_down_workers.yaml
+++ b/tests/performance/tests/scale_down_workers/scale_down_workers.yaml
@@ -24,7 +24,7 @@ services:
   criteria:
     # Inbuilt Criteria
     - success of ScaleDown<${SCL_DWN_SUCC} for 10s, ${STOP_ALIAS} as failed
-    - avg-rt of ScaleDown>${SCL_DWN_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of ScaleDown>${SCL_DWN_RT} for 10s, ${STOP_ALIAS} as failed
     # Custom Criteria
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes
diff --git a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
index 32004800e..bb08a703b 100644
--- a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
@@ -19,7 +19,7 @@ settings:
 
     CONCURRENCY: 10
     RAMP-UP: 1s
-    HOLD-FOR: 300s
+    HOLD-FOR: 30s
     SCRIPT: scale_up_workers.jmx
 
     STOP : ''    #possible values true, false. Bug in bzt so for false use ''
diff --git a/tests/performance/tests/scale_up_workers/scale_up_workers.yaml b/tests/performance/tests/scale_up_workers/scale_up_workers.yaml
index 051122d92..da139df6d 100644
--- a/tests/performance/tests/scale_up_workers/scale_up_workers.yaml
+++ b/tests/performance/tests/scale_up_workers/scale_up_workers.yaml
@@ -24,7 +24,7 @@ services:
   criteria:
     # Inbuilt Criteria
     - success of ScaleUp<${SCL_UP_SUCC} for 10s, ${STOP_ALIAS} as failed
-    - avg-rt of ScaleUp>${SCL_UP_RT}, ${STOP_ALIAS} as failed
+    - avg-rt of ScaleUp>${SCL_UP_RT} for 10s, ${STOP_ALIAS} as failed
     # Custom Criteria
     - class: bzt.modules.monitoring.MonitoringCriteria
       subject: ServerLocalClient/total_processes

From 0adf6ac6fbe10cf579d2613100b585e845582f79 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Thu, 16 Jul 2020 14:39:05 +0530
Subject: [PATCH 17/21] fix links

---
 tests/performance/README.md | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/performance/README.md b/tests/performance/README.md
index 8f6258682..5fbcfdb86 100644
--- a/tests/performance/README.md
+++ b/tests/performance/README.md
@@ -44,7 +44,7 @@ The building blocks of the performance regression suite and flow is captured in
 3. Make sure that `git` is installed and the test suites are run from the Model Server working directory.
 
 ### B. Running the test suite
-1. Make sure parameters set in [tests/global_config.yaml](tests/performance/tests/global_config.yaml) are correct.
+1. Make sure parameters set in [tests/global_config.yaml](tests/global_config.yaml) are correct.
 2. To run the test suite execute [run_performance_suite.py](run_performance_suite.py) with the following 
 parameters
 
@@ -92,7 +92,7 @@ cd $MODEL_SERVER_HOME/tests/performance
  
 # Note that Model server started and stopped by the individual test suite.
 # check variables such as Model server PORT etc 
-# vi tests/common/global_config.yaml 
+# vi tests/global_config.yaml 
 
 #all tests
 python -m run_performance_suite -e xlarge
@@ -205,11 +205,10 @@ Create a folder for the test under `test_dir` location. A test generally compris
 load scenario and a yaml file which contains test scenarios specifying the conditions for failure or success. The
 file-names should be identical to the folder name with their respective extensions. 
 
-An example [jmeter script](tests/examples_starter/examples_starter.jmx) 
-and a [scenario](tests/examples_starter/examples_starter.yaml) is provided as a template to get started.
+An example [jmeter script](tests/examples_starter/examples_starter.jmx) and [scenario](tests/examples_starter/examples_starter.yaml) is provided as a template to get started.
     
 Please note that various global configuration settings used by examples_starter.jmx script are specified in 
-[tests/global_config.yaml](tests/performance/tests/global_config.yaml) file.
+[tests/global_config.yaml](tests/global_config.yaml) file.
     
  ```tests/examples_starter/examples_starter.yaml
  execution:
@@ -243,7 +242,7 @@ Specify the metrics of interest in the services/monitoring section of the yaml.
 1. Standalone monitoring server
 
    Use this technique if Model Server and the tests execute on different machines. Before running the test cases, 
-   please start the [metrics_monitoring_server.py](metrics_monitoring_server.py) script. It will communicate server 
+   please start the [metrics_monitoring_server.py](agents/metrics_monitoring_server.py) script. It will communicate server 
    metric data with the test client over sockets. The monitoring server runs on port 9009 by default.
     
    To start the monitoring server, run the following commands on the Model Server host:
@@ -424,5 +423,3 @@ possible ways to achieve this
 * Alternatively, deploy the standalone monitoring agent on the Model Server instance and run the test cases against the remote
 server. Note that the standalone monitoring agent works on both Python 2/3. 
 
-
-

From 6cacaf43a268ba0ec2ee65a7c2d55a1b6aff5738 Mon Sep 17 00:00:00 2001
From: Mahesh Ambule <ambulemahesh@gmail.com>
Date: Thu, 16 Jul 2020 19:55:31 +0530
Subject: [PATCH 18/21] modular

---
 .../tests/inference_single_worker/inference_single_worker.yaml  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/performance/tests/inference_single_worker/inference_single_worker.yaml b/tests/performance/tests/inference_single_worker/inference_single_worker.yaml
index 1bf6cce30..fdba969e0 100644
--- a/tests/performance/tests/inference_single_worker/inference_single_worker.yaml
+++ b/tests/performance/tests/inference_single_worker/inference_single_worker.yaml
@@ -1,6 +1,6 @@
 ---
 scenarios:
-  inference_single_worker:
+  scenario_0:
     script: inference_single_worker.jmx
 
 services:

From 7866a95696dfeada79f1005469f747d8ac21aed7 Mon Sep 17 00:00:00 2001
From: Prashant Sail <psail4444@gmail.com>
Date: Fri, 17 Jul 2020 17:46:11 +0530
Subject: [PATCH 19/21] AB support in PRT (#28)

---
 tests/performance/README.md                   |  31 ++++-
 tests/performance/run_performance_suite.py    |   2 +-
 .../runs/taurus/override/__init__.py          |   0
 .../runs/taurus/override/apache_bench.py      | 129 ++++++++++++++++++
 .../override}/metrics_monitoring_inproc.py    |   6 +-
 .../environments/xlarge.yaml                  |  48 +++++++
 .../examples_apache_bench.yaml                |  43 ++++++
 tests/performance/tests/global_config.yaml    |   3 +
 8 files changed, 254 insertions(+), 8 deletions(-)
 create mode 100644 tests/performance/runs/taurus/override/__init__.py
 create mode 100644 tests/performance/runs/taurus/override/apache_bench.py
 rename tests/performance/{agents => runs/taurus/override}/metrics_monitoring_inproc.py (94%)
 create mode 100644 tests/performance/tests/examples_apache_bench/environments/xlarge.yaml
 create mode 100644 tests/performance/tests/examples_apache_bench/examples_apache_bench.yaml

diff --git a/tests/performance/README.md b/tests/performance/README.md
index 5fbcfdb86..89025dab8 100644
--- a/tests/performance/README.md
+++ b/tests/performance/README.md
@@ -24,6 +24,7 @@ environment, will have its own threshold values.
    * Specification of pass/fail criterion between two commits. For example, memory consumed by workers should not 
    increase by more than 10% between two commits for the given test case.
    * Custom reporting of results.
+   * Apache Benchmark executor which supports GET, POST, PUT, OPTIONS, DELETE methods
    
 The building blocks of the performance regression suite and flow is captured in the following drawing
 
@@ -201,6 +202,8 @@ Follow these three steps to add a new test case to the test suite.
 
 
 #### 1. Add scenario (a.k.a test suite)
+> By default, all scenarios are triggered using _jmeter_ as the underlying executor.
+
 Create a folder for the test under `test_dir` location. A test generally comprises of a jmeter file - containing the 
 load scenario and a yaml file which contains test scenarios specifying the conditions for failure or success. The
 file-names should be identical to the folder name with their respective extensions. 
@@ -222,7 +225,6 @@ Please note that various global configuration settings used by examples_starter.
      script: examples_starter.jmx
 
  ```
-    
 To execute this test suite, run the following command
     
  ```bash
@@ -231,9 +233,30 @@ To execute this test suite, run the following command
  python -m run_performance_suite -p examples_starter -e xlarge
  ```
 
-**Note**:
-Taurus provides support for different executors such as JMeter. Supported executor types can be found [here](https://gettaurus.org/docs/ExecutionSettings/).
-Details about how to use an existing JMeter script are provided [here](https://gettaurus.org/docs/JMeter/). 
+**Using Apache Benchmark**
+
+To execute a scenario using _apache benchmark_ as the executor; In the yaml -
+1. Override the `execution` section and explicitly specify "apache_bench" as the value of `executor`
+2. Override the `scenarios` section and specify the request details under `requests` section
+
+```
+~execution:
+  - executor: apache_bench
+    concurrency: 10
+    hold-for: 300s
+~scenarios:
+  demo:
+    requests:
+    - url: http://127.0.0.1:8080/predictions/squeezenet1_1
+      label: MyInference
+      method: POST
+      file-path: /Users/johndoe/demo/kitten.jpg
+```
+Refer to [examples_apache_bench](tests/examples_apache_bench/examples_apache_bench.yaml) for the complete scenario.
+
+> **Note**:  
+> Taurus provides support for different executors such as JMeter, Apache Benchmark, etc. Supported executor types can be found [here](https://gettaurus.org/docs/ExecutionSettings/).
+> Details about how to use an existing JMeter script are provided [here](https://gettaurus.org/docs/JMeter/).  
 
 
 #### 2. Add metrics to monitor
diff --git a/tests/performance/run_performance_suite.py b/tests/performance/run_performance_suite.py
index 7f4b89af6..11d56d315 100755
--- a/tests/performance/run_performance_suite.py
+++ b/tests/performance/run_performance_suite.py
@@ -84,7 +84,7 @@ def run_test_suite(artifacts_dir, test_dir, pattern, exclude_pattern,
         logger.info("Collected tests %s", test_dirs)
 
     with ExecutionEnv(MONITORING_AGENT, artifacts_dir, env_name, compare_local, compare_with, monit) as prt:
-        pre_command = 'export PYTHONPATH={}:$PYTHONPATH;'.format(os.path.join(str(ROOT_PATH), "agents"))
+        pre_command = 'export PYTHONPATH={}:$PYTHONPATH;'.format(os.path.join(str(ROOT_PATH), "runs", "taurus", "override"))
         for suite_name in tqdm(test_dirs, desc="Test Suites"):
             with Timer("Test suite {} execution time".format(suite_name)) as t:
                 suite_artifacts_dir = os.path.join(artifacts_dir, suite_name)
diff --git a/tests/performance/runs/taurus/override/__init__.py b/tests/performance/runs/taurus/override/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/performance/runs/taurus/override/apache_bench.py b/tests/performance/runs/taurus/override/apache_bench.py
new file mode 100644
index 000000000..026cbb119
--- /dev/null
+++ b/tests/performance/runs/taurus/override/apache_bench.py
@@ -0,0 +1,129 @@
+"""
+Module add support for POST, PUT, OPTIONS and DELETE methods to Apache Benchmark
+"""
+import mimetypes
+import os
+
+from math import ceil
+from distutils.version import LooseVersion
+
+from bzt import TaurusConfigError
+from bzt.modules.aggregator import ConsolidatingAggregator
+from bzt.six import iteritems
+from bzt.utils import dehumanize_time
+from bzt.modules.ab import ApacheBenchmarkExecutor, TSVDataReader
+
+
+class ApacheBenchmarkExecutor(ApacheBenchmarkExecutor):
+    """
+    Apache Benchmark executor module
+    """
+
+    def prepare(self):
+        super(ApacheBenchmarkExecutor, self).prepare()
+        self.scenario = self.get_scenario()
+        self.install_required_tools()
+
+        self._tsv_file = self.engine.create_artifact("ab", ".tsv")
+
+        self.stdout = open(self.engine.create_artifact("ab", ".out"), 'w')
+        self.stderr = open(self.engine.create_artifact("ab", ".err"), 'w')
+
+        self.reader = TSVDataReader(self._tsv_file, self.log)
+        if isinstance(self.engine.aggregator, ConsolidatingAggregator):
+            self.engine.aggregator.add_underling(self.reader)
+
+    def startup(self):
+        args = [self.tool.tool_path]
+        load = self.get_load()
+        load_iterations = load.iterations or 1
+        load_concurrency = load.concurrency or 1
+
+        if load.hold:
+            hold = int(ceil(dehumanize_time(load.hold)))
+            args += ['-t', str(hold)]
+        else:
+            args += ['-n', str(load_iterations * load_concurrency)]  # ab waits for total number of iterations
+
+        timeout = self.get_scenario().get("timeout", None)
+        if timeout:
+            args += ['-s', str(ceil(dehumanize_time(timeout)))]
+
+        args += ['-c', str(load_concurrency)]
+        args += ['-d']  # do not print 'Processed *00 requests' every 100 requests or so
+        args += ['-r']  # do not crash on socket level errors
+
+        if self.tool.version and LooseVersion(self.tool.version) >= LooseVersion("2.4.7"):
+            args += ['-l']  # accept variable-len responses
+
+        args += ['-g', str(self._tsv_file)]  # dump stats to TSV file
+
+        # add global scenario headers
+        for key, val in iteritems(self.scenario.get_headers()):
+            args += ['-H', "%s: %s" % (key, val)]
+
+        requests = self.scenario.get_requests()
+        if not requests:
+            raise TaurusConfigError("You must specify at least one request for ab")
+        if len(requests) > 1:
+            self.log.warning("ab doesn't support multiple requests. Only first one will be used.")
+        request = self.__first_http_request()
+        if request is None:
+            raise TaurusConfigError("ab supports only HTTP requests, while scenario doesn't have any")
+
+        # add request-specific headers
+        for key, val in iteritems(request.headers):
+            args += ['-H', "%s: %s" % (key, val)]
+
+        # if request.method != 'GET':
+        #     raise TaurusConfigError("ab supports only GET requests, but '%s' is found" % request.method)
+
+        if request.method == 'HEAD':
+            args += ['-i']
+        elif request.method in ['POST', 'PUT']:
+            options = {'POST': '-p', 'PUT': '-u'}
+            file_path = request.config['file-path']
+            if not file_path:
+                file_path = os.devnull
+                self.log.warning("No file path specified, dev null will be used instead")
+            args += [options[request.method], file_path]
+            content_type = request.config['content-type'] or mimetypes.guess_type(file_path)[0]
+            if content_type:
+                args += ['-T', content_type]
+        else: # 'GET', 'OPTIONS', 'DELETE', etc
+            args += ['-m', request.method]
+
+        if request.priority_option('keepalive', default=True):
+            args += ['-k']
+
+        args += [request.url]
+
+        self.reader.setup(load_concurrency, request.label)
+
+        self.log.info('Executing command : ' + ' '.join(arg for arg in args))
+        self.process = self._execute(args)
+
+
+class TSVDataReader(TSVDataReader):
+    def _read(self, last_pass=False):
+        lines = self.file.get_lines(size=1024 * 1024, last_pass=last_pass)
+
+        for line in lines:
+            if not self.skipped_header:
+                self.skipped_header = True
+                continue
+            log_vals = [val.strip() for val in line.split('\t')]
+
+            _error = None
+            # _rstatus = None
+            _rstatus = '' #Hack to trick taurus into computing aggreated stats
+
+            _url = self.url_label
+            _concur = self.concurrency
+            _tstamp = int(log_vals[1])  # timestamp - moment of request sending
+            _con_time = float(log_vals[2]) / 1000.0  # connection time
+            _etime = float(log_vals[4]) / 1000.0  # elapsed time
+            _latency = float(log_vals[5]) / 1000.0  # latency (aka waittime)
+            _bytes = None
+
+            yield _tstamp, _url, _concur, _etime, _con_time, _latency, _rstatus, _error, '', _bytes
\ No newline at end of file
diff --git a/tests/performance/agents/metrics_monitoring_inproc.py b/tests/performance/runs/taurus/override/metrics_monitoring_inproc.py
similarity index 94%
rename from tests/performance/agents/metrics_monitoring_inproc.py
rename to tests/performance/runs/taurus/override/metrics_monitoring_inproc.py
index ed5788187..e922dfb2a 100644
--- a/tests/performance/agents/metrics_monitoring_inproc.py
+++ b/tests/performance/runs/taurus/override/metrics_monitoring_inproc.py
@@ -24,9 +24,9 @@
 from bzt.modules import monitoring
 from bzt.utils import dehumanize_time
 
-import configuration
-from metrics import get_metrics, AVAILABLE_METRICS as AVAILABLE_SERVER_METRICS
-from utils.process import get_process_pid_from_file, get_server_processes, \
+from agents import  configuration
+from agents.metrics import get_metrics, AVAILABLE_METRICS as AVAILABLE_SERVER_METRICS
+from agents.utils.process import get_process_pid_from_file, get_server_processes, \
     get_child_processes, get_server_pidfile
 
 
diff --git a/tests/performance/tests/examples_apache_bench/environments/xlarge.yaml b/tests/performance/tests/examples_apache_bench/environments/xlarge.yaml
new file mode 100644
index 000000000..f06a0540b
--- /dev/null
+++ b/tests/performance/tests/examples_apache_bench/environments/xlarge.yaml
@@ -0,0 +1,48 @@
+
+---
+settings:
+  env:
+    API_LABEL : Inference
+    API_SUCCESS : 80%
+    API_AVG_RT : 140ms
+
+    TOTAL_WORKERS: 1
+    TOTAL_WORKERS_MEM: 300000000
+    TOTAL_WORKERS_FDS: 150
+
+    TOTAL_MEM : 1000000000
+    TOTAL_PROCS : 3
+    TOTAL_FDS : 150
+
+    FRNTEND_MEM: 600000000
+
+    TOTAL_ORPHANS : 0
+    TOTAL_ZOMBIES : 0
+
+    ## Percent diff values to do a compare across runs
+    TOTAL_WORKERS_PREV_DIFF: 0
+    TOTAL_WORKERS_MEM_PREV_DIFF: 30
+    TOTAL_WORKERS_FDS_PREV_DIFF: 30
+    TOTAL_MEM_PREV_DIFF: 30
+    TOTAL_PROCS_PREV_DIFF: 30
+    TOTAL_FDS_PREV_DIFF: 30
+    FRNTEND_MEM_PREV_DIFF: 30
+    TOTAL_ORPHANS_PREV_DIFF: 0
+    TOTAL_ZOMBIES_PREV_DIFF: 0
+
+    TOTAL_WORKERS_RUN_DIFF: 0
+    TOTAL_WORKERS_MEM_RUN_DIFF: 30
+    TOTAL_WORKERS_FDS_RUN_DIFF: 30
+    TOTAL_MEM_RUN_DIFF: 60
+    TOTAL_PROCS_RUN_DIFF: 30
+    TOTAL_FDS_RUN_DIFF: 30
+    FRNTEND_MEM_RUN_DIFF: 90
+    TOTAL_ORPHANS_RUN_DIFF: 0
+    TOTAL_ZOMBIES_RUN_DIFF: 0
+
+    CONCURRENCY : 10
+    RAMP-UP : 1s
+    HOLD-FOR : 300s
+
+    STOP :  ''    #possible values true, false. Bug in bzt so for false use ''
+    STOP_ALIAS: continue  #possible values continue, stop
\ No newline at end of file
diff --git a/tests/performance/tests/examples_apache_bench/examples_apache_bench.yaml b/tests/performance/tests/examples_apache_bench/examples_apache_bench.yaml
new file mode 100644
index 000000000..87af61a32
--- /dev/null
+++ b/tests/performance/tests/examples_apache_bench/examples_apache_bench.yaml
@@ -0,0 +1,43 @@
+~execution:
+  - executor: apache_bench
+    concurrency: ${CONCURRENCY}
+    ramp-up: ${RAMP-UP}
+    hold-for: ${HOLD-FOR}
+    scenario: scenario_0
+
+~scenarios:
+  scenario_0:
+    requests:
+    - url: http://127.0.0.1:8080/predictions/${SQZNET_NAME}
+      label: ${API_LABEL}
+      method: POST
+      file-path: ${INPUT_IMG_PATH}
+
+services:
+  - module: shellexec
+    prepare:
+      - "curl -s -X POST http://localhost:8081/models?url=${SQZNET_URL}"
+      - "curl -s -X PUT  http://localhost:8081/models/${SQZNET_NAME}?min_worker=1&synchronous=true"
+
+
+reporting:
+- module: passfail
+  criteria:
+    # Inbuilt Criteria - cannot be used with Apache Benchmark
+    # - success of MyLabel<${INFR_SUCC}, stop as failed
+    # - avg-rt of MyLabel>${INFR_RT}, stop as failed
+    # Custom Criteria
+    - class: bzt.modules.monitoring.MonitoringCriteria
+      subject: ServerLocalClient/total_processes
+      condition: '>'
+      threshold: ${TOTAL_PROCS}
+      timeframe: 1s
+      stop : true
+      fail : true
+    - class: bzt.modules.monitoring.MonitoringCriteria
+      subject: ServerLocalClient/total_processes
+      condition: '<'
+      threshold: ${TOTAL_PROCS}
+      timeframe: 1s
+      stop : true
+      fail : true
\ No newline at end of file
diff --git a/tests/performance/tests/global_config.yaml b/tests/performance/tests/global_config.yaml
index c88e1a2e4..1b8da6de3 100644
--- a/tests/performance/tests/global_config.yaml
+++ b/tests/performance/tests/global_config.yaml
@@ -32,6 +32,9 @@ modules:
     # metrics_monitoring_inproc and dependencies should be in python path
     class : metrics_monitoring_inproc.Monitor # monitoring class.
 
+  apache_bench:
+    class: apache_bench.ApacheBenchmarkExecutor
+
 services:
   - module: shellexec
     prepare:

From 62ba5bf3ee6c2c72272e32b084491d4c1601f0e2 Mon Sep 17 00:00:00 2001
From: Prashant Sail <psail4444@gmail.com>
Date: Wed, 22 Jul 2020 11:24:46 +0530
Subject: [PATCH 20/21] support for remote exeuction and logging (#29)

---
 tests/performance/README.md                       |  8 ++++----
 tests/performance/run_performance_suite.py        |  2 +-
 tests/performance/runs/taurus/__init__.py         |  3 ++-
 ...monitoring_inproc.py => metrics_monitoring.py} | 15 +++++++++++++++
 tests/performance/runs/taurus/reader.py           |  6 +++---
 .../examples_local_criteria.yaml                  | 10 +++++-----
 .../examples_local_monitoring.yaml                | 10 +++++-----
 .../examples_remote_criteria.yaml                 |  4 ++--
 .../examples_remote_monitoring.yaml               |  4 ++--
 tests/performance/tests/global_config.yaml        | 10 +++++-----
 10 files changed, 44 insertions(+), 28 deletions(-)
 rename tests/performance/runs/taurus/override/{metrics_monitoring_inproc.py => metrics_monitoring.py} (87%)

diff --git a/tests/performance/README.md b/tests/performance/README.md
index 89025dab8..b90125099 100644
--- a/tests/performance/README.md
+++ b/tests/performance/README.md
@@ -279,8 +279,8 @@ Specify the metrics of interest in the services/monitoring section of the yaml.
     
     ```yaml
     services:
-      - module: monitoring
-        server-agent:
+      - module: server_monitoring
+        ServerRemoteClient:
           - address: <Model-Server-host>:9009 # metric monitoring service address
             label: Model-Server-inference-server  # Specified label will be used in reports instead of ip:port
             interval: 1s    # polling interval
@@ -308,12 +308,12 @@ Specify the metrics of interest in the services/monitoring section of the yaml.
     
     ```yaml
     modules:
-      server_local_monitoring:
+      server_monitoring:
         # metrics_monitoring_taurus and dependencies should be in python path
         class : metrics_monitoring_taurus.Monitor # monitoring class.
     
     services:
-      - module: server_local_monitoring # should be added in modules section
+      - module: server_monitoring # should be added in modules section
         ServerLocalClient: # keyword from metrics_monitoring_taurus.Monitor
         - interval: 1s
           metrics:
diff --git a/tests/performance/run_performance_suite.py b/tests/performance/run_performance_suite.py
index 11d56d315..4e0d05bfb 100755
--- a/tests/performance/run_performance_suite.py
+++ b/tests/performance/run_performance_suite.py
@@ -97,7 +97,7 @@ def run_test_suite(artifacts_dir, test_dir, pattern, exclude_pattern,
                                                                             GLOBAL_CONFIG_PATH, test_file,
                                                                             env_yaml_path))
 
-                    update_taurus_metric_files(suite_artifacts_dir, test_file)
+                    update_taurus_metric_files(suite_artifacts_dir)
 
     sys.exit(prt.exit_code)
 
diff --git a/tests/performance/runs/taurus/__init__.py b/tests/performance/runs/taurus/__init__.py
index d2dfd5012..cadc5af82 100644
--- a/tests/performance/runs/taurus/__init__.py
+++ b/tests/performance/runs/taurus/__init__.py
@@ -42,13 +42,14 @@ def get_taurus_options(artifacts_dir, jmeter_path=None):
     return options_str
 
 
-def update_taurus_metric_files(suite_artifacts_dir, test_file):
+def update_taurus_metric_files(suite_artifacts_dir):
     """
     It renames the server and local metric monitoring log files to metrics.csv.
     The order of the columns in header of server metric monitoring SALogs file generated by taurus
     is not inline with data. So as a work around this function rewrites the header based on order
     defined in the test yaml.
     """
+    test_file = os.path.join(suite_artifacts_dir, "effective.yml")
     metrics_new_file = os.path.join(suite_artifacts_dir, "metrics.csv")
 
     server_metric_file_pattern = os.path.join(suite_artifacts_dir, "SAlogs_*")
diff --git a/tests/performance/runs/taurus/override/metrics_monitoring_inproc.py b/tests/performance/runs/taurus/override/metrics_monitoring.py
similarity index 87%
rename from tests/performance/runs/taurus/override/metrics_monitoring_inproc.py
rename to tests/performance/runs/taurus/override/metrics_monitoring.py
index e922dfb2a..859b2ea3a 100644
--- a/tests/performance/runs/taurus/override/metrics_monitoring_inproc.py
+++ b/tests/performance/runs/taurus/override/metrics_monitoring.py
@@ -42,6 +42,7 @@ class Monitor(monitoring.Monitoring):
     def __init__(self):
         super(Monitor, self).__init__()
         self.client_classes.update({'ServerLocalClient': ServerLocalClient})
+        self.client_classes.update({'ServerRemoteClient': ServerRemoteClient})
 
 
 class ServerLocalClient(monitoring.LocalClient):
@@ -86,6 +87,20 @@ def connect(self):
                     logs_writer.writerow(metrics)
 
 
+class ServerRemoteClient(monitoring.ServerAgentClient):
+    """Custom server remote client """
+    def get_data(self):
+        result = super().get_data()
+        # Logging for custom metric values
+        msg = []
+        for res in result:
+            for metric_name in self.config.get("metrics"):
+                metric_value = res[metric_name]
+                msg.append("{0} : {1}".format(metric_name, metric_value))
+            self.log.info("{0}".format(" -- ".join(msg)))
+        return result
+
+
 class ServerLocalMonitor(monitoring.LocalMonitor):
     """Custom server local monitor"""
 
diff --git a/tests/performance/runs/taurus/reader.py b/tests/performance/runs/taurus/reader.py
index 7abfdf7cc..71ac05b48 100644
--- a/tests/performance/runs/taurus/reader.py
+++ b/tests/performance/runs/taurus/reader.py
@@ -21,13 +21,13 @@
 
 
 def get_mon_metrics_list(test_yaml_path):
-    """Utility method to get list of server-agent metrics which are being monitored from a test yaml file"""
+    """Utility method to get list of ServerRemoteClient metrics which are being monitored from a test yaml file"""
     metrics = []
     with open(test_yaml_path) as test_yaml:
         test_yaml = yaml.safe_load(test_yaml)
         for rep_section in test_yaml.get('services', []):
-            if rep_section.get('module', None) == 'monitoring' and "server-agent" in rep_section:
-                for mon_section in rep_section.get('server-agent', []):
+            if rep_section.get('module', None) == 'monitoring' and "ServerRemoteClient" in rep_section:
+                for mon_section in rep_section.get('ServerRemoteClient', []):
                     if isinstance(mon_section, dict):
                         metrics.extend(mon_section.get('metrics', []))
 
diff --git a/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml b/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
index 8234a8dd7..72a539d26 100644
--- a/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
+++ b/tests/performance/tests/examples_local_criteria/examples_local_criteria.yaml
@@ -10,9 +10,9 @@
     script: examples_local_criteria.jmx
 
 modules:
-  server_local_monitoring:
-    # metrics_monitoring_inproc and dependencies should be in python path
-    class : metrics_monitoring_inproc.Monitor # monitoring class.
+  server_monitoring:
+    # metrics_monitoring and dependencies should be in python path
+    class : metrics_monitoring.Monitor # monitoring class.
 
 ~services:
   - module: shellexec
@@ -28,8 +28,8 @@ modules:
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-  - module: server_local_monitoring # should be added in modules section
-    ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
+  - module: server_monitoring # should be added in modules section
+    ServerLocalClient: # keyword from metrics_monitoring.Monitor
     - interval: 1s
       logging : True
       metrics:
diff --git a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
index 86ac99ac9..b48ea0d81 100644
--- a/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
+++ b/tests/performance/tests/examples_local_monitoring/examples_local_monitoring.yaml
@@ -10,9 +10,9 @@
     script: examples_local_monitoring.jmx
 
 modules:
-  server_local_monitoring:
-    # metrics_monitoring_inproc and dependencies should be in python path
-    class : metrics_monitoring_inproc.Monitor # monitoring class.
+  server_monitoring:
+    # metrics_monitoring and dependencies should be in python path
+    class : metrics_monitoring.Monitor # monitoring class.
 
 ~services:
   - module: shellexec
@@ -28,8 +28,8 @@ modules:
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-  - module: server_local_monitoring # should be added in modules section
-    ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
+  - module: server_monitoring # should be added in modules section
+    ServerLocalClient: # keyword from metrics_monitoring.Monitor
       - interval: 1s
         metrics:
           - cpu
diff --git a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
index 384f6bf2b..487c825ae 100644
--- a/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
+++ b/tests/performance/tests/examples_remote_criteria/examples_remote_criteria.yaml
@@ -23,8 +23,8 @@
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-  - module: monitoring
-    server-agent:
+  - module: server_monitoring
+    ServerRemoteClient:
       - address: localhost:9009 # metric monitoring service address
         label: model-server  # if you specify label, it will be used in reports instead of ip:port
         interval: 1s    # polling interval
diff --git a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
index f899ec98f..bf531d571 100644
--- a/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
+++ b/tests/performance/tests/examples_remote_monitoring/examples_remote_monitoring.yaml
@@ -25,8 +25,8 @@
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-  - module: monitoring
-    server-agent:
+  - module: server_monitoring
+    ServerRemoteClient:
       - address: localhost:9009 # metric monitoring service address
         label: model-server  # if you specify label, it will be used in reports instead of ip:port
         interval: 1s    # polling interval
diff --git a/tests/performance/tests/global_config.yaml b/tests/performance/tests/global_config.yaml
index 1b8da6de3..98c343d86 100644
--- a/tests/performance/tests/global_config.yaml
+++ b/tests/performance/tests/global_config.yaml
@@ -28,9 +28,9 @@ modules:
       RESNET_URL: ${RESNET_URL}
       RESNET_NAME: ${RESNET_NAME}
 
-  server_local_monitoring:
-    # metrics_monitoring_inproc and dependencies should be in python path
-    class : metrics_monitoring_inproc.Monitor # monitoring class.
+  server_monitoring:
+    # metrics_monitoring and dependencies should be in python path
+    class : metrics_monitoring.Monitor # monitoring class.
 
   apache_bench:
     class: apache_bench.ApacheBenchmarkExecutor
@@ -49,8 +49,8 @@ services:
       - "rm -r /tmp/ts_model_store"
       - "mv logs ${ARTIFACTS_DIR}/model_server_logs"
 
-  - module: server_local_monitoring # should be added in modules section
-    ServerLocalClient: # keyword from metrics_monitoring_inproc.Monitor
+  - module: server_monitoring # should be added in modules section
+    ServerLocalClient: # keyword from metrics_monitoring.Monitor
     - interval: 1s
       logging : True
       metrics:

From 37877672b4b30cc9e60356d240af59100e3b92f6 Mon Sep 17 00:00:00 2001
From: Prashant Sail <psail4444@gmail.com>
Date: Wed, 22 Jul 2020 21:11:24 +0530
Subject: [PATCH 21/21] Perf cut2 (#30)

* corrected section - needed for header sequence in metrics.csv

* fixed workers count
---
 tests/performance/agents/metrics/__init__.py                  | 2 +-
 tests/performance/runs/taurus/reader.py                       | 2 +-
 .../tests/batch_and_single_inference/environments/xlarge.yaml | 2 +-
 .../tests/batch_inference/environments/xlarge.yaml            | 4 ++--
 .../tests/inference_multiple_models/environments/xlarge.yaml  | 4 ++--
 .../tests/inference_multiple_worker/environments/xlarge.yaml  | 4 ++--
 .../tests/inference_single_worker/environments/xlarge.yaml    | 4 ++--
 .../tests/model_description/environments/xlarge.yaml          | 4 ++--
 .../multiple_inference_and_scaling/environments/xlarge.yaml   | 4 ++--
 .../tests/register_unregister/environments/xlarge.yaml        | 4 ++--
 .../register_unregister_multiple/environments/xlarge.yaml     | 4 ++--
 .../tests/scale_down_workers/environments/xlarge.yaml         | 4 ++--
 .../tests/scale_up_workers/environments/xlarge.yaml           | 4 ++--
 13 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/tests/performance/agents/metrics/__init__.py b/tests/performance/agents/metrics/__init__.py
index 8ecd97ab0..21266ffd2 100644
--- a/tests/performance/agents/metrics/__init__.py
+++ b/tests/performance/agents/metrics/__init__.py
@@ -156,7 +156,7 @@ def update_metric(metric_name, proc_type, stats):
 
     # Total processes
     result['total_processes'] = len(worker_stats) + 1
-    result['total_workers'] = max(len(worker_stats) - 1, 0)
+    result['total_workers'] = len(worker_stats)
     result['orphans'] = len(list(filter(lambda p: p['ppid'] == 1, worker_stats)))
     result['zombies'] = len(zombie_children)
 
diff --git a/tests/performance/runs/taurus/reader.py b/tests/performance/runs/taurus/reader.py
index 71ac05b48..700039f04 100644
--- a/tests/performance/runs/taurus/reader.py
+++ b/tests/performance/runs/taurus/reader.py
@@ -26,7 +26,7 @@ def get_mon_metrics_list(test_yaml_path):
     with open(test_yaml_path) as test_yaml:
         test_yaml = yaml.safe_load(test_yaml)
         for rep_section in test_yaml.get('services', []):
-            if rep_section.get('module', None) == 'monitoring' and "ServerRemoteClient" in rep_section:
+            if rep_section.get('module', None) == 'server_monitoring' and "ServerRemoteClient" in rep_section:
                 for mon_section in rep_section.get('ServerRemoteClient', []):
                     if isinstance(mon_section, dict):
                         metrics.extend(mon_section.get('metrics', []))
diff --git a/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml b/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml
index cd22dcdb3..d42bbd30a 100644
--- a/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml
+++ b/tests/performance/tests/batch_and_single_inference/environments/xlarge.yaml
@@ -8,7 +8,7 @@ settings:
     INF2_SUCC: 80%
     INF2_AVG_RT: 30ms
 
-    TOTAL_WORKERS: 5
+    TOTAL_WORKERS: 6
     TOTAL_WORKERS_MEM: 999686400
     TOTAL_WORKERS_FDS: 60
 
diff --git a/tests/performance/tests/batch_inference/environments/xlarge.yaml b/tests/performance/tests/batch_inference/environments/xlarge.yaml
index 5ee95f963..c1f8df5df 100644
--- a/tests/performance/tests/batch_inference/environments/xlarge.yaml
+++ b/tests/performance/tests/batch_inference/environments/xlarge.yaml
@@ -5,12 +5,12 @@ settings:
     API_SUCCESS : 80%
     API_AVG_RT : 30ms
 
-    TOTAL_WORKERS: 4
+    TOTAL_WORKERS: 3
     TOTAL_WORKERS_MEM: 3000000000
     TOTAL_WORKERS_FDS: 400
 
     TOTAL_MEM : 4000000000
-    TOTAL_PROCS : 7
+    TOTAL_PROCS : 4
     TOTAL_FDS : 200
 
     FRNTEND_MEM: 1000000000
diff --git a/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml b/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml
index c854de1cd..b68f5c08e 100644
--- a/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml
+++ b/tests/performance/tests/inference_multiple_models/environments/xlarge.yaml
@@ -9,12 +9,12 @@ settings:
     INFR2_SUCC: 100%
     INFR2_RT: 450ms
 
-    TOTAL_WORKERS: 2
+    TOTAL_WORKERS: 4
     TOTAL_WORKERS_MEM: 600000000
     TOTAL_WORKERS_FDS: 150
 
     TOTAL_MEM : 1400000000
-    TOTAL_PROCS : 3
+    TOTAL_PROCS : 5
     TOTAL_FDS : 150
 
     FRNTEND_MEM: 800000000
diff --git a/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml b/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml
index 7f7887242..2fa0f5b6d 100644
--- a/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml
+++ b/tests/performance/tests/inference_multiple_worker/environments/xlarge.yaml
@@ -6,12 +6,12 @@ settings:
     API_SUCCESS : 80%
     API_AVG_RT : 140ms
 
-    TOTAL_WORKERS: 4
+    TOTAL_WORKERS: 5
     TOTAL_WORKERS_MEM: 600000000
     TOTAL_WORKERS_FDS: 40
 
     TOTAL_MEM : 1400000000
-    TOTAL_PROCS : 5
+    TOTAL_PROCS : 6
     TOTAL_FDS : 150
 
     FRNTEND_MEM: 800000000
diff --git a/tests/performance/tests/inference_single_worker/environments/xlarge.yaml b/tests/performance/tests/inference_single_worker/environments/xlarge.yaml
index f160a1bcf..f70048f05 100644
--- a/tests/performance/tests/inference_single_worker/environments/xlarge.yaml
+++ b/tests/performance/tests/inference_single_worker/environments/xlarge.yaml
@@ -6,12 +6,12 @@ settings:
     API_SUCCESS : 80%
     API_AVG_RT : 140ms
 
-    TOTAL_WORKERS: 1
+    TOTAL_WORKERS: 2
     TOTAL_WORKERS_MEM: 300000000
     TOTAL_WORKERS_FDS: 150
 
     TOTAL_MEM : 1000000000
-    TOTAL_PROCS : 2
+    TOTAL_PROCS : 3
     TOTAL_FDS : 150
 
     FRNTEND_MEM: 600000000
diff --git a/tests/performance/tests/model_description/environments/xlarge.yaml b/tests/performance/tests/model_description/environments/xlarge.yaml
index f62c5e282..370ba7324 100644
--- a/tests/performance/tests/model_description/environments/xlarge.yaml
+++ b/tests/performance/tests/model_description/environments/xlarge.yaml
@@ -6,12 +6,12 @@ settings:
     API_SUCCESS : 80%
     API_AVG_RT : 14ms
 
-    TOTAL_WORKERS: 1
+    TOTAL_WORKERS: 2
     TOTAL_WORKERS_MEM: 150205952
     TOTAL_WORKERS_FDS: 40
 
     TOTAL_MEM : 1400000000
-    TOTAL_PROCS : 2
+    TOTAL_PROCS : 3
     TOTAL_FDS : 150
 
     FRNTEND_MEM: 800000000
diff --git a/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml b/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml
index 704576848..21eaaee4b 100644
--- a/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml
+++ b/tests/performance/tests/multiple_inference_and_scaling/environments/xlarge.yaml
@@ -13,12 +13,12 @@ settings:
     SCALEDOWN1_RT : 100ms
     SCALEDOWN2_RT : 100ms
 
-    TOTAL_WORKERS: 9
+    TOTAL_WORKERS: 8
     TOTAL_WORKERS_MEM: 2668554752
     TOTAL_WORKERS_FDS: 100
 
     TOTAL_MEM : 2000000000
-    TOTAL_PROCS : 11
+    TOTAL_PROCS : 9
     TOTAL_FDS : 300
 
     FRNTEND_MEM: 1000000000
diff --git a/tests/performance/tests/register_unregister/environments/xlarge.yaml b/tests/performance/tests/register_unregister/environments/xlarge.yaml
index 1407c6336..cbe49892b 100644
--- a/tests/performance/tests/register_unregister/environments/xlarge.yaml
+++ b/tests/performance/tests/register_unregister/environments/xlarge.yaml
@@ -8,12 +8,12 @@ settings:
     UNREG_SUCC: 80%
     UNREG_RT: 290ms
 
-    TOTAL_WORKERS: 1
+    TOTAL_WORKERS: 0
     TOTAL_WORKERS_MEM: 14054528
     TOTAL_WORKERS_FDS: 50
 
     TOTAL_MEM : 1400000000
-    TOTAL_PROCS : 2
+    TOTAL_PROCS : 1
     TOTAL_FDS : 100
 
     FRNTEND_MEM: 1200000000
diff --git a/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml b/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml
index 4affbc8ef..eb4d0e024 100644
--- a/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml
+++ b/tests/performance/tests/register_unregister_multiple/environments/xlarge.yaml
@@ -10,13 +10,13 @@ settings:
     SCL_UP_RT: 1.5s
     UNREG_RT: 18ms
 
-    TOTAL_WORKERS: 4
+    TOTAL_WORKERS: 1
     TOTAL_WORKERS_MEM: 100000000
     TOTAL_WORKERS_FDS: 200
 
 
     TOTAL_MEM : 2000000000
-    TOTAL_PROCS : 5
+    TOTAL_PROCS : 2
     TOTAL_FDS : 200
 
     FRNTEND_MEM: 100000000
diff --git a/tests/performance/tests/scale_down_workers/environments/xlarge.yaml b/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
index 2a0c9101a..913da23ee 100644
--- a/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_down_workers/environments/xlarge.yaml
@@ -5,8 +5,8 @@ settings:
     SCL_DWN_RT : 10ms
     TOTAL_PROCS_B4_SCL_DWN : 6
     TOTAL_PROCS_AFTR_SCL_DWN : 4
-    TOTAL_WRKRS_B4_SCL_DWN : 4
-    TOTAL_WRKRS_AFTR_SCL_DWN  : 2
+    TOTAL_WRKRS_B4_SCL_DWN : 5
+    TOTAL_WRKRS_AFTR_SCL_DWN  : 3
     FRNTEND_FDS : 78
     TOTAL_WRKRS_FDS_B4_SCL_DWN: 38
     FRNTEND_MEM : 1000000000
diff --git a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
index bb08a703b..5beca12dc 100644
--- a/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
+++ b/tests/performance/tests/scale_up_workers/environments/xlarge.yaml
@@ -5,8 +5,8 @@ settings:
     SCL_UP_RT : 10ms
     TOTAL_PROCS_AFTR_SCL_UP : 6
     TOTAL_PROCS_B4_SCL_UP : 2
-    TOTAL_WRKRS_AFTR_SCL_UP : 4
-    TOTAL_WRKRS_B4_SCL_UP  : 1
+    TOTAL_WRKRS_AFTR_SCL_UP : 5
+    TOTAL_WRKRS_B4_SCL_UP  : 3
     FRNTEND_FDS : 88
     TOTAL_WRKRS_FDS_AFTR_SCL_UP : 38
     FRNTEND_MEM : 1000000000