Add BLS composing model option to PA config (#664)

nv-braf · tgerdesnv · web-flow · commit 8cb5d427d5d0 · 2023-04-07T10:13:06.000-07:00
* Adding bls composing model options to PA config

* fix a few issues

* Fix type checking

* Appending composing model config names to representaiton string

* Ignore typing error

---------

Co-authored-by: tgerdes &lt;tgerdes@nvidia.com&gt;
diff --git a/docs/config.md b/docs/config.md
@@ -638,8 +638,7 @@ profile_models:
     cpu_only: true
   model_2:
     perf_analyzer_flags:
-    percentile: 95
-    latency-report-file: /path/to/latency/report/file
+      percentile: 95
 ```
 
 The above config tells model analyzer to profile `model_1` on CPU only,
@@ -669,7 +668,6 @@ profile_models:
       batch_sizes: 4
 perf_analyzer_flags:
   percentile: 95
-  latency-report-file: /path/to/latency/report/file
 ```
 
 ### Model-specific options for Perf Analyzer
@@ -685,7 +683,6 @@ profile_models:
   model_1:
     perf_analyzer_flags:
       percentile: 95
-      latency-report-file: /path/to/latency/report/file
 ```
 
 ### Shape, Input-Data, and Streaming
@@ -931,7 +928,6 @@ profile_models:
   model_1:
     perf_analyzer_flags:
       percentile: 95
-      latency-report-file: /path/to/latency/report/file
     model_config_parameters:
       max_batch_size: 2
       dynamic_batching:
diff --git a/model_analyzer/config/input/config_command.py b/model_analyzer/config/input/config_command.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Dict, List, Optional, Any
+from typing import Dict, List, Optional, Any, Union
 from model_analyzer.model_analyzer_exceptions \
     import TritonModelAnalyzerException
 import yaml
@@ -266,12 +266,11 @@ def _check_no_brute_search(self, args: Namespace,
 
     def _check_no_multi_model(self, args: Namespace,
                               yaml_config: Optional[Dict[str, List]]) -> None:
-        profile_models = self._get_config_value('profile_models', args,
-                                                yaml_config)
+        profile_models: Union[Dict, List, str] = self._get_config_value(
+            'profile_models', args, yaml_config)  # type: ignore
 
-        profile_model_count = len(profile_models) if isinstance(
-            profile_models, list) else len(
-                profile_models.split(','))  # type: ignore
+        profile_model_count = len(profile_models.split(',')) if isinstance(
+            profile_models, str) else len(profile_models)
 
         if profile_model_count > 1:
             raise TritonModelAnalyzerException(
diff --git a/model_analyzer/config/run/model_run_config.py b/model_analyzer/config/run/model_run_config.py
@@ -107,8 +107,13 @@ def representation(self) -> str:
         Returns a representation string for the ModelRunConfig that can be used
         as a key to uniquely identify it
         """
+        repr = self.perf_config().representation()
 
-        return self.perf_config().representation()
+        if self._composing_configs:
+            repr += " " + (',').join(
+                self.get_composing_config_names())  # type: ignore
+
+        return repr
 
     def _check_for_client_vs_model_batch_size(self) -> bool:
         """
diff --git a/model_analyzer/perf_analyzer/perf_analyzer.py b/model_analyzer/perf_analyzer/perf_analyzer.py
@@ -53,6 +53,7 @@
 import os
 import csv
 import tempfile
+import glob
 
 logger = logging.getLogger(LOGGER_NAME)
 
@@ -446,7 +447,9 @@ def _parse_outputs(self, metrics):
         for perf_config in [
                 mrc.perf_config() for mrc in self._config.model_run_configs()
         ]:
-            os.remove(perf_config['latency-report-file'])
+            # Remove the latency file and all associated composing model latency files
+            for f in glob.glob(f"*{perf_config['latency-report-file']}"):
+                os.remove(f)
 
     def _extract_perf_records_from_row(
             self, requested_metrics: List[Record],
diff --git a/model_analyzer/perf_analyzer/perf_config.py b/model_analyzer/perf_analyzer/perf_config.py
@@ -41,7 +41,7 @@ class PerfAnalyzerConfig:
         'ssl-https-client-certificate-type',
         'ssl-https-client-certificate-file', 'ssl-https-private-key-type',
         'ssl-https-private-key-file', 'collect-metrics', 'metrics-url',
-        'metrics-interval'
+        'metrics-interval', 'bls-composing-models'
     ]
 
     input_to_options = [
@@ -177,6 +177,13 @@ def update_config_from_profile_config(self, model_name, profile_config):
                 'metrics-interval': metrics_interval
             })
 
+        if profile_config.bls_composing_models:
+            bls_composing_model_names = ','.join([
+                bls_composing_model.model_name()
+                for bls_composing_model in profile_config.bls_composing_models
+            ])
+            params.update({'bls-composing-models': bls_composing_model_names})
+
         self.update_config(params)
 
     @classmethod