use gpu metrics from PA (#520)

tgerdesnv · mc-nv · commit 70802910da2b · 2022-09-12T10:36:38.000-07:00
* support and use new CLI options

* Updates to use new gpu metrics

* working unit tests

* Update FreeMemory support

* Review feedback

* fix tests

* add fixme
diff --git a/model_analyzer/constants.py b/model_analyzer/constants.py
@@ -49,6 +49,7 @@
 INTERVAL_SLEEP_TIME = 1
 PERF_ANALYZER_MEASUREMENT_WINDOW = 5000
 PERF_ANALYZER_MINIMUM_REQUEST_COUNT = 50
+SECONDS_TO_MILLISECONDS_MULTIPLIER = 1000
 
 # Triton Server
 SERVER_OUTPUT_TIMEOUT_SECS = 5
diff --git a/model_analyzer/perf_analyzer/perf_analyzer.py b/model_analyzer/perf_analyzer/perf_analyzer.py
@@ -38,7 +38,7 @@
 from model_analyzer.record.types.gpu_utilization import GPUUtilization
 from model_analyzer.record.types.gpu_power_usage import GPUPowerUsage
 from model_analyzer.record.types.gpu_used_memory import GPUUsedMemory
-from model_analyzer.record.types.gpu_total_memory import GPUTotalMemory
+from model_analyzer.record.types.gpu_free_memory import GPUFreeMemory
 
 from model_analyzer.constants import \
     INTERVAL_SLEEP_TIME, LOGGER_NAME, MEASUREMENT_REQUEST_COUNT_STEP, \
@@ -86,10 +86,10 @@ class PerfAnalyzer:
     ]
 
     gpu_metric_table = [
-        ["gpu_utilization",            "Avg GPU Utilizations",    GPUUtilization],
-        ["gpu_power_usage",            "Avg GPU Power Usages",    GPUPowerUsage],
-        ["gpu_used_memory",            "Max GPU Memory Usages",   GPUUsedMemory],
-        ["gpu_total_memory",           "Total GPU Memory Usages", GPUTotalMemory]
+        ["gpu_utilization",            "Avg GPU Utilization",   GPUUtilization,          "0.01"],
+        ["gpu_power_usage",            "Avg GPU Power Usage",   GPUPowerUsage,              "1"],
+        ["gpu_used_memory",            "Max GPU Memory Usage",  GPUUsedMemory,        "1000000"],
+        ["gpu_free_memory",            "Total GPU Memory",      GPUFreeMemory,        "1000000"]
     ]
     #yapf: enable
 
@@ -133,6 +133,7 @@ def __init__(self, path, config, max_retries, timeout, max_cpu_util):
         self._timeout = timeout
         self._output = ""
         self._perf_records = {}
+        self._gpu_records = []
         self._max_cpu_util = max_cpu_util
 
     def run(self, metrics, env=None):
@@ -183,11 +184,11 @@ def run(self, metrics, env=None):
 
         return self.PA_SUCCESS
 
-    def get_records(self):
+    def get_perf_records(self):
         """
         Returns
         -------
-        The records from the last perf_analyzer run
+        The perf records from the last perf_analyzer run
         """
 
         if self._perf_records:
@@ -196,6 +197,15 @@ def get_records(self):
             "Attempted to get perf_analyzer results"
             "without calling run first.")
 
+    def get_gpu_records(self):
+        """
+        Returns
+        -------
+        The gpu records from the last perf_analyzer run
+        """
+
+        return self._gpu_records
+
     def output(self):
         """
         Returns
@@ -331,7 +341,16 @@ def _get_process_output(self):
         self._cmd_log.seek(0)
         tmp_output = self._cmd_log.read()
         self._cmd_log.close()
-        return tmp_output.decode('utf-8')
+
+        # PA has occasionally output non-UTF-8 bytes which would cause MA
+        # to assert. In that case, just ignore the result instead of asserting
+        result = ""
+        try:
+            result = tmp_output.decode('utf-8')
+        except:
+            pass
+
+        return result
 
     def _auto_adjust_parameters(self, process):
         """
@@ -419,28 +438,17 @@ def _parse_outputs(self, metrics):
 
                 for row in csv_reader:
                     self._perf_records[perf_config[
-                        'model-name']] = self._extract_metrics_from_row(
+                        'model-name']] = self._extract_perf_records_from_row(
                             metrics, row)
+                    self._gpu_records = self._extract_gpu_records_from_row(
+                        metrics, row)
 
         for perf_config in [
                 mrc.perf_config() for mrc in self._config.model_run_configs()
         ]:
             os.remove(perf_config['latency-report-file'])
 
-    def _extract_metrics_from_row(self, requested_metrics: List[Record],
-                                  row_metrics: Dict[str, str]) -> List[Record]:
-        """ 
-        Extracts the requested metrics from the CSV's row and creates a list of Records
-        """
-        perf_records = self._create_records_from_perf_metrics(
-            requested_metrics, row_metrics)
-
-        gpu_records = self._create_records_from_gpu_metrics(
-            requested_metrics, row_metrics)
-
-        return perf_records + gpu_records
-
-    def _create_records_from_perf_metrics(
+    def _extract_perf_records_from_row(
             self, requested_metrics: List[Record],
             row_metrics: Dict[str, str]) -> List[Record]:
         perf_records: List[Record] = []
@@ -459,7 +467,7 @@ def _create_records_from_perf_metrics(
 
         return perf_records
 
-    def _create_records_from_gpu_metrics(
+    def _extract_gpu_records_from_row(
             self, requested_metrics: List[Record],
             row_metrics: Dict[str, str]) -> List[Record]:
         # GPU metrics have the following format: UUID0:value0;UUID1:value1;...
@@ -484,14 +492,41 @@ def _create_records_from_gpu_metrics(
                 for gpu_metric_string_tuple in gpu_metric_string_tuples:
                     gpu_metric_tuple = gpu_metric_string_tuple.split(':')
 
-                    gpu_records.append(gpu_metric[PerfAnalyzer.RECORD_CLASS](
-                        value=float(
-                            gpu_metric_tuple[PerfAnalyzer.GPU_METRIC_VALUE]),
-                        device_uuid=gpu_metric_tuple[
-                            PerfAnalyzer.GPU_METRIC_UUID]))  # type: ignore
+                    uuid = gpu_metric_tuple[PerfAnalyzer.GPU_METRIC_UUID]
+                    tmp_value = float(
+                        gpu_metric_tuple[PerfAnalyzer.GPU_METRIC_VALUE])
+                    reduction_factor = float(
+                        str(gpu_metric[PerfAnalyzer.REDUCTION_FACTOR]))
+                    value = tmp_value / reduction_factor
+
+                    record = gpu_metric[PerfAnalyzer.RECORD_CLASS](
+                        value=value, device_uuid=uuid)  # type: ignore
 
+                    gpu_records.append(record)
+
+        self._cleanup_gpu_records(gpu_records)
         return gpu_records
 
+    def _cleanup_gpu_records(self, gpu_records):
+        # Recalculate GPUFreeMemory by removing the value of the associated GPUUsedMemory
+        # Remove any GPUFreeMemory records that don't have a matching GPUUsedMemory
+        indexes_to_remove = []
+        for i, record in enumerate(gpu_records):
+            if type(record) == GPUFreeMemory:
+                # Find matching UUID UsedMemory
+                found = False
+                for other_record in gpu_records:
+                    if type(other_record
+                           ) == GPUUsedMemory and record.device_uuid(
+                           ) == other_record.device_uuid():
+                        found = True
+                        record._value = record.value() - other_record.value()
+                        break
+                if not found:
+                    indexes_to_remove.append(i)
+        for i in reversed(indexes_to_remove):
+            del gpu_records[i]
+
     def _is_metric_requested_and_in_row(self, metric: List[object],
                                         requested_metrics: List[Record],
                                         row_metrics: Dict[str, str]) -> bool:
diff --git a/model_analyzer/perf_analyzer/perf_config.py b/model_analyzer/perf_analyzer/perf_config.py
@@ -15,6 +15,7 @@
 from model_analyzer.model_analyzer_exceptions \
     import TritonModelAnalyzerException
 from model_analyzer.config.input.config_defaults import DEFAULT_MEASUREMENT_MODE
+from model_analyzer.constants import SECONDS_TO_MILLISECONDS_MULTIPLIER
 
 
 class PerfAnalyzerConfig:
@@ -39,7 +40,8 @@ class PerfAnalyzerConfig:
         'ssl-https-verify-host', 'ssl-https-ca-certificates-file',
         'ssl-https-client-certificate-type',
         'ssl-https-client-certificate-file', 'ssl-https-private-key-type',
-        'ssl-https-private-key-file'
+        'ssl-https-private-key-file', 'collect-metrics', 'metrics-url',
+        'metrics-interval'
     ]
 
     input_to_options = [
@@ -52,7 +54,8 @@ class PerfAnalyzerConfig:
     additive_args = ['input-data', 'shape']
 
     boolean_args = [
-        'streaming', 'async', 'sync', 'binary-search', 'ssl-grpc-use-ssl'
+        'streaming', 'async', 'sync', 'binary-search', 'ssl-grpc-use-ssl',
+        'collect-metrics'
     ]
 
     def __init__(self):
@@ -166,6 +169,14 @@ def update_config_from_profile_config(self, model_name, profile_config):
                 'protocol': profile_config.client_protocol,
                 'url': url
             })
+
+            metrics_interval = profile_config.monitoring_interval * SECONDS_TO_MILLISECONDS_MULTIPLIER
+            params.update({
+                'collect-metrics': 'True',
+                'metrics-url': profile_config.triton_metrics_url,
+                'metrics-interval': metrics_interval
+            })
+
         self.update_config(params)
 
     @classmethod
diff --git a/model_analyzer/record/metrics_manager.py b/model_analyzer/record/metrics_manager.py
@@ -142,8 +142,7 @@ def _categorize_metrics(metric_tags, collect_cpu_metrics=False):
         gpu_metrics, perf_metrics, cpu_metrics = [], [], []
         # Separates metrics and objectives into related lists
         for metric in MetricsManager.get_metric_types(metric_tags):
-            if metric in DCGMMonitor.model_analyzer_to_dcgm_field or metric in RemoteMonitor.gpu_metrics.values(
-            ):
+            if metric in PerfAnalyzer.get_gpu_metrics():
                 gpu_metrics.append(metric)
             elif metric in PerfAnalyzer.get_perf_metrics():
                 perf_metrics.append(metric)
@@ -223,17 +222,16 @@ def profile_models(self, run_config):
 
         self._start_monitors(cpu_only=cpu_only)
 
-        perf_analyzer_metrics = self._run_perf_analyzer(run_config,
-                                                        perf_output_writer)
+        perf_analyzer_metrics, model_gpu_metrics = self._run_perf_analyzer(
+            run_config, perf_output_writer)
 
         if not perf_analyzer_metrics:
             self._stop_monitors(cpu_only=cpu_only)
             self._destroy_monitors(cpu_only=cpu_only)
             return None
 
         # Get metrics for model inference and combine metrics that do not have GPU UUID
-        model_gpu_metrics = {}
-        if not cpu_only:
+        if not cpu_only and not model_gpu_metrics:
             model_gpu_metrics = self._get_gpu_inference_metrics()
         model_cpu_metrics = self._get_cpu_inference_metrics()
 
@@ -369,6 +367,7 @@ def _start_monitors(self, cpu_only=False):
         Start any metrics monitors
         """
 
+        self._gpu_monitor = None
         if not cpu_only:
             try:
                 if self._config.use_local_gpu_monitor:
@@ -446,7 +445,8 @@ def _run_perf_analyzer(self, run_config, perf_output_writer):
             timeout=self._config.perf_analyzer_timeout,
             max_cpu_util=self._config.perf_analyzer_cpu_util)
 
-        status = perf_analyzer.run(self._perf_metrics, env=perf_analyzer_env)
+        metrics_to_gather = self._perf_metrics + self._gpu_metrics
+        status = perf_analyzer.run(metrics_to_gather, env=perf_analyzer_env)
 
         if perf_output_writer:
             perf_output_writer.write(
@@ -459,16 +459,23 @@ def _run_perf_analyzer(self, run_config, perf_output_writer):
 
         # PerfAnalyzer run was not succesful
         if status == 1:
-            return None
+            return (None, None)
+
+        perf_records = perf_analyzer.get_perf_records()
+        gpu_records = perf_analyzer.get_gpu_records()
 
-        per_model_perf_records = perf_analyzer.get_records()
+        aggregated_perf_records = self._aggregate_perf_records(perf_records)
+        aggregated_gpu_records = self._aggregate_gpu_records(gpu_records)
 
-        for (model, perf_records) in per_model_perf_records.items():
+        return aggregated_perf_records, aggregated_gpu_records
+
+    def _aggregate_perf_records(self, perf_records):
+        per_model_perf_records = {}
+        for (model, records) in perf_records.items():
             perf_record_aggregator = RecordAggregator()
-            perf_record_aggregator.insert_all(perf_records)
+            perf_record_aggregator.insert_all(records)
 
             per_model_perf_records[model] = perf_record_aggregator.aggregate()
-
         return per_model_perf_records
 
     def _get_gpu_inference_metrics(self):
@@ -485,6 +492,10 @@ def _get_gpu_inference_metrics(self):
         # Stop and destroy DCGM monitor
         gpu_records = self._gpu_monitor.stop_recording_metrics()
 
+        gpu_metrics = self._aggregate_gpu_records(gpu_records)
+        return gpu_metrics
+
+    def _aggregate_gpu_records(self, gpu_records):
         # Insert all records into aggregator and get aggregated DCGM records
         gpu_record_aggregator = RecordAggregator()
         gpu_record_aggregator.insert_all(gpu_records)
diff --git a/tests/common/test_utils.py b/tests/common/test_utils.py
@@ -25,11 +25,12 @@
 from model_analyzer.record.metrics_manager import MetricsManager
 from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
 from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
-
+from model_analyzer.constants import SECONDS_TO_MILLISECONDS_MULTIPLIER
 from model_analyzer.config.input.config_defaults import \
     DEFAULT_BATCH_SIZES, DEFAULT_TRITON_LAUNCH_MODE, DEFAULT_CLIENT_PROTOCOL, \
     DEFAULT_MEASUREMENT_MODE, DEFAULT_TRITON_GRPC_ENDPOINT, DEFAULT_TRITON_HTTP_ENDPOINT, \
-    DEFAULT_TRITON_INSTALL_PATH, DEFAULT_OUTPUT_MODEL_REPOSITORY
+    DEFAULT_TRITON_INSTALL_PATH, DEFAULT_OUTPUT_MODEL_REPOSITORY, DEFAULT_TRITON_METRICS_URL, \
+    DEFAULT_MONITORING_INTERVAL
 
 import os
 
@@ -221,6 +222,10 @@ def construct_perf_analyzer_config(model_name='my-model',
         pa_config._args['triton-server-directory'] = DEFAULT_TRITON_INSTALL_PATH
         pa_config._args['model-repository'] = DEFAULT_OUTPUT_MODEL_REPOSITORY
     else:
+        pa_config._args['collect-metrics'] = 'True'
+        pa_config._args['metrics-url'] = DEFAULT_TRITON_METRICS_URL
+        pa_config._args[
+            'metrics-interval'] = SECONDS_TO_MILLISECONDS_MULTIPLIER * DEFAULT_MONITORING_INTERVAL
         pa_config._options['-i'] = client_protocol
         if client_protocol == 'http':
             pa_config._options['-u'] = DEFAULT_TRITON_HTTP_ENDPOINT
diff --git a/tests/test_perf_analyzer.py b/tests/test_perf_analyzer.py