Concurrency sweep early backoff (#537)

nv-braf · web-flow · commit aab949578aca · 2022-10-04T08:31:57.000-05:00
* Implemented early backoff

* Adding get_last_results method to ConfigGeneratorInterface

* Ignore type check error

* Removing get_last_measurements

* Reverting formatting changes

* Fixing type checking
diff --git a/model_analyzer/config/generate/perf_analyzer_config_generator.py b/model_analyzer/config/generate/perf_analyzer_config_generator.py
@@ -36,12 +36,9 @@ class PerfAnalyzerConfigGenerator(ConfigGeneratorInterface):
     earlier depending on results that it receives
     """
 
-    def __init__(self, 
-                cli_config: ConfigCommandProfile, 
-                model_name: str, 
-                model_perf_analyzer_flags: dict,
-                model_parameters: dict, 
-                early_exit_enable: bool) -> None:
+    def __init__(self, cli_config: ConfigCommandProfile, model_name: str,
+                 model_perf_analyzer_flags: dict, model_parameters: dict,
+                 early_exit_enable: bool) -> None:
         """
         Parameters
         ----------
@@ -77,8 +74,8 @@ def __init__(self,
         self._generator_started = False
 
         self._last_results: List[RunConfigMeasurement] = []
-        self._concurrency_results: List[RunConfigMeasurement] = []
-        self._batch_size_results: List[RunConfigMeasurement] = []
+        self._concurrency_results: List[Optional[RunConfigMeasurement]] = []
+        self._batch_size_results: List[Optional[RunConfigMeasurement]] = []
 
         self._model_name = model_name
         self._perf_analyzer_flags = model_perf_analyzer_flags
@@ -91,6 +88,33 @@ def __init__(self,
 
         self._generate_perf_configs()
 
+    @staticmethod
+    def throughput_gain_valid_helper(
+            throughputs: List[Optional[RunConfigMeasurement]],
+            min_tries: int = THROUGHPUT_MINIMUM_CONSECUTIVE_CONCURRENCY_TRIES,
+            min_gain: float = THROUGHPUT_MINIMUM_GAIN) -> bool:
+        if len(throughputs) < min_tries:
+            return True
+
+        tputs_in_range = [
+            PerfAnalyzerConfigGenerator.get_throughput(throughputs[x])
+            for x in range(-min_tries, 0)
+        ]
+
+        first = tputs_in_range[0]
+        best = max(tputs_in_range)
+
+        gain = (best - first) / first
+
+        return gain > min_gain
+
+    @staticmethod
+    def get_throughput(measurement: Optional[RunConfigMeasurement]) -> float:
+        if measurement:
+            return measurement.get_non_gpu_metric_value('perf_throughput')
+        else:
+            return 0.0
+
     def _is_done(self) -> bool:
         """ Returns true if this generator is done generating configs """
         return self._generator_started and self._done_walking()
@@ -111,7 +135,8 @@ def get_configs(self) -> Generator[PerfAnalyzerConfig, None, None]:
 
             self._step()
 
-    def set_last_results(self, measurements: List[Optional[RunConfigMeasurement]]) -> None:
+    def set_last_results(
+            self, measurements: List[Optional[RunConfigMeasurement]]) -> None:
         """
         Given the results from the last PerfAnalyzerConfig, make decisions
         about future configurations to generate
@@ -182,7 +207,8 @@ def _step(self) -> None:
 
     def _add_best_throughput_to_batch_sizes(self) -> None:
         if self._concurrency_results:
-            best = max(self._concurrency_results)
+            # type is List[Optional[RCM]]
+            best = max(self._concurrency_results)  #type: ignore
             self._batch_size_results.append(best)
 
     def _reset_concurrencies(self) -> None:
@@ -231,34 +257,14 @@ def _last_results_erroneous(self) -> bool:
 
     def _concurrency_throughput_gain_valid(self) -> bool:
         """ Check if any of the last X concurrency results resulted in valid gain """
-        return self._throughput_gain_valid_helper(
+        return PerfAnalyzerConfigGenerator.throughput_gain_valid_helper(
             throughputs=self._concurrency_results,
             min_tries=THROUGHPUT_MINIMUM_CONSECUTIVE_CONCURRENCY_TRIES,
             min_gain=THROUGHPUT_MINIMUM_GAIN)
 
     def _batch_size_throughput_gain_valid(self) -> bool:
         """ Check if any of the last X batch_size results resulted in valid gain """
-        return self._throughput_gain_valid_helper(
+        return PerfAnalyzerConfigGenerator.throughput_gain_valid_helper(
             throughputs=self._batch_size_results,
             min_tries=THROUGHPUT_MINIMUM_CONSECUTIVE_BATCH_SIZE_TRIES,
             min_gain=THROUGHPUT_MINIMUM_GAIN)
-
-    def _throughput_gain_valid_helper(self,
-                                      throughputs: List[RunConfigMeasurement],
-                                      min_tries: int, min_gain: float) -> bool:
-        if len(throughputs) < min_tries:
-            return True
-
-        tputs_in_range = [
-            self._get_throughput(throughputs[x]) for x in range(-min_tries, 0)
-        ]
-
-        first = tputs_in_range[0]
-        best = max(tputs_in_range)
-
-        gain = (best - first) / first
-
-        return gain > min_gain
-
-    def _get_throughput(self, measurement: RunConfigMeasurement) -> float:
-        return measurement.get_non_gpu_metric_value('perf_throughput')
diff --git a/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py b/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py
@@ -20,6 +20,7 @@
 from model_analyzer.config.generate.brute_run_config_generator import BruteRunConfigGenerator
 from model_analyzer.config.generate.quick_run_config_generator import QuickRunConfigGenerator
 from model_analyzer.config.generate.model_variant_name_manager import ModelVariantNameManager
+from model_analyzer.config.generate.perf_analyzer_config_generator import PerfAnalyzerConfigGenerator
 from model_analyzer.config.run.run_config import RunConfig
 from model_analyzer.triton.client.client import TritonClient
 from model_analyzer.device.gpu_device import GPUDevice
@@ -78,6 +79,7 @@ def __init__(self, search_config: SearchConfig,
 
     def set_last_results(
             self, measurements: List[Optional[RunConfigMeasurement]]) -> None:
+        self._last_measurement = measurements[-1]
         self._rcg.set_last_results(measurements)
 
     def get_configs(self) -> Generator[RunConfig, None, None]:
@@ -127,10 +129,21 @@ def _sweep_concurrency_over_top_results(
 
             for count, result in enumerate(top_results):
                 run_config = deepcopy(result.run_config())
+
+                run_config_measurements = []
                 for concurrency in (2**i for i in range(0, 10)):
                     run_config = self._set_concurrency(run_config, concurrency)
                     yield run_config
 
+                    run_config_measurements.append(self._last_measurement)
+
+                    if not PerfAnalyzerConfigGenerator.throughput_gain_valid_helper(
+                            throughputs=run_config_measurements):
+                        logger.info(
+                            "Terminating concurrency sweep - throughput is decreasing"
+                        )
+                        break
+
     def _set_concurrency(self, run_config: RunConfig,
                          concurrency: int) -> RunConfig:
         for model_run_config in run_config.model_run_configs():
diff --git a/tests/test_perf_analyzer_config_generator.py b/tests/test_perf_analyzer_config_generator.py
@@ -567,9 +567,8 @@ def _test_throughput_gain_valid_helper(self, throughput_values,
                                            MagicMock(),
                                            early_exit_enable=False)
 
-        result = pacg._throughput_gain_valid_helper(throughputs=throughputs,
-                                                    min_tries=4,
-                                                    min_gain=0.05)
+        result = PerfAnalyzerConfigGenerator.throughput_gain_valid_helper(
+            throughputs=throughputs, min_tries=4, min_gain=0.05)
 
         self.assertEqual(result, expected_result)