Add Binary Parameter Search to Brute (#681)

nv-braf · web-flow · commit 5af535e46af5 · 2023-05-09T11:44:48.000-07:00
* Initial changes - still need to add parameter support

* Moved checking for request rate

* ConcurrencySearch to ParameterSearch

* Adding request rate binary search to brute

* Fixng QL errors

* Making is_request_rate a class member function

* Fixing check of when BCS can occur
diff --git a/model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py b/model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py
@@ -0,0 +1,160 @@
+# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import List, Optional, Generator, Dict
+
+from .config_generator_interface import ConfigGeneratorInterface
+
+from model_analyzer.config.generate.brute_run_config_generator import BruteRunConfigGenerator
+from model_analyzer.config.generate.model_variant_name_manager import ModelVariantNameManager
+from model_analyzer.config.run.run_config import RunConfig
+from model_analyzer.triton.client.client import TritonClient
+from model_analyzer.device.gpu_device import GPUDevice
+from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
+from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
+from model_analyzer.result.result_manager import ResultManager
+from model_analyzer.result.run_config_measurement import RunConfigMeasurement
+from model_analyzer.result.parameter_search import ParameterSearch
+
+from model_analyzer.constants import LOGGER_NAME
+
+from copy import deepcopy
+
+import logging
+
+logger = logging.getLogger(LOGGER_NAME)
+
+
+class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface
+                                                      ):
+    """
+    First run BruteRunConfigGenerator for a brute search, then for 
+    automatic searches use ParameterSearch to perform a binary search
+    """
+
+    def __init__(self, config: ConfigCommandProfile, gpus: List[GPUDevice],
+                 models: List[ModelProfileSpec], client: TritonClient,
+                 result_manager: ResultManager,
+                 model_variant_name_manager: ModelVariantNameManager):
+        """
+        Parameters
+        ----------
+        config: ConfigCommandProfile
+            Profile configuration information
+        gpus: List of GPUDevices
+        models: List of ModelProfileSpec
+            List of models to profile
+        client: TritonClient
+        result_manager: ResultManager
+            The object that handles storing and sorting the results from the perf analyzer
+        model_variant_name_manager: ModelVariantNameManager
+            Maps model variants to config names
+        """
+        self._config = config
+        self._gpus = gpus
+        self._models = models
+        self._client = client
+        self._result_manager = result_manager
+        self._model_variant_name_manager = model_variant_name_manager
+
+    def set_last_results(
+            self, measurements: List[Optional[RunConfigMeasurement]]) -> None:
+        self._last_measurement = measurements[-1]
+        self._rcg.set_last_results(measurements)
+
+    def get_configs(self) -> Generator[RunConfig, None, None]:
+        """
+        Returns
+        -------
+        RunConfig
+            The next RunConfig generated by this class
+        """
+
+        logger.info("")
+        logger.info("Starting brute mode search")
+        logger.info("")
+        yield from self._execute_brute_search()
+        logger.info("")
+        logger.info("Done with brute mode search.")
+        logger.info("")
+
+        if self._can_binary_search_top_results():
+            yield from self._binary_search_over_top_results()
+            logger.info("")
+            logger.info(
+                "Done gathering concurrency sweep measurements for reports")
+            logger.info("")
+
+    def _execute_brute_search(self) -> Generator[RunConfig, None, None]:
+        self._rcg: ConfigGeneratorInterface = self._create_brute_run_config_generator(
+        )
+
+        yield from self._rcg.get_configs()
+
+    def _create_brute_run_config_generator(self) -> BruteRunConfigGenerator:
+        return BruteRunConfigGenerator(
+            config=self._config,
+            gpus=self._gpus,
+            models=self._models,
+            client=self._client,
+            model_variant_name_manager=self._model_variant_name_manager)
+
+    def _can_binary_search_top_results(self) -> bool:
+        for model in self._models:
+            if model.parameters()['concurrency'] or model.parameters(
+            )['request_rate']:
+                return False
+
+        return True
+
+    def _binary_search_over_top_results(
+            self) -> Generator[RunConfig, None, None]:
+        for model_name in self._result_manager.get_model_names():
+            top_results = self._result_manager.top_n_results(
+                model_name=model_name,
+                n=self._config.num_configs_per_model,
+                include_default=True)
+
+            for result in top_results:
+                run_config = deepcopy(result.run_config())
+                model_parameters = self._get_model_parameters(model_name)
+                parameter_search = ParameterSearch(
+                    config=self._config,
+                    model_parameters=model_parameters,
+                    skip_parameter_sweep=True)
+                for parameter in parameter_search.search_parameters():
+                    run_config = self._set_parameter(run_config,
+                                                     model_parameters,
+                                                     parameter)
+                    yield run_config
+                    parameter_search.add_run_config_measurement(
+                        self._last_measurement)
+
+    def _get_model_parameters(self, model_name: str) -> Dict:
+        for model in self._models:
+            if model_name == model.model_name():
+                return model.parameters()
+
+        return {}
+
+    def _set_parameter(self, run_config: RunConfig, model_parameters: Dict,
+                       parameter: int) -> RunConfig:
+        for model_run_config in run_config.model_run_configs():
+            perf_config = model_run_config.perf_config()
+            if self._config.is_request_rate_specified(model_parameters):
+                perf_config.update_config({'request-rate-range': parameter})
+            else:
+                perf_config.update_config({'concurrency-range': parameter})
+
+        return run_config
diff --git a/model_analyzer/config/generate/perf_analyzer_config_generator.py b/model_analyzer/config/generate/perf_analyzer_config_generator.py
@@ -19,7 +19,7 @@
 from .config_generator_interface import ConfigGeneratorInterface
 from .generator_utils import GeneratorUtils as utils
 
-from model_analyzer.constants import LOGGER_NAME, THROUGHPUT_MINIMUM_GAIN, THROUGHPUT_MINIMUM_CONSECUTIVE_CONCURRENCY_TRIES, THROUGHPUT_MINIMUM_CONSECUTIVE_BATCH_SIZE_TRIES
+from model_analyzer.constants import LOGGER_NAME, THROUGHPUT_MINIMUM_GAIN, THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES, THROUGHPUT_MINIMUM_CONSECUTIVE_BATCH_SIZE_TRIES
 from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
 from model_analyzer.result.run_config_measurement import RunConfigMeasurement
 
@@ -90,7 +90,7 @@ def __init__(self, cli_config: ConfigCommandProfile, model_name: str,
     @staticmethod
     def throughput_gain_valid_helper(
             throughputs: List[Optional[RunConfigMeasurement]],
-            min_tries: int = THROUGHPUT_MINIMUM_CONSECUTIVE_CONCURRENCY_TRIES,
+            min_tries: int = THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES,
             min_gain: float = THROUGHPUT_MINIMUM_GAIN) -> bool:
         if len(throughputs) < min_tries:
             return True
@@ -159,17 +159,11 @@ def _create_parameter_list(self) -> List[int]:
         # The two possible parameters are request rate or concurrency
         # Concurrency is the default and will be used unless the user specifies
         # request rate, either as a model parameter or a config option
-        if self._config_specifies_request_rate():
+        if self._cli_config.is_request_rate_specified(self._model_parameters):
             return self._create_request_rate_list()
         else:
             return self._create_concurrency_list()
 
-    def _config_specifies_request_rate(self) -> bool:
-        return self._model_parameters['request_rate'] or \
-               self._cli_config.request_rate_search_enable or \
-               self._cli_config.get_config()['run_config_search_min_request_rate'].is_set_by_user() or \
-               self._cli_config.get_config()['run_config_search_max_request_rate'].is_set_by_user()
-
     def _create_request_rate_list(self) -> List[int]:
         if self._model_parameters['request_rate']:
             return sorted(self._model_parameters['request_rate'])
@@ -205,7 +199,8 @@ def _generate_perf_configs(self) -> None:
 
                 new_perf_config.update_config(params)
 
-                if self._config_specifies_request_rate():
+                if self._cli_config.is_request_rate_specified(
+                        self._model_parameters):
                     new_perf_config.update_config(
                         {'request-rate-range': parameter})
                 else:
@@ -259,7 +254,8 @@ def _done_walking_parameters(self) -> bool:
         if self._early_exit_enable and not self._parameter_throughput_gain_valid(
         ):
             if not self._parameter_warning_printed:
-                if self._config_specifies_request_rate():
+                if self._cli_config.is_request_rate_specified(
+                        self._model_parameters):
                     logger.info(
                         "No longer increasing request rate as throughput has plateaued"
                     )
@@ -292,7 +288,7 @@ def _parameter_throughput_gain_valid(self) -> bool:
         """ Check if any of the last X parameter results resulted in valid gain """
         return PerfAnalyzerConfigGenerator.throughput_gain_valid_helper(
             throughputs=self._parameter_results,
-            min_tries=THROUGHPUT_MINIMUM_CONSECUTIVE_CONCURRENCY_TRIES,
+            min_tries=THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES,
             min_gain=THROUGHPUT_MINIMUM_GAIN)
 
     def _batch_size_throughput_gain_valid(self) -> bool:
diff --git a/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py b/model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py
@@ -27,7 +27,7 @@
 from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
 from model_analyzer.result.result_manager import ResultManager
 from model_analyzer.result.run_config_measurement import RunConfigMeasurement
-from model_analyzer.result.concurrency_search import ConcurrencySearch
+from model_analyzer.result.parameter_search import ParameterSearch
 
 from model_analyzer.constants import LOGGER_NAME
 
@@ -42,7 +42,8 @@
 class QuickPlusConcurrencySweepRunConfigGenerator(ConfigGeneratorInterface):
     """
     First run QuickRunConfigGenerator for a hill climbing search, then use 
-    Brute for a concurrency sweep of the default and Top N results
+    ParameterSearch for a concurrency sweep + binary search of the default 
+    and Top N results
     """
 
     def __init__(self, search_config: SearchConfig,
@@ -68,8 +69,6 @@ def __init__(self, search_config: SearchConfig,
             The object that handles storing and sorting the results from the perf analyzer
         model_variant_name_manager: ModelVariantNameManager
             Maps model variants to config names
-        
-        model_variant_name_manager: ModelVariantNameManager
         """
         self._search_config = search_config
         self._config = config
@@ -133,11 +132,11 @@ def _sweep_concurrency_over_top_results(
 
             for result in top_results:
                 run_config = deepcopy(result.run_config())
-                concurrency_search = ConcurrencySearch(self._config)
-                for concurrency in concurrency_search.search_concurrencies():
+                parameter_search = ParameterSearch(self._config)
+                for concurrency in parameter_search.search_parameters():
                     run_config = self._set_concurrency(run_config, concurrency)
                     yield run_config
-                    concurrency_search.add_run_config_measurement(
+                    parameter_search.add_run_config_measurement(
                         self._last_measurement)
 
     def _set_concurrency(self, run_config: RunConfig,
diff --git a/model_analyzer/config/generate/run_config_generator_factory.py b/model_analyzer/config/generate/run_config_generator_factory.py
@@ -25,6 +25,7 @@
 from model_analyzer.result.result_manager import ResultManager
 from .brute_run_config_generator import BruteRunConfigGenerator
 from .quick_plus_concurrency_sweep_run_config_generator import QuickPlusConcurrencySweepRunConfigGenerator
+from .brute_plus_binary_parameter_search_run_config_generator import BrutePlusBinaryParameterSearchRunConfigGenerator
 from .search_dimensions import SearchDimensions
 from .search_dimension import SearchDimension
 from .search_config import SearchConfig
@@ -84,28 +85,31 @@ def create_run_config_generator(
                 result_manager=result_manager,
                 model_variant_name_manager=model_variant_name_manager)
         elif (command_config.run_config_search_mode == "brute"):
-            return RunConfigGeneratorFactory._create_brute_run_config_generator(
+            return RunConfigGeneratorFactory._create_brute_plus_binary_parameter_search_run_config_generator(
                 command_config=command_config,
                 gpus=gpus,
                 models=new_models,
                 client=client,
+                result_manager=result_manager,
                 model_variant_name_manager=model_variant_name_manager)
         else:
             raise TritonModelAnalyzerException(
                 f"Unexpected search mode {command_config.run_config_search_mode}"
             )
 
     @staticmethod
-    def _create_brute_run_config_generator(
+    def _create_brute_plus_binary_parameter_search_run_config_generator(
         command_config: ConfigCommandProfile, gpus: List[GPUDevice],
         models: List[ModelProfileSpec], client: TritonClient,
+        result_manager: ResultManager,
         model_variant_name_manager: ModelVariantNameManager
     ) -> ConfigGeneratorInterface:
-        return BruteRunConfigGenerator(
+        return BrutePlusBinaryParameterSearchRunConfigGenerator(
             config=command_config,
             gpus=gpus,
             models=models,
             client=client,
+            result_manager=result_manager,
             model_variant_name_manager=model_variant_name_manager)
 
     @staticmethod
diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py
@@ -1239,4 +1239,13 @@ def _are_models_using_request_rate(self) -> bool:
             raise TritonModelAnalyzerException("Parameters in all profiled models must use request-rate-range. "\
                 "Model Analyzer does not support mixing concurrency-range and request-rate-range.")
         else:
-            return model_using_request_rate
+            return model_using_request_rate
+
+    def is_request_rate_specified(self, model_parameters: dict) -> bool:
+        """
+        Returns true if either the model or the config specified request rate
+        """
+        return 'request_rate' in model_parameters and model_parameters['request_rate'] or \
+            self.request_rate_search_enable or \
+            self.get_config()['run_config_search_min_request_rate'].is_set_by_user() or \
+            self.get_config()['run_config_search_max_request_rate'].is_set_by_user()
diff --git a/model_analyzer/constants.py b/model_analyzer/constants.py
@@ -30,7 +30,7 @@
 
 # Run Search
 THROUGHPUT_MINIMUM_GAIN = 0.05
-THROUGHPUT_MINIMUM_CONSECUTIVE_CONCURRENCY_TRIES = 4
+THROUGHPUT_MINIMUM_CONSECUTIVE_PARAMETER_TRIES = 4
 THROUGHPUT_MINIMUM_CONSECUTIVE_BATCH_SIZE_TRIES = 4
 
 # Quick search algorithm constants
diff --git a/model_analyzer/result/parameter_search.py b/model_analyzer/result/parameter_search.py
diff --git a/tests/test_parameter_search.py b/tests/test_parameter_search.py