Update QRCG to set concurrency for default config (#820)

nv-braf · web-flow · commit f69b439c41fd · 2024-01-29T15:12:04.000-08:00
* Updating QRCG to use calculated concurrency value for default config

* Adding new method to calculate default instance count with unit testing

* Removing unused import

* Creating a new private method to decouple string parsing logic
diff --git a/model_analyzer/config/generate/quick_run_config_generator.py b/model_analyzer/config/generate/quick_run_config_generator.py
@@ -33,6 +33,7 @@
 from model_analyzer.config.generate.neighborhood import Neighborhood
 from model_analyzer.config.generate.search_config import SearchConfig
 from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
+from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES
 from model_analyzer.config.run.model_run_config import ModelRunConfig
 from model_analyzer.config.run.run_config import RunConfig
 from model_analyzer.constants import LOGGER_NAME
@@ -704,13 +705,27 @@ def _create_default_perf_analyzer_config(
             model_config.get_field("name"), self._config
         )
 
-        perf_config_params = {"batch-size": 1, "concurrency-range": 1}
+        default_concurrency = self._calculate_default_concurrency(model_config)
+
+        perf_config_params = {
+            "batch-size": DEFAULT_BATCH_SIZES,
+            "concurrency-range": default_concurrency,
+        }
         default_perf_analyzer_config.update_config(perf_config_params)
 
         default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
 
         return default_perf_analyzer_config
 
+    def _calculate_default_concurrency(self, model_config: ModelConfig) -> int:
+        default_max_batch_size = model_config.max_batch_size()
+        default_instance_count = model_config.instance_group_count(
+            system_gpu_count=len(self._gpus)
+        )
+        default_concurrency = 2 * default_max_batch_size * default_instance_count
+
+        return default_concurrency
+
     def _print_debug_logs(
         self, measurements: List[Union[RunConfigMeasurement, None]]
     ) -> None:
diff --git a/model_analyzer/triton/model/model_config.py b/model_analyzer/triton/model/model_config.py
@@ -486,15 +486,41 @@ def dynamic_batching_string(self) -> str:
         else:
             return "Disabled"
 
+    def instance_group_count(self, system_gpu_count: int) -> int:
+        """
+        Returns:
+            int: The total number of instance groups (cpu + gpu)
+        """
+
+        kind_to_count = self._get_instance_groups(system_gpu_count)
+        instance_group_count = sum([count for count in kind_to_count.values()])
+
+        return instance_group_count
+
     def instance_group_string(self, system_gpu_count: int) -> str:
         """
         Returns
         -------
         str
             representation of the instance group used
             to generate this result
+
+            Format is "GPU:<count> + CPU:<count>"
         """
 
+        kind_to_count = self._get_instance_groups(system_gpu_count)
+
+        ret_str = ""
+        for k, v in kind_to_count.items():
+            if ret_str != "":
+                ret_str += " + "
+            ret_str += f"{v}:{k}"
+        return ret_str
+
+    def _get_instance_groups(self, system_gpu_count: int) -> Dict[str, int]:
+        """
+        Returns a dictionary with type of instance (GPU/CPU) and its count
+        """
         model_config = self.get_config()
 
         # TODO change when remote mode is fixed
@@ -527,9 +553,4 @@ def instance_group_string(self, system_gpu_count: int) -> str:
                 kind_to_count[group_kind] = 0
             kind_to_count[group_kind] += group_total_count
 
-        ret_str = ""
-        for k, v in kind_to_count.items():
-            if ret_str != "":
-                ret_str += " + "
-            ret_str += f"{v}:{k}"
-        return ret_str
+        return kind_to_count
diff --git a/tests/test_model_config.py b/tests/test_model_config.py
@@ -316,6 +316,47 @@ def _test_helper(config_dict, expected_result, gpu_count=None):
         }
         _test_helper(model_config_dict, "8:GPU + 3:CPU", gpu_count=4)
 
+    def test_instance_group_count(self):
+        """Test out all corner cases of instance_group_count()"""
+
+        def _test_helper(config_dict, expected_result, gpu_count=None):
+            model_config = ModelConfig.create_from_dictionary(config_dict)
+            instance_group_count = model_config.instance_group_count(gpu_count)
+            self.assertEqual(instance_group_count, expected_result)
+
+        # No instance group info in model_config_dict:
+        #  - default to 1 per GPU
+        model_config_dict = {}
+        _test_helper(model_config_dict, 1, gpu_count=1)
+
+        # No instance group info in model_config_dict:
+        #  - 1 per GPU -- if 2 gpus then 2 total
+        model_config_dict = {}
+        _test_helper(model_config_dict, 2, gpu_count=2)
+
+        # 2 per GPU, 3 gpus in the system = 6 total
+        model_config_dict = {
+            "instance_group": [
+                {
+                    "count": 2,
+                    "kind": "KIND_GPU",
+                }
+            ]
+        }
+        _test_helper(model_config_dict, 6, gpu_count=3)
+
+        # 1 on ALL gpus + 2 each on [1 and 3] + 3 more on CPUs
+        # with 4 GPUs in the system:
+        #   8 on GPU and 3 on CPU
+        model_config_dict = {
+            "instance_group": [
+                {"count": 1, "kind": "KIND_GPU"},
+                {"count": 2, "kind": "KIND_GPU", "gpus": [1, 3]},
+                {"count": 3, "kind": "KIND_CPU"},
+            ]
+        }
+        _test_helper(model_config_dict, 11, gpu_count=4)
+
     def test_is_ensemble(self):
         """Test that we recognize if the platform is ensemble"""
 
diff --git a/tests/test_quick_run_config_generator.py b/tests/test_quick_run_config_generator.py
@@ -533,12 +533,16 @@ def test_default_config_generation(self):
 
         sc = SearchConfig(dimensions=dims, radius=5, min_initialized=2)
         qrcg = QuickRunConfigGenerator(
-            sc, config, MagicMock(), models, {}, MagicMock(), ModelVariantNameManager()
+            sc, config, ["GPU0"], models, {}, MagicMock(), ModelVariantNameManager()
         )
 
         default_run_config = qrcg._create_default_run_config()
 
         self.assertIn("--percentile=96", default_run_config.representation())
+        self.assertIn(
+            "--concurrency-range=8",
+            default_run_config.model_run_configs()[0].perf_config().representation(),
+        )
 
     def test_default_ensemble_config_generation(self):
         """