Skip to content

Commit f69b439

Browse files
authored
Update QRCG to set concurrency for default config (#820)
* Updating QRCG to use calculated concurrency value for default config * Adding new method to calculate default instance count with unit testing * Removing unused import * Creating a new private method to decouple string parsing logic
1 parent 4844973 commit f69b439

File tree

4 files changed

+89
-8
lines changed

4 files changed

+89
-8
lines changed

model_analyzer/config/generate/quick_run_config_generator.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from model_analyzer.config.generate.neighborhood import Neighborhood
3434
from model_analyzer.config.generate.search_config import SearchConfig
3535
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
36+
from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES
3637
from model_analyzer.config.run.model_run_config import ModelRunConfig
3738
from model_analyzer.config.run.run_config import RunConfig
3839
from model_analyzer.constants import LOGGER_NAME
@@ -704,13 +705,27 @@ def _create_default_perf_analyzer_config(
704705
model_config.get_field("name"), self._config
705706
)
706707

707-
perf_config_params = {"batch-size": 1, "concurrency-range": 1}
708+
default_concurrency = self._calculate_default_concurrency(model_config)
709+
710+
perf_config_params = {
711+
"batch-size": DEFAULT_BATCH_SIZES,
712+
"concurrency-range": default_concurrency,
713+
}
708714
default_perf_analyzer_config.update_config(perf_config_params)
709715

710716
default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
711717

712718
return default_perf_analyzer_config
713719

720+
def _calculate_default_concurrency(self, model_config: ModelConfig) -> int:
721+
default_max_batch_size = model_config.max_batch_size()
722+
default_instance_count = model_config.instance_group_count(
723+
system_gpu_count=len(self._gpus)
724+
)
725+
default_concurrency = 2 * default_max_batch_size * default_instance_count
726+
727+
return default_concurrency
728+
714729
def _print_debug_logs(
715730
self, measurements: List[Union[RunConfigMeasurement, None]]
716731
) -> None:

model_analyzer/triton/model/model_config.py

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -486,15 +486,41 @@ def dynamic_batching_string(self) -> str:
486486
else:
487487
return "Disabled"
488488

489+
def instance_group_count(self, system_gpu_count: int) -> int:
490+
"""
491+
Returns:
492+
int: The total number of instance groups (cpu + gpu)
493+
"""
494+
495+
kind_to_count = self._get_instance_groups(system_gpu_count)
496+
instance_group_count = sum([count for count in kind_to_count.values()])
497+
498+
return instance_group_count
499+
489500
def instance_group_string(self, system_gpu_count: int) -> str:
490501
"""
491502
Returns
492503
-------
493504
str
494505
representation of the instance group used
495506
to generate this result
507+
508+
Format is "GPU:<count> + CPU:<count>"
496509
"""
497510

511+
kind_to_count = self._get_instance_groups(system_gpu_count)
512+
513+
ret_str = ""
514+
for k, v in kind_to_count.items():
515+
if ret_str != "":
516+
ret_str += " + "
517+
ret_str += f"{v}:{k}"
518+
return ret_str
519+
520+
def _get_instance_groups(self, system_gpu_count: int) -> Dict[str, int]:
521+
"""
522+
Returns a dictionary with type of instance (GPU/CPU) and its count
523+
"""
498524
model_config = self.get_config()
499525

500526
# TODO change when remote mode is fixed
@@ -527,9 +553,4 @@ def instance_group_string(self, system_gpu_count: int) -> str:
527553
kind_to_count[group_kind] = 0
528554
kind_to_count[group_kind] += group_total_count
529555

530-
ret_str = ""
531-
for k, v in kind_to_count.items():
532-
if ret_str != "":
533-
ret_str += " + "
534-
ret_str += f"{v}:{k}"
535-
return ret_str
556+
return kind_to_count

tests/test_model_config.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,47 @@ def _test_helper(config_dict, expected_result, gpu_count=None):
316316
}
317317
_test_helper(model_config_dict, "8:GPU + 3:CPU", gpu_count=4)
318318

319+
def test_instance_group_count(self):
320+
"""Test out all corner cases of instance_group_count()"""
321+
322+
def _test_helper(config_dict, expected_result, gpu_count=None):
323+
model_config = ModelConfig.create_from_dictionary(config_dict)
324+
instance_group_count = model_config.instance_group_count(gpu_count)
325+
self.assertEqual(instance_group_count, expected_result)
326+
327+
# No instance group info in model_config_dict:
328+
# - default to 1 per GPU
329+
model_config_dict = {}
330+
_test_helper(model_config_dict, 1, gpu_count=1)
331+
332+
# No instance group info in model_config_dict:
333+
# - 1 per GPU -- if 2 gpus then 2 total
334+
model_config_dict = {}
335+
_test_helper(model_config_dict, 2, gpu_count=2)
336+
337+
# 2 per GPU, 3 gpus in the system = 6 total
338+
model_config_dict = {
339+
"instance_group": [
340+
{
341+
"count": 2,
342+
"kind": "KIND_GPU",
343+
}
344+
]
345+
}
346+
_test_helper(model_config_dict, 6, gpu_count=3)
347+
348+
# 1 on ALL gpus + 2 each on [1 and 3] + 3 more on CPUs
349+
# with 4 GPUs in the system:
350+
# 8 on GPU and 3 on CPU
351+
model_config_dict = {
352+
"instance_group": [
353+
{"count": 1, "kind": "KIND_GPU"},
354+
{"count": 2, "kind": "KIND_GPU", "gpus": [1, 3]},
355+
{"count": 3, "kind": "KIND_CPU"},
356+
]
357+
}
358+
_test_helper(model_config_dict, 11, gpu_count=4)
359+
319360
def test_is_ensemble(self):
320361
"""Test that we recognize if the platform is ensemble"""
321362

tests/test_quick_run_config_generator.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -533,12 +533,16 @@ def test_default_config_generation(self):
533533

534534
sc = SearchConfig(dimensions=dims, radius=5, min_initialized=2)
535535
qrcg = QuickRunConfigGenerator(
536-
sc, config, MagicMock(), models, {}, MagicMock(), ModelVariantNameManager()
536+
sc, config, ["GPU0"], models, {}, MagicMock(), ModelVariantNameManager()
537537
)
538538

539539
default_run_config = qrcg._create_default_run_config()
540540

541541
self.assertIn("--percentile=96", default_run_config.representation())
542+
self.assertIn(
543+
"--concurrency-range=8",
544+
default_run_config.model_run_configs()[0].perf_config().representation(),
545+
)
542546

543547
def test_default_ensemble_config_generation(self):
544548
"""

0 commit comments

Comments
 (0)