Skip to content

Commit ca96f10

Browse files
authored
Add cpu_only option for ensemble composing models (#683)
* Adding config option to specify cpu only composing models * Adding config option to specify cpu only composing models * Changing composing model to be cpu only w/ unit testing * Updating documentation * Remove duplicated test * Enabling cpu_only for BLS composing models * Removing ensemble from description
1 parent b5e901f commit ca96f10

File tree

7 files changed

+78
-19
lines changed

7 files changed

+78
-19
lines changed

docs/config.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ profile_models: <comma-delimited-string-list>
9292
# List of composing models for BLS models
9393
bls_composing_models: <comma-delimited-string-list>
9494
95+
# List of composing models that should be profiled using CPU instances only
96+
cpu_only_composing_models: <comma-delimited-string-list>
97+
9598
# Full path to directory to which to read and write checkpoints and profile data
9699
[ checkpoint_directory: <string> | default: './checkpoints' ]
97100
@@ -273,6 +276,9 @@ profile_models: <comma-delimited-string-list|list|profile_model>
273276
# List of composing models for BLS models
274277
bls_composing_models: <comma-delimited-string-list>
275278
279+
# List of composing models that should be profiled using CPU instances only
280+
cpu_only_composing_models: <comma-delimited-string-list>
281+
276282
# List of constraints placed on the config search results
277283
[ constraints: <constraint> ]
278284

model_analyzer/config/generate/model_profile_spec.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ def __init__(self, spec: ConfigModelProfileSpec,
3434
self._default_model_config = ModelConfig.create_model_config_dict(
3535
config, client, gpus, config.model_repository, spec.model_name())
3636

37+
if spec.model_name() in config.cpu_only_composing_models:
38+
self._cpu_only = True
39+
3740
def get_default_config(self) -> dict:
3841
""" Returns the default configuration for this model """
3942
return deepcopy(self._default_model_config)

model_analyzer/config/input/config_command_profile.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,14 @@ def _add_profile_models_configs(self):
483483
required=True),
484484
default_value=[],
485485
description='List of the models to be profiled'))
486+
self._add_config(
487+
ConfigField(
488+
'cpu_only_composing_models',
489+
field_type=ConfigListString(),
490+
flags=['--cpu-only-composing-models'],
491+
description=
492+
("A list of strings representing composing models that should be profiled using CPU instances only. "
493+
)))
486494

487495
def _add_client_configs(self):
488496
"""

model_analyzer/model_manager.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,11 @@ def _check_for_ensemble_model_incompatibility(
179179
)
180180
else:
181181
self._config.run_config_search_mode = 'quick'
182+
elif not self._config.bls_composing_models:
183+
if len(self._config.cpu_only_composing_models) > 0:
184+
raise TritonModelAnalyzerException(
185+
f'\nCan only specify --cpu-only-composing-models for ensemble or BLS models.'
186+
)
182187

183188
def _init_state(self):
184189
"""

tests/test_cli.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,7 @@ def get_test_options():
156156
OptionStruct("noop", "profile", "--model-repository"),
157157
OptionStruct("noop", "profile", "--profile-models"),
158158
OptionStruct("noop", "profile", "--bls-composing-models"),
159+
OptionStruct("noop", "profile", "--cpu-only-composing-models"),
159160

160161
OptionStruct("noop", "report", "--report-model-configs"),
161162
OptionStruct("noop", "report", "--output-formats", "-o", ["pdf", "csv", "png"], "pdf", "SHOULD_FAIL"),

tests/test_model_manager.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,41 @@ def test_ensemble_makes_quick_default(self, *args):
10951095

10961096
self.assertEqual(config.run_config_search_mode, "quick")
10971097

1098+
@patch('model_analyzer.triton.model.model_config.ModelConfig.is_ensemble',
1099+
return_value=False)
1100+
def test_cpu_only_composing_models_error(self, *args):
1101+
"""
1102+
Test that --cpu-only-composing-models errors when
1103+
set for non-ensemble/BLS models
1104+
"""
1105+
yaml_str = ("""
1106+
profile_models: test_model
1107+
""")
1108+
1109+
args = self._args.copy()
1110+
args.append('--cpu-only-composing-models')
1111+
args.append('composing_modelA,composing_modelB')
1112+
1113+
self.mock_model_config = MockModelConfig(self._model_config_protobuf)
1114+
self.mock_model_config.start()
1115+
config = evaluate_mock_config(args, yaml_str, subcommand="profile")
1116+
1117+
state_manager = AnalyzerStateManager(config, MagicMock())
1118+
metrics_manager = MetricsManagerSubclass(config, MagicMock(),
1119+
MagicMock(), MagicMock(),
1120+
MagicMock(), state_manager)
1121+
model_manager = ModelManager(config, MagicMock(), MagicMock(),
1122+
MagicMock(), metrics_manager, MagicMock(),
1123+
state_manager, MagicMock())
1124+
1125+
# RunConfigSearch check
1126+
models = [
1127+
ConfigModelProfileSpec('test_model'),
1128+
]
1129+
with self.assertRaises(TritonModelAnalyzerException):
1130+
model_manager._check_for_ensemble_model_incompatibility(models)
1131+
self.mock_model_config.stop()
1132+
10981133
def _test_model_manager(self, yaml_content, expected_ranges, args=None):
10991134
"""
11001135
Test helper function that passes the given yaml_str into

tests/test_quick_run_config_generator.py

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -899,22 +899,23 @@ def _get_next_run_config_ensemble(self,
899899
Test that get_next_run_config() creates a proper RunConfig for ensemble
900900
901901
Sets up a case where the coordinate is [1,2,4,5], which corresponds to
902-
- composing model 1 max_batch_size = 2
903-
- composing model 1 instance_count = 3
904-
- composing model 1 concurrency = 2*3*2 = 12
905-
- composing model 2 max_batch_size = 16
906-
- composing model 2 instance_count = 6
907-
- composing model 2 concurrency = 16*6*2 = 192
902+
- composing model A max_batch_size = 2
903+
- composing model A instance_count = 3
904+
- composing model A concurrency = 2*3*2 = 12
905+
- composing model B max_batch_size = 16
906+
- composing model B instance_count = 6
907+
- composing model B concurrency = 16*6*2 = 192
908908
- ensemble model concurrency = 12 (minimum value of [12, 192])
909909
910910
Also,
911-
- sequence batching should be on for model 1
912-
- dynamic batching should be on for model 2
911+
- sequence batching should be on for model A
912+
- dynamic batching should be on for model B
913+
- cpu_only should be set for model B
913914
- existing values from the base model config should persist if they aren't overwritten
914915
- existing values for perf-analyzer config should persist if they aren't overwritten
915916
"""
916917

917-
additional_args = []
918+
additional_args = ['--cpu-only-composing-models', 'fake_model_B']
918919
if max_concurrency:
919920
additional_args.append('--run-config-search-max-concurrency')
920921
additional_args.append(f'{max_concurrency}')
@@ -923,7 +924,7 @@ def _get_next_run_config_ensemble(self,
923924
additional_args.append(f'{min_concurrency}')
924925

925926
#yapf: disable
926-
expected_model_config0 = {
927+
expected_model_A_config_0 = {
927928
'cpu_only': False,
928929
'instanceGroup': [{
929930
'count': 3,
@@ -939,12 +940,12 @@ def _get_next_run_config_ensemble(self,
939940
}]
940941
}
941942

942-
expected_model_config1 = {
943-
'cpu_only': False,
943+
expected_model_B_config_0 = {
944+
'cpu_only': True,
944945
'dynamicBatching': {},
945946
'instanceGroup': [{
946947
'count': 6,
947-
'kind': 'KIND_GPU',
948+
'kind': 'KIND_CPU',
948949
}],
949950
'maxBatchSize': 16,
950951
'name': 'fake_model_B_config_0',
@@ -1004,15 +1005,15 @@ def _get_next_run_config_ensemble(self,
10041005

10051006
model_config = run_config.model_run_configs()[0].model_config()
10061007
perf_config = run_config.model_run_configs()[0].perf_config()
1007-
composing_model_config0 = run_config.model_run_configs(
1008+
composing_model_A_config_0 = run_config.model_run_configs(
10081009
)[0].composing_configs()[0]
1009-
composing_model_config1 = run_config.model_run_configs(
1010+
composing_model_B_config_0 = run_config.model_run_configs(
10101011
)[0].composing_configs()[1]
10111012

1012-
self.assertEqual(composing_model_config0.to_dict(),
1013-
expected_model_config0)
1014-
self.assertEqual(composing_model_config1.to_dict(),
1015-
expected_model_config1)
1013+
self.assertEqual(composing_model_A_config_0.to_dict(),
1014+
expected_model_A_config_0)
1015+
self.assertEqual(composing_model_B_config_0.to_dict(),
1016+
expected_model_B_config_0)
10161017

10171018
if max_concurrency:
10181019
self.assertEqual(perf_config['concurrency-range'], max_concurrency)

0 commit comments

Comments
 (0)