Skip to content

Commit 24e2e78

Browse files
tgerdesnvmc-nv
authored andcommitted
Support for quick search of models that don't support batching (#506)
* initial no batching support for quick search fix issue where dynamic batching should not be enabled * fix quick search for no batching * Rework no batching support * more cleanup * more cleanup * remove needless comment * fix type checking * Clean up and document new class * add more type checking
1 parent b7b1b78 commit 24e2e78

15 files changed

+267
-167
lines changed

model_analyzer/config/generate/automatic_model_config_generator.py

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,25 @@
1515
from .base_model_config_generator import BaseModelConfigGenerator
1616

1717
from model_analyzer.constants import LOGGER_NAME, DEFAULT_CONFIG_PARAMS
18-
import logging
1918
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
19+
from .model_profile_spec import ModelProfileSpec
20+
import logging
2021

2122
logger = logging.getLogger(LOGGER_NAME)
2223

2324

2425
class AutomaticModelConfigGenerator(BaseModelConfigGenerator):
2526
""" Given a model, generates model configs in automatic search mode """
2627

27-
def __init__(self, config, gpus, model, client, model_variant_name_manager,
28-
default_only, early_exit_enable):
28+
def __init__(self, config, gpus, model: ModelProfileSpec, client,
29+
model_variant_name_manager, default_only, early_exit_enable):
2930
"""
3031
Parameters
3132
----------
3233
config: ModelAnalyzerConfig
3334
gpus: List of GPUDevices
34-
model: The model to generate ModelConfigs for
35+
model: ModelProfileSpec
36+
The model to generate ModelConfigs for
3537
client: TritonClient
3638
model_variant_name_manager: ModelVariantNameManager
3739
default_only: Bool
@@ -54,9 +56,6 @@ def __init__(self, config, gpus, model, client, model_variant_name_manager,
5456
self._curr_instance_count = self._min_instance_count
5557
self._curr_max_batch_size = 0
5658

57-
self._max_batch_size_disabled = self._determine_max_batch_size_disabled(
58-
)
59-
6059
self._reset_max_batch_size()
6160

6261
if not self._early_exit_enable:
@@ -102,10 +101,10 @@ def _max_batch_size_limit_reached(self):
102101
def _reset_max_batch_size(self):
103102
super()._reset_max_batch_size()
104103

105-
if self._max_batch_size_disabled:
106-
self._curr_max_batch_size = self._max_model_batch_size
107-
else:
104+
if self._base_model.supports_batching():
108105
self._curr_max_batch_size = self._min_model_batch_size
106+
else:
107+
self._curr_max_batch_size = self._max_model_batch_size
109108

110109
def _get_next_model_config(self):
111110
param_combo = self._get_curr_param_combo()
@@ -123,17 +122,10 @@ def _get_curr_param_combo(self):
123122
}]
124123
}
125124

126-
if not self._max_batch_size_disabled:
125+
if self._base_model.supports_batching():
127126
config['max_batch_size'] = self._curr_max_batch_size
127+
128+
if self._base_model.supports_dynamic_batching():
128129
config['dynamic_batching'] = {}
129130

130131
return config
131-
132-
def _determine_max_batch_size_disabled(self):
133-
config = BaseModelConfigGenerator.get_base_model_config_dict(
134-
self._config, self._client, self._gpus, self._model_repository,
135-
self._base_model_name)
136-
max_batch_size_disabled = False
137-
if "max_batch_size" not in config or config['max_batch_size'] == 0:
138-
max_batch_size_disabled = True
139-
return max_batch_size_disabled

model_analyzer/config/generate/base_model_config_generator.py

Lines changed: 13 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
1516
from .config_generator_interface import ConfigGeneratorInterface
16-
17+
from typing import List, Optional
1718
from model_analyzer.constants import LOGGER_NAME
1819
from model_analyzer.triton.model.model_config import ModelConfig
19-
20+
from .model_profile_spec import ModelProfileSpec
2021
import abc
2122
import logging
2223

@@ -26,14 +27,15 @@
2627
class BaseModelConfigGenerator(ConfigGeneratorInterface):
2728
""" Base class for generating model configs """
2829

29-
def __init__(self, config, gpus, model, client, model_variant_name_manager,
30-
default_only, early_exit_enable):
30+
def __init__(self, config, gpus, model: ModelProfileSpec, client,
31+
model_variant_name_manager, default_only, early_exit_enable):
3132
"""
3233
Parameters
3334
----------
3435
config: ModelAnalyzerConfig
3536
gpus: List of GPUDevices
36-
model: The model to generate ModelConfigs for
37+
model: ModelProfileSpec
38+
The model to generate ModelConfigs for
3739
client: TritonClient
3840
model_variant_name_manager: ModelVariantNameManager
3941
default_only: Bool
@@ -43,10 +45,8 @@ def __init__(self, config, gpus, model, client, model_variant_name_manager,
4345
If true, the generator can early exit if throughput plateaus
4446
"""
4547
self._config = config
46-
self._gpus = gpus
4748
self._client = client
4849
self._model_variant_name_manager = model_variant_name_manager
49-
self._model_repository = config.model_repository
5050
self._base_model = model
5151
self._base_model_name = model.model_name()
5252
self._remote_mode = config.triton_launch_mode == 'remote'
@@ -56,11 +56,11 @@ def __init__(self, config, gpus, model, client, model_variant_name_manager,
5656
self._model_name_index = 0
5757
self._generator_started = False
5858
self._max_batch_size_warning_printed = False
59-
self._last_results = []
59+
self._last_results: List[RunConfigMeasurement] = []
6060
# Contains the max throughput from each provided list of measurements
6161
# since the last time we stepped max_batch_size
6262
#
63-
self._curr_max_batch_size_throughputs = []
63+
self._curr_max_batch_size_throughputs: List[float] = []
6464

6565
def _is_done(self):
6666
""" Returns true if this generator is done generating configs """
@@ -119,7 +119,7 @@ def _last_results_increased_throughput(self):
119119
lastest_throughput > prev_throughput
120120
for prev_throughput in self._curr_max_batch_size_throughputs[:-1])
121121

122-
def _get_last_results_max_throughput(self):
122+
def _get_last_results_max_throughput(self) -> Optional[float]:
123123
throughputs = [
124124
m.get_non_gpu_metric_value('perf_throughput')
125125
for m in self._last_results
@@ -144,16 +144,12 @@ def _make_remote_model_config(self):
144144
def _make_direct_mode_model_config(self, param_combo):
145145
return BaseModelConfigGenerator.make_model_config(
146146
param_combo=param_combo,
147-
config=self._config,
148-
client=self._client,
149-
gpus=self._gpus,
150147
model=self._base_model,
151-
model_repository=self._model_repository,
152148
model_variant_name_manager=self._model_variant_name_manager)
153149

154150
@staticmethod
155-
def make_model_config(param_combo, config, client, gpus, model,
156-
model_repository, model_variant_name_manager):
151+
def make_model_config(param_combo, model: ModelProfileSpec,
152+
model_variant_name_manager):
157153
"""
158154
Loads the base model config from the model repository, and then applies the
159155
parameters in the param_combo on top to create and return a new model config
@@ -173,8 +169,7 @@ def make_model_config(param_combo, config, client, gpus, model,
173169
"""
174170
model_name = model.model_name()
175171

176-
model_config_dict = BaseModelConfigGenerator.get_base_model_config_dict(
177-
config, client, gpus, model_repository, model_name)
172+
model_config_dict = model.get_default_config()
178173

179174
logger_str = []
180175
if param_combo is not None:
@@ -208,29 +203,6 @@ def make_model_config(param_combo, config, client, gpus, model,
208203

209204
return model_config
210205

211-
@classmethod
212-
def get_base_model_config_dict(cls, config, client, gpus, model_repository,
213-
model_name):
214-
"""
215-
Attempts to create a base model config dict from config.pbtxt, if one exists
216-
If the config.pbtxt is not present, we will load a Triton Server with the
217-
base model and have it create a default config for MA, if possible
218-
219-
Parameters:
220-
-----------
221-
config: ModelAnalyzerConfig
222-
client: TritonClient
223-
gpus: List of GPUDevices
224-
model_repository: str
225-
path to the model repository on the file system
226-
model_name: str
227-
name of the base model
228-
"""
229-
model_config_dict = ModelConfig.create_model_config_dict(
230-
config, client, gpus, model_repository, model_name)
231-
232-
return model_config_dict
233-
234206
def _reset_max_batch_size(self):
235207
self._max_batch_size_warning_printed = False
236208
self._curr_max_batch_size_throughputs = []

model_analyzer/config/generate/brute_run_config_generator.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from typing import List, Optional
15+
from typing import List
1616

1717
from .config_generator_interface import ConfigGeneratorInterface
1818
from model_analyzer.config.run.run_config import RunConfig
1919
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
20+
from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
2021
from model_analyzer.config.generate.model_run_config_generator import ModelRunConfigGenerator
2122
from model_analyzer.config.generate.model_variant_name_manager import ModelVariantNameManager
22-
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
2323

2424

2525
class BruteRunConfigGenerator(ConfigGeneratorInterface):
@@ -30,9 +30,9 @@ class BruteRunConfigGenerator(ConfigGeneratorInterface):
3030
def __init__(self,
3131
config,
3232
gpus,
33-
models,
33+
models: List[ModelProfileSpec],
3434
client,
35-
model_variant_name_manager,
35+
model_variant_name_manager: ModelVariantNameManager,
3636
skip_default_config: bool = False):
3737
"""
3838
Parameters
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from copy import deepcopy
16+
from typing import List
17+
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
18+
from model_analyzer.config.input.objects.config_model_profile_spec import ConfigModelProfileSpec
19+
from model_analyzer.triton.client.client import TritonClient
20+
from model_analyzer.triton.model.model_config import ModelConfig
21+
from model_analyzer.device.gpu_device import GPUDevice
22+
23+
24+
class ModelProfileSpec(ConfigModelProfileSpec):
25+
"""
26+
The profile configuration and default model config for a single model to be profiled
27+
"""
28+
29+
def __init__(self, spec: ConfigModelProfileSpec,
30+
config: ConfigCommandProfile, client: TritonClient,
31+
gpus: List[GPUDevice]):
32+
self.__dict__ = deepcopy(spec.__dict__)
33+
34+
self._default_model_config = ModelConfig.create_model_config_dict(
35+
config, client, gpus, config.model_repository, spec.model_name())
36+
37+
def get_default_config(self) -> dict:
38+
""" Returns the default configuration for this model """
39+
return deepcopy(self._default_model_config)
40+
41+
def supports_batching(self) -> bool:
42+
""" Returns True if this model supports batching. Else False """
43+
if "max_batch_size" not in self._default_model_config or self._default_model_config[
44+
'max_batch_size'] == 0:
45+
return False
46+
return True
47+
48+
def supports_dynamic_batching(self) -> bool:
49+
""" Returns True if this model supports dynamic batching. Else False """
50+
supports_dynamic_batching = self.supports_batching()
51+
52+
if "sequence_batching" in self._default_model_config:
53+
supports_dynamic_batching = False
54+
return supports_dynamic_batching

model_analyzer/config/generate/perf_analyzer_config_generator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from typing import List, Union
15+
from typing import List, Union, Optional
1616

1717
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
1818

@@ -121,7 +121,7 @@ def set_last_results(self, measurements: List[Union[RunConfigMeasurement,
121121
# Remove 'NONE' cases, and find single max measurement from the list
122122
valid_measurements = [m for m in measurements if m]
123123

124-
measurement: List[Union[RunConfigMeasurement, None]] = [None]
124+
measurement: List[Optional[RunConfigMeasurement]] = [None]
125125
if valid_measurements:
126126
measurement = [max(valid_measurements)]
127127

model_analyzer/config/generate/quick_plus_concurrency_sweep_run_config_generator.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from model_analyzer.triton.client.client import TritonClient
2525
from model_analyzer.device.gpu_device import GPUDevice
2626
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
27-
from model_analyzer.config.input.objects.config_model_profile_spec import ConfigModelProfileSpec
27+
from model_analyzer.config.generate.model_profile_spec import ModelProfileSpec
2828
from model_analyzer.result.result_manager import ResultManager
2929
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
3030
from model_analyzer.result.run_config_result import RunConfigResult
@@ -47,7 +47,7 @@ class QuickPlusConcurrencySweepRunConfigGenerator(ConfigGeneratorInterface):
4747

4848
def __init__(self, search_config: SearchConfig,
4949
config: ConfigCommandProfile, gpus: List[GPUDevice],
50-
models: List[ConfigModelProfileSpec], client: TritonClient,
50+
models: List[ModelProfileSpec], client: TritonClient,
5151
result_manager: ResultManager,
5252
model_variant_name_manager: ModelVariantNameManager):
5353
"""
@@ -171,8 +171,13 @@ def _set_parameters(self, result: RunConfigResult,
171171
# We will need to create a yaml config to set each models
172172
# batch size/instances seperately
173173
def _find_batch_size(self, result: RunConfigResult) -> int:
174-
return result.run_config().model_run_configs()[0].model_config(
175-
).get_config()['max_batch_size']
174+
mc = result.run_config().model_run_configs()[0].model_config(
175+
).get_config()
176+
177+
batch_size = 1
178+
if 'max_batch_size' in mc:
179+
batch_size = mc['max_batch_size']
180+
return batch_size
176181

177182
def _find_instance_count(self, result: RunConfigResult) -> int:
178183
return result.run_config().model_run_configs()[0].model_config(

0 commit comments

Comments
 (0)