Skip to content

Commit 08590a2

Browse files
authored
Add request rate parameter for optuna search (#915)
* Add reeuest rate parameter for optuna search * Fix pre-commit error * Remove unused variable * Fix comments * Modify TestOptunaRunConfigGenerator to support search based on request-rate * Add search by request-rate to OptunRunConfigGenerator * Changes * Fix codeql errors * Modification to disable concurrency sweep when request rate is specified * Modify test to check if concurrency-range is in perf_config * Fix codeql errors * Fix codeql errors * Fix comments * Fix error
1 parent 2dabb62 commit 08590a2

File tree

4 files changed

+203
-39
lines changed

4 files changed

+203
-39
lines changed

model_analyzer/config/generate/optuna_run_config_generator.py

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,10 @@
3737
)
3838
from model_analyzer.config.generate.search_parameters import SearchParameters
3939
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
40-
from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES
40+
from model_analyzer.config.input.config_defaults import (
41+
DEFAULT_BATCH_SIZES,
42+
DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
43+
)
4144
from model_analyzer.config.run.model_run_config import ModelRunConfig
4245
from model_analyzer.config.run.run_config import RunConfig
4346
from model_analyzer.constants import LOGGER_NAME
@@ -76,6 +79,7 @@ class OptunaRunConfigGenerator(ConfigGeneratorInterface):
7679
"instance_group",
7780
"concurrency",
7881
"max_queue_delay_microseconds",
82+
"request_rate",
7983
]
8084

8185
# TODO: TMA-1927: Figure out the correct value for this
@@ -380,7 +384,6 @@ def _create_trial_objectives(self, trial: optuna.Trial) -> AllTrialObjectives:
380384
parameter = self._search_parameters[model_name].get_parameter(
381385
parameter_name
382386
)
383-
384387
if parameter:
385388
objective_name = self._create_trial_objective_name(
386389
model_name=model_name, parameter_name=parameter_name
@@ -668,14 +671,21 @@ def _create_default_perf_analyzer_config(
668671
model_config.get_field("name"), self._config
669672
)
670673

671-
default_concurrency = self._calculate_default_concurrency(model_config)
672-
673-
perf_config_params = {
674-
"batch-size": DEFAULT_BATCH_SIZES,
675-
"concurrency-range": default_concurrency,
676-
}
674+
if self._search_parameters[model_config.get_field("name")].get_parameter(
675+
"request_rate"
676+
):
677+
perf_config_params = {
678+
"batch-size": DEFAULT_BATCH_SIZES,
679+
"request-rate-range": DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
680+
}
681+
self._config.concurrency_sweep_disable = True
682+
else:
683+
default_concurrency = self._calculate_default_concurrency(model_config)
684+
perf_config_params = {
685+
"batch-size": DEFAULT_BATCH_SIZES,
686+
"concurrency-range": default_concurrency,
687+
}
677688
default_perf_analyzer_config.update_config(perf_config_params)
678-
679689
default_perf_analyzer_config.update_config(model.perf_analyzer_flags())
680690

681691
return default_perf_analyzer_config
@@ -712,16 +722,10 @@ def _create_model_run_config(
712722
composing_model_config_variants: List[ModelConfigVariant],
713723
trial_objectives: ModelTrialObjectives,
714724
) -> ModelRunConfig:
715-
trial_batch_sizes = (
716-
int(trial_objectives["batch_sizes"])
717-
if "batch_sizes" in trial_objectives
718-
else DEFAULT_BATCH_SIZES
719-
)
720725
perf_analyzer_config = self._create_perf_analyzer_config(
721726
model_name=model.model_name(),
722727
model=model,
723-
concurrency=int(trial_objectives["concurrency"]),
724-
batch_sizes=trial_batch_sizes,
728+
trial_objectives=trial_objectives,
725729
)
726730
model_run_config = ModelRunConfig(
727731
model.model_name(), model_config_variant, perf_analyzer_config
@@ -738,17 +742,30 @@ def _create_perf_analyzer_config(
738742
self,
739743
model_name: str,
740744
model: ModelProfileSpec,
741-
concurrency: int,
742-
batch_sizes: int,
745+
trial_objectives: ModelTrialObjectives,
743746
) -> PerfAnalyzerConfig:
744747
perf_analyzer_config = PerfAnalyzerConfig()
745748

746749
perf_analyzer_config.update_config_from_profile_config(model_name, self._config)
747750

748-
perf_config_params = {
749-
"batch-size": batch_sizes,
750-
"concurrency-range": concurrency,
751-
}
751+
batch_sizes = (
752+
int(trial_objectives["batch_sizes"])
753+
if "batch_sizes" in trial_objectives
754+
else DEFAULT_BATCH_SIZES
755+
)
756+
757+
perf_config_params = {"batch-size": batch_sizes}
758+
759+
if "concurrency" in trial_objectives:
760+
perf_config_params["concurrency-range"] = int(
761+
trial_objectives["concurrency"]
762+
)
763+
elif "request_rate" in trial_objectives:
764+
perf_config_params["request-rate-range"] = int(
765+
trial_objectives["request_rate"]
766+
)
767+
self._config.concurrency_sweep_disable = True
768+
752769
perf_analyzer_config.update_config(perf_config_params)
753770

754771
perf_analyzer_config.update_config(model.perf_analyzer_flags())

model_analyzer/config/generate/search_parameters.py

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,20 @@ class SearchParameters:
3131

3232
# These map to the run-config-search fields
3333
# See github.com/triton-inference-server/model_analyzer/blob/main/docs/config.md
34-
exponential_rcs_parameters = ["max_batch_size", "batch_sizes", "concurrency"]
34+
exponential_rcs_parameters = [
35+
"max_batch_size",
36+
"batch_sizes",
37+
"concurrency",
38+
"request_rate",
39+
]
3540
linear_rcs_parameters = ["instance_group"]
3641

3742
model_parameters = [
3843
"max_batch_size",
3944
"instance_group",
4045
"max_queue_delay_microseconds",
4146
]
42-
runtime_parameters = ["batch_sizes", "concurrency"]
47+
runtime_parameters = ["batch_sizes", "concurrency", "request_rate"]
4348

4449
def __init__(
4550
self,
@@ -129,8 +134,10 @@ def _populate_parameters(self) -> None:
129134
self._populate_batch_sizes()
130135

131136
if not self._is_composing_model:
132-
self._populate_concurrency()
133-
# TODO: Populate request rate - TMA-1903
137+
if self._config.is_request_rate_specified(self._parameters):
138+
self._populate_request_rate()
139+
else:
140+
self._populate_concurrency()
134141

135142
def _populate_model_config_parameters(self) -> None:
136143
self._populate_max_batch_size()
@@ -161,6 +168,20 @@ def _populate_concurrency(self) -> None:
161168
rcs_parameter_max_value=self._config.run_config_search_max_concurrency,
162169
)
163170

171+
def _populate_request_rate(self) -> None:
172+
if self._parameters and self._parameters["request_rate"]:
173+
self._populate_list_parameter(
174+
parameter_name="request_rate",
175+
parameter_list=self._parameters["request_rate"],
176+
parameter_category=ParameterCategory.INT_LIST,
177+
)
178+
else:
179+
self._populate_rcs_parameter(
180+
parameter_name="request_rate",
181+
rcs_parameter_min_value=self._config.run_config_search_min_request_rate,
182+
rcs_parameter_max_value=self._config.run_config_search_max_request_rate,
183+
)
184+
164185
def _populate_max_batch_size(self) -> None:
165186
# Example config format:
166187
# model_config_parameters:

tests/test_optuna_run_config_generator.py

Lines changed: 95 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@
2525
OptunaRunConfigGenerator,
2626
)
2727
from model_analyzer.config.generate.search_parameters import SearchParameters
28-
from model_analyzer.config.input.config_defaults import DEFAULT_BATCH_SIZES
28+
from model_analyzer.config.input.config_defaults import (
29+
DEFAULT_BATCH_SIZES,
30+
DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE,
31+
)
2932
from model_analyzer.config.input.objects.config_model_profile_spec import (
3033
ConfigModelProfileSpec,
3134
)
@@ -174,9 +177,9 @@ def test_min_number_of_configs_to_search_both(self):
174177
# Since both are specified we will use the larger of the two (trials=6)
175178
self.assertEqual(min_configs_to_search, 6)
176179

177-
def test_create_default_run_config(self):
180+
def test_create_default_run_config_with_concurrency(self):
178181
"""
179-
Test that a default run config is properly created
182+
Test that a default run config with concurrency is properly created
180183
"""
181184
default_run_config = self._rcg._create_default_run_config()
182185

@@ -190,9 +193,50 @@ def test_create_default_run_config(self):
190193
perf_config["concurrency-range"], 2 * self._default_max_batch_size
191194
)
192195

193-
def test_create_objective_based_run_config(self):
196+
def test_create_default_run_config_with_request_rate(self):
197+
"""
198+
Test that a default run config with request rate is properly created
199+
"""
200+
config = self._create_config(["--request-rate-search-enable"])
201+
mock_model_config = MockModelConfig()
202+
mock_model_config.start()
203+
model = ModelProfileSpec(
204+
config.profile_models[0], config, MagicMock(), MagicMock()
205+
)
206+
mock_model_config.stop()
207+
search_parameters = SearchParameters(
208+
model=model,
209+
config=config,
210+
)
211+
212+
rcg = OptunaRunConfigGenerator(
213+
config=config,
214+
state_manager=MagicMock(),
215+
gpu_count=1,
216+
models=self._mock_models,
217+
composing_models=[],
218+
model_variant_name_manager=ModelVariantNameManager(),
219+
search_parameters={"add_sub": search_parameters},
220+
composing_search_parameters={},
221+
user_seed=100,
222+
)
223+
224+
default_run_config = rcg._create_default_run_config()
225+
self.assertEqual(len(default_run_config.model_run_configs()), 1)
226+
227+
model_config = default_run_config.model_run_configs()[0].model_config()
228+
perf_config = default_run_config.model_run_configs()[0].perf_config()
229+
230+
self.assertEqual(model_config.to_dict()["name"], self._test_config_dict["name"])
231+
self.assertEqual(perf_config["batch-size"], DEFAULT_BATCH_SIZES)
232+
self.assertEqual(
233+
perf_config["request-rate-range"], DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE
234+
)
235+
self.assertEqual(perf_config["concurrency-range"], None)
236+
237+
def test_create_objective_based_run_config_with_concurrency(self):
194238
"""
195-
Test that an objective based run config is properly created
239+
Test that an objective based run config with concurrency is properly created
196240
"""
197241
trial = self._rcg._study.ask()
198242
trial_objectives = self._rcg._create_trial_objectives(trial)
@@ -215,6 +259,47 @@ def test_create_objective_based_run_config(self):
215259
self.assertEqual(perf_config["batch-size"], DEFAULT_BATCH_SIZES)
216260
self.assertEqual(perf_config["concurrency-range"], 64)
217261

262+
def test_create_objective_based_run_config_with_request_rate(self):
263+
"""
264+
Test that an objective based run config with request rate is properly created
265+
"""
266+
config = self._create_config(["--request-rate", "1024,2048"])
267+
mock_model_config = MockModelConfig()
268+
mock_model_config.start()
269+
model = ModelProfileSpec(
270+
config.profile_models[0], config, MagicMock(), MagicMock()
271+
)
272+
mock_model_config.stop()
273+
search_parameters = SearchParameters(
274+
model=model,
275+
config=config,
276+
)
277+
278+
rcg = OptunaRunConfigGenerator(
279+
config=config,
280+
state_manager=MagicMock(),
281+
gpu_count=1,
282+
models=self._mock_models,
283+
composing_models=[],
284+
model_variant_name_manager=ModelVariantNameManager(),
285+
search_parameters={"add_sub": search_parameters},
286+
composing_search_parameters={},
287+
user_seed=100,
288+
)
289+
290+
trial = rcg._study.ask()
291+
trial_objectives = rcg._create_trial_objectives(trial)
292+
run_config = rcg._create_objective_based_run_config(trial_objectives, None)
293+
294+
model_config = run_config.model_run_configs()[0].model_config()
295+
perf_config = run_config.model_run_configs()[0].perf_config()
296+
297+
# These values are the result of using a fixed seed of 100
298+
self.assertEqual(model_config.to_dict()["name"], self._test_config_dict["name"])
299+
self.assertEqual(perf_config["batch-size"], DEFAULT_BATCH_SIZES)
300+
self.assertEqual(perf_config["request-rate-range"], 2048)
301+
self.assertEqual(perf_config["concurrency-range"], None)
302+
218303
def test_create_run_config_with_concurrency_formula(self):
219304
"""
220305
Tests that the concurrency formula option is used correctly
@@ -284,12 +369,10 @@ def test_create_run_bls_config(self):
284369
config=config,
285370
)
286371
add_search_parameters = SearchParameters(
287-
model=add_model,
288-
config=config,
372+
model=add_model, config=config, is_composing_model=True
289373
)
290374
sub_search_parameters = SearchParameters(
291-
model=sub_model,
292-
config=config,
375+
model=sub_model, config=config, is_composing_model=True
293376
)
294377
rcg = OptunaRunConfigGenerator(
295378
config=config,
@@ -331,16 +414,16 @@ def test_create_run_bls_config(self):
331414
self.assertEqual(add_model_config.to_dict()["instanceGroup"][0]["count"], 3)
332415
self.assertEqual(
333416
add_model_config.to_dict()["dynamicBatching"]["maxQueueDelayMicroseconds"],
334-
"300",
417+
"100",
335418
)
336419

337420
# SUB (composing model)
338421
# =====================================================================
339422
self.assertEqual(sub_model_config.to_dict()["name"], "sub")
340-
self.assertEqual(sub_model_config.to_dict()["instanceGroup"][0]["count"], 5)
423+
self.assertEqual(sub_model_config.to_dict()["instanceGroup"][0]["count"], 4)
341424
self.assertEqual(
342425
sub_model_config.to_dict()["dynamicBatching"]["maxQueueDelayMicroseconds"],
343-
"500",
426+
"400",
344427
)
345428

346429
def test_create_run_multi_model_config(self):

tests/test_search_parameters.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def test_exponential_parameter(self):
9898
Test exponential parameter, accessing dataclass directly
9999
"""
100100

101+
# concurrency
101102
parameter = self.search_parameters.get_parameter("concurrency")
102103

103104
self.assertEqual(ParameterUsage.RUNTIME, parameter.usage)
@@ -322,10 +323,10 @@ def test_search_parameter_creation_multi_model_non_default(self):
322323
mult_div:
323324
parameters:
324325
concurrency: [1, 8, 64, 256]
326+
325327
"""
326328

327329
config = TestConfig()._evaluate_config(args, yaml_content)
328-
329330
analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock())
330331
mock_model_config = MockModelConfig()
331332
mock_model_config.start()
@@ -418,6 +419,48 @@ def test_search_parameter_creation_multi_model_non_default(self):
418419
default.DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT, instance_group.max_range
419420
)
420421

422+
def test_search_parameter_request_rate(self):
423+
"""
424+
Test that request rate is correctly set in
425+
a non-default optuna case
426+
"""
427+
428+
args = [
429+
"model-analyzer",
430+
"profile",
431+
"--model-repository",
432+
"cli-repository",
433+
"-f",
434+
"path-to-config-file",
435+
"--run-config-search-mode",
436+
"optuna",
437+
]
438+
439+
yaml_content = """
440+
run_config_search_mode: optuna
441+
profile_models:
442+
mult_div:
443+
parameters:
444+
request_rate: [1, 8, 64, 256]
445+
446+
"""
447+
config = TestConfig()._evaluate_config(args, yaml_content)
448+
analyzer = Analyzer(config, MagicMock(), MagicMock(), MagicMock())
449+
mock_model_config = MockModelConfig()
450+
mock_model_config.start()
451+
analyzer._populate_search_parameters(MagicMock(), MagicMock())
452+
mock_model_config.stop()
453+
454+
# request_rate
455+
# ===================================================================
456+
457+
request_rate = analyzer._search_parameters["mult_div"].get_parameter(
458+
"request_rate"
459+
)
460+
self.assertEqual(ParameterUsage.RUNTIME, request_rate.usage)
461+
self.assertEqual(ParameterCategory.INT_LIST, request_rate.category)
462+
self.assertEqual([1, 8, 64, 256], request_rate.enumerated_list)
463+
421464
def test_number_of_configs_range(self):
422465
"""
423466
Test number of configs for a range (INTEGER/EXPONENTIAL)

0 commit comments

Comments
 (0)