Skip to content

Commit 985fc23

Browse files
nv-braftgerdesnv
authored andcommitted
New config/CLI options for request-rate-range (#665)
* Added config options for RRR * Unit testing * Adding in missing request-rate-range
1 parent 9df1539 commit 985fc23

File tree

6 files changed

+181
-8
lines changed

6 files changed

+181
-8
lines changed

docs/config.md

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ bls_composing_models: <comma-delimited-string-list>
110110
# Batch size values to be used
111111
[ batch_sizes: <comma-delimited-string|list|range> | default: 1 ]
112112
113+
# Request rate range values to be used
114+
[ request_rate_range: <comma-delimited-string|list|range> ]
115+
113116
# Specifies the maximum number of retries for any retry attempt
114117
[ client_max_retries: <int> | default: 50 ]
115118
@@ -185,30 +188,39 @@ bls_composing_models: <comma-delimited-string-list>
185188
# Search mode. Options are "brute" and "quick"
186189
[ run_config_search_mode: <string> | default: brute]
187190
188-
# Minimum concurrency used for the automatic config search
191+
# Minimum concurrency used for the automatic/quick config search
189192
[ run_config_search_min_concurrency: <int> | default: 1 ]
190193
191-
# Maximum concurrency used for the automatic config search
194+
# Maximum concurrency used for the automatic/quick config search
192195
[ run_config_search_max_concurrency: <int> | default: 1024 ]
193196
194-
# Minimum max_batch_size used for the automatic config search
197+
# Minimum max_batch_size used for the automatic/quick config search
195198
[ run_config_search_min_model_batch_size: <int> | default: 1 ]
196199
197-
# Maximum max_batch_size used for the automatic config search
200+
# Maximum max_batch_size used for the automatic/quick config search
198201
[ run_config_search_max_model_batch_size: <int> | default: 128 ]
199202
200-
# Minimum instance group count used for the automatic config search
203+
# Minimum instance group count used for the automatic/quick config search
201204
[ run_config_search_min_instance_count: <int> | default: 1 ]
202205
203-
# Maximum instance group count used for the automatic config search
206+
# Maximum instance group count used for the automatic/quick config search
204207
[ run_config_search_max_instance_count: <int> | default: 5 ]
205208
209+
# Minimum request rate range used for the automatic/quick config search
210+
[ run_config_search_min_request_rate_range: <int> | default: 1 ]
211+
212+
# Maximum request rate range used for the automatic/quick config search
213+
[ run_config_search_max_request_rate_range: <int> | default: 1024 ]
214+
206215
# Disables automatic config search
207216
[ run_config_search_disable: <bool> | default: false ]
208217
209218
# Enables the profiling of all supplied models concurrently
210219
[ run_config_profile_models_concurrently_enable: <bool> | default: false]
211220
221+
# Enables the searching of request rate range (instead of concurrency)
222+
[ request_rate_range_search_enable: <bool> | default: false]
223+
212224
# Skips the generation of summary reports and tables
213225
[ skip_summary_reports: <bool> | default: false]
214226

model_analyzer/config/input/config_command.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -120,6 +120,8 @@ def _check_for_illegal_config_settings(
120120
self._check_for_multi_model_incompatibility(args, yaml_config)
121121
self._check_for_quick_search_incompatibility(args, yaml_config)
122122
self._check_for_bls_incompatibility(args, yaml_config)
123+
self._check_for_concurrency_rate_request_conflicts(args, yaml_config)
124+
self._check_for_config_search_rate_request_conflicts(args, yaml_config)
123125

124126
def _set_field_values(self, args: Namespace,
125127
yaml_config: Optional[Dict[str, List]]) -> None:
@@ -288,6 +290,63 @@ def _check_no_concurrent_search(
288290
'\nPlease remove `--run-config-profile-models-concurrently-enable from the config/CLI.'
289291
)
290292

293+
def _check_for_concurrency_rate_request_conflicts(
294+
self, args: Namespace, yaml_config: Optional[Dict[str,
295+
List]]) -> None:
296+
if self._get_config_value('concurrency', args, yaml_config):
297+
if self._get_config_value('request_rate_range_search_enable', args,
298+
yaml_config):
299+
raise TritonModelAnalyzerException(
300+
f'\nCannot have both `request-rate-range-search-enable` and `concurrency` specified in the config/CLI.'
301+
)
302+
elif self._get_config_value('request_rate_range', args,
303+
yaml_config):
304+
raise TritonModelAnalyzerException(
305+
f'\nCannot have both `request-rate-range` and `concurrency` specified in the config/CLI.'
306+
)
307+
elif self._get_config_value(
308+
'run_config_search_min_request_rate_range', args,
309+
yaml_config):
310+
raise TritonModelAnalyzerException(
311+
f'\nCannot have both `run-config-search-min-request-rate-range` and `concurrency` specified in the config/CLI.'
312+
)
313+
elif self._get_config_value(
314+
'run_config_search_max_request_rate_range', args,
315+
yaml_config):
316+
raise TritonModelAnalyzerException(
317+
f'\nCannot have both `run-config-search-max-request-rate-range` and `concurrency` specified in the config/CLI.'
318+
)
319+
320+
def _check_for_config_search_rate_request_conflicts(
321+
self, args: Namespace, yaml_config: Optional[Dict[str,
322+
List]]) -> None:
323+
if self._get_config_value('run_config_search_max_concurrency', args,
324+
yaml_config) or self._get_config_value(
325+
'run_config_search_min_concurrency', args,
326+
yaml_config):
327+
if self._get_config_value('request_rate_range_search_enable', args,
328+
yaml_config):
329+
raise TritonModelAnalyzerException(
330+
f'\nCannot have both `request-rate-range-search-enable` and `run-config-search-min/max-concurrency` specified in the config/CLI.'
331+
)
332+
elif self._get_config_value('request_rate_range', args,
333+
yaml_config):
334+
raise TritonModelAnalyzerException(
335+
f'\nCannot have both `request-rate-range` and `run-config-search-min/max-concurrency` specified in the config/CLI.'
336+
)
337+
elif self._get_config_value(
338+
'run_config_search_min_request_rate_range', args,
339+
yaml_config):
340+
raise TritonModelAnalyzerException(
341+
f'\nCannot have both `run-config-search-min-request-rate-range` and `run-config-search-min/max-concurrency` specified in the config/CLI.'
342+
)
343+
elif self._get_config_value(
344+
'run_config_search_max_request_rate_range', args,
345+
yaml_config):
346+
raise TritonModelAnalyzerException(
347+
f'\nCannot have both `run-config-search-max-request-rate-range` and `run-config-search-min/max-concurrency` specified in the config/CLI.'
348+
)
349+
291350
def _preprocess_and_verify_arguments(self):
292351
"""
293352
Enforces some rules on the config.

model_analyzer/config/input/config_command_profile.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@
3434
DEFAULT_OUTPUT_MODEL_REPOSITORY, DEFAULT_OVERRIDE_OUTPUT_REPOSITORY_FLAG, \
3535
DEFAULT_PERF_ANALYZER_CPU_UTIL, DEFAULT_PERF_ANALYZER_PATH, DEFAULT_PERF_MAX_AUTO_ADJUSTS, \
3636
DEFAULT_PERF_OUTPUT_FLAG, DEFAULT_RUN_CONFIG_MAX_CONCURRENCY, DEFAULT_RUN_CONFIG_MIN_CONCURRENCY, \
37+
DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE_RANGE, DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE_RANGE, \
3738
DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE, DEFAULT_RUN_CONFIG_SEARCH_MODE, \
39+
DEFAULT_REQUEST_RATE_RANGE_SEARCH_ENABLE, \
3840
DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT, DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT, \
3941
DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE, DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE, \
4042
DEFAULT_RUN_CONFIG_SEARCH_DISABLE, DEFAULT_TRITON_DOCKER_IMAGE, DEFAULT_TRITON_GRPC_ENDPOINT, \
@@ -440,6 +442,14 @@ def _add_profile_models_configs(self):
440442
description=
441443
"Comma-delimited list of concurrency values or ranges <start:end:step>"
442444
" to be used during profiling"))
445+
self._add_config(
446+
ConfigField(
447+
'request_rate_range',
448+
flags=['-rrr', '--request-rate-range'],
449+
field_type=ConfigListNumeric(int),
450+
description=
451+
"Comma-delimited list of request rate range values or ranges <start:end:step>"
452+
" to be used during profiling"))
443453
self._add_config(
444454
ConfigField(
445455
'reload_model_disable',
@@ -509,7 +519,7 @@ def _add_run_search_configs(self):
509519
default_value=False,
510520
flags=['--early-exit-enable'],
511521
description=
512-
'Flag to indicate if Model Analyzer can skip some configurations when manually searching concurrency or max_batch_size'
522+
'Flag to indicate if Model Analyzer can skip some configurations when manually searching concurrency/request rate range, or max_batch_size'
513523
))
514524
self._add_config(
515525
ConfigField(
@@ -529,6 +539,24 @@ def _add_run_search_configs(self):
529539
description=
530540
"Min concurrency value that run config search should start with."
531541
))
542+
self._add_config(
543+
ConfigField(
544+
'run_config_search_max_request_rate_range',
545+
flags=['--run-config-search-max-request-rate-range'],
546+
field_type=ConfigPrimitive(int),
547+
default_value=DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE_RANGE,
548+
description=
549+
"Max request rate range value that run config search should not go beyond that."
550+
))
551+
self._add_config(
552+
ConfigField(
553+
'run_config_search_min_request_rate_range',
554+
flags=['--run-config-search-min-request-rate-range'],
555+
field_type=ConfigPrimitive(int),
556+
default_value=DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE_RANGE,
557+
description=
558+
"Min request rate range value that run config search should start with."
559+
))
532560
self._add_config(
533561
ConfigField(
534562
'run_config_search_max_instance_count',
@@ -595,6 +623,16 @@ def _add_run_search_configs(self):
595623
DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE,
596624
description=
597625
"Enable the profiling of all supplied models concurrently."))
626+
self._add_config(
627+
ConfigField(
628+
'request_rate_range_search_enable',
629+
flags=['--request-rate-range-search-enable'],
630+
field_type=ConfigPrimitive(bool),
631+
parser_args={'action': 'store_true'},
632+
default_value=DEFAULT_REQUEST_RATE_RANGE_SEARCH_ENABLE,
633+
description=
634+
"Enables the searching of request rate range (instead of concurrency)."
635+
))
598636

599637
def _add_triton_configs(self):
600638
"""

model_analyzer/config/input/config_defaults.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,16 @@
4141
DEFAULT_CLIENT_PROTOCOL = 'grpc'
4242
DEFAULT_RUN_CONFIG_MAX_CONCURRENCY = 1024
4343
DEFAULT_RUN_CONFIG_MIN_CONCURRENCY = 1
44+
DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE_RANGE = 1024
45+
DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE_RANGE = 1
4446
DEFAULT_RUN_CONFIG_MAX_INSTANCE_COUNT = 5
4547
DEFAULT_RUN_CONFIG_MIN_INSTANCE_COUNT = 1
4648
DEFAULT_RUN_CONFIG_MIN_MODEL_BATCH_SIZE = 1
4749
DEFAULT_RUN_CONFIG_MAX_MODEL_BATCH_SIZE = 128
4850
DEFAULT_RUN_CONFIG_SEARCH_DISABLE = False
4951
DEFAULT_RUN_CONFIG_SEARCH_MODE = 'brute'
5052
DEFAULT_RUN_CONFIG_PROFILE_MODELS_CONCURRENTLY_ENABLE = False
53+
DEFAULT_REQUEST_RATE_RANGE_SEARCH_ENABLE = False
5154
DEFAULT_TRITON_LAUNCH_MODE = 'local'
5255
DEFAULT_TRITON_DOCKER_IMAGE = 'nvcr.io/nvidia/tritonserver:23.04-py3'
5356
DEFAULT_TRITON_HTTP_ENDPOINT = 'localhost:8000'

tests/test_cli.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def get_test_options():
6363
OptionStruct("bool", "profile","--perf-output"),
6464
OptionStruct("bool", "profile","--run-config-search-disable"),
6565
OptionStruct("bool", "profile","--run-config-profile-models-concurrently-enable"),
66+
OptionStruct("bool", "profile","--request-rate-range-search-enable"),
6667
OptionStruct("bool", "profile","--reload-model-disable"),
6768
OptionStruct("bool", "profile","--early-exit-enable"),
6869
OptionStruct("bool", "profile","--skip-summary-reports"),
@@ -78,6 +79,8 @@ def get_test_options():
7879
OptionStruct("int", "profile", "--perf-analyzer-max-auto-adjusts", None, "100", "10"),
7980
OptionStruct("int", "profile", "--run-config-search-min-concurrency", None, "2", "1"),
8081
OptionStruct("int", "profile", "--run-config-search-max-concurrency", None, "100", "1024"),
82+
OptionStruct("int", "profile", "--run-config-search-min-request-rate-range", None, "2", "1"),
83+
OptionStruct("int", "profile", "--run-config-search-max-request-rate-range", None, "100", "1024"),
8184
OptionStruct("int", "profile", "--run-config-search-min-model-batch-size", None, "100", "1"),
8285
OptionStruct("int", "profile", "--run-config-search-max-model-batch-size", None, "100", "128"),
8386
OptionStruct("int", "profile", "--run-config-search-min-instance-count", None, "2", "1"),
@@ -132,6 +135,7 @@ def get_test_options():
132135
# expected_default_value
133136
OptionStruct("intlist", "profile", "--batch-sizes", "-b", "2, 4, 6", "1"),
134137
OptionStruct("intlist", "profile", "--concurrency", "-c", "1, 2, 3", None),
138+
OptionStruct("intlist", "profile", "--request-rate-range", "-rrr", "1, 2, 3", None),
135139
OptionStruct("stringlist", "profile", "--triton-docker-mounts", None, "a:b:c, d:e:f", None, extra_commands=["--triton-launch-mode", "docker"]),
136140
OptionStruct("stringlist", "profile", "--gpus", None, "a, b, c", "all"),
137141
OptionStruct("stringlist", "profile", "--inference-output-fields", None, "a, b, c",

tests/test_config.py

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from .mocks.mock_numba import MockNumba
1919
from .mocks.mock_os import MockOSMethods
2020

21-
from typing import Dict, List, Optional
21+
from typing import Dict, List, Optional, Any
2222
from argparse import Namespace
2323

2424
from .common import test_result_collector as trc
@@ -1980,6 +1980,63 @@ def test_bls_illegal_config_combinations(self):
19801980
with self.assertRaises(TritonModelAnalyzerException):
19811981
self._evaluate_config(args, yaml_content)
19821982

1983+
def test_concurrency_rate_request_config_combinations(self):
1984+
"""
1985+
Test for concurrency with rate request conflicts
1986+
"""
1987+
base_args = [
1988+
'model-analyzer', 'profile', '--model-repository', 'cli-repository',
1989+
'--profile-models', 'modelA', '-c', '1,2,3'
1990+
]
1991+
yaml_content = ''
1992+
1993+
self._test_request_rate_config_conflicts(base_args, yaml_content)
1994+
1995+
def test_config_search_min_rate_request_config_combinations(self):
1996+
"""
1997+
Test for concurrency min request with rate request conflicts
1998+
"""
1999+
base_args = [
2000+
'model-analyzer', 'profile', '--model-repository', 'cli-repository',
2001+
'--profile-models', 'modelA', '--run-config-search-min-concurrency',
2002+
'1'
2003+
]
2004+
yaml_content = ''
2005+
2006+
self._test_request_rate_config_conflicts(base_args, yaml_content)
2007+
2008+
def test_config_search_max_rate_request_config_combinations(self):
2009+
"""
2010+
Test for concurrency max request with rate request conflicts
2011+
"""
2012+
base_args = [
2013+
'model-analyzer', 'profile', '--model-repository', 'cli-repository',
2014+
'--profile-models', 'modelA', '--run-config-search-max-concurrency',
2015+
'1'
2016+
]
2017+
yaml_content = ''
2018+
2019+
self._test_request_rate_config_conflicts(base_args, yaml_content)
2020+
2021+
def _test_request_rate_config_conflicts(self, base_args: List[Any],
2022+
yaml_content: str) -> None:
2023+
self._test_arg_conflict(base_args, yaml_content,
2024+
['--request-rate-range-search-enable'])
2025+
self._test_arg_conflict(base_args, yaml_content,
2026+
['--request-rate-range', '1,2,3'])
2027+
self._test_arg_conflict(
2028+
base_args, yaml_content,
2029+
['--run-config-search-min-request-rate-range', '1'])
2030+
self._test_arg_conflict(
2031+
base_args, yaml_content,
2032+
['--run-config-search-max-request-rate-range', '1'])
2033+
2034+
def _test_arg_conflict(self, base_args: List[Any], yaml_content: str,
2035+
new_args: List[Any]) -> None:
2036+
args = base_args.copy() + new_args
2037+
with self.assertRaises(TritonModelAnalyzerException):
2038+
self._evaluate_config(args, yaml_content)
2039+
19832040

19842041
if __name__ == '__main__':
19852042
unittest.main()

0 commit comments

Comments
 (0)