Skip to content

Commit 6de64a3

Browse files
nv-braftgerdesnv
authored andcommitted
Brute Force Search for Request Rate Range (#666)
* Changed concurrency to parameter list * Generalized concurrency into parameter in PACG * Added unit testing * Fixing config options (removing range). * Adding request rate as a parameter * Unit test to check for request rate in parameters * Refactoring create_parameter_list * Adding check for both request rate and model parameters specified
1 parent 985fc23 commit 6de64a3

14 files changed

+378
-166
lines changed

docs/config.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ bls_composing_models: <comma-delimited-string-list>
110110
# Batch size values to be used
111111
[ batch_sizes: <comma-delimited-string|list|range> | default: 1 ]
112112
113-
# Request rate range values to be used
114-
[ request_rate_range: <comma-delimited-string|list|range> ]
113+
# Request rate values to be used
114+
[ request_rate: <comma-delimited-string|list|range> ]
115115
116116
# Specifies the maximum number of retries for any retry attempt
117117
[ client_max_retries: <int> | default: 50 ]
@@ -206,20 +206,20 @@ bls_composing_models: <comma-delimited-string-list>
206206
# Maximum instance group count used for the automatic/quick config search
207207
[ run_config_search_max_instance_count: <int> | default: 5 ]
208208
209-
# Minimum request rate range used for the automatic/quick config search
210-
[ run_config_search_min_request_rate_range: <int> | default: 1 ]
209+
# Minimum request rate used for the automatic/quick config search
210+
[ run_config_search_min_request_rate: <int> | default: 16 ]
211211
212-
# Maximum request rate range used for the automatic/quick config search
213-
[ run_config_search_max_request_rate_range: <int> | default: 1024 ]
212+
# Maximum request rate used for the automatic/quick config search
213+
[ run_config_search_max_request_rate: <int> | default: 8092 ]
214214
215215
# Disables automatic config search
216216
[ run_config_search_disable: <bool> | default: false ]
217217
218218
# Enables the profiling of all supplied models concurrently
219219
[ run_config_profile_models_concurrently_enable: <bool> | default: false]
220220
221-
# Enables the searching of request rate range (instead of concurrency)
222-
[ request_rate_range_search_enable: <bool> | default: false]
221+
# Enables the searching of request rate (instead of concurrency)
222+
[ request_rate_search_enable: <bool> | default: false]
223223
224224
# Skips the generation of summary reports and tables
225225
[ skip_summary_reports: <bool> | default: false]

model_analyzer/config/generate/perf_analyzer_config_generator.py

Lines changed: 76 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -52,40 +52,39 @@ def __init__(self, cli_config: ConfigCommandProfile, model_name: str,
5252
custom perf analyzer configuration
5353
5454
model_parameters: Dict
55-
model constraints for batch_sizes and/or concurrency
55+
model constraints for batch_sizes, concurrency and/or request rate
5656
5757
early_exit_enable: Bool
58-
If true, this class can early exit during search of concurrency
58+
If true, this class can early exit during search of concurrency/request rate
5959
"""
6060

6161
self._early_exit_enable = early_exit_enable
6262

6363
# All configs are pregenerated in _configs[][]
6464
# Indexed as follows:
65-
# _configs[_curr_batch_size_index][_curr_concurrency_index]
65+
# _configs[_curr_batch_size_index][_curr_parameter_index]
6666
#
67-
self._curr_concurrency_index = 0
67+
self._curr_parameter_index = 0
6868
self._curr_batch_size_index = 0
6969
self._configs: List[List[PerfAnalyzerConfig]] = []
70-
self._concurrency_warning_printed = False
70+
self._parameter_warning_printed = False
7171

7272
# Flag to indicate we have started to return results
7373
#
7474
self._generator_started = False
7575

7676
self._last_results: List[RunConfigMeasurement] = []
77-
self._concurrency_results: List[Optional[RunConfigMeasurement]] = []
77+
self._parameter_results: List[Optional[RunConfigMeasurement]] = []
7878
self._batch_size_results: List[Optional[RunConfigMeasurement]] = []
7979

8080
self._model_name = model_name
8181
self._perf_analyzer_flags = model_perf_analyzer_flags
8282

8383
self._batch_sizes = sorted(model_parameters['batch_sizes'])
84-
self._concurrencies = self._create_concurrency_list(
85-
cli_config, model_parameters)
86-
8784
self._cli_config = cli_config
8885

86+
self._model_parameters = model_parameters
87+
self._parameters = self._create_parameter_list()
8988
self._generate_perf_configs()
9089

9190
@staticmethod
@@ -96,13 +95,13 @@ def throughput_gain_valid_helper(
9695
if len(throughputs) < min_tries:
9796
return True
9897

99-
tputs_in_range = [
98+
throughputs_in_range = [
10099
PerfAnalyzerConfigGenerator.get_throughput(throughputs[x])
101100
for x in range(-min_tries, 0)
102101
]
103102

104-
first = tputs_in_range[0]
105-
best = max(tputs_in_range)
103+
first = throughputs_in_range[0]
104+
best = max(throughputs_in_range)
106105

107106
gain = (best - first) / first
108107

@@ -127,7 +126,7 @@ def get_configs(self) -> Generator[PerfAnalyzerConfig, None, None]:
127126

128127
self._generator_started = True
129128
config = self._configs[self._curr_batch_size_index][
130-
self._curr_concurrency_index]
129+
self._curr_parameter_index]
131130
yield (config)
132131

133132
if self._last_results_erroneous():
@@ -154,87 +153,116 @@ def set_last_results(
154153
measurement = [max(valid_measurements)]
155154

156155
self._last_results = measurement
157-
self._concurrency_results.extend(measurement)
156+
self._parameter_results.extend(measurement)
157+
158+
def _create_parameter_list(self) -> List[int]:
159+
# The two possible parameters are request rate or concurrency
160+
# Concurrency is the default and will be used unless the user specifies
161+
# request rate, either as a model parameter or a config option
162+
if self._config_specifies_request_rate():
163+
return self._create_request_rate_list()
164+
else:
165+
return self._create_concurrency_list()
166+
167+
def _config_specifies_request_rate(self) -> bool:
168+
return self._model_parameters['request_rate'] or \
169+
self._cli_config.request_rate_search_enable or \
170+
self._cli_config.get_config()['run_config_search_min_request_rate'].is_set_by_user() or \
171+
self._cli_config.get_config()['run_config_search_max_request_rate'].is_set_by_user()
172+
173+
def _create_request_rate_list(self) -> List[int]:
174+
if self._model_parameters['request_rate']:
175+
return sorted(self._model_parameters['request_rate'])
176+
elif self._cli_config.run_config_search_disable:
177+
return [1]
178+
else:
179+
return utils.generate_doubled_list(
180+
self._cli_config.run_config_search_min_request_rate,
181+
self._cli_config.run_config_search_max_request_rate)
158182

159-
def _create_concurrency_list(self, cli_config: ConfigCommandProfile,
160-
model_parameters: dict) -> List[int]:
161-
if model_parameters['concurrency']:
162-
return sorted(model_parameters['concurrency'])
163-
elif cli_config.run_config_search_disable:
183+
def _create_concurrency_list(self) -> List[int]:
184+
if self._model_parameters['concurrency']:
185+
return sorted(self._model_parameters['concurrency'])
186+
elif self._cli_config.run_config_search_disable:
164187
return [1]
165188
else:
166189
return utils.generate_doubled_list(
167-
cli_config.run_config_search_min_concurrency,
168-
cli_config.run_config_search_max_concurrency)
190+
self._cli_config.run_config_search_min_concurrency,
191+
self._cli_config.run_config_search_max_concurrency)
169192

170193
def _generate_perf_configs(self) -> None:
171-
perf_config_non_concurrency_params = self._create_non_concurrency_perf_config_params(
194+
perf_config_non_parameter_values = self._create_non_parameter_perf_config_values(
172195
)
173196

174197
for params in utils.generate_parameter_combinations(
175-
perf_config_non_concurrency_params):
198+
perf_config_non_parameter_values):
176199
configs_with_concurrency = []
177-
for concurrency in self._concurrencies:
200+
for parameter in self._parameters:
178201
new_perf_config = PerfAnalyzerConfig()
179202

180203
new_perf_config.update_config_from_profile_config(
181204
self._model_name, self._cli_config)
182205

183206
new_perf_config.update_config(params)
184-
new_perf_config.update_config(
185-
{'concurrency-range': concurrency})
207+
208+
if self._config_specifies_request_rate():
209+
new_perf_config.update_config(
210+
{'request-rate-range': parameter})
211+
else:
212+
new_perf_config.update_config(
213+
{'concurrency-range': parameter})
186214

187215
# User provided flags can override the search parameters
188216
new_perf_config.update_config(self._perf_analyzer_flags)
189217

190218
configs_with_concurrency.append(new_perf_config)
191219
self._configs.append(configs_with_concurrency)
192220

193-
def _create_non_concurrency_perf_config_params(self) -> dict:
194-
perf_config_params = {
221+
def _create_non_parameter_perf_config_values(self) -> dict:
222+
perf_config_values = {
195223
'batch-size': self._batch_sizes,
196224
}
197225

198-
return perf_config_params
226+
return perf_config_values
199227

200228
def _step(self) -> None:
201-
self._step_concurrency()
229+
self._step_parameter()
202230

203-
if self._done_walking_concurrencies():
231+
if self._done_walking_parameters():
204232
self._add_best_throughput_to_batch_sizes()
205-
self._reset_concurrencies()
233+
self._reset_parameters()
206234
self._step_batch_size()
207235

208236
def _add_best_throughput_to_batch_sizes(self) -> None:
209-
if self._concurrency_results:
237+
if self._parameter_results:
210238
# type is List[Optional[RCM]]
211-
best = max(self._concurrency_results) #type: ignore
239+
best = max(self._parameter_results) #type: ignore
212240
self._batch_size_results.append(best)
213241

214-
def _reset_concurrencies(self) -> None:
215-
self._curr_concurrency_index = 0
216-
self._concurrency_warning_printed = False
217-
self._concurrency_results = []
242+
def _reset_parameters(self) -> None:
243+
self._curr_parameter_index = 0
244+
self._parameter_warning_printed = False
245+
self._parameter_results = []
218246

219-
def _step_concurrency(self) -> None:
220-
self._curr_concurrency_index += 1
247+
def _step_parameter(self) -> None:
248+
self._curr_parameter_index += 1
221249

222250
def _step_batch_size(self) -> None:
223251
self._curr_batch_size_index += 1
224252

225253
def _done_walking(self) -> bool:
226254
return self._done_walking_batch_sizes()
227255

228-
def _done_walking_concurrencies(self) -> bool:
229-
if len(self._concurrencies) == self._curr_concurrency_index:
256+
def _done_walking_parameters(self) -> bool:
257+
if len(self._parameters) == self._curr_parameter_index:
230258
return True
231-
if self._early_exit_enable and not self._concurrency_throughput_gain_valid(
259+
if self._early_exit_enable and not self._parameter_throughput_gain_valid(
232260
):
233-
if not self._concurrency_warning_printed:
261+
if not self._parameter_warning_printed:
234262
logger.info(
235263
"No longer increasing concurrency as throughput has plateaued"
236264
)
237-
self._concurrency_warning_printed = True
265+
self._parameter_warning_printed = True
238266
return True
239267
return False
240268

@@ -255,10 +283,10 @@ def _done_walking_batch_sizes(self) -> bool:
255283
def _last_results_erroneous(self) -> bool:
256284
return not self._last_results or self._last_results[-1] is None
257285

258-
def _concurrency_throughput_gain_valid(self) -> bool:
259-
""" Check if any of the last X concurrency results resulted in valid gain """
286+
def _parameter_throughput_gain_valid(self) -> bool:
287+
""" Check if any of the last X parameter results resulted in valid gain """
260288
return PerfAnalyzerConfigGenerator.throughput_gain_valid_helper(
261-
throughputs=self._concurrency_results,
289+
throughputs=self._parameter_results,
262290
min_tries=THROUGHPUT_MINIMUM_CONSECUTIVE_CONCURRENCY_TRIES,
263291
min_gain=THROUGHPUT_MINIMUM_GAIN)
264292

model_analyzer/config/generate/quick_run_config_generator.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -451,10 +451,10 @@ def _calculate_model_batch_size(
451451
batch_size = int(dimension_values.get("max_batch_size", 1))
452452

453453
min_batch_size_is_set_by_config = self._config.get_config(
454-
)['run_config_search_min_model_batch_size'].is_set_by_config()
454+
)['run_config_search_min_model_batch_size'].is_set_by_user()
455455

456456
max_batch_size_is_set_by_config = self._config.get_config(
457-
)['run_config_search_max_model_batch_size'].is_set_by_config()
457+
)['run_config_search_max_model_batch_size'].is_set_by_user()
458458

459459
if min_batch_size_is_set_by_config and batch_size < self._config.run_config_search_min_model_batch_size:
460460
return self._config.run_config_search_min_model_batch_size
@@ -469,10 +469,10 @@ def _calculate_instance_count(
469469
instance_count = int(dimension_values.get("instance_count", 1))
470470

471471
min_instance_count_is_set_by_config = self._config.get_config(
472-
)['run_config_search_min_instance_count'].is_set_by_config()
472+
)['run_config_search_min_instance_count'].is_set_by_user()
473473

474474
max_instance_count_is_set_by_config = self._config.get_config(
475-
)['run_config_search_max_instance_count'].is_set_by_config()
475+
)['run_config_search_max_instance_count'].is_set_by_user()
476476

477477
if min_instance_count_is_set_by_config and instance_count < self._config.run_config_search_min_instance_count:
478478
return self._config.run_config_search_min_instance_count
@@ -489,10 +489,10 @@ def _calculate_concurrency(
489489
concurrency = 2 * model_batch_size * instance_count
490490

491491
min_concurrency_is_set_by_config = self._config.get_config(
492-
)['run_config_search_min_concurrency'].is_set_by_config()
492+
)['run_config_search_min_concurrency'].is_set_by_user()
493493

494494
max_concurrency_is_set_by_config = self._config.get_config(
495-
)['run_config_search_max_concurrency'].is_set_by_config()
495+
)['run_config_search_max_concurrency'].is_set_by_user()
496496

497497
if min_concurrency_is_set_by_config and concurrency < self._config.run_config_search_min_concurrency:
498498
return self._config.run_config_search_min_concurrency

0 commit comments

Comments
 (0)