@@ -52,40 +52,39 @@ def __init__(self, cli_config: ConfigCommandProfile, model_name: str,
5252 custom perf analyzer configuration
5353
5454 model_parameters: Dict
55- model constraints for batch_sizes and/or concurrency
55+ model constraints for batch_sizes, concurrency and/or request rate
5656
5757 early_exit_enable: Bool
58- If true, this class can early exit during search of concurrency
58+ If true, this class can early exit during search of concurrency/request rate
5959 """
6060
6161 self ._early_exit_enable = early_exit_enable
6262
6363 # All configs are pregenerated in _configs[][]
6464 # Indexed as follows:
65- # _configs[_curr_batch_size_index][_curr_concurrency_index ]
65+ # _configs[_curr_batch_size_index][_curr_parameter_index ]
6666 #
67- self ._curr_concurrency_index = 0
67+ self ._curr_parameter_index = 0
6868 self ._curr_batch_size_index = 0
6969 self ._configs : List [List [PerfAnalyzerConfig ]] = []
70- self ._concurrency_warning_printed = False
70+ self ._parameter_warning_printed = False
7171
7272 # Flag to indicate we have started to return results
7373 #
7474 self ._generator_started = False
7575
7676 self ._last_results : List [RunConfigMeasurement ] = []
77- self ._concurrency_results : List [Optional [RunConfigMeasurement ]] = []
77+ self ._parameter_results : List [Optional [RunConfigMeasurement ]] = []
7878 self ._batch_size_results : List [Optional [RunConfigMeasurement ]] = []
7979
8080 self ._model_name = model_name
8181 self ._perf_analyzer_flags = model_perf_analyzer_flags
8282
8383 self ._batch_sizes = sorted (model_parameters ['batch_sizes' ])
84- self ._concurrencies = self ._create_concurrency_list (
85- cli_config , model_parameters )
86-
8784 self ._cli_config = cli_config
8885
86+ self ._model_parameters = model_parameters
87+ self ._parameters = self ._create_parameter_list ()
8988 self ._generate_perf_configs ()
9089
9190 @staticmethod
@@ -96,13 +95,13 @@ def throughput_gain_valid_helper(
9695 if len (throughputs ) < min_tries :
9796 return True
9897
99- tputs_in_range = [
98+ throughputs_in_range = [
10099 PerfAnalyzerConfigGenerator .get_throughput (throughputs [x ])
101100 for x in range (- min_tries , 0 )
102101 ]
103102
104- first = tputs_in_range [0 ]
105- best = max (tputs_in_range )
103+ first = throughputs_in_range [0 ]
104+ best = max (throughputs_in_range )
106105
107106 gain = (best - first ) / first
108107
@@ -127,7 +126,7 @@ def get_configs(self) -> Generator[PerfAnalyzerConfig, None, None]:
127126
128127 self ._generator_started = True
129128 config = self ._configs [self ._curr_batch_size_index ][
130- self ._curr_concurrency_index ]
129+ self ._curr_parameter_index ]
131130 yield (config )
132131
133132 if self ._last_results_erroneous ():
@@ -154,87 +153,116 @@ def set_last_results(
154153 measurement = [max (valid_measurements )]
155154
156155 self ._last_results = measurement
157- self ._concurrency_results .extend (measurement )
156+ self ._parameter_results .extend (measurement )
157+
158+ def _create_parameter_list (self ) -> List [int ]:
159+ # The two possible parameters are request rate or concurrency
160+ # Concurrency is the default and will be used unless the user specifies
161+ # request rate, either as a model parameter or a config option
162+ if self ._config_specifies_request_rate ():
163+ return self ._create_request_rate_list ()
164+ else :
165+ return self ._create_concurrency_list ()
166+
167+ def _config_specifies_request_rate (self ) -> bool :
168+ return self ._model_parameters ['request_rate' ] or \
169+ self ._cli_config .request_rate_search_enable or \
170+ self ._cli_config .get_config ()['run_config_search_min_request_rate' ].is_set_by_user () or \
171+ self ._cli_config .get_config ()['run_config_search_max_request_rate' ].is_set_by_user ()
172+
173+ def _create_request_rate_list (self ) -> List [int ]:
174+ if self ._model_parameters ['request_rate' ]:
175+ return sorted (self ._model_parameters ['request_rate' ])
176+ elif self ._cli_config .run_config_search_disable :
177+ return [1 ]
178+ else :
179+ return utils .generate_doubled_list (
180+ self ._cli_config .run_config_search_min_request_rate ,
181+ self ._cli_config .run_config_search_max_request_rate )
158182
159- def _create_concurrency_list (self , cli_config : ConfigCommandProfile ,
160- model_parameters : dict ) -> List [int ]:
161- if model_parameters ['concurrency' ]:
162- return sorted (model_parameters ['concurrency' ])
163- elif cli_config .run_config_search_disable :
183+ def _create_concurrency_list (self ) -> List [int ]:
184+ if self ._model_parameters ['concurrency' ]:
185+ return sorted (self ._model_parameters ['concurrency' ])
186+ elif self ._cli_config .run_config_search_disable :
164187 return [1 ]
165188 else :
166189 return utils .generate_doubled_list (
167- cli_config .run_config_search_min_concurrency ,
168- cli_config .run_config_search_max_concurrency )
190+ self . _cli_config .run_config_search_min_concurrency ,
191+ self . _cli_config .run_config_search_max_concurrency )
169192
170193 def _generate_perf_configs (self ) -> None :
171- perf_config_non_concurrency_params = self ._create_non_concurrency_perf_config_params (
194+ perf_config_non_parameter_values = self ._create_non_parameter_perf_config_values (
172195 )
173196
174197 for params in utils .generate_parameter_combinations (
175- perf_config_non_concurrency_params ):
198+ perf_config_non_parameter_values ):
176199 configs_with_concurrency = []
177- for concurrency in self ._concurrencies :
200+ for parameter in self ._parameters :
178201 new_perf_config = PerfAnalyzerConfig ()
179202
180203 new_perf_config .update_config_from_profile_config (
181204 self ._model_name , self ._cli_config )
182205
183206 new_perf_config .update_config (params )
184- new_perf_config .update_config (
185- {'concurrency-range' : concurrency })
207+
208+ if self ._config_specifies_request_rate ():
209+ new_perf_config .update_config (
210+ {'request-rate-range' : parameter })
211+ else :
212+ new_perf_config .update_config (
213+ {'concurrency-range' : parameter })
186214
187215 # User provided flags can override the search parameters
188216 new_perf_config .update_config (self ._perf_analyzer_flags )
189217
190218 configs_with_concurrency .append (new_perf_config )
191219 self ._configs .append (configs_with_concurrency )
192220
193- def _create_non_concurrency_perf_config_params (self ) -> dict :
194- perf_config_params = {
221+ def _create_non_parameter_perf_config_values (self ) -> dict :
222+ perf_config_values = {
195223 'batch-size' : self ._batch_sizes ,
196224 }
197225
198- return perf_config_params
226+ return perf_config_values
199227
200228 def _step (self ) -> None :
201- self ._step_concurrency ()
229+ self ._step_parameter ()
202230
203- if self ._done_walking_concurrencies ():
231+ if self ._done_walking_parameters ():
204232 self ._add_best_throughput_to_batch_sizes ()
205- self ._reset_concurrencies ()
233+ self ._reset_parameters ()
206234 self ._step_batch_size ()
207235
208236 def _add_best_throughput_to_batch_sizes (self ) -> None :
209- if self ._concurrency_results :
237+ if self ._parameter_results :
210238 # type is List[Optional[RCM]]
211- best = max (self ._concurrency_results ) #type: ignore
239+ best = max (self ._parameter_results ) #type: ignore
212240 self ._batch_size_results .append (best )
213241
214- def _reset_concurrencies (self ) -> None :
215- self ._curr_concurrency_index = 0
216- self ._concurrency_warning_printed = False
217- self ._concurrency_results = []
242+ def _reset_parameters (self ) -> None :
243+ self ._curr_parameter_index = 0
244+ self ._parameter_warning_printed = False
245+ self ._parameter_results = []
218246
219- def _step_concurrency (self ) -> None :
220- self ._curr_concurrency_index += 1
247+ def _step_parameter (self ) -> None :
248+ self ._curr_parameter_index += 1
221249
222250 def _step_batch_size (self ) -> None :
223251 self ._curr_batch_size_index += 1
224252
225253 def _done_walking (self ) -> bool :
226254 return self ._done_walking_batch_sizes ()
227255
228- def _done_walking_concurrencies (self ) -> bool :
229- if len (self ._concurrencies ) == self ._curr_concurrency_index :
256+ def _done_walking_parameters (self ) -> bool :
257+ if len (self ._parameters ) == self ._curr_parameter_index :
230258 return True
231- if self ._early_exit_enable and not self ._concurrency_throughput_gain_valid (
259+ if self ._early_exit_enable and not self ._parameter_throughput_gain_valid (
232260 ):
233- if not self ._concurrency_warning_printed :
261+ if not self ._parameter_warning_printed :
234262 logger .info (
235263 "No longer increasing concurrency as throughput has plateaued"
236264 )
237- self ._concurrency_warning_printed = True
265+ self ._parameter_warning_printed = True
238266 return True
239267 return False
240268
@@ -255,10 +283,10 @@ def _done_walking_batch_sizes(self) -> bool:
255283 def _last_results_erroneous (self ) -> bool :
256284 return not self ._last_results or self ._last_results [- 1 ] is None
257285
258- def _concurrency_throughput_gain_valid (self ) -> bool :
259- """ Check if any of the last X concurrency results resulted in valid gain """
286+ def _parameter_throughput_gain_valid (self ) -> bool :
287+ """ Check if any of the last X parameter results resulted in valid gain """
260288 return PerfAnalyzerConfigGenerator .throughput_gain_valid_helper (
261- throughputs = self ._concurrency_results ,
289+ throughputs = self ._parameter_results ,
262290 min_tries = THROUGHPUT_MINIMUM_CONSECUTIVE_CONCURRENCY_TRIES ,
263291 min_gain = THROUGHPUT_MINIMUM_GAIN )
264292
0 commit comments