Skip to content

Commit 47ec159

Browse files
committed
brute working
1 parent 4844973 commit 47ec159

File tree

6 files changed

+52
-22
lines changed

6 files changed

+52
-22
lines changed

model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,11 @@ def _binary_search_over_top_results(self) -> Generator[RunConfig, None, None]:
132132
for result in top_results:
133133
run_config = deepcopy(result.run_config())
134134
model_parameters = self._get_model_parameters(model_name)
135+
perf_analyzer_flags = self._get_model_perf_analyzer_flags(model_name)
135136
parameter_search = ParameterSearch(
136137
config=self._config,
137138
model_parameters=model_parameters,
139+
perf_analyzer_flags=perf_analyzer_flags,
138140
skip_parameter_sweep=True,
139141
)
140142
for parameter in parameter_search.search_parameters():
@@ -151,6 +153,11 @@ def _get_model_parameters(self, model_name: str) -> Dict:
151153

152154
return {}
153155

156+
def _get_model_perf_analyzer_flags(self, model_name: str) -> Dict:
157+
for model in self._models:
158+
if model_name == model.model_name():
159+
return model.perf_analyzer_flags()
160+
154161
def _set_parameter(
155162
self, run_config: RunConfig, model_parameters: Dict, parameter: int
156163
) -> RunConfig:

model_analyzer/config/generate/perf_analyzer_config_generator.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,9 @@ def _create_parameter_list(self) -> List[int]:
172172
# The two possible parameters are request rate or concurrency
173173
# Concurrency is the default and will be used unless the user specifies
174174
# request rate, either as a model parameter or a config option
175-
if self._cli_config.is_request_rate_specified(self._model_parameters):
175+
if "request-intervals" in self._perf_analyzer_flags:
176+
return [self._perf_analyzer_flags["request-intervals"]]
177+
elif self._cli_config.is_request_rate_specified(self._model_parameters):
176178
return self._create_request_rate_list()
177179
else:
178180
return self._create_concurrency_list()
@@ -207,6 +209,7 @@ def _generate_perf_configs(self) -> None:
207209
for params in utils.generate_parameter_combinations(
208210
perf_config_non_parameter_values
209211
):
212+
# FIXME variable name
210213
configs_with_concurrency = []
211214
for parameter in self._parameters:
212215
new_perf_config = PerfAnalyzerConfig()
@@ -217,7 +220,9 @@ def _generate_perf_configs(self) -> None:
217220

218221
new_perf_config.update_config(params)
219222

220-
if self._cli_config.is_request_rate_specified(self._model_parameters):
223+
if "request-intervals" in self._perf_analyzer_flags:
224+
pass
225+
elif self._cli_config.is_request_rate_specified(self._model_parameters):
221226
new_perf_config.update_config({"request-rate-range": parameter})
222227
else:
223228
new_perf_config.update_config({"concurrency-range": parameter})

model_analyzer/perf_analyzer/perf_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ def extract_model_specific_parameters(self):
273273
"batch-size": self._options["-b"],
274274
"concurrency-range": self._args["concurrency-range"],
275275
"request-rate-range": self._args["request-rate-range"],
276+
"request-intervals": self._args["request-intervals"],
276277
}
277278

278279
@classmethod

model_analyzer/plots/detailed_plot.py

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,6 @@ def __init__(self, name, title, bar_width=0.5):
8989
self._fig.set_figheight(8)
9090
self._fig.set_figwidth(12)
9191

92-
self._ax_latency.set_xlabel("Concurrent Client Requests")
9392
self._ax_latency.set_ylabel(latency_axis_label)
9493
self._ax_throughput.set_ylabel(throughput_axis_label)
9594

@@ -144,6 +143,18 @@ def add_run_config_measurement(self, run_config_measurement):
144143
]
145144
)
146145

146+
if (
147+
"request-intervals" in run_config_measurement.model_specific_pa_params()[0]
148+
and run_config_measurement.model_specific_pa_params()[0][
149+
"request-intervals"
150+
]
151+
):
152+
self._data["request-intervals"].append(
153+
run_config_measurement.model_specific_pa_params()[0][
154+
"request-intervals"
155+
]
156+
)
157+
147158
self._data["perf_throughput"].append(
148159
run_config_measurement.get_non_gpu_metric_value(tag="perf_throughput")
149160
)
@@ -164,19 +175,20 @@ def plot_data(self):
164175
on this plot's Axes object
165176
"""
166177

167-
# Need to change the default x-axis plot title for request rates
168-
if "request_rate" in self._data and self._data["request_rate"][0]:
178+
# Update the x-axis plot title
179+
if "request-intervals" in self._data and self._data["request-intervals"][0]:
180+
self._ax_latency.set_xlabel("Request Intervals File")
181+
sort_indices_key = "request-intervals"
182+
elif "request_rate" in self._data and self._data["request_rate"][0]:
169183
self._ax_latency.set_xlabel("Client Request Rate")
170-
171-
# Sort the data by request rate or concurrency
172-
if "request_rate" in self._data and self._data["request_rate"][0]:
173-
sort_indices = list(
174-
zip(*sorted(enumerate(self._data["request_rate"]), key=lambda x: x[1]))
175-
)[0]
184+
sort_indices_key = "request_rate"
176185
else:
177-
sort_indices = list(
178-
zip(*sorted(enumerate(self._data["concurrency"]), key=lambda x: x[1]))
179-
)[0]
186+
self._ax_latency.set_xlabel("Concurrent Client Requests")
187+
sort_indices_key = "concurrency"
188+
189+
sort_indices = list(
190+
zip(*sorted(enumerate(self._data[sort_indices_key]), key=lambda x: x[1]))
191+
)[0]
180192

181193
sorted_data = {
182194
key: [data_list[i] for i in sort_indices]
@@ -197,10 +209,7 @@ def plot_data(self):
197209
)
198210
bottoms = None
199211

200-
if "request_rate" in self._data:
201-
sorted_data["indices"] = list(map(str, sorted_data["request_rate"]))
202-
else:
203-
sorted_data["indices"] = list(map(str, sorted_data["concurrency"]))
212+
sorted_data["indices"] = list(map(str, sorted_data[sort_indices_key]))
204213

205214
# Plot latency breakdown with concurrency casted as string to make uniform x
206215
for metric, label in labels.items():

model_analyzer/record/metrics_manager.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,11 @@ def _get_triton_metrics_gpus(self):
753753
def _print_run_config_info(self, run_config):
754754
for model_run_config in run_config.model_run_configs():
755755
perf_config = model_run_config.perf_config()
756-
if perf_config["request-rate-range"]:
756+
if perf_config["request-intervals"]:
757+
logger.info(
758+
f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-intervals={perf_config['request-intervals']}"
759+
)
760+
elif perf_config["request-rate-range"]:
757761
logger.info(
758762
f"Profiling {model_run_config.model_variant_name()}: client batch size={perf_config['batch-size']}, request-rate-range={perf_config['request-rate-range']}"
759763
)

model_analyzer/result/parameter_search.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def __init__(
4545
self,
4646
config: ConfigCommandProfile,
4747
model_parameters: dict = {},
48+
perf_analyzer_flags: dict = {},
4849
skip_parameter_sweep: bool = False,
4950
) -> None:
5051
"""
@@ -60,6 +61,8 @@ def __init__(
6061
model_parameters
6162
)
6263

64+
self._inference_load_is_custom = "request-intervals" in perf_analyzer_flags
65+
6366
if self._parameter_is_request_rate:
6467
self._min_parameter_index = int(
6568
log2(config.run_config_search_min_request_rate)
@@ -98,10 +101,11 @@ def search_parameters(self) -> Generator[int, None, None]:
98101
a binary parameter search around the point where the constraint
99102
violated
100103
"""
101-
yield from self._perform_parameter_sweep()
104+
if not self._inference_load_is_custom:
105+
yield from self._perform_parameter_sweep()
102106

103-
if self._was_constraint_violated():
104-
yield from self._perform_binary_parameter_search()
107+
if self._was_constraint_violated():
108+
yield from self._perform_binary_parameter_search()
105109

106110
def _perform_parameter_sweep(self) -> Generator[int, None, None]:
107111
for parameter in (

0 commit comments

Comments
 (0)