Skip to content

Commit 8169200

Browse files
authored
Sweep experiments script (#504)
* Experiment updates * PR feedback
1 parent 5a75a83 commit 8169200

8 files changed

+357
-36
lines changed

experiments/checkpoint_experiment_data.py

Lines changed: 56 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from experiments.experiment_data import ExperimentData
1616
from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
1717
from unittest.mock import MagicMock
18+
from copy import deepcopy
1819

1920

2021
class CheckpointExperimentData(ExperimentData):
@@ -24,18 +25,31 @@ class CheckpointExperimentData(ExperimentData):
2425

2526
def __init__(self, config):
2627
super().__init__()
28+
self._default_run_config = None
2729
self._load_checkpoint(config)
2830

31+
def get_default_config_dict(self):
32+
ret = self._default_run_config.model_run_configs()[0].model_config(
33+
).to_dict()
34+
ret = deepcopy(ret)
35+
del ret["cpu_only"]
36+
return ret
37+
2938
def _load_checkpoint(self, config):
3039
state_manager = AnalyzerStateManager(config, MagicMock())
3140
state_manager.load_checkpoint(checkpoint_required=True)
3241

3342
results = state_manager.get_state_variable('ResultManager.results')
43+
3444
model_name = ",".join([x.model_name() for x in config.profile_models])
3545
model_measurements = results.get_model_measurements_dict(model_name)
3646
for (run_config,
3747
run_config_measurements) in model_measurements.values():
3848

49+
if run_config.model_variants_name(
50+
) == model_name + "_config_default":
51+
self._default_run_config = run_config
52+
3953
# Due to the way that data is stored in the AnalyzerStateManager, the
4054
# run_config only represents the model configuration used. The
4155
# perf_analyzer information for each measurement associated with it
@@ -46,6 +60,46 @@ def _load_checkpoint(self, config):
4660
for (perf_analyzer_string,
4761
run_config_measurement) in run_config_measurements.items():
4862

63+
run_config_measurement.set_model_config_constraints(
64+
model_config_constraints=[config.constraints])
65+
run_config_measurement.set_metric_weightings(
66+
metric_objectives=[config.objectives])
4967
pa_key = self._make_pa_key_from_cli_string(perf_analyzer_string)
50-
self._add_run_config_measurement_from_keys(
51-
ma_key, pa_key, run_config, run_config_measurement)
68+
69+
existing_measurement = self._get_run_config_measurement_from_keys(
70+
ma_key, pa_key, skip_warn=True)
71+
if not existing_measurement or run_config_measurement > existing_measurement:
72+
self._add_run_config_measurement_from_keys(
73+
ma_key, pa_key, run_config, run_config_measurement)
74+
75+
if self._default_run_config is None:
76+
print(f"No default config for {model_name}")
77+
exit(1)
78+
79+
self._print_map()
80+
81+
def _print_map(self):
82+
for i in range(0, 10):
83+
row_str = ""
84+
for j in range(0, 10):
85+
instance_count = j + 1
86+
max_batch_size = 2**i
87+
88+
ma_key = f"instance_count={instance_count},max_batch_size={max_batch_size}"
89+
90+
clamped_int = self._clamp_to_power_of_two(2 * instance_count *
91+
max_batch_size)
92+
93+
pa_key = str(clamped_int)
94+
95+
measurement = self._get_run_config_measurement_from_keys(
96+
ma_key, pa_key, skip_warn=True)
97+
tput = 0
98+
lat = 0
99+
if measurement:
100+
tput = measurement.get_non_gpu_metric_value(
101+
'perf_throughput')
102+
lat = measurement.get_non_gpu_metric_value(
103+
'perf_latency_p99')
104+
row_str += f"\t{tput:4.1f}:{lat:4.1f}"
105+
print(row_str)

experiments/config_command_experiment.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,16 @@ class ConfigCommandExperiment(ConfigCommandProfile):
2424

2525
def _fill_config(self):
2626
super()._fill_config()
27+
self._add_config(
28+
ConfigField(
29+
'exponential_inst_count',
30+
field_type=ConfigPrimitive(bool),
31+
flags=['--exponential-inst-count'],
32+
parser_args={'action': 'store_true'},
33+
default_value=False,
34+
description=
35+
'Whether or not the inst count dimension should be linear or exponential'
36+
))
2737
self._add_config(
2838
ConfigField('radius',
2939
field_type=ConfigPrimitive(int),

experiments/evaluate_config_generator.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from experiment_data import ExperimentData
1919
from checkpoint_experiment_data import CheckpointExperimentData
2020
from experiment_file_writer import ExperimentFileWriter
21+
from unittest.mock import MagicMock, patch
2122

2223

2324
class EvaluateConfigGenerator:
@@ -35,6 +36,13 @@ def __init__(self, model_name, data_path, output_path, other_args):
3536
self._checkpoint_data = CheckpointExperimentData(self._config_command)
3637
self._profile_data = ExperimentData()
3738

39+
self._default_config_dict = self._checkpoint_data.get_default_config_dict(
40+
)
41+
p = patch(
42+
'model_analyzer.config.generate.base_model_config_generator.BaseModelConfigGenerator.get_base_model_config_dict',
43+
MagicMock(return_value=self._default_config_dict))
44+
p.start()
45+
3846
def execute_generator(self, generator_name):
3947

4048
generator = GeneratorExperimentFactory.create_generator(
@@ -59,11 +67,14 @@ def _run_generator(self, cg):
5967
for run_config in cg.get_configs():
6068
run_config_measurement = self._checkpoint_data.get_run_config_measurement(
6169
run_config)
62-
self._profile_data.add_run_config_measurement(
63-
run_config, run_config_measurement)
6470

6571
if run_config_measurement:
72+
run_config_measurement.set_metric_weightings(
73+
metric_objectives=[self._config_command.objectives])
6674
run_config_measurement.set_model_config_constraints(
6775
model_config_constraints=[self._config_command.constraints])
6876

77+
self._profile_data.add_run_config_measurement(
78+
run_config, run_config_measurement)
79+
6980
cg.set_last_results([run_config_measurement])

experiments/experiment_config_command_creator.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from tests.common.test_utils import convert_to_bytes
1615
from tests.mocks.mock_config import MockConfig
1716
from tests.mocks.mock_model_config import MockModelConfig
17+
from tests.common.test_utils import convert_to_bytes
1818
from model_analyzer.cli.cli import CLI
1919
from config_command_experiment import ConfigCommandExperiment
20+
import re
2021

2122

2223
class ExperimentConfigCommandCreator:
@@ -26,22 +27,35 @@ class ExperimentConfigCommandCreator:
2627

2728
@staticmethod
2829
def make_config(data_path, model_name, other_args):
29-
mock_model_config = MockModelConfig("")
30-
mock_model_config.start()
3130

32-
checkpoint_dir = f"{data_path}/{model_name}"
31+
ckpt = re.search('(.+)\/(\d\.ckpt)', data_path)
32+
if ckpt:
33+
checkpoint_dir = ckpt.group(1)
34+
else:
35+
checkpoint_dir = f"{data_path}/{model_name}"
3336

3437
#yapf: disable
3538
args = [
3639
'model-analyzer', 'profile',
3740
'--profile-models', model_name,
3841
'--model-repository', data_path,
39-
'--checkpoint-directory', checkpoint_dir,
40-
'-f', 'path-to-config-file'
42+
'--checkpoint-directory', checkpoint_dir
4143
]
4244
args += other_args
4345

44-
yaml_content = convert_to_bytes("")
46+
if '-f' not in args and '--config-file' not in args:
47+
args += ['-f', 'path-to-config-file']
48+
yaml_content = convert_to_bytes("")
49+
else:
50+
index = args.index('-f') if '-f' in args else args.index('--config-file')
51+
yaml_file = args[index + 1]
52+
53+
with open(yaml_file, 'r') as f:
54+
yaml_content = f.read()
55+
yaml_content = convert_to_bytes(yaml_content)
56+
57+
mock_model_config = MockModelConfig("")
58+
mock_model_config.start()
4559

4660
mock_config = MockConfig(args, yaml_content)
4761
mock_config.start()

experiments/experiment_data.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,23 +87,27 @@ def _add_run_config_measurement_from_keys(self, ma_key, pa_key, run_config,
8787
curr_dict[ma_key][pa_key] = run_config_measurement
8888

8989
def _update_best_trackers(self, run_config, run_config_measurement):
90-
if not self._best_run_config_measurement or run_config_measurement.get_non_gpu_metric_value(
91-
'perf_throughput'
92-
) > self._best_run_config_measurement.get_non_gpu_metric_value(
93-
'perf_throughput'):
90+
if run_config_measurement.is_passing_constraints() and \
91+
(not self._best_run_config_measurement or (run_config_measurement > self._best_run_config_measurement)):
92+
9493
self._best_run_config_measurement = run_config_measurement
9594
self._best_run_config = run_config
9695

97-
def _get_run_config_measurement_from_keys(self, ma_key, pa_key):
96+
def _get_run_config_measurement_from_keys(self,
97+
ma_key,
98+
pa_key,
99+
skip_warn=False):
98100
if ma_key not in self._data:
99-
print(f"WARNING: Model config {ma_key} not in results")
100-
self._missing_measurement_count += 1
101+
if not skip_warn:
102+
print(f"WARNING: Model config {ma_key} not in results")
103+
self._missing_measurement_count += 1
101104
return None
102105
if pa_key not in self._data[ma_key]:
103-
print(
104-
f"WARNING: Model config {ma_key}, concurrency={pa_key} not in results"
105-
)
106-
self._missing_measurement_count += 1
106+
if not skip_warn:
107+
print(
108+
f"WARNING: Model config {ma_key}, concurrency={pa_key} not in results"
109+
)
110+
self._missing_measurement_count += 1
107111
return None
108112

109113
return self._data[ma_key][pa_key]

experiments/experiment_evaluator.py

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,16 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from experiment_data import ExperimentData
16+
1517

1618
class ExperimentEvaluator:
1719
"""
1820
Class to compare the results of a config generator execution against
1921
a checkpoint of raw data
2022
"""
2123

22-
def __init__(self, raw_data, profile_data):
24+
def __init__(self, raw_data: ExperimentData, profile_data: ExperimentData):
2325
self._raw_data = raw_data
2426
self._profile_data = profile_data
2527

@@ -43,6 +45,9 @@ def print_results(self):
4345
print(
4446
f"Overall best throughput: {overall_best_measurement.get_non_gpu_metric_value('perf_throughput')}"
4547
)
48+
print(
49+
f"Overall best latency: {overall_best_measurement.get_non_gpu_metric_value('perf_latency_p99')}"
50+
)
4651
print()
4752
print(
4853
f"Generator num measurements: {self._profile_data.get_run_config_measurement_count()}"
@@ -56,14 +61,29 @@ def print_results(self):
5661
print(
5762
f"Generator best config: {self._run_config_to_string(generator_best_run_config)}"
5863
)
59-
print(
60-
f"Generator best throughput: {generator_best_measurement.get_non_gpu_metric_value('perf_throughput')}"
61-
)
64+
65+
if generator_best_measurement:
66+
best_throughput = generator_best_measurement.get_non_gpu_metric_value(
67+
'perf_throughput')
68+
best_latency = generator_best_measurement.get_non_gpu_metric_value(
69+
'perf_latency_p99')
70+
overall_best_throughput = overall_best_measurement.get_non_gpu_metric_value(
71+
'perf_throughput')
72+
percentile = round(best_throughput / overall_best_throughput, 2)
73+
else:
74+
best_throughput = None
75+
best_latency = None
76+
percentile = None
77+
78+
print(f"Generator best throughput: {best_throughput}")
79+
print(f"Generator best latency: {best_latency}")
80+
print(f"Percentile: {percentile}")
6281
print()
6382

6483
def _run_config_to_string(self, run_config):
65-
str = "\n".join([
66-
f"{x.model_config().get_config()}"
67-
for x in run_config.model_run_configs()
68-
])
69-
return str
84+
if run_config:
85+
str = "\n".join([
86+
f"{x.model_config().get_config()}"
87+
for x in run_config.model_run_configs()
88+
])
89+
return str

experiments/generator_experiment_factory.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
from model_analyzer.config.generate.brute_run_config_generator import BruteRunConfigGenerator
16+
from model_analyzer.config.generate.model_variant_name_manager import ModelVariantNameManager
1617
from model_analyzer.config.generate.quick_run_config_generator import QuickRunConfigGenerator
1718
from model_analyzer.config.generate.search_config import SearchConfig
1819
from model_analyzer.config.generate.search_dimension import SearchDimension
@@ -53,10 +54,17 @@ def create_generator(generator_name, config_command):
5354

5455
#yapf: disable
5556
for i, _ in enumerate(config_command.profile_models):
56-
dimensions.add_dimensions(i, [
57-
SearchDimension(f"max_batch_size", SearchDimension.DIMENSION_TYPE_EXPONENTIAL),
58-
SearchDimension(f"instance_count", SearchDimension.DIMENSION_TYPE_LINEAR)
59-
])
57+
if config_command.exponential_inst_count:
58+
dimensions.add_dimensions(i, [
59+
SearchDimension(f"max_batch_size", SearchDimension.DIMENSION_TYPE_EXPONENTIAL),
60+
SearchDimension(f"instance_count", SearchDimension.DIMENSION_TYPE_EXPONENTIAL)
61+
])
62+
else:
63+
dimensions.add_dimensions(i, [
64+
SearchDimension(f"max_batch_size", SearchDimension.DIMENSION_TYPE_EXPONENTIAL),
65+
SearchDimension(f"instance_count", SearchDimension.DIMENSION_TYPE_LINEAR)
66+
])
67+
6068
#yapf: enable
6169

6270
search_config = SearchConfig(
@@ -65,10 +73,11 @@ def create_generator(generator_name, config_command):
6573
step_magnitude=config_command.magnitude,
6674
min_initialized=config_command.min_initialized)
6775

76+
mvn = ModelVariantNameManager()
6877
generator = QuickRunConfigGenerator(search_config, config_command,
6978
MagicMock(),
7079
config_command.profile_models,
71-
MagicMock())
80+
MagicMock(), mvn)
7281
return generator
7382
else:
7483
raise Exception(f"Unknown generator {generator_name}")

0 commit comments

Comments
 (0)