Skip to content

Commit cdb5f77

Browse files
authored
More experiment sweep changes (#509)
* Changes to sweeping experiments of quick search * minor PR feedback * Fix constructor after rebase
1 parent 35bca92 commit cdb5f77

File tree

9 files changed

+452
-230
lines changed

9 files changed

+452
-230
lines changed

experiments/checkpoint_experiment_data.py

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,16 @@
1616
from model_analyzer.state.analyzer_state_manager import AnalyzerStateManager
1717
from unittest.mock import MagicMock
1818
from copy import deepcopy
19+
import re
1920

2021

2122
class CheckpointExperimentData(ExperimentData):
2223
"""
2324
Extends ExperimentData to be able to preload data from a checkpoint
2425
"""
2526

27+
LOAD_ONLY_VISABLE = True
28+
2629
def __init__(self, config):
2730
super().__init__()
2831
self._default_run_config = None
@@ -66,6 +69,10 @@ def _load_checkpoint(self, config):
6669
metric_objectives=[config.objectives])
6770
pa_key = self._make_pa_key_from_cli_string(perf_analyzer_string)
6871

72+
if CheckpointExperimentData.LOAD_ONLY_VISABLE:
73+
if not self._are_keys_visable_to_algorithm(ma_key, pa_key):
74+
continue
75+
6976
existing_measurement = self._get_run_config_measurement_from_keys(
7077
ma_key, pa_key, skip_warn=True)
7178
if not existing_measurement or run_config_measurement > existing_measurement:
@@ -79,10 +86,18 @@ def _load_checkpoint(self, config):
7986
self._print_map()
8087

8188
def _print_map(self):
82-
for i in range(0, 10):
83-
row_str = ""
89+
row_str = "\t\t"
90+
for j in range(0, 10):
91+
row_str += f" [{j}]\t\t"
92+
print(row_str)
93+
94+
has_exponential_inst_count = self._has_exponential_inst_count()
95+
96+
for i in range(0, 100):
97+
row_has_data = False
98+
row_str = f"\t[{i}]"
8499
for j in range(0, 10):
85-
instance_count = j + 1
100+
instance_count = 2**j if has_exponential_inst_count else j + 1
86101
max_batch_size = 2**i
87102

88103
ma_key = f"instance_count={instance_count},max_batch_size={max_batch_size}"
@@ -97,9 +112,38 @@ def _print_map(self):
97112
tput = 0
98113
lat = 0
99114
if measurement:
115+
row_has_data = True
100116
tput = measurement.get_non_gpu_metric_value(
101117
'perf_throughput')
102118
lat = measurement.get_non_gpu_metric_value(
103119
'perf_latency_p99')
104120
row_str += f"\t{tput:4.1f}:{lat:4.1f}"
105-
print(row_str)
121+
122+
# Print at least the first 7 rows, and then stop when we hit an empty row
123+
# (Some databases don't have data for the first 6 rows)
124+
if row_has_data or i < 7:
125+
print(row_str)
126+
else:
127+
break
128+
129+
def _has_exponential_inst_count(self) -> bool:
130+
# See if instance count of 3 is in the database. If not, it is exponential (1,2,4,8)
131+
ret = False
132+
ma_key = f"instance_count=3,max_batch_size=1"
133+
pa_key = "8"
134+
measurement = self._get_run_config_measurement_from_keys(ma_key,
135+
pa_key,
136+
skip_warn=True)
137+
if not measurement:
138+
ret = True
139+
return ret
140+
141+
def _are_keys_visable_to_algorithm(self, ma_key, pa_key) -> bool:
142+
# The quick algorithm can only see meaurements where the
143+
# concurrency is 2 * inst_count * max_batch_size.
144+
results = re.search("instance_count=(\d+),max_batch_size=(\d+)", ma_key)
145+
inst_count = int(results.group(1))
146+
max_batch_size = int(results.group(2))
147+
expected_pa_key = self._clamp_to_power_of_two(2 * inst_count *
148+
max_batch_size)
149+
return expected_pa_key == int(pa_key)

experiments/config_command_experiment.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,13 @@ def _fill_config(self):
4646
flags=['--magnitude'],
4747
default_value=20,
4848
description='The size of each step'))
49+
self._add_config(
50+
ConfigField('min_mbs_index',
51+
field_type=ConfigPrimitive(int),
52+
flags=['--min-mbs-index'],
53+
default_value=0,
54+
description='The minimum index for max batch size'))
55+
4956
self._add_config(
5057
ConfigField(
5158
'min_initialized',

experiments/evaluate_config_generator.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
from checkpoint_experiment_data import CheckpointExperimentData
2020
from experiment_file_writer import ExperimentFileWriter
2121
from unittest.mock import MagicMock, patch
22+
from model_analyzer.state.analyzer_state import AnalyzerState
23+
from model_analyzer.config.generate.model_variant_name_manager import ModelVariantNameManager
2224

2325

2426
class EvaluateConfigGenerator:
@@ -28,6 +30,9 @@ class EvaluateConfigGenerator:
2830
"""
2931

3032
def __init__(self, model_name, data_path, output_path, other_args):
33+
34+
self._patch_checkpoint_load()
35+
3136
self._output_path = output_path
3237
self._model_name = model_name
3338
self._config_command = ExperimentConfigCommandCreator.make_config(
@@ -43,16 +48,17 @@ def __init__(self, model_name, data_path, output_path, other_args):
4348
MagicMock(return_value=self._default_config_dict))
4449
p.start()
4550

46-
def execute_generator(self, generator_name):
51+
def execute_generator(self):
4752

4853
generator = GeneratorExperimentFactory.create_generator(
49-
generator_name, self._config_command)
54+
self._config_command)
5055

5156
self._run_generator(generator)
5257

5358
def print_results(self):
5459
result_evaluator = ExperimentEvaluator(self._checkpoint_data,
55-
self._profile_data)
60+
self._profile_data,
61+
self._config_command)
5662
result_evaluator.print_results()
5763

5864
def store_results(self):
@@ -78,3 +84,18 @@ def _run_generator(self, cg):
7884
run_config, run_config_measurement)
7985

8086
cg.set_last_results([run_config_measurement])
87+
88+
def _patch_checkpoint_load(self):
89+
90+
old_fn = AnalyzerState.from_dict
91+
92+
def patched_analyzer_state_from_dict(state_dict):
93+
if 'ModelManager.model_variant_name_manager' not in state_dict:
94+
state_dict[
95+
"ModelManager.model_variant_name_manager"] = ModelVariantNameManager(
96+
)
97+
return old_fn(state_dict)
98+
99+
p = patch('model_analyzer.state.analyzer_state.AnalyzerState.from_dict',
100+
patched_analyzer_state_from_dict)
101+
p.start()

experiments/experiment_evaluator.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
from experiment_data import ExperimentData
16+
from experiments.config_command_experiment import ConfigCommandExperiment
1617

1718

1819
class ExperimentEvaluator:
@@ -21,10 +22,15 @@ class ExperimentEvaluator:
2122
a checkpoint of raw data
2223
"""
2324

24-
def __init__(self, raw_data: ExperimentData, profile_data: ExperimentData):
25+
def __init__(self, raw_data: ExperimentData, profile_data: ExperimentData,
26+
config_command: ConfigCommandExperiment):
2527
self._raw_data = raw_data
2628
self._profile_data = profile_data
2729

30+
self._maximize_throughput = True
31+
if config_command.min_throughput is not None:
32+
self._maximize_throughput = False
33+
2834
def print_results(self):
2935
overall_best_measurement = self._raw_data.get_best_run_config_measurement(
3036
)
@@ -69,7 +75,13 @@ def print_results(self):
6975
'perf_latency_p99')
7076
overall_best_throughput = overall_best_measurement.get_non_gpu_metric_value(
7177
'perf_throughput')
72-
percentile = round(best_throughput / overall_best_throughput, 2)
78+
overall_best_latency = overall_best_measurement.get_non_gpu_metric_value(
79+
'perf_latency_p99')
80+
81+
if self._maximize_throughput:
82+
percentile = round(best_throughput / overall_best_throughput, 2)
83+
else:
84+
percentile = round(overall_best_latency / best_latency, 2)
7385
else:
7486
best_throughput = None
7587
best_latency = None

experiments/generator_experiment_factory.py

Lines changed: 49 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,15 @@
1919
from model_analyzer.config.generate.search_dimension import SearchDimension
2020
from model_analyzer.config.generate.search_dimensions import SearchDimensions
2121
from unittest.mock import MagicMock, patch
22+
from model_analyzer.config.generate.run_config_generator_factory import RunConfigGeneratorFactory
2223

2324

2425
class GeneratorExperimentFactory:
2526

27+
command_config = None
28+
2629
@staticmethod
27-
def create_generator(generator_name, config_command):
30+
def create_generator(config_command):
2831
"""
2932
Create and return a RunConfig generator of the requested name
3033
@@ -39,45 +42,53 @@ def create_generator(generator_name, config_command):
3942
The config for model analyzer algorithm experiment
4043
"""
4144

42-
if generator_name == "BruteRunConfigGenerator":
43-
generator = BruteRunConfigGenerator(config_command, MagicMock(),
44-
config_command.profile_models,
45-
MagicMock())
46-
p = patch(
47-
'model_analyzer.config.generate.brute_run_config_generator.BruteRunConfigGenerator.determine_triton_server_env'
48-
)
49-
p.start()
50-
51-
return generator
52-
elif generator_name == "QuickRunConfigGenerator":
53-
dimensions = SearchDimensions()
45+
GeneratorExperimentFactory.config_command = config_command
5446

55-
#yapf: disable
56-
for i, _ in enumerate(config_command.profile_models):
57-
if config_command.exponential_inst_count:
58-
dimensions.add_dimensions(i, [
59-
SearchDimension(f"max_batch_size", SearchDimension.DIMENSION_TYPE_EXPONENTIAL),
60-
SearchDimension(f"instance_count", SearchDimension.DIMENSION_TYPE_EXPONENTIAL)
61-
])
62-
else:
63-
dimensions.add_dimensions(i, [
64-
SearchDimension(f"max_batch_size", SearchDimension.DIMENSION_TYPE_EXPONENTIAL),
65-
SearchDimension(f"instance_count", SearchDimension.DIMENSION_TYPE_LINEAR)
66-
])
47+
p1 = patch(
48+
'model_analyzer.config.generate.run_config_generator_factory.RunConfigGeneratorFactory._get_batching_supported_dimensions',
49+
GeneratorExperimentFactory.get_batching_supported_dimensions)
50+
p2 = patch(
51+
'model_analyzer.config.generate.run_config_generator_factory.RunConfigGeneratorFactory._get_batching_not_supported_dimensions',
52+
GeneratorExperimentFactory.get_batching_not_supported_dimensions)
53+
p1.start()
54+
p2.start()
55+
mvn = ModelVariantNameManager()
56+
generator = RunConfigGeneratorFactory.create_run_config_generator(
57+
config_command, MagicMock(), config_command.profile_models,
58+
MagicMock(), MagicMock(), mvn)
59+
return generator
6760

68-
#yapf: enable
61+
@staticmethod
62+
def get_batching_supported_dimensions():
63+
mbs_min = GeneratorExperimentFactory.config_command.min_mbs_index
64+
ret = [
65+
SearchDimension(f"max_batch_size",
66+
SearchDimension.DIMENSION_TYPE_EXPONENTIAL, mbs_min)
67+
]
68+
if GeneratorExperimentFactory.config_command.exponential_inst_count:
69+
ret.append(
70+
SearchDimension(f"instance_count",
71+
SearchDimension.DIMENSION_TYPE_EXPONENTIAL))
72+
else:
73+
ret.append(
74+
SearchDimension(f"instance_count",
75+
SearchDimension.DIMENSION_TYPE_LINEAR))
76+
return ret
6977

70-
search_config = SearchConfig(
71-
dimensions=dimensions,
72-
radius=config_command.radius,
73-
step_magnitude=config_command.magnitude,
74-
min_initialized=config_command.min_initialized)
78+
@staticmethod
79+
def get_batching_not_supported_dimensions():
80+
mbs_min = GeneratorExperimentFactory.config_command.min_mbs_index
7581

76-
mvn = ModelVariantNameManager()
77-
generator = QuickRunConfigGenerator(search_config, config_command,
78-
MagicMock(),
79-
config_command.profile_models,
80-
MagicMock(), mvn)
81-
return generator
82+
ret = [
83+
SearchDimension(f"concurrency",
84+
SearchDimension.DIMENSION_TYPE_EXPONENTIAL, mbs_min)
85+
]
86+
if GeneratorExperimentFactory.config_command.exponential_inst_count:
87+
ret.append(
88+
SearchDimension(f"instance_count",
89+
SearchDimension.DIMENSION_TYPE_EXPONENTIAL))
8290
else:
83-
raise Exception(f"Unknown generator {generator_name}")
91+
ret.append(
92+
SearchDimension(f"instance_count",
93+
SearchDimension.DIMENSION_TYPE_LINEAR))
94+
return ret

experiments/main.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,6 @@
4949
required=False,
5050
default="./output",
5151
help="The path to the output csv files")
52-
parser.add_argument("--generator",
53-
type=str,
54-
required=True,
55-
help="The name of the config generator to evaluate")
5652
args, other_args = parser.parse_known_args()
5753

5854
if args.verbose:
@@ -63,7 +59,7 @@
6359

6460
ecg = EvaluateConfigGenerator(args.model_name, args.data_path, args.output_path,
6561
other_args)
66-
ecg.execute_generator(args.generator)
62+
ecg.execute_generator()
6763
ecg.print_results()
6864

6965
if args.save:
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
objectives:
16+
- perf_latency_p99

0 commit comments

Comments
 (0)