Skip to content

Commit 6019852

Browse files
authored
Revert LLM changes (#818)
* Revert "Add MVP LLM support to MA (#783)" This reverts commit f15427e. * Fixing merge conficts
1 parent 9cde160 commit 6019852

32 files changed

+463
-1820
lines changed

model_analyzer/analyzer.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -136,14 +136,8 @@ def profile(
136136

137137
if not self._config.skip_summary_reports:
138138
self._create_summary_tables(verbose)
139-
140-
# TODO TMA-1401: need to figure out summary reporting for LLMs
141-
if not self._config.is_llm_model():
142-
self._create_summary_reports(mode)
143-
144-
# TODO TMA-1443: need to figure out detailed reporting for LLMs
145-
if not self._config.is_llm_model():
146-
self._create_detailed_reports(mode)
139+
self._create_summary_reports(mode)
140+
self._create_detailed_reports(mode)
147141

148142
self._check_for_perf_analyzer_errors()
149143

model_analyzer/config/generate/automatic_model_config_generator.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,10 @@ def __init__(
7979
logger.info("")
8080
AutomaticModelConfigGenerator._log_first_run = True
8181

82-
self._set_min_max_search_values(config)
82+
self._max_instance_count = config.run_config_search_max_instance_count
83+
self._min_instance_count = config.run_config_search_min_instance_count
84+
self._max_model_batch_size = config.run_config_search_max_model_batch_size
85+
self._min_model_batch_size = config.run_config_search_min_model_batch_size
8386

8487
self._instance_kind = "KIND_CPU" if self._cpu_only else "KIND_GPU"
8588

@@ -88,7 +91,7 @@ def __init__(
8891

8992
self._reset_max_batch_size()
9093

91-
if not self._early_exit_enable and not self._config.is_llm_model():
94+
if not self._early_exit_enable:
9295
raise TritonModelAnalyzerException(
9396
"Early exit disable is not supported in automatic model config generator"
9497
)
@@ -159,9 +162,3 @@ def _get_curr_param_combo(self) -> Dict:
159162
config["dynamic_batching"] = {}
160163

161164
return config
162-
163-
def _set_min_max_search_values(self, config: ConfigCommandProfile) -> None:
164-
self._max_instance_count = config.run_config_search_max_instance_count
165-
self._min_instance_count = config.run_config_search_min_instance_count
166-
self._max_model_batch_size = config.run_config_search_max_model_batch_size
167-
self._min_model_batch_size = config.run_config_search_min_model_batch_size

model_analyzer/config/generate/brute_plus_binary_search_run_config_generator.py renamed to model_analyzer/config/generate/brute_plus_binary_parameter_search_run_config_generator.py

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
from model_analyzer.config.run.run_config import RunConfig
3030
from model_analyzer.constants import LOGGER_NAME
3131
from model_analyzer.device.gpu_device import GPUDevice
32-
from model_analyzer.result.inference_load_search import InferenceLoadSearch
32+
from model_analyzer.result.parameter_search import ParameterSearch
3333
from model_analyzer.result.result_manager import ResultManager
3434
from model_analyzer.result.run_config_measurement import RunConfigMeasurement
3535
from model_analyzer.triton.client.client import TritonClient
@@ -39,10 +39,10 @@
3939
logger = logging.getLogger(LOGGER_NAME)
4040

4141

42-
class BrutePlusBinarySearchRunConfigGenerator(ConfigGeneratorInterface):
42+
class BrutePlusBinaryParameterSearchRunConfigGenerator(ConfigGeneratorInterface):
4343
"""
4444
First run BruteRunConfigGenerator for a brute search, then for
45-
automatic searches use InferenceLoadSearch to perform a binary search
45+
automatic searches use ParameterSearch to perform a binary search
4646
"""
4747

4848
def __init__(
@@ -116,11 +116,7 @@ def _create_brute_run_config_generator(self) -> BruteRunConfigGenerator:
116116

117117
def _can_binary_search_top_results(self) -> bool:
118118
for model in self._models:
119-
if (
120-
model.parameters()["concurrency"]
121-
or model.parameters()["request_rate"]
122-
or self._config.is_llm_model()
123-
):
119+
if model.parameters()["concurrency"] or model.parameters()["request_rate"]:
124120
return False
125121

126122
return True
@@ -136,19 +132,17 @@ def _binary_search_over_top_results(self) -> Generator[RunConfig, None, None]:
136132
for result in top_results:
137133
run_config = deepcopy(result.run_config())
138134
model_parameters = self._get_model_parameters(model_name)
139-
inference_load_search = InferenceLoadSearch(
135+
parameter_search = ParameterSearch(
140136
config=self._config,
141137
model_parameters=model_parameters,
142-
skip_inference_load_sweep=True,
138+
skip_parameter_sweep=True,
143139
)
144-
for inference_load in inference_load_search.search_inference_loads():
145-
run_config = self._set_inference_load(
146-
run_config, model_parameters, inference_load
140+
for parameter in parameter_search.search_parameters():
141+
run_config = self._set_parameter(
142+
run_config, model_parameters, parameter
147143
)
148144
yield run_config
149-
inference_load_search.add_run_config_measurement(
150-
self._last_measurement
151-
)
145+
parameter_search.add_run_config_measurement(self._last_measurement)
152146

153147
def _get_model_parameters(self, model_name: str) -> Dict:
154148
for model in self._models:
@@ -157,14 +151,14 @@ def _get_model_parameters(self, model_name: str) -> Dict:
157151

158152
return {}
159153

160-
def _set_inference_load(
161-
self, run_config: RunConfig, model_parameters: Dict, inference_load: int
154+
def _set_parameter(
155+
self, run_config: RunConfig, model_parameters: Dict, parameter: int
162156
) -> RunConfig:
163157
for model_run_config in run_config.model_run_configs():
164158
perf_config = model_run_config.perf_config()
165159
if self._config.is_request_rate_specified(model_parameters):
166-
perf_config.update_config({"request-rate-range": inference_load})
160+
perf_config.update_config({"request-rate-range": parameter})
167161
else:
168-
perf_config.update_config({"concurrency-range": inference_load})
162+
perf_config.update_config({"concurrency-range": parameter})
169163

170164
return run_config

model_analyzer/config/generate/brute_run_config_generator.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def __init__(
8080
self._curr_results: List = [[] for n in range(self._num_models)]
8181
self._curr_generators: Dict[int, ConfigGeneratorInterface] = {}
8282

83-
self._skip_default_config = skip_default_config or config.is_llm_model()
83+
self._skip_default_config = skip_default_config
8484

8585
def set_last_results(
8686
self, measurements: List[Optional[RunConfigMeasurement]]

model_analyzer/config/generate/generator_utils.py

Lines changed: 3 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# limitations under the License.
1616

1717
from itertools import product
18-
from typing import Dict, List, Optional
18+
from typing import Dict, List
1919

2020

2121
class GeneratorUtils:
@@ -80,8 +80,8 @@ def generate_combinations(value: object) -> List:
8080
@staticmethod
8181
def generate_parameter_combinations(params: Dict) -> List[Dict]:
8282
"""
83-
Generate a list of all possible sub-dictionaries
84-
from given dictionary. The sub-dictionaries will
83+
Generate a list of all possible subdictionaries
84+
from given dictionary. The subdictionaries will
8585
have all the same keys, but only one value from
8686
each key.
8787
@@ -108,45 +108,9 @@ def generate_doubled_list(min_value: int, max_value: int) -> List[int]:
108108
The value that the generated list will not exceed
109109
"""
110110

111-
assert min_value <= max_value
112-
113111
list = []
114112
val = 1 if min_value == 0 else min_value
115113
while val <= max_value:
116114
list.append(val)
117115
val *= 2
118116
return list
119-
120-
@staticmethod
121-
def extract_value_from_request_parameter(request_parameter: Optional[str]) -> int:
122-
if not request_parameter:
123-
return 0
124-
125-
# Format is: <parameter>:<value>:<type>
126-
# Example: max_tokens:10:int
127-
_, value, _ = request_parameter.split(":")
128-
129-
# this catches the case for non-LLM models where the user has specified request parameters
130-
try:
131-
int(value)
132-
except ValueError as _:
133-
return 0
134-
135-
return int(value)
136-
137-
@staticmethod
138-
def extract_text_input_length_from_input_data(input_data: Optional[str]) -> int:
139-
if not input_data:
140-
return 0
141-
142-
# format is input-data-<num>.json
143-
_, _, text_input_length = input_data.split("-")
144-
text_input_length, _ = text_input_length.split(".")
145-
146-
# this catches the case for non-LLM models where the user has specified input data
147-
try:
148-
int(text_input_length)
149-
except ValueError as _:
150-
return 0
151-
152-
return int(text_input_length)

model_analyzer/config/generate/model_run_config_generator.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -150,13 +150,5 @@ def _determine_early_exit_enables(
150150
concurrency_specified = model.parameters()["concurrency"]
151151
config_parameters_exist = model.model_config_parameters()
152152

153-
if config.is_llm_model():
154-
self._pacg_early_exit_enable = False
155-
self._mcg_early_exit_enable = False
156-
else:
157-
self._pacg_early_exit_enable = (
158-
early_exit_enable or not concurrency_specified
159-
)
160-
self._mcg_early_exit_enable = (
161-
early_exit_enable or not config_parameters_exist
162-
)
153+
self._pacg_early_exit_enable = early_exit_enable or not concurrency_specified
154+
self._mcg_early_exit_enable = early_exit_enable or not config_parameters_exist

0 commit comments

Comments
 (0)