diff --git a/Dockerfile b/Dockerfile index fea1cd97e..ebd1f6469 100644 --- a/Dockerfile +++ b/Dockerfile @@ -71,17 +71,10 @@ RUN chmod +x /opt/triton-model-analyzer/nvidia_entrypoint.sh RUN chmod +x build_wheel.sh && \ ./build_wheel.sh perf_analyzer true && \ rm -f perf_analyzer -RUN python3 -m pip install nvidia-pyindex && \ - python3 -m pip install wheels/triton_model_analyzer-*-manylinux*.whl -# Install other pip packages -RUN python3 -m pip install coverage -RUN python3 -m pip install mypy -RUN python3 -m pip install types-PyYAML -RUN python3 -m pip install types-requests -RUN python3 -m pip install types-protobuf -RUN python3 -m pip install mkdocs -RUN python3 -m pip install mkdocs-htmlproofer-plugin==0.10.3 -RUN python3 -m pip install yapf==0.32.0 + +# Install model analyzer and development dependencies +RUN python3 -m pip install --no-cache-dir wheels/triton_model_analyzer-*-manylinux*.whl && \ + python3 -m pip install --no-cache-dir -r requirements-dev.txt RUN apt-get install -y wkhtmltopdf diff --git a/experiments/experiment_config_command_creator.py b/experiments/experiment_config_command_creator.py index 998f1c3e4..e9eee8f10 100755 --- a/experiments/experiment_config_command_creator.py +++ b/experiments/experiment_config_command_creator.py @@ -37,23 +37,26 @@ def make_config(data_path, model_name, other_args): else: checkpoint_dir = f"{data_path}/{model_name}" - # yapf: disable args = [ - 'model-analyzer', 'profile', - '--profile-models', model_name, - '--model-repository', data_path, - '--checkpoint-directory', checkpoint_dir + "model-analyzer", + "profile", + "--profile-models", + model_name, + "--model-repository", + data_path, + "--checkpoint-directory", + checkpoint_dir, ] args += other_args - if '-f' not in args and '--config-file' not in args: - args += ['-f', 'path-to-config-file'] + if "-f" not in args and "--config-file" not in args: + args += ["-f", "path-to-config-file"] yaml_content = convert_to_bytes("") else: - index = args.index('-f') if '-f' in args else args.index('--config-file') + index = args.index("-f") if "-f" in args else args.index("--config-file") yaml_file = args[index + 1] - with open(yaml_file, 'r') as f: + with open(yaml_file, "r") as f: yaml_content = f.read() yaml_content = convert_to_bytes(yaml_content) @@ -65,10 +68,11 @@ def make_config(data_path, model_name, other_args): config = ConfigCommandExperiment() cli = CLI() cli.add_subcommand( - cmd='profile', - help='Run model inference profiling based on specified CLI or ' - 'config options.', - config=config) + cmd="profile", + help="Run model inference profiling based on specified CLI or " + "config options.", + config=config, + ) cli.parse() mock_config.stop() diff --git a/experiments/experiment_file_writer.py b/experiments/experiment_file_writer.py index dc1fd2cec..f19431e35 100755 --- a/experiments/experiment_file_writer.py +++ b/experiments/experiment_file_writer.py @@ -56,21 +56,20 @@ def write(self, checkpoint_data, profile_data, radius, min_initialized): ) quick_best_measurement = profile_data.get_best_run_config_measurement() - # yapf: disable - writer.writerow({ - "overall_num_measurements": - checkpoint_data.get_run_config_measurement_count(), - "overall_best_throughput": - overall_best_measurement.get_non_gpu_metric_value("perf_throughput"), - "quick_num_measurements": - profile_data.get_run_config_measurement_count(), - "missing_num_measurements": - checkpoint_data.get_missing_measurement_count(), - "quick_throughput": - quick_best_measurement.get_non_gpu_metric_value("perf_throughput"), - "radius": radius, - "min_initialized": min_initialized - }) - # yapf: enable + writer.writerow( + { + "overall_num_measurements": checkpoint_data.get_run_config_measurement_count(), + "overall_best_throughput": overall_best_measurement.get_non_gpu_metric_value( + "perf_throughput" + ), + "quick_num_measurements": profile_data.get_run_config_measurement_count(), + "missing_num_measurements": checkpoint_data.get_missing_measurement_count(), + "quick_throughput": quick_best_measurement.get_non_gpu_metric_value( + "perf_throughput" + ), + "radius": radius, + "min_initialized": min_initialized, + } + ) except OSError as e: raise TritonModelAnalyzerException(e) diff --git a/experiments/scripts/test_pa.py b/experiments/scripts/test_pa.py index 2d94a8211..20105af3d 100755 --- a/experiments/scripts/test_pa.py +++ b/experiments/scripts/test_pa.py @@ -343,15 +343,21 @@ def _add_results(self, config, results): self._results.append((config, results)) def _get_cmd(self, config: RunConfigData): - # yapf: disable cmd = [ - "/usr/local/bin/perf_analyzer", "-v", - "-i", config.protocol, - "--measurement-mode", config.measurement_mode, - "-m", config.model, - "-b", str(config.batch_size), - "--concurrency-range", str(config.concurrency), - "--max-threads", str(config.max_threads) + "/usr/local/bin/perf_analyzer", + "-v", + "-i", + config.protocol, + "--measurement-mode", + config.measurement_mode, + "-m", + config.model, + "-b", + str(config.batch_size), + "--concurrency-range", + str(config.concurrency), + "--max-threads", + str(config.max_threads), ] if config.protocol == "http": cmd += ["-u", "localhost:8000"] @@ -362,7 +368,6 @@ def _get_cmd(self, config: RunConfigData): cmd += ["--async"] else: cmd += ["--sync"] - # yapf: enable return cmd def _get_dict_combos(self, config: dict): diff --git a/format.py b/format.py deleted file mode 100755 index da17728ee..000000000 --- a/format.py +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/env python3 - -# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import os -import subprocess - -import yapf - -FLAGS = None -FORMAT_EXTS = ("proto", "cc", "cu", "h") -SKIP_PATHS = ("tools",) - - -def visit(path): - if FLAGS.verbose: - print("visiting " + path) - - valid_ext = False - python_file = False - for ext in FORMAT_EXTS: - if path.endswith("." + ext): - valid_ext = True - break - if path.endswith(".py"): - valid_ext = True - python_file = True - if not valid_ext: - if FLAGS.verbose: - print("skipping due to extension: " + path) - return True - - for skip in SKIP_PATHS: - if path.startswith(skip): - if FLAGS.verbose: - print("skipping due to path prefix: " + path) - return True - if python_file: - yapf.yapflib.yapf_api.FormatFile(path, in_place=True, style_config="google") - return True - else: - args = ["clang-format-6.0", "--style=file", "-i"] - if FLAGS.verbose: - args.append("-verbose") - args.append(path) - - ret = subprocess.call(args) - if ret != 0: - print("format failed for " + path) - return False - - return True - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "-v", - "--verbose", - action="store_true", - required=False, - default=False, - help="Enable verbose output", - ) - parser.add_argument( - "paths", - type=str, - nargs="*", - default=None, - help="Directories or files to format", - ) - FLAGS = parser.parse_args() - - # Check the version of yapf. Needs a consistent version - # of yapf to prevent unnecessary changes in the code. - if yapf.__version__ != "0.32.0": - print("Needs yapf 0.32.0, but got yapf {}".format(yapf.__version__)) - - if (FLAGS.paths is None) or (len(FLAGS.paths) == 0): - parser.print_help() - exit(1) - - ret = True - for path in FLAGS.paths: - if not os.path.isdir(path): - if not visit(path): - ret = False - else: - for root, dirs, files in os.walk(path): - for name in files: - if not visit(os.path.join(root, name)): - ret = False - - exit(0 if ret else 1) diff --git a/model_analyzer/perf_analyzer/perf_analyzer.py b/model_analyzer/perf_analyzer/perf_analyzer.py index 66590e6e0..3053d7a6a 100755 --- a/model_analyzer/perf_analyzer/perf_analyzer.py +++ b/model_analyzer/perf_analyzer/perf_analyzer.py @@ -89,54 +89,162 @@ class PerfAnalyzer: GPU_METRIC_UUID = 0 GPU_METRIC_VALUE = 1 - # yapf: disable PA_SUCCESS, PA_FAIL, PA_RETRY = 0, 1, 2 - METRIC_TAG, CSV_STRING, RECORD_CLASS, REDUCTION_FACTOR = 0, 1, 2, 3 + METRIC_TAG, CSV_STRING, RECORD_CLASS, REDUCTION_FACTOR = 0, 1, 2, 3 perf_metric_table = [ - ["perf_latency_avg", "Avg latency", PerfLatencyAvg, "1000"], - ["perf_latency_p90", "p90 latency", PerfLatencyP90, "1000"], - ["perf_latency_p95", "p95 latency", PerfLatencyP95, "1000"], - ["perf_latency_p99", "p99 latency", PerfLatencyP99, "1000"], - ["perf_throughput", "Inferences/Second", PerfThroughput, "1"], - ["perf_client_send_recv", "request/response", PerfClientSendRecv, "1000"], - ["perf_client_send_recv", "send/recv", PerfClientSendRecv, "1000"], - ["perf_client_response_wait", "response wait", PerfClientResponseWait, "1000"], - ["perf_server_queue", "Server Queue", PerfServerQueue, "1000"], - ["perf_server_compute_infer", "Server Compute Infer", PerfServerComputeInfer, "1000"], - ["perf_server_compute_input", "Server Compute Input", PerfServerComputeInput, "1000"], - ["perf_server_compute_output", "Server Compute Output", PerfServerComputeOutput, "1000"] + ["perf_latency_avg", "Avg latency", PerfLatencyAvg, "1000"], + ["perf_latency_p90", "p90 latency", PerfLatencyP90, "1000"], + ["perf_latency_p95", "p95 latency", PerfLatencyP95, "1000"], + ["perf_latency_p99", "p99 latency", PerfLatencyP99, "1000"], + ["perf_throughput", "Inferences/Second", PerfThroughput, "1"], + ["perf_client_send_recv", "request/response", PerfClientSendRecv, "1000"], + ["perf_client_send_recv", "send/recv", PerfClientSendRecv, "1000"], + ["perf_client_response_wait", "response wait", PerfClientResponseWait, "1000"], + ["perf_server_queue", "Server Queue", PerfServerQueue, "1000"], + [ + "perf_server_compute_infer", + "Server Compute Infer", + PerfServerComputeInfer, + "1000", + ], + [ + "perf_server_compute_input", + "Server Compute Input", + PerfServerComputeInput, + "1000", + ], + [ + "perf_server_compute_output", + "Server Compute Output", + PerfServerComputeOutput, + "1000", + ], ] gpu_metric_table = [ - ["gpu_utilization", "Avg GPU Utilization", GPUUtilization, "0.01"], - ["gpu_power_usage", "Avg GPU Power Usage", GPUPowerUsage, "1"], - ["gpu_used_memory", "Max GPU Memory Usage", GPUUsedMemory, "1000000"], - ["gpu_free_memory", "Total GPU Memory", GPUFreeMemory, "1000000"] + ["gpu_utilization", "Avg GPU Utilization", GPUUtilization, "0.01"], + ["gpu_power_usage", "Avg GPU Power Usage", GPUPowerUsage, "1"], + ["gpu_used_memory", "Max GPU Memory Usage", GPUUsedMemory, "1000000"], + ["gpu_free_memory", "Total GPU Memory", GPUFreeMemory, "1000000"], ] llm_metric_table = [ - ["time_to_first_token_avg", "Time To First Token (ns) avg", TimeToFirstTokenAvg, "1000"], - ["time_to_first_token_min", "Time To First Token (ns) min", TimeToFirstTokenMin, "1000"], - ["time_to_first_token_max", "Time To First Token (ns) max", TimeToFirstTokenMax, "1000"], - ["time_to_first_token_p99", "Time To First Token (ns) p99", TimeToFirstTokenP99, "1000"], - ["time_to_first_token_p95", "Time To First Token (ns) p95", TimeToFirstTokenP95, "1000"], - ["time_to_first_token_p90", "Time To First Token (ns) p90", TimeToFirstTokenP90, "1000"], - ["time_to_first_token_p75", "Time To First Token (ns) p75", TimeToFirstTokenP75, "1000"], - ["time_to_first_token_p50", "Time To First Token (ns) p50", TimeToFirstTokenP50, "1000"], - ["time_to_first_token_p25", "Time To First Token (ns) p25", TimeToFirstTokenP25, "1000"], - ["inter_token_latency_avg", "Inter Token Latency (ns) avg", InterTokenLatencyAvg, "1000"], - ["inter_token_latency_min", "Inter Token Latency (ns) min", InterTokenLatencyMin, "1000"], - ["inter_token_latency_max", "Inter Token Latency (ns) max", InterTokenLatencyMax, "1000"], - ["inter_token_latency_p99", "Inter Token Latency (ns) p99", InterTokenLatencyP99, "1000"], - ["inter_token_latency_p95", "Inter Token Latency (ns) p95", InterTokenLatencyP95, "1000"], - ["inter_token_latency_p90", "Inter Token Latency (ns) p90", InterTokenLatencyP90, "1000"], - ["inter_token_latency_p75", "Inter Token Latency (ns) p75", InterTokenLatencyP75, "1000"], - ["inter_token_latency_p50", "Inter Token Latency (ns) p50", InterTokenLatencyP50, "1000"], - ["inter_token_latency_p25", "Inter Token Latency (ns) p25", InterTokenLatencyP25, "1000"], - ["output_token_throughput", "Output Token Throughput (per sec) avg", OutputTokenThroughput, "1"] + [ + "time_to_first_token_avg", + "Time To First Token (ns) avg", + TimeToFirstTokenAvg, + "1000", + ], + [ + "time_to_first_token_min", + "Time To First Token (ns) min", + TimeToFirstTokenMin, + "1000", + ], + [ + "time_to_first_token_max", + "Time To First Token (ns) max", + TimeToFirstTokenMax, + "1000", + ], + [ + "time_to_first_token_p99", + "Time To First Token (ns) p99", + TimeToFirstTokenP99, + "1000", + ], + [ + "time_to_first_token_p95", + "Time To First Token (ns) p95", + TimeToFirstTokenP95, + "1000", + ], + [ + "time_to_first_token_p90", + "Time To First Token (ns) p90", + TimeToFirstTokenP90, + "1000", + ], + [ + "time_to_first_token_p75", + "Time To First Token (ns) p75", + TimeToFirstTokenP75, + "1000", + ], + [ + "time_to_first_token_p50", + "Time To First Token (ns) p50", + TimeToFirstTokenP50, + "1000", + ], + [ + "time_to_first_token_p25", + "Time To First Token (ns) p25", + TimeToFirstTokenP25, + "1000", + ], + [ + "inter_token_latency_avg", + "Inter Token Latency (ns) avg", + InterTokenLatencyAvg, + "1000", + ], + [ + "inter_token_latency_min", + "Inter Token Latency (ns) min", + InterTokenLatencyMin, + "1000", + ], + [ + "inter_token_latency_max", + "Inter Token Latency (ns) max", + InterTokenLatencyMax, + "1000", + ], + [ + "inter_token_latency_p99", + "Inter Token Latency (ns) p99", + InterTokenLatencyP99, + "1000", + ], + [ + "inter_token_latency_p95", + "Inter Token Latency (ns) p95", + InterTokenLatencyP95, + "1000", + ], + [ + "inter_token_latency_p90", + "Inter Token Latency (ns) p90", + InterTokenLatencyP90, + "1000", + ], + [ + "inter_token_latency_p75", + "Inter Token Latency (ns) p75", + InterTokenLatencyP75, + "1000", + ], + [ + "inter_token_latency_p50", + "Inter Token Latency (ns) p50", + InterTokenLatencyP50, + "1000", + ], + [ + "inter_token_latency_p25", + "Inter Token Latency (ns) p25", + InterTokenLatencyP25, + "1000", + ], + [ + "output_token_throughput", + "Output Token Throughput (per sec) avg", + OutputTokenThroughput, + "1", + ], ] - # yapf: enable @staticmethod def get_perf_metrics(): diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 000000000..10c33dc97 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,30 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Development and testing dependencies +# Install with: pip install -r requirements-dev.txt + +# Testing +coverage>=7.0.0 + +# Documentation +mkdocs>=1.5.0 +mkdocs-htmlproofer-plugin==0.10.3 + +# Type checking +mypy>=1.9.0 +types-protobuf>=4.24.0 +types-PyYAML>=6.0.0 +types-requests>=2.31.0 +unittest-parallel>=1.6.0 diff --git a/setup.py b/setup.py index 0fd1e06c3..44b29144d 100755 --- a/setup.py +++ b/setup.py @@ -35,18 +35,21 @@ def version(filename="VERSION"): with open(os.path.join(filename)) as f: - project_version = f.read() + project_version = f.read().strip() return project_version def req_file(filename): with open(os.path.join(filename)) as f: content = f.readlines() - return [x.strip() for x in content if not x.startswith("#")] + return [x.strip() for x in content if not x.startswith("#") and x.strip()] project_version = version() install_requires = req_file("requirements.txt") +dev_requires = ( + req_file("requirements-dev.txt") if os.path.exists("requirements-dev.txt") else [] +) try: from wheel.bdist_wheel import bdist_wheel as _bdist_wheel @@ -114,6 +117,9 @@ def get_tag(self): "console_scripts": ["model-analyzer = model_analyzer.entrypoint:main"] }, install_requires=install_requires, + extras_require={ + "dev": dev_requires, + }, dependency_links=["https://pypi.ngc.nvidia.com/tritonclient"], packages=find_packages(exclude=("tests",)), zip_safe=False, diff --git a/tests/test_cli.py b/tests/test_cli.py index 612afed5f..10949f9f5 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -49,28 +49,27 @@ def get_test_options(): """ Returns the list of OptionStructs that are used for testing. """ - # yapf: disable options = [ - #Boolean options + # Boolean options # Options format: # (bool, MA step, long_option) - OptionStruct("bool", "profile","--override-output-model-repository"), - OptionStruct("bool", "profile","--collect-cpu-metrics"), - OptionStruct("bool", "profile","--perf-output"), - OptionStruct("bool", "profile","--run-config-search-disable"), - OptionStruct("bool", "profile","--run-config-profile-models-concurrently-enable"), - OptionStruct("bool", "profile","--request-rate-search-enable"), - OptionStruct("bool", "profile","--reload-model-disable"), - OptionStruct("bool", "profile","--early-exit-enable"), - OptionStruct("bool", "profile","--skip-summary-reports"), - OptionStruct("bool", "profile","--skip-detailed-reports"), - OptionStruct("bool", "profile","--always-report-gpu-metrics"), - OptionStruct("bool", "profile","--use-concurrency-formula"), - OptionStruct("bool", "profile","--concurrency-sweep-disable"), - OptionStruct("bool", "profile","--dcgm-disable"), - - - #Int/Float options + OptionStruct("bool", "profile", "--override-output-model-repository"), + OptionStruct("bool", "profile", "--collect-cpu-metrics"), + OptionStruct("bool", "profile", "--perf-output"), + OptionStruct("bool", "profile", "--run-config-search-disable"), + OptionStruct( + "bool", "profile", "--run-config-profile-models-concurrently-enable" + ), + OptionStruct("bool", "profile", "--request-rate-search-enable"), + OptionStruct("bool", "profile", "--reload-model-disable"), + OptionStruct("bool", "profile", "--early-exit-enable"), + OptionStruct("bool", "profile", "--skip-summary-reports"), + OptionStruct("bool", "profile", "--skip-detailed-reports"), + OptionStruct("bool", "profile", "--always-report-gpu-metrics"), + OptionStruct("bool", "profile", "--use-concurrency-formula"), + OptionStruct("bool", "profile", "--concurrency-sweep-disable"), + OptionStruct("bool", "profile", "--dcgm-disable"), + # Int/Float options # Options format: # (int/float, MA step, long_option, short_option, test_value, expected_default_value) # The following options can be None: @@ -79,29 +78,81 @@ def get_test_options(): OptionStruct("int", "profile", "--client-max-retries", "-r", "125", "50"), OptionStruct("int", "profile", "--duration-seconds", "-d", "10", "3"), OptionStruct("int", "profile", "--perf-analyzer-timeout", None, "100", "600"), - OptionStruct("int", "profile", "--perf-analyzer-max-auto-adjusts", None, "100", "10"), - OptionStruct("int", "profile", "--run-config-search-min-concurrency", None, "2", "1"), - OptionStruct("int", "profile", "--run-config-search-max-concurrency", None, "100", "1024"), - OptionStruct("int", "profile", "--run-config-search-min-request-rate", None, "2", "16"), - OptionStruct("int", "profile", "--run-config-search-max-request-rate", None, "100", "8192"), - OptionStruct("int", "profile", "--run-config-search-min-model-batch-size", None, "100", "1"), - OptionStruct("int", "profile", "--run-config-search-max-model-batch-size", None, "100", "128"), - OptionStruct("int", "profile", "--run-config-search-min-instance-count", None, "2", "1"), - OptionStruct("int", "profile", "--run-config-search-max-instance-count", None, "10", "5"), - OptionStruct("int", "profile", "--run-config-search-max-binary-search-steps", None, "10", "5"), - OptionStruct("int", "profile", "--min-percentage-of-search-space", None, "10", "5"), - OptionStruct("int", "profile", "--max-percentage-of-search-space", None, "5", "10"), + OptionStruct( + "int", "profile", "--perf-analyzer-max-auto-adjusts", None, "100", "10" + ), + OptionStruct( + "int", "profile", "--run-config-search-min-concurrency", None, "2", "1" + ), + OptionStruct( + "int", "profile", "--run-config-search-max-concurrency", None, "100", "1024" + ), + OptionStruct( + "int", "profile", "--run-config-search-min-request-rate", None, "2", "16" + ), + OptionStruct( + "int", + "profile", + "--run-config-search-max-request-rate", + None, + "100", + "8192", + ), + OptionStruct( + "int", + "profile", + "--run-config-search-min-model-batch-size", + None, + "100", + "1", + ), + OptionStruct( + "int", + "profile", + "--run-config-search-max-model-batch-size", + None, + "100", + "128", + ), + OptionStruct( + "int", "profile", "--run-config-search-min-instance-count", None, "2", "1" + ), + OptionStruct( + "int", "profile", "--run-config-search-max-instance-count", None, "10", "5" + ), + OptionStruct( + "int", + "profile", + "--run-config-search-max-binary-search-steps", + None, + "10", + "5", + ), + OptionStruct( + "int", "profile", "--min-percentage-of-search-space", None, "10", "5" + ), + OptionStruct( + "int", "profile", "--max-percentage-of-search-space", None, "5", "10" + ), OptionStruct("int", "profile", "--optuna-min-trials", None, "10", "20"), OptionStruct("int", "profile", "--optuna-max-trials", None, "5", "200"), - OptionStruct("int", "profile", "--optuna-early-exit-threshold", None, "5", "10"), + OptionStruct( + "int", "profile", "--optuna-early-exit-threshold", None, "5", "10" + ), OptionStruct("float", "profile", "--monitoring-interval", "-i", "10.0", "1.0"), - OptionStruct("float", "profile", "--perf-analyzer-cpu-util", None, "10.0", str(psutil.cpu_count() * 80.0)), + OptionStruct( + "float", + "profile", + "--perf-analyzer-cpu-util", + None, + "10.0", + str(psutil.cpu_count() * 80.0), + ), OptionStruct("int", "profile", "--num-configs-per-model", None, "10", "3"), OptionStruct("int", "profile", "--num-top-model-configs", None, "10", "0"), OptionStruct("int", "profile", "--latency-budget", None, "200", None), OptionStruct("int", "profile", "--min-throughput", None, "300", None), - - #String options + # String options # Options format: # (string, MA step, long_flag, short_flag, test_value, expected_default_value, expected_failing_value, extra_commands) # The following options can be None: @@ -110,34 +161,190 @@ def get_test_options(): # expected_failing_value # For options with choices, list the test_values in a list of strings OptionStruct("string", "profile", "--config-file", "-f", "baz", None, None), - OptionStruct("string", "profile", "--checkpoint-directory", "-s", "./test_dir", os.path.join(os.getcwd(), "checkpoints"), None), - OptionStruct("string", "profile", "--output-model-repository-path", None, "./test_dir", os.path.join(os.getcwd(), "output_model_repository"), None), - OptionStruct("string", "profile", "--client-protocol", None, ["http", "grpc"], "grpc", "SHOULD_FAIL"), - OptionStruct("string", "profile", "--perf-analyzer-path", None, ".", "perf_analyzer", None), + OptionStruct( + "string", + "profile", + "--checkpoint-directory", + "-s", + "./test_dir", + os.path.join(os.getcwd(), "checkpoints"), + None, + ), + OptionStruct( + "string", + "profile", + "--output-model-repository-path", + None, + "./test_dir", + os.path.join(os.getcwd(), "output_model_repository"), + None, + ), + OptionStruct( + "string", + "profile", + "--client-protocol", + None, + ["http", "grpc"], + "grpc", + "SHOULD_FAIL", + ), + OptionStruct( + "string", + "profile", + "--perf-analyzer-path", + None, + ".", + "perf_analyzer", + None, + ), OptionStruct("string", "profile", "--perf-output-path", None, ".", None, None), - OptionStruct("string", "profile", "--triton-docker-image", None, "test_image", DEFAULT_TRITON_DOCKER_IMAGE, None), - OptionStruct("string", "profile", "--triton-http-endpoint", None, "localhost:4000", "localhost:8000", None), - OptionStruct("string", "profile", "--triton-grpc-endpoint", None, "localhost:4001", "localhost:8001", None), - OptionStruct("string", "profile", "--triton-metrics-url", None, "localhost:4002", "http://localhost:8002/metrics", None), - OptionStruct("string", "profile", "--triton-server-path", None, "test_path", "tritonserver", None), - OptionStruct("string", "profile", "--triton-output-path", None, "test_path", None, None), - OptionStruct("string", "profile", "--triton-launch-mode", None, ["local", "docker", "remote","c_api"], "local", "SHOULD_FAIL"), - OptionStruct("string", "profile", "--triton-install-path", None, "test_path", "/opt/tritonserver", None), - OptionStruct("string", "profile", "--checkpoint-directory", "-s", "./test_dir", os.path.join(os.getcwd(), "checkpoints"), None), - OptionStruct("string", "profile", "--export-path", "-e", "./test_dir", os.getcwd(), None), - OptionStruct("string", "profile", "--filename-model-inference", None, "foo", "metrics-model-inference.csv", None), - OptionStruct("string", "profile", "--filename-model-gpu", None, "foo", "metrics-model-gpu.csv", None), - OptionStruct("string", "profile", "--filename-server-only", None, "foo", "metrics-server-only.csv", None), + OptionStruct( + "string", + "profile", + "--triton-docker-image", + None, + "test_image", + DEFAULT_TRITON_DOCKER_IMAGE, + None, + ), + OptionStruct( + "string", + "profile", + "--triton-http-endpoint", + None, + "localhost:4000", + "localhost:8000", + None, + ), + OptionStruct( + "string", + "profile", + "--triton-grpc-endpoint", + None, + "localhost:4001", + "localhost:8001", + None, + ), + OptionStruct( + "string", + "profile", + "--triton-metrics-url", + None, + "localhost:4002", + "http://localhost:8002/metrics", + None, + ), + OptionStruct( + "string", + "profile", + "--triton-server-path", + None, + "test_path", + "tritonserver", + None, + ), + OptionStruct( + "string", "profile", "--triton-output-path", None, "test_path", None, None + ), + OptionStruct( + "string", + "profile", + "--triton-launch-mode", + None, + ["local", "docker", "remote", "c_api"], + "local", + "SHOULD_FAIL", + ), + OptionStruct( + "string", + "profile", + "--triton-install-path", + None, + "test_path", + "/opt/tritonserver", + None, + ), + OptionStruct( + "string", + "profile", + "--checkpoint-directory", + "-s", + "./test_dir", + os.path.join(os.getcwd(), "checkpoints"), + None, + ), + OptionStruct( + "string", "profile", "--export-path", "-e", "./test_dir", os.getcwd(), None + ), + OptionStruct( + "string", + "profile", + "--filename-model-inference", + None, + "foo", + "metrics-model-inference.csv", + None, + ), + OptionStruct( + "string", + "profile", + "--filename-model-gpu", + None, + "foo", + "metrics-model-gpu.csv", + None, + ), + OptionStruct( + "string", + "profile", + "--filename-server-only", + None, + "foo", + "metrics-server-only.csv", + None, + ), OptionStruct("string", "profile", "--config-file", "-f", "baz", None, None), - - OptionStruct("string", "report", "--checkpoint-directory", "-s", "./test_dir", os.path.join(os.getcwd(), "checkpoints"), None), - OptionStruct("string", "report", "--export-path", "-e", "./test_dir", os.getcwd(), None), + OptionStruct( + "string", + "report", + "--checkpoint-directory", + "-s", + "./test_dir", + os.path.join(os.getcwd(), "checkpoints"), + None, + ), + OptionStruct( + "string", "report", "--export-path", "-e", "./test_dir", os.getcwd(), None + ), OptionStruct("string", "report", "--config-file", "-f", "baz", None, None), - OptionStruct("string", "profile", "--triton-docker-shm-size", None, "1G", None, extra_commands=["--triton-launch-mode", "docker"]), - OptionStruct("string", "profile","--run-config-search-mode", None, ["quick", "brute"], "brute", "SHOULD_FAIL"), - OptionStruct("string", "profile", "--model-type", None, ["generic", "LLM"], "generic", None), - - #List Options: + OptionStruct( + "string", + "profile", + "--triton-docker-shm-size", + None, + "1G", + None, + extra_commands=["--triton-launch-mode", "docker"], + ), + OptionStruct( + "string", + "profile", + "--run-config-search-mode", + None, + ["quick", "brute"], + "brute", + "SHOULD_FAIL", + ), + OptionStruct( + "string", + "profile", + "--model-type", + None, + ["generic", "LLM"], + "generic", + None, + ), + # List Options: # Options format: # (intlist/stringlist, MA step, long_flag, short_flag, test_value, expected_default_value, extra_commands) # The following options can be None: @@ -146,15 +353,40 @@ def get_test_options(): OptionStruct("intlist", "profile", "--batch-sizes", "-b", "2, 4, 6", "1"), OptionStruct("intlist", "profile", "--concurrency", "-c", "1, 2, 3", None), OptionStruct("intlist", "profile", "--request-rate", None, "1, 2, 3", None), - OptionStruct("stringlist", "profile", "--triton-docker-mounts", None, "a:b:c, d:e:f", None, extra_commands=["--triton-launch-mode", "docker"]), + OptionStruct( + "stringlist", + "profile", + "--triton-docker-mounts", + None, + "a:b:c, d:e:f", + None, + extra_commands=["--triton-launch-mode", "docker"], + ), OptionStruct("stringlist", "profile", "--gpus", None, "a, b, c", "all"), - OptionStruct("stringlist", "profile", "--inference-output-fields", None, "a, b, c", - "model_name,batch_size,concurrency,model_config_path,instance_group,max_batch_size,satisfies_constraints,perf_throughput,perf_latency_p99"), - OptionStruct("stringlist", "profile", "--gpu-output-fields", None, "a, b, c", - "model_name,gpu_uuid,batch_size,concurrency,model_config_path,instance_group,satisfies_constraints,gpu_used_memory,gpu_utilization,gpu_power_usage"), - OptionStruct("stringlist", "profile", "--server-output-fields", None, "a, b, c", - "model_name,gpu_uuid,gpu_used_memory,gpu_utilization,gpu_power_usage"), - + OptionStruct( + "stringlist", + "profile", + "--inference-output-fields", + None, + "a, b, c", + "model_name,batch_size,concurrency,model_config_path,instance_group,max_batch_size,satisfies_constraints,perf_throughput,perf_latency_p99", + ), + OptionStruct( + "stringlist", + "profile", + "--gpu-output-fields", + None, + "a, b, c", + "model_name,gpu_uuid,batch_size,concurrency,model_config_path,instance_group,satisfies_constraints,gpu_used_memory,gpu_utilization,gpu_power_usage", + ), + OptionStruct( + "stringlist", + "profile", + "--server-output-fields", + None, + "a, b, c", + "model_name,gpu_uuid,gpu_used_memory,gpu_utilization,gpu_power_usage", + ), # No OP Options: # Option format: # (noop, any MA step, long_flag) @@ -166,9 +398,16 @@ def get_test_options(): OptionStruct("noop", "profile", "--profile-models"), OptionStruct("noop", "profile", "--bls-composing-models"), OptionStruct("noop", "profile", "--cpu-only-composing-models"), - OptionStruct("noop", "report", "--report-model-configs"), - OptionStruct("noop", "report", "--output-formats", "-o", ["pdf", "csv", "png"], "pdf", "SHOULD_FAIL"), + OptionStruct( + "noop", + "report", + "--output-formats", + "-o", + ["pdf", "csv", "png"], + "pdf", + "SHOULD_FAIL", + ), OptionStruct("noop", "yaml_profile", "constraints"), OptionStruct("noop", "yaml_profile", "objectives"), OptionStruct("noop", "yaml_profile", "weighting"), @@ -178,9 +417,8 @@ def get_test_options(): OptionStruct("noop", "yaml_profile", "triton_docker_labels"), OptionStruct("noop", "yaml_profile", "triton_server_environment"), OptionStruct("noop", "yaml_profile", "triton_docker_args"), - OptionStruct("noop", "yaml_profile", "plots") + OptionStruct("noop", "yaml_profile", "plots"), ] - # yapf: enable return options @@ -210,16 +448,14 @@ class CLIConfigProfileStruct: """ def __init__(self): - # yapf: disable self.args = [ - '/usr/local/bin/model-analyzer', - 'profile', - '--model-repository', - 'foo', - '--profile-models', - 'bar' + "/usr/local/bin/model-analyzer", + "profile", + "--model-repository", + "foo", + "--profile-models", + "bar", ] - # yapf: enable config_profile = ConfigCommandProfile() self.cli = CLISubclass() self.cli.add_subcommand(cmd="profile", help="", config=config_profile) @@ -234,14 +470,12 @@ class CLIConfigReportStruct: """ def __init__(self): - # yapf: disable self.args = [ - '/usr/local/bin/model-analyzer', - 'report', - '--report-model-configs', - 'a, b, c' + "/usr/local/bin/model-analyzer", + "report", + "--report-model-configs", + "a, b, c", ] - # yapf: enable config_report = ConfigCommandReport() self.cli = CLISubclass() self.cli.add_subcommand(cmd="report", help="", config=config_report) diff --git a/tests/test_coordinate_data.py b/tests/test_coordinate_data.py index a668fd3d1..97a68f6e1 100755 --- a/tests/test_coordinate_data.py +++ b/tests/test_coordinate_data.py @@ -29,12 +29,9 @@ def _construct_rcm( ): model_config_name = [config_name] - # yapf: disable - non_gpu_metric_values = [{ - "perf_throughput": throughput, - "perf_latency_avg": latency - }] - # yapf: enable + non_gpu_metric_values = [ + {"perf_throughput": throughput, "perf_latency_avg": latency} + ] metric_objectives = [{"perf_throughput": 1}] weights = [1] diff --git a/tests/test_model_config_generator.py b/tests/test_model_config_generator.py index 3717c04be..c17e3d265 100755 --- a/tests/test_model_config_generator.py +++ b/tests/test_model_config_generator.py @@ -49,55 +49,213 @@ def test_direct_no_params(self): and default config (empty dict) will be included """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) + """ expected_configs = [ - {'dynamic_batching': {}, 'max_batch_size': 1, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 1, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 1, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 1, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 1, 'instance_group': [{'count': 5, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 2, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 2, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 2, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 2, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 2, 'instance_group': [{'count': 5, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 4, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 4, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 4, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 4, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 4, 'instance_group': [{'count': 5, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 5, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 16, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 16, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 16, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 16, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 16, 'instance_group': [{'count': 5, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 32, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 32, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 32, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 32, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 32, 'instance_group': [{'count': 5, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 64, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 64, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 64, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 64, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 64, 'instance_group': [{'count': 5, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 128, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 128, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 128, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 128, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 128, 'instance_group': [{'count': 5, 'kind': 'KIND_GPU'}]} + { + "dynamic_batching": {}, + "max_batch_size": 1, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 1, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 1, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 1, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 1, + "instance_group": [{"count": 5, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 2, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 2, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 2, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 2, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 2, + "instance_group": [{"count": 5, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 4, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 4, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 4, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 4, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 4, + "instance_group": [{"count": 5, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 5, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 16, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 16, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 16, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 16, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 16, + "instance_group": [{"count": 5, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 32, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 32, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 32, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 32, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 32, + "instance_group": [{"count": 5, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 64, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 64, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 64, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 64, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 64, + "instance_group": [{"count": 5, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 128, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 128, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 128, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 128, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 128, + "instance_group": [{"count": 5, "kind": "KIND_GPU"}], + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs) @@ -108,15 +266,13 @@ def test_direct_no_params_search_disable(self): This will just return a single empty config, since there are no parameters to combine """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_disable: True profile_models: - my-model - """) + """ - expected_configs = [{'max_batch_size': 8}] - # yapf: enable + expected_configs = [{"max_batch_size": 8}] self._run_and_test_model_config_generator(yaml_str, expected_configs) @@ -127,8 +283,7 @@ def test_direct_yes_params_search_disable(self): This will just combine all model_config_parameters """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 16 run_config_search_disable: True profile_models: @@ -139,17 +294,22 @@ def test_direct_yes_params_search_disable(self): - kind: KIND_GPU count: [1,2] - """) + """ expected_configs = [ - {'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}], 'max_batch_size': 1}, - {'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}], 'max_batch_size': 4}, - {'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}], 'max_batch_size': 16}, - {'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}], 'max_batch_size': 1}, - {'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}], 'max_batch_size': 4}, - {'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}], 'max_batch_size': 16} + {"instance_group": [{"count": 1, "kind": "KIND_GPU"}], "max_batch_size": 1}, + {"instance_group": [{"count": 1, "kind": "KIND_GPU"}], "max_batch_size": 4}, + { + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + "max_batch_size": 16, + }, + {"instance_group": [{"count": 2, "kind": "KIND_GPU"}], "max_batch_size": 1}, + {"instance_group": [{"count": 2, "kind": "KIND_GPU"}], "max_batch_size": 4}, + { + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + "max_batch_size": 16, + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs) @@ -159,30 +319,76 @@ def test_run_config_search_options(self): and run_config_search_max_model_batch_size and run_config_search_min_model_batch_size """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 3 run_config_search_min_model_batch_size: 2 run_config_search_max_model_batch_size: 16 profile_models: - my-model - """) + """ expected_configs = [ - {'dynamic_batching': {}, 'max_batch_size': 2, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 2, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 2, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 4, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 4, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 4, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 16, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 16, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 16, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]} + { + "dynamic_batching": {}, + "max_batch_size": 2, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 2, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 2, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 4, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 4, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 4, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 16, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 16, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 16, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs) @@ -191,25 +397,47 @@ def test_run_config_search_min_instance_counts(self): Test that ModelConfigGenerator will honor run_config_search_min_instance_count """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_min_instance_count: 2 run_config_search_max_instance_count: 3 run_config_search_min_model_batch_size: 2 run_config_search_max_model_batch_size: 8 profile_models: - my-model - """) + """ expected_configs = [ - {'dynamic_batching': {}, 'max_batch_size': 2, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 2, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 4, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 4, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]} + { + "dynamic_batching": {}, + "max_batch_size": 2, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 2, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 4, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 4, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs) @@ -219,24 +447,46 @@ def test_non_power_of_two_max_batch_size(self): input values that aren't a power of 2 """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 2 run_config_search_min_model_batch_size: 3 run_config_search_max_model_batch_size: 15 profile_models: - my-model - """) + """ expected_configs = [ - {'dynamic_batching': {}, 'max_batch_size': 3, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 3, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 6, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 6, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 12, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 12, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]} + { + "dynamic_batching": {}, + "max_batch_size": 3, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 3, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 6, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 6, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 12, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 12, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs) @@ -248,8 +498,7 @@ def test_direct_yes_params_specified(self): default config (None) will be included """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 16 run_config_search_disable: False profile_models: @@ -260,17 +509,22 @@ def test_direct_yes_params_specified(self): - kind: KIND_GPU count: [1,2] - """) + """ expected_configs = [ - {'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}], 'max_batch_size': 1}, - {'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}], 'max_batch_size': 4}, - {'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}], 'max_batch_size': 16}, - {'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}], 'max_batch_size': 1}, - {'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}], 'max_batch_size': 4}, - {'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}], 'max_batch_size': 16} + {"instance_group": [{"count": 1, "kind": "KIND_GPU"}], "max_batch_size": 1}, + {"instance_group": [{"count": 1, "kind": "KIND_GPU"}], "max_batch_size": 4}, + { + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + "max_batch_size": 16, + }, + {"instance_group": [{"count": 2, "kind": "KIND_GPU"}], "max_batch_size": 1}, + {"instance_group": [{"count": 2, "kind": "KIND_GPU"}], "max_batch_size": 4}, + { + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + "max_batch_size": 16, + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs) @@ -279,23 +533,37 @@ def test_direct_cpu_only(self): Test direct mode with cpu_only=true """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 2 run_config_search_min_model_batch_size: 8 run_config_search_max_model_batch_size: 16 profile_models: - my-model: cpu_only: True - """) + """ expected_configs = [ - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 1, 'kind': 'KIND_CPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 8, 'instance_group': [{'count': 2, 'kind': 'KIND_CPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 16, 'instance_group': [{'count': 1, 'kind': 'KIND_CPU'}]}, - {'dynamic_batching': {}, 'max_batch_size': 16, 'instance_group': [{'count': 2, 'kind': 'KIND_CPU'}]} + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 1, "kind": "KIND_CPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 8, + "instance_group": [{"count": 2, "kind": "KIND_CPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 16, + "instance_group": [{"count": 1, "kind": "KIND_CPU"}], + }, + { + "dynamic_batching": {}, + "max_batch_size": 16, + "instance_group": [{"count": 2, "kind": "KIND_CPU"}], + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs) @@ -306,7 +574,6 @@ def test_direct_max_batch_size_0(self): max_batch_size and dynamic_batching should not be part of the resulting configs """ - # yapf: disable protobuf = """ platform: "fake_platform" max_batch_size: 0 @@ -318,21 +585,32 @@ def test_direct_max_batch_size_0(self): ] """ - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 4 run_config_search_min_model_batch_size: 8 run_config_search_max_model_batch_size: 8 profile_models: - my-model - """) + """ expected_configs = [ - {'platform': "fake_platform", 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]} + { + "platform": "fake_platform", + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs, protobuf) @@ -343,7 +621,6 @@ def test_direct_max_batch_size_unspecified(self): max_batch_size and dynamic_batching should not be part of the resulting configs """ - # yapf: disable protobuf = """ platform: "fake_platform" instance_group [ @@ -354,21 +631,32 @@ def test_direct_max_batch_size_unspecified(self): ] """ - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 4 run_config_search_min_model_batch_size: 8 run_config_search_max_model_batch_size: 8 profile_models: - my-model - """) + """ expected_configs = [ - {'platform': "fake_platform", 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]} + { + "platform": "fake_platform", + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs, protobuf) @@ -379,7 +667,6 @@ def test_direct_sequence_batching(self): dynamic_batching should not be part of the resulting configs """ - # yapf: disable protobuf = """ platform: "fake_platform" max_batch_size: 4 @@ -392,25 +679,64 @@ def test_direct_sequence_batching(self): ] """ - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 4 run_config_search_min_model_batch_size: 8 run_config_search_max_model_batch_size: 16 profile_models: - my-model - """) + """ expected_configs = [ - {'platform': "fake_platform", 'max_batch_size': 8, 'sequence_batching': {}, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'max_batch_size': 8, 'sequence_batching': {}, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'max_batch_size': 8, 'sequence_batching': {}, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'max_batch_size': 8, 'sequence_batching': {}, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'max_batch_size': 16, 'sequence_batching': {}, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'max_batch_size': 16, 'sequence_batching': {}, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'max_batch_size': 16, 'sequence_batching': {}, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'platform': "fake_platform", 'max_batch_size': 16, 'sequence_batching': {}, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}]} + { + "platform": "fake_platform", + "max_batch_size": 8, + "sequence_batching": {}, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "max_batch_size": 8, + "sequence_batching": {}, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "max_batch_size": 8, + "sequence_batching": {}, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "max_batch_size": 8, + "sequence_batching": {}, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "max_batch_size": 16, + "sequence_batching": {}, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "max_batch_size": 16, + "sequence_batching": {}, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "max_batch_size": 16, + "sequence_batching": {}, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + }, + { + "platform": "fake_platform", + "max_batch_size": 16, + "sequence_batching": {}, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs, protobuf) @@ -422,7 +748,6 @@ def test_direct_nonempty_default_config(self): any values that are part of the search """ - # yapf: disable protobuf = """ platform: "fake_platform" max_batch_size: 4 @@ -434,21 +759,40 @@ def test_direct_nonempty_default_config(self): ] """ - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 4 run_config_search_min_model_batch_size: 8 run_config_search_max_model_batch_size: 8 profile_models: - my-model - """) + """ expected_configs = [ - {'platform': "fake_platform", 'max_batch_size': 8, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'platform': "fake_platform", 'max_batch_size': 8, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'platform': "fake_platform", 'max_batch_size': 8, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'platform': "fake_platform", 'max_batch_size': 8, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}],'dynamic_batching': {}} + { + "platform": "fake_platform", + "max_batch_size": 8, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "platform": "fake_platform", + "max_batch_size": 8, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "platform": "fake_platform", + "max_batch_size": 8, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "platform": "fake_platform", + "max_batch_size": 8, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs, protobuf) @@ -457,8 +801,7 @@ def test_remote_yes_params_specified(self): Test remote mode with model_config_parameters specified """ - # yapf: disable - yaml_str = (""" + yaml_str = """ triton_launch_mode: remote run_config_search_max_instance_count: 16 profile_models: @@ -469,17 +812,22 @@ def test_remote_yes_params_specified(self): - kind: KIND_GPU count: [1,2] - """) + """ expected_configs = [ - {'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}], 'max_batch_size': 1}, - {'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}], 'max_batch_size': 4}, - {'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}], 'max_batch_size': 16}, - {'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}], 'max_batch_size': 1}, - {'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}], 'max_batch_size': 4}, - {'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}], 'max_batch_size': 16} + {"instance_group": [{"count": 1, "kind": "KIND_GPU"}], "max_batch_size": 1}, + {"instance_group": [{"count": 1, "kind": "KIND_GPU"}], "max_batch_size": 4}, + { + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + "max_batch_size": 16, + }, + {"instance_group": [{"count": 2, "kind": "KIND_GPU"}], "max_batch_size": 1}, + {"instance_group": [{"count": 2, "kind": "KIND_GPU"}], "max_batch_size": 4}, + { + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + "max_batch_size": 16, + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs) @@ -488,23 +836,37 @@ def test_remote_no_params_specified(self): Test remote mode with no model_config_parameters specified """ - # yapf: disable - yaml_str = (""" + yaml_str = """ triton_launch_mode: remote run_config_search_max_instance_count: 4 run_config_search_min_model_batch_size: 8 run_config_search_max_model_batch_size: 8 profile_models: - my-model - """) + """ expected_configs = [ - {'max_batch_size': 8, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 8, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 8, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 8, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}],'dynamic_batching': {}} + { + "max_batch_size": 8, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 8, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 8, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 8, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs) @@ -517,7 +879,6 @@ def test_search_subparameter(self): sibling property (cpu_execution_accelerator) being overwritten """ - # yapf: disable protobuf = """ max_batch_size: 4 instance_group [ @@ -546,7 +907,7 @@ def test_search_subparameter(self): }} """ - yaml_str = (""" + yaml_str = """ profile_models: my-model: model_config_parameters: @@ -557,45 +918,38 @@ def test_search_subparameter(self): parameters: param1: string_value: ["foo", "bar"] - """) + """ expected_configs = [ { - 'optimization': - { - 'execution_accelerators': - { - 'cpu_execution_accelerator': [{'name': 'new_cpu_accelerator'}], - 'gpu_execution_accelerator': [{'name': 'fake_gpu_accelerator'}] + "optimization": { + "execution_accelerators": { + "cpu_execution_accelerator": [{"name": "new_cpu_accelerator"}], + "gpu_execution_accelerator": [{"name": "fake_gpu_accelerator"}], } }, - 'parameters': - { - 'param1': {'string_value': 'foo'}, - 'param2': {'string_value': 'param2_value'} + "parameters": { + "param1": {"string_value": "foo"}, + "param2": {"string_value": "param2_value"}, }, - 'max_batch_size': 4, - 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}] + "max_batch_size": 4, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], }, { - 'optimization': - { - 'execution_accelerators': - { - 'cpu_execution_accelerator': [{'name': 'new_cpu_accelerator'}], - 'gpu_execution_accelerator': [{'name': 'fake_gpu_accelerator'}] + "optimization": { + "execution_accelerators": { + "cpu_execution_accelerator": [{"name": "new_cpu_accelerator"}], + "gpu_execution_accelerator": [{"name": "fake_gpu_accelerator"}], } }, - 'parameters': - { - 'param1': {'string_value': 'bar'}, - 'param2': {'string_value': 'param2_value'} + "parameters": { + "param1": {"string_value": "bar"}, + "param2": {"string_value": "param2_value"}, }, - 'max_batch_size': 4, - 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}] + "max_batch_size": 4, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs, protobuf) @@ -608,7 +962,6 @@ def test_search_dynamic_batching_subparameter(self): unchanged in all model configs generated by the manual search """ - # yapf: disable protobuf = """ max_batch_size: 4 instance_group [ @@ -622,21 +975,36 @@ def test_search_dynamic_batching_subparameter(self): } """ - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 4 run_config_search_min_model_batch_size: 8 run_config_search_max_model_batch_size: 8 profile_models: - my-model - """) + """ expected_configs = [ - {'max_batch_size': 8, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}],'dynamic_batching': { 'max_queue_delay_microseconds': '100'}}, - {'max_batch_size': 8, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}],'dynamic_batching': { 'max_queue_delay_microseconds': '100'}}, - {'max_batch_size': 8, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}],'dynamic_batching': { 'max_queue_delay_microseconds': '100'}}, - {'max_batch_size': 8, 'instance_group': [{'count': 4, 'kind': 'KIND_GPU'}],'dynamic_batching': { 'max_queue_delay_microseconds': '100'}} + { + "max_batch_size": 8, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + "dynamic_batching": {"max_queue_delay_microseconds": "100"}, + }, + { + "max_batch_size": 8, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + "dynamic_batching": {"max_queue_delay_microseconds": "100"}, + }, + { + "max_batch_size": 8, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + "dynamic_batching": {"max_queue_delay_microseconds": "100"}, + }, + { + "max_batch_size": 8, + "instance_group": [{"count": 4, "kind": "KIND_GPU"}], + "dynamic_batching": {"max_queue_delay_microseconds": "100"}, + }, ] - # yapf: enable self._run_and_test_model_config_generator(yaml_str, expected_configs, protobuf) @@ -677,14 +1045,12 @@ def test_early_exit_off_automatic_asserts(self): Test that passing early_exit=False for automatic search raises an assert """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) + """ expected_configs = [] - # yapf: enable with self.assertRaises(TritonModelAnalyzerException): self._run_and_test_model_config_generator( @@ -695,46 +1061,91 @@ def test_early_exit_on_automatic(self): """ Test that automatic mode will early exit max_batch_size when throughput plateaus """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model run_config_search_max_instance_count: 3 run_config_search_max_model_batch_size: 8 - """) + """ expected_configs = [ - {'max_batch_size': 1, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 2, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 4, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 8, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 1, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 2, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 1, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 2, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 4, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, - {'max_batch_size': 8, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}],'dynamic_batching': {}}, + { + "max_batch_size": 1, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 2, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 4, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 8, + "instance_group": [{"count": 1, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 1, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 2, + "instance_group": [{"count": 2, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 1, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 2, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 4, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, + { + "max_batch_size": 8, + "instance_group": [{"count": 3, "kind": "KIND_GPU"}], + "dynamic_batching": {}, + }, ] - with patch.object(TestModelConfigGenerator, - "_get_next_fake_throughput") as mock_method: + with patch.object( + TestModelConfigGenerator, "_get_next_fake_throughput" + ) as mock_method: mock_method.side_effect = [ - 1, 2, 4, 8, # 1 instance - 1, 1, # 2 instances. Yes backoff - 1, 2, 4, 8 # 3 instances + 1, + 2, + 4, + 8, # 1 instance + 1, + 1, # 2 instances. Yes backoff + 1, + 2, + 4, + 8, # 3 instances ] - self._run_and_test_model_config_generator(yaml_str, - expected_configs, - early_exit_enable=True) - # yapf: enable + self._run_and_test_model_config_generator( + yaml_str, expected_configs, early_exit_enable=True + ) def test_early_exit_off_manual(self): """ Test that manual mode will not early exit despite throughput plateauing despite because early_exit_enable=False """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: test_model: model_config_parameters: @@ -743,19 +1154,18 @@ def test_early_exit_off_manual(self): - kind: KIND_GPU count: [1,2] - """) + """ expected_configs = [ - {'max_batch_size': 1, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 2, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 4, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 8, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 1, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 2, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 4, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 8, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, + {"max_batch_size": 1, "instance_group": [{"count": 1, "kind": "KIND_GPU"}]}, + {"max_batch_size": 2, "instance_group": [{"count": 1, "kind": "KIND_GPU"}]}, + {"max_batch_size": 4, "instance_group": [{"count": 1, "kind": "KIND_GPU"}]}, + {"max_batch_size": 8, "instance_group": [{"count": 1, "kind": "KIND_GPU"}]}, + {"max_batch_size": 1, "instance_group": [{"count": 2, "kind": "KIND_GPU"}]}, + {"max_batch_size": 2, "instance_group": [{"count": 2, "kind": "KIND_GPU"}]}, + {"max_batch_size": 4, "instance_group": [{"count": 2, "kind": "KIND_GPU"}]}, + {"max_batch_size": 8, "instance_group": [{"count": 2, "kind": "KIND_GPU"}]}, ] - # yapf: enable with patch.object( TestModelConfigGenerator, "_get_next_fake_throughput" @@ -770,8 +1180,7 @@ def test_early_exit_on_manual(self): Test that manual mode will early exit when throughput plateaus when early_exit_enable=True """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: test_model: model_config_parameters: @@ -780,32 +1189,39 @@ def test_early_exit_on_manual(self): - kind: KIND_GPU count: [1,2,3] - """) + """ expected_configs = [ - {'max_batch_size': 1, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 2, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 3, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 4, 'instance_group': [{'count': 1, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 1, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 2, 'instance_group': [{'count': 2, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 1, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 2, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 3, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, - {'max_batch_size': 4, 'instance_group': [{'count': 3, 'kind': 'KIND_GPU'}]}, + {"max_batch_size": 1, "instance_group": [{"count": 1, "kind": "KIND_GPU"}]}, + {"max_batch_size": 2, "instance_group": [{"count": 1, "kind": "KIND_GPU"}]}, + {"max_batch_size": 3, "instance_group": [{"count": 1, "kind": "KIND_GPU"}]}, + {"max_batch_size": 4, "instance_group": [{"count": 1, "kind": "KIND_GPU"}]}, + {"max_batch_size": 1, "instance_group": [{"count": 2, "kind": "KIND_GPU"}]}, + {"max_batch_size": 2, "instance_group": [{"count": 2, "kind": "KIND_GPU"}]}, + {"max_batch_size": 1, "instance_group": [{"count": 3, "kind": "KIND_GPU"}]}, + {"max_batch_size": 2, "instance_group": [{"count": 3, "kind": "KIND_GPU"}]}, + {"max_batch_size": 3, "instance_group": [{"count": 3, "kind": "KIND_GPU"}]}, + {"max_batch_size": 4, "instance_group": [{"count": 3, "kind": "KIND_GPU"}]}, ] - with patch.object(TestModelConfigGenerator, - "_get_next_fake_throughput") as mock_method: + with patch.object( + TestModelConfigGenerator, "_get_next_fake_throughput" + ) as mock_method: mock_method.side_effect = [ - 1, 2, 4, 8, # 1 instance - 1, 1, # 2 instances. Yes backoff - 1, 2, 4, 8 # 3 instances + 1, + 2, + 4, + 8, # 1 instance + 1, + 1, # 2 instances. Yes backoff + 1, + 2, + 4, + 8, # 3 instances ] - self._run_and_test_model_config_generator(yaml_str, - expected_configs, - early_exit_enable=True) - # yapf: enable + self._run_and_test_model_config_generator( + yaml_str, expected_configs, early_exit_enable=True + ) def test_extract_model_name_from_variant_name(self): input_output_pairs = {} diff --git a/tests/test_model_manager.py b/tests/test_model_manager.py index b489a277f..d503f5db6 100755 --- a/tests/test_model_manager.py +++ b/tests/test_model_manager.py @@ -400,61 +400,81 @@ def _test_early_exit_client_batch_size_helper(self, early_exit): with patch.object( MetricsManagerSubclass, "_get_next_perf_throughput_value" ) as mock_method: - # yapf: disable side_effect = [ # Default config, bs=1, concurrency 1,2,4,8 # Will early exit for concurrency # "Best" result for bs early exit is 5 - 5, 5, 5, 5, - + 5, + 5, + 5, + 5, # Default config, bs=2, concurrency 1,2,4,8 # Will early exit for concurrency # "Best" result for bs early exit is 4 - 4, 4, 4, 4, - + 4, + 4, + 4, + 4, # Default config, bs=3, concurrency 1,2,4,8 # Will early exit for concurrency # "Best" result for bs early exit is 6 - 6, 6, 6, 6, - + 6, + 6, + 6, + 6, # Default config, bs=4, concurrency 1,2,4,8 # Will early exit for concurrency # "Best" result for bs early exit is 5 # We will not early exit batch size - 5, 5, 5, 5, - + 5, + 5, + 5, + 5, # Default config, bs=7, concurrency 1,2,4,8 # Will not early exit for concurrency # "Best" result for bs early exit is 1 # We are done sweeping batch size - 1, 1, 1, 1, - - + 1, + 1, + 1, + 1, # 1 instance, bs=1, concurrency 1,2,4,8,16 # Will not early exit for concurrency # "Best" result for bs early exit is 10 - 1, 1, 10, 1, 1, - + 1, + 1, + 10, + 1, + 1, # 1 instance, bs=2, concurrency 1,2,4,8,16 # Will not early exit for concurrency # "Best" result for bs early exit is 9 - 1, 9, 1, 1, 2, - + 1, + 9, + 1, + 1, + 2, # 1 instance, bs=3, concurrency 1,2,4,8,16 # Will not early exit for concurrency # "Best" result for bs early exit is 8 - 1, 1, 1, 8, 3, - + 1, + 1, + 1, + 8, + 3, # 1 instance, bs=4, concurrency 1,2,4,8,16 # Will not early exit for concurrency # "Best" result for bs early exit is 7 # Will early exit batch size now - 1, 1, 7, 1, 4 + 1, + 1, + 7, + 1, + 4, ] # Add a bunch of extra results for the no-early-exit case - side_effect.extend([1]*100) + side_effect.extend([1] * 100) mock_method.side_effect = side_effect - # yapf: enable self._test_model_manager(yaml_content, expected_ranges, args=args) @@ -1014,18 +1034,32 @@ def test_lower_throughput_early_batch_size_exit(self): with patch.object( MetricsManagerSubclass, "_get_next_perf_throughput_value" ) as mock_method: - # yapf: disable mock_method.side_effect = [ - 1, 2, 4, # Default config, concurrency 1,2,4 - 1, 2, 4, # 1 Instance, Batch size 8, concurrency 1,2,4 - 2, 4, 8, # 1 Instance, Batch size 16, concurrency 1,2,4 - 2, 4, 8, # 1 Instance, Batch size 32, concurrency 1,2,4 - 1, 2, 4, # 1 Instance, Batch size 8, concurrency 1,2,4 - 8, 4, 2, # 1 Instance, Batch size 16, concurrency 1,2,4 - 4, 8, 16, # 1 Instance, Batch size 32, concurrency 1,2,4 - 4, 8, 16 # 1 Instance, Batch size 64, concurrency 1,2,4 + 1, + 2, + 4, # Default config, concurrency 1,2,4 + 1, + 2, + 4, # 1 Instance, Batch size 8, concurrency 1,2,4 + 2, + 4, + 8, # 1 Instance, Batch size 16, concurrency 1,2,4 + 2, + 4, + 8, # 1 Instance, Batch size 32, concurrency 1,2,4 + 1, + 2, + 4, # 1 Instance, Batch size 8, concurrency 1,2,4 + 8, + 4, + 2, # 1 Instance, Batch size 16, concurrency 1,2,4 + 4, + 8, + 16, # 1 Instance, Batch size 32, concurrency 1,2,4 + 4, + 8, + 16, # 1 Instance, Batch size 64, concurrency 1,2,4 ] - # yapf: enable mock_method.return_value = None self._test_model_manager(yaml_str, expected_ranges) diff --git a/tests/test_neighborhood.py b/tests/test_neighborhood.py index ad96899fa..d0fda30e6 100755 --- a/tests/test_neighborhood.py +++ b/tests/test_neighborhood.py @@ -53,12 +53,9 @@ def tearDown(self): def _construct_rcm(self, throughput: float, latency: float): model_config_name = ["modelA_config_0"] - # yapf: disable - non_gpu_metric_values = [{ - "perf_throughput": throughput, - "perf_latency_p99": latency - }] - # yapf: enable + non_gpu_metric_values = [ + {"perf_throughput": throughput, "perf_latency_p99": latency} + ] metric_objectives = [{"perf_throughput": 1}] weights = [1] diff --git a/tests/test_optuna_run_config_generator.py b/tests/test_optuna_run_config_generator.py index 54232f426..cbe7e6ee2 100755 --- a/tests/test_optuna_run_config_generator.py +++ b/tests/test_optuna_run_config_generator.py @@ -511,16 +511,14 @@ def _create_config(self, additional_args=[]): for arg in additional_args: args.append(arg) - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: add_sub: model_config_parameters: dynamic_batching: max_queue_delay_microseconds: [100, 200, 300] - """) - # yapf: enable + """ config = TestConfig()._evaluate_config(args, yaml_str) @@ -539,8 +537,7 @@ def _create_bls_config(self, additional_args=[]): for arg in additional_args: args.append(arg) - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: bls bls_composing_models: add: @@ -552,8 +549,7 @@ def _create_bls_config(self, additional_args=[]): dynamic_batching: max_queue_delay_microseconds: [400, 500, 600] - """) - # yapf: enable + """ config = TestConfig()._evaluate_config(args, yaml_str) @@ -572,8 +568,7 @@ def _create_multi_model_config(self, additional_args=[]): for arg in additional_args: args.append(arg) - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: add_sub: model_config_parameters: @@ -583,8 +578,7 @@ def _create_multi_model_config(self, additional_args=[]): model_config_parameters: dynamic_batching: max_queue_delay_microseconds: [400, 500, 600] - """) - # yapf: enable + """ config = TestConfig()._evaluate_config(args, yaml_str) diff --git a/tests/test_perf_analyzer.py b/tests/test_perf_analyzer.py index 77e6f90c2..398ad0708 100755 --- a/tests/test_perf_analyzer.py +++ b/tests/test_perf_analyzer.py @@ -843,19 +843,32 @@ def test_get_cmd_multi_model(self): max_cpu_util=50, ) - # yapf: disable expected_cmd = [ - 'mpiexec', '--allow-run-as-root', '--tag-output', - '-n', '1', 'perf_analyzer', '--enable-mpi', - '-m', 'MultiModel1', - '--measurement-interval', '1000', - '--measurement-request-count', '50', - ':', '-n', '1', 'perf_analyzer', '--enable-mpi', - '-m', 'MultiModel2', - '-b', '16', - '--concurrency-range', '1024' + "mpiexec", + "--allow-run-as-root", + "--tag-output", + "-n", + "1", + "perf_analyzer", + "--enable-mpi", + "-m", + "MultiModel1", + "--measurement-interval", + "1000", + "--measurement-request-count", + "50", + ":", + "-n", + "1", + "perf_analyzer", + "--enable-mpi", + "-m", + "MultiModel2", + "-b", + "16", + "--concurrency-range", + "1024", ] - # yapf: enable self.assertEqual(pa._get_cmd(), expected_cmd) diff --git a/tests/test_perf_analyzer_config_generator.py b/tests/test_perf_analyzer_config_generator.py index e9852356e..c4f135293 100755 --- a/tests/test_perf_analyzer_config_generator.py +++ b/tests/test_perf_analyzer_config_generator.py @@ -77,12 +77,10 @@ def test_default(self): will be generated by the auto-search """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ concurrencies = utils.generate_doubled_list( 1, DEFAULT_RUN_CONFIG_MAX_CONCURRENCY @@ -102,12 +100,10 @@ def test_search_disabled(self): and concurrency will be set to 1 """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ expected_configs = [construct_perf_analyzer_config()] @@ -124,12 +120,10 @@ def test_c_api(self): and only one config will be generated """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ concurrencies = utils.generate_doubled_list( 1, DEFAULT_RUN_CONFIG_MAX_CONCURRENCY @@ -152,12 +146,10 @@ def test_http(self): and only one config will be generated """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ concurrencies = utils.generate_doubled_list( 1, DEFAULT_RUN_CONFIG_MAX_CONCURRENCY @@ -182,12 +174,10 @@ def test_batch_size_search_disabled(self): and 3 configs will be generated """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ batch_sizes = [1, 2, 4] expected_configs = [ @@ -209,12 +199,10 @@ def test_batch_size_search_enabled(self): Concurrency: log2(DEFAULT_RUN_CONFIG_MAX_CONCURRENCY)+1 """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ batch_sizes = [1, 2, 4] concurrencies = utils.generate_doubled_list( @@ -242,12 +230,10 @@ def test_concurrency(self): and 4 configs will be generated """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ concurrencies = [1, 2, 3, 4] expected_configs = [ @@ -277,12 +263,10 @@ def test_batch_size_and_concurrency(self): 12 configs will be generated """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ batch_sizes = [1, 2, 4] concurrencies = [1, 2, 3, 4] @@ -313,12 +297,10 @@ def test_max_concurrency(self): and 5 configs (log2(16)+1) will be generated """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ concurrencies = utils.generate_doubled_list(1, 16) expected_configs = [ @@ -339,12 +321,10 @@ def test_min_concurrency(self): 2 configs [5, 10] will be generated """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ concurrencies = [5, 10] expected_configs = [ @@ -372,12 +352,10 @@ def test_request_rate_list(self): and 4 configs will be generated """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ request_rates = [1, 2, 3, 4] expected_configs = [ @@ -395,14 +373,12 @@ def test_request_rate_list(self): yaml_str, expected_configs, pa_cli_args ) - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: my-model: parameters: request_rate: 1,2,3,4 - """) - # yapf: enable + """ pa_cli_args = [] self._run_and_test_perf_analyzer_config_generator( @@ -419,12 +395,10 @@ def test_request_rate_enable(self): Default (1) value will be used for batch size """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ request_rates = utils.generate_doubled_list( DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE, DEFAULT_RUN_CONFIG_MAX_REQUEST_RATE @@ -448,12 +422,10 @@ def test_max_request_rate(self): Default (1) value will be used for batch size """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ request_rates = utils.generate_doubled_list( DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE, @@ -481,12 +453,10 @@ def test_min_request_rate(self): Default (1) value will be used for batch size """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ request_rates = utils.generate_doubled_list( DEFAULT_RUN_CONFIG_MIN_REQUEST_RATE * 2, @@ -516,14 +486,12 @@ def test_perf_analyzer_flags(self): will be generated by the auto-search """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model: perf_analyzer_flags: percentile: 96 - """) - # yapf: enable + """ concurrencies = utils.generate_doubled_list( 1, DEFAULT_RUN_CONFIG_MAX_CONCURRENCY @@ -543,8 +511,7 @@ def test_perf_analyzer_config_ssl_options(self): - No CLI options specified """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model: perf_analyzer_flags: @@ -558,8 +525,7 @@ def test_perf_analyzer_config_ssl_options(self): ssl-https-client-certificate-file: f ssl-https-private-key-type: g ssl-https-private-key-file: h - """) - # yapf: enable + """ concurrencies = utils.generate_doubled_list( 1, DEFAULT_RUN_CONFIG_MAX_CONCURRENCY @@ -590,12 +556,10 @@ def test_early_exit_on_no_plateau(self): Test if early_exit is true but the throughput is still increasing, we do not early exit """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ concurrencies = utils.generate_doubled_list(1, 64) expected_configs = [ @@ -612,12 +576,10 @@ def test_early_exit_on_yes_plateau(self): Test if early_exit is true and the throughput plateaus, we do early exit """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ concurrencies = utils.generate_doubled_list(1, 32) expected_configs = [ @@ -638,12 +600,10 @@ def test_early_exit_off_yes_plateau(self): Test if early_exit is off and the throughput plateaus, we do not early exit """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ concurrencies = utils.generate_doubled_list(1, 64) expected_configs = [ diff --git a/tests/test_quick_run_config_generator.py b/tests/test_quick_run_config_generator.py index 914a78133..0ca6e8bf6 100755 --- a/tests/test_quick_run_config_generator.py +++ b/tests/test_quick_run_config_generator.py @@ -191,15 +191,16 @@ def setUp(self): def test_get_starting_coordinate(self): """Test that get_starting_coordinate() works for non-zero values""" - # yapf: disable dims = SearchDimensions() - dims.add_dimensions(0, [ + dims.add_dimensions( + 0, + [ SearchDimension("x", SearchDimension.DIMENSION_TYPE_EXPONENTIAL, min=2), SearchDimension("y", SearchDimension.DIMENSION_TYPE_LINEAR, min=1), - SearchDimension("z", SearchDimension.DIMENSION_TYPE_EXPONENTIAL, min=3) - ]) - sc = SearchConfig(dimensions=dims,radius=2, min_initialized=2) - # yapf: enable + SearchDimension("z", SearchDimension.DIMENSION_TYPE_EXPONENTIAL, min=3), + ], + ) + sc = SearchConfig(dimensions=dims, radius=2, min_initialized=2) qrcg = QuickRunConfigGenerator( sc, MagicMock(), @@ -298,7 +299,6 @@ def test_get_next_run_config(self): } }, } - # yapf: enable rc = qrcg._get_next_run_config() @@ -329,57 +329,43 @@ def test_get_next_run_config_multi_model(self): - existing values for perf-analyzer config should persist if they aren't overwritten """ - # yapf: disable fake_base_config1 = { "name": "fake_model_name1", - "input": [{ - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "dims": [16] - }], + "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": [16]}], "max_batch_size": 4, - "sequence_batching": {} + "sequence_batching": {}, } fake_base_config2 = { "name": "fake_model_name2", - "input": [{ - "name": "INPUT__2", - "dataType": "TYPE_FP16", - "dims": [32] - }], - "max_batch_size": 8 + "input": [{"name": "INPUT__2", "dataType": "TYPE_FP16", "dims": [32]}], + "max_batch_size": 8, } expected_model_config1 = { - 'instanceGroup': [{ - 'count': 3, - 'kind': 'KIND_GPU', - }], - 'maxBatchSize': 2, - 'sequenceBatching': {}, - 'name': 'fake_model_name1', - 'input': [{ - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "dims": ['16'] - }] + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU", + } + ], + "maxBatchSize": 2, + "sequenceBatching": {}, + "name": "fake_model_name1", + "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}], } expected_model_config2 = { - 'dynamicBatching': {}, - 'instanceGroup': [{ - 'count': 6, - 'kind': 'KIND_GPU', - }], - 'maxBatchSize': 16, - 'name': 'fake_model_name2', - 'input': [{ - "name": "INPUT__2", - "dataType": "TYPE_FP16", - "dims": ['32'] - }] + "dynamicBatching": {}, + "instanceGroup": [ + { + "count": 6, + "kind": "KIND_GPU", + } + ], + "maxBatchSize": 16, + "name": "fake_model_name2", + "input": [{"name": "INPUT__2", "dataType": "TYPE_FP16", "dims": ["32"]}], } - # yapf: enable mock_models = [] with patch( @@ -487,14 +473,12 @@ def test_default_config_generation(self): "/tmp/my_config.yml", ] - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model: perf_analyzer_flags: percentile: 96 - """) - # yapf: enable + """ config = evaluate_mock_config(args, yaml_str, subcommand="profile") @@ -553,14 +537,12 @@ def test_default_ensemble_config_generation(self): "/tmp/my_config.yml", ] - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model: perf_analyzer_flags: percentile: 96 - """) - # yapf: enable + """ config = evaluate_mock_config(args, yaml_str, subcommand="profile") @@ -636,14 +618,12 @@ def test_default_bls_config_generation(self): "bls_composing_modelA,bls_composing_modelB", ] - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model: perf_analyzer_flags: percentile: 96 - """) - # yapf: enable + """ config = evaluate_mock_config(args, yaml_str, subcommand="profile") @@ -770,7 +750,6 @@ def test_get_next_run_config_max_batch_size(self): "name": "fake_model_name", "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}], } - # yapf: enable rc = qrcg._get_next_run_config() @@ -822,7 +801,6 @@ def test_get_next_run_config_max_instance_count(self): "name": "fake_model_name", "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}], } - # yapf: enable rc = qrcg._get_next_run_config() @@ -874,7 +852,6 @@ def test_get_next_run_config_min_batch_size(self): "name": "fake_model_name", "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}], } - # yapf: enable rc = qrcg._get_next_run_config() @@ -926,7 +903,6 @@ def test_get_next_run_config_min_instance_count(self): "name": "fake_model_name", "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}], } - # yapf: enable rc = qrcg._get_next_run_config() @@ -994,37 +970,31 @@ def _get_next_run_config_ensemble(self, max_concurrency=0, min_concurrency=0): additional_args.append("--run-config-search-min-concurrency") additional_args.append(f"{min_concurrency}") - # yapf: disable expected_model_A_config_0 = { - 'instanceGroup': [{ - 'count': 3, - 'kind': 'KIND_GPU', - }], - 'maxBatchSize': 2, - 'sequenceBatching': {}, - 'name': 'fake_model_A', - 'input': [{ - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "dims": ['16'] - }] + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU", + } + ], + "maxBatchSize": 2, + "sequenceBatching": {}, + "name": "fake_model_A", + "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}], } expected_model_B_config_0 = { - 'dynamicBatching': {}, - 'instanceGroup': [{ - 'count': 6, - 'kind': 'KIND_CPU', - }], - 'maxBatchSize': 16, - 'name': 'fake_model_B', - 'input': [{ - "name": "INPUT__2", - "dataType": "TYPE_FP16", - "dims": ['32'] - }] + "dynamicBatching": {}, + "instanceGroup": [ + { + "count": 6, + "kind": "KIND_CPU", + } + ], + "maxBatchSize": 16, + "name": "fake_model_B", + "input": [{"name": "INPUT__2", "dataType": "TYPE_FP16", "dims": ["32"]}], } - # yapf: enable config = self._create_config(additional_args) @@ -1158,53 +1128,45 @@ def _get_next_run_config_bls(self, max_concurrency=0, min_concurrency=0): additional_args.append("--run-config-search-min-concurrency") additional_args.append(f"{min_concurrency}") - # yapf: disable expected_model_config = { - 'instanceGroup': [{ - 'count': 3, - 'kind': 'KIND_GPU', - }], - 'maxBatchSize': 2, - 'dynamicBatching': {}, - 'name': 'my-model', - 'platform': 'bls', - 'input': [{ - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "dims": ['16'] - }] + "instanceGroup": [ + { + "count": 3, + "kind": "KIND_GPU", + } + ], + "maxBatchSize": 2, + "dynamicBatching": {}, + "name": "my-model", + "platform": "bls", + "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}], } expected_composing_model_config0 = { - 'instanceGroup': [{ - 'count': 5, - 'kind': 'KIND_GPU', - }], - 'maxBatchSize': 8, - 'sequenceBatching': {}, - 'name': 'fake_model_A', - 'input': [{ - "name": "INPUT__0", - "dataType": "TYPE_FP32", - "dims": ['16'] - }] + "instanceGroup": [ + { + "count": 5, + "kind": "KIND_GPU", + } + ], + "maxBatchSize": 8, + "sequenceBatching": {}, + "name": "fake_model_A", + "input": [{"name": "INPUT__0", "dataType": "TYPE_FP32", "dims": ["16"]}], } expected_composing_model_config1 = { - 'dynamicBatching': {}, - 'instanceGroup': [{ - 'count': 7, - 'kind': 'KIND_GPU', - }], - 'maxBatchSize': 32, - 'name': 'fake_model_B', - 'input': [{ - "name": "INPUT__2", - "dataType": "TYPE_FP16", - "dims": ['32'] - }] + "dynamicBatching": {}, + "instanceGroup": [ + { + "count": 7, + "kind": "KIND_GPU", + } + ], + "maxBatchSize": 32, + "name": "fake_model_B", + "input": [{"name": "INPUT__2", "dataType": "TYPE_FP16", "dims": ["32"]}], } - # yapf: enable config = self._create_config(additional_args) @@ -1328,12 +1290,10 @@ def _create_config(self, additional_args=[]): for arg in additional_args: args.append(arg) - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ config = evaluate_mock_config(args, yaml_str, subcommand="profile") diff --git a/tests/test_run_config_generator.py b/tests/test_run_config_generator.py index b58850b9f..ed7c837fe 100755 --- a/tests/test_run_config_generator.py +++ b/tests/test_run_config_generator.py @@ -57,12 +57,10 @@ def test_default_config_single_model(self): total = (num_PAC * num_MC) will be generated by the auto-search """ - # yapf: disable - yaml_str = (""" + yaml_str = """ profile_models: - my-model - """) - # yapf: enable + """ expected_pa_configs = len( utils.generate_doubled_list(1, DEFAULT_RUN_CONFIG_MAX_CONCURRENCY) @@ -94,8 +92,7 @@ def test_two_models(self): total = default_step + model_total * model_total = 68 """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 2 run_config_search_max_instance_count: 2 run_config_search_max_concurrency: 2 @@ -103,8 +100,7 @@ def test_two_models(self): - my-model - my-modelB - """) - # yapf: enable + """ expected_num_of_configs = 68 @@ -147,8 +143,7 @@ def test_two_uneven_models(self): total = default_step + modelA_total * modelB_total = 150 """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 2 run_config_search_max_instance_count: 2 run_config_search_max_concurrency: 2 @@ -163,8 +158,7 @@ def test_two_uneven_models(self): - kind: KIND_GPU count: [1,2] - """) - # yapf: enable + """ expected_num_of_configs = 150 @@ -196,8 +190,7 @@ def test_three_uneven_models(self): total = default_step + modelA_total * modelB_total * modelC_total = 1164 """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 2 run_config_search_max_instance_count: 2 run_config_search_max_concurrency: 2 @@ -217,8 +210,7 @@ def test_three_uneven_models(self): - kind: KIND_GPU count: [1,2,3] - """) - # yapf: enable + """ expected_num_of_configs = 1164 # All 2 times that the leaf generator is done with default config will also pass results to root generator @@ -270,16 +262,14 @@ def test_early_backoff_leaf_model(self): """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 8 run_config_search_max_instance_count: 2 run_config_search_max_concurrency: 2 profile_models: - my-model - my-modelB - """) - # yapf: enable + """ expected_num_of_configs = 256 @@ -318,16 +308,14 @@ def test_early_backoff_root_model(self): """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 8 run_config_search_max_instance_count: 2 run_config_search_max_concurrency: 2 profile_models: - my-model - my-modelB - """) - # yapf: enable + """ expected_num_of_configs = 196 @@ -365,16 +353,14 @@ def test_measurement_list(self): """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 8 run_config_search_max_instance_count: 2 run_config_search_max_concurrency: 2 profile_models: - my-model - my-modelB - """) - # yapf: enable + """ expected_num_of_configs = 260 @@ -401,8 +387,7 @@ def test_matching_triton_server_env(self): Test that we don't assert if triton server environments match: """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 2 run_config_search_max_instance_count: 2 run_config_search_max_concurrency: 2 @@ -417,8 +402,7 @@ def test_matching_triton_server_env(self): triton_server_environment: 'LD_PRELOAD': fake_preload_1, 'LD_LIBRARY_PATH': fake_library_path_1 - """) - # yapf: enable + """ expected_num_of_configs = 68 self._run_and_test_run_config_generator( @@ -430,8 +414,7 @@ def test_mismatching_triton_server_env(self): Test that we assert if triton server environments don't match: """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 2 run_config_search_max_instance_count: 2 run_config_search_max_concurrency: 2 @@ -446,8 +429,7 @@ def test_mismatching_triton_server_env(self): triton_server_environment: 'LD_PRELOAD': fake_preload_2, 'LD_LIBRARY_PATH': fake_library_path_2 - """) - # yapf: enable + """ with self.assertRaises(TritonModelAnalyzerException): expected_num_of_configs = 100 @@ -466,27 +448,33 @@ def test_none_result_in_max_batch_size(self): expected_num_of_configs = 17 - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 4 run_config_search_max_instance_count: 3 run_config_search_max_concurrency: 2 profile_models: - my-model - """) + """ perf_throughput_values = [ - 1, 2, # Default, Concurrency 1 and 2 - 1, 2, # Instances=1, MaxBatch=1, Concurrency 1 and 2 - 3, 4, # Instances=1, MaxBatch=2, Concurrency 1 and 2 - 5, 6, # Instances=1, MaxBatch=4, Concurrency 1 and 2 - 7, 8, # Instances=2, MaxBatch=1, Concurrency 1 and 2 + 1, + 2, # Default, Concurrency 1 and 2 + 1, + 2, # Instances=1, MaxBatch=1, Concurrency 1 and 2 + 3, + 4, # Instances=1, MaxBatch=2, Concurrency 1 and 2 + 5, + 6, # Instances=1, MaxBatch=4, Concurrency 1 and 2 + 7, + 8, # Instances=2, MaxBatch=1, Concurrency 1 and 2 None, # Instances=2, MaxBatch=2, Concurrency 1. - 9,10, # Instances=3, MaxBatch=1, Concurrency 1 and 2 - 11,12, # Instances=3, MaxBatch=2, Concurrency 1 and 2 - 13,14 # Instances=3, MaxBatch=4, Concurrency 1 and 2 + 9, + 10, # Instances=3, MaxBatch=1, Concurrency 1 and 2 + 11, + 12, # Instances=3, MaxBatch=2, Concurrency 1 and 2 + 13, + 14, # Instances=3, MaxBatch=4, Concurrency 1 and 2 ] - # yapf: enable with patch.object( TestRunConfigGenerator, "_get_next_perf_throughput_value" @@ -508,21 +496,43 @@ def test_none_result_before_threshold(self): # expected_num_of_configs = 27 - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 2 run_config_search_max_instance_count: 1 run_config_search_max_concurrency: 2048 profile_models: - my-model - """) + """ perf_throughput_values = [ - 1,2,4,8,16,32,64,128,256,512,1024,2048, # Default config - 1,2,None, # Batch size 1 - 1,2,4,8,16,32,64,128,256,512,1024,2048 # Batch size 2 + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128, + 256, + 512, + 1024, + 2048, # Default config + 1, + 2, + None, # Batch size 1 + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128, + 256, + 512, + 1024, + 2048, # Batch size 2 ] - # yapf: enable with patch.object( TestRunConfigGenerator, "_get_next_perf_throughput_value" @@ -544,21 +554,51 @@ def test_none_result_after_threshold(self): # expected_num_of_configs = 35 - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 2 run_config_search_max_instance_count: 1 run_config_search_max_concurrency: 2048 profile_models: - my-model - """) + """ perf_throughput_values = [ - 1,2,4,8,16,32,64,128,256,512,1024,2048, # Default config - 1,2,4,8,16,32,64,128,256,512,None, # Batch size 1 - 1,2,4,8,16,32,64,128,256,512,1024,2048 # Batch size 2 + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128, + 256, + 512, + 1024, + 2048, # Default config + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128, + 256, + 512, + None, # Batch size 1 + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128, + 256, + 512, + 1024, + 2048, # Batch size 2 ] - # yapf: enable with patch.object( TestRunConfigGenerator, "_get_next_perf_throughput_value" @@ -584,8 +624,7 @@ def test_variant_naming(self): modelB_config_4: 2 instance, max_batch_size=4 """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_model_batch_size: 4 run_config_search_max_instance_count: 2 run_config_search_max_concurrency: 1 @@ -600,7 +639,7 @@ def test_variant_naming(self): kind: KIND_GPU count: [1] - modelB - """) + """ expected_modelB_name_order = [ "modelB_config_default", @@ -611,20 +650,26 @@ def test_variant_naming(self): "modelB_config_4", "modelB_config_0", "modelB_config_1", - "modelB_config_5", # This was skipped, and needs to be created on second pass + "modelB_config_5", # This was skipped, and needs to be created on second pass "modelB_config_2", "modelB_config_3", "modelB_config_4", ] perf_throughput_values = [ - 1, # Default config - 1,None, # A: 1 Instance, BS=1 B: 1 Instance, BS=1,2 - 2,3,4, # A: 1 Instance, BS=1 B: 2 Instance, BS=1,2,4 - 5,6,7, # A: 1 Instance, BS=2 B: 1 Instance, BS=1,2,4 - 8,9,10 # A: 1 Instance, BS=2 B: 2 Instance, BS=1,2,4 + 1, # Default config + 1, + None, # A: 1 Instance, BS=1 B: 1 Instance, BS=1,2 + 2, + 3, + 4, # A: 1 Instance, BS=1 B: 2 Instance, BS=1,2,4 + 5, + 6, + 7, # A: 1 Instance, BS=2 B: 1 Instance, BS=1,2,4 + 8, + 9, + 10, # A: 1 Instance, BS=2 B: 2 Instance, BS=1,2,4 ] - # yapf: enable expected_num_of_configs = 12 diff --git a/tests/test_yaml_options.py b/tests/test_yaml_options.py index 1c7ce9a4d..820e51b4c 100755 --- a/tests/test_yaml_options.py +++ b/tests/test_yaml_options.py @@ -66,8 +66,7 @@ def test_multiple_options(self): ) def test_valid_yaml_file(self): - # yapf: disable - yaml_str = (""" + yaml_str = """ run_config_search_max_instance_count: 16 run_config_search_disable: True profile_models: @@ -78,8 +77,7 @@ def test_valid_yaml_file(self): - kind: KIND_GPU count: [1,2] - """) - # yapf: enable + """ yaml_config = self._load_config_file(yaml_str) YamlConfigValidator.validate(yaml_config) @@ -89,8 +87,7 @@ def test_invalid_yaml_file(self): Raises an exception because run-config-search-max-instance-count: 16 uses hyphens instead of the required underscores """ - # yapf: disable - yaml_str = (""" + yaml_str = """ run-config-search-max-instance-count: 16 run_config_search_disable: True profile_models: @@ -101,8 +98,7 @@ def test_invalid_yaml_file(self): - kind: KIND_GPU count: [1,2] - """) - # yapf: enable + """ yaml_config = self._load_config_file(yaml_str) with self.assertRaises(TritonModelAnalyzerException):