Support custom artifacts directory and improve default artifacts directory (#636)

nv-hwoo · mc-nv · commit 41a3ac2f5f44 · 2024-05-13T14:09:09.000-07:00
* Add artifacts dir option and more descriptive profile export filename

* Clean up

* fix input data path

* Add tests

* create one to one plot dir for each profile run

* change the directory look

* add helper method
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py
@@ -39,14 +39,15 @@ class OutputFormat(Enum):
     TENSORRTLLM = auto()
     VLLM = auto()
 
+    def to_lowercase(self):
+        return self.name.lower()
+
 
 class LlmInputs:
     """
     A library of methods that control the generation of LLM Inputs
     """
 
-    OUTPUT_FILENAME = DEFAULT_INPUT_DATA_JSON
-
     OPEN_ORCA_URL = "https://datasets-server.huggingface.co/rows?dataset=Open-Orca%2FOpenOrca&config=default&split=train"
     CNN_DAILYMAIL_URL = "https://datasets-server.huggingface.co/rows?dataset=cnn_dailymail&config=1.0.0&split=train"
 
@@ -92,6 +93,7 @@ def create_llm_inputs(
         add_stream: bool = False,
         tokenizer: Tokenizer = get_tokenizer(DEFAULT_TOKENIZER),
         extra_inputs: Optional[Dict] = None,
+        output_dir: Path = Path(""),
     ) -> Dict:
         """
         Given an input type, input format, and output type. Output a string of LLM Inputs
@@ -193,7 +195,7 @@ def create_llm_inputs(
             output_tokens_deterministic,
             model_name,
         )
-        cls._write_json_to_file(json_in_pa_format)
+        cls._write_json_to_file(json_in_pa_format, output_dir)
 
         return json_in_pa_format
 
@@ -540,8 +542,9 @@ def _convert_generic_json_to_trtllm_format(
         return pa_json
 
     @classmethod
-    def _write_json_to_file(cls, json_in_pa_format: Dict) -> None:
-        with open(DEFAULT_INPUT_DATA_JSON, "w") as f:
+    def _write_json_to_file(cls, json_in_pa_format: Dict, output_dir: Path) -> None:
+        filename = output_dir / DEFAULT_INPUT_DATA_JSON
+        with open(str(filename), "w") as f:
             f.write(json.dumps(json_in_pa_format, indent=2))
 
     @classmethod
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py
@@ -35,7 +35,6 @@
 
 import numpy as np
 import pandas as pd
-from genai_perf.constants import DEFAULT_ARTIFACT_DIR
 from genai_perf.tokenizer import Tokenizer
 from genai_perf.utils import load_json, remove_sse_prefix
 from rich.console import Console
@@ -377,15 +376,17 @@ def export_to_csv(self, csv_filename: str) -> None:
             for row in singular_metric_rows:
                 csv_writer.writerow(row)
 
-    def export_parquet(self, parquet_filename: str) -> None:
+    def export_parquet(self, artifact_dir: Path, filename: str) -> None:
         max_length = -1
         col_index = 0
         filler_list = []
         df = pd.DataFrame()
+
         # Data frames require all columns of the same length
         # find the max length column
         for key, value in self._metrics.data.items():
             max_length = max(max_length, len(value))
+
         # Insert None for shorter columns to match longest column
         for key, value in self._metrics.data.items():
             if len(value) < max_length:
@@ -395,9 +396,9 @@ def export_parquet(self, parquet_filename: str) -> None:
             diff = 0
             filler_list = []
             col_index = col_index + 1
-        df.to_parquet(
-            f"{DEFAULT_ARTIFACT_DIR}/data/{parquet_filename}.gzip", compression="gzip"
-        )
+
+        filepath = artifact_dir / f"{filename}.gzip"
+        df.to_parquet(filepath, compression="gzip")
 
 
 class ProfileDataParser:
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/main.py b/src/c++/perf_analyzer/genai-perf/genai_perf/main.py
@@ -26,32 +26,27 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
-import shutil
 import sys
 import traceback
 from argparse import Namespace
 from pathlib import Path
 
 import genai_perf.logging as logging
 from genai_perf import parser
-from genai_perf.constants import DEFAULT_ARTIFACT_DIR, DEFAULT_PARQUET_FILE
+from genai_perf.constants import DEFAULT_PARQUET_FILE
 from genai_perf.exceptions import GenAIPerfException
 from genai_perf.llm_inputs.llm_inputs import LlmInputs
-from genai_perf.llm_metrics import LLMProfileDataParser, Statistics
+from genai_perf.llm_metrics import LLMProfileDataParser
 from genai_perf.plots.plot_config_parser import PlotConfigParser
 from genai_perf.plots.plot_manager import PlotManager
 from genai_perf.tokenizer import Tokenizer, get_tokenizer
 
 
-def init_logging() -> None:
-    logging.init_logging()
-
-
-def create_artifacts_dirs(generate_plots: bool) -> None:
-    if not os.path.exists(f"{DEFAULT_ARTIFACT_DIR}"):
-        os.mkdir(f"{DEFAULT_ARTIFACT_DIR}")
-        os.mkdir(f"{DEFAULT_ARTIFACT_DIR}/data")
-        os.mkdir(f"{DEFAULT_ARTIFACT_DIR}/plots")
+def create_artifacts_dirs(args: Namespace) -> None:
+    # TMA-1911: support plots CLI option
+    plot_dir = args.artifact_dir / "plots"
+    os.makedirs(args.artifact_dir, exist_ok=True)
+    os.makedirs(plot_dir, exist_ok=True)
 
 
 def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
@@ -82,6 +77,7 @@ def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
         add_stream=args.streaming,
         tokenizer=tokenizer,
         extra_inputs=extra_input_dict,
+        output_dir=args.artifact_dir,
     )
 
 
@@ -93,12 +89,12 @@ def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> LLMProfileDataPa
 
 
 def report_output(data_parser: LLMProfileDataParser, args: Namespace) -> None:
-    if "concurrency_range" in args:
+    if args.concurrency:
         infer_mode = "concurrency"
-        load_level = args.concurrency_range
-    elif "request_rate_range" in args:
+        load_level = f"{args.concurrency}"
+    elif args.request_rate:
         infer_mode = "request_rate"
-        load_level = args.request_rate_range
+        load_level = f"{args.request_rate}"
     else:
         raise GenAIPerfException("No valid infer mode specified")
 
@@ -107,56 +103,48 @@ def report_output(data_parser: LLMProfileDataParser, args: Namespace) -> None:
         args.profile_export_file.stem + "_genai_perf.csv"
     )
     stats.export_to_csv(export_csv_name)
-    stats.export_parquet(DEFAULT_PARQUET_FILE)
+    stats.export_parquet(args.artifact_dir, DEFAULT_PARQUET_FILE)
     stats.pretty_print()
     if args.generate_plots:
-        create_plots(args.profile_export_file)
+        create_plots(args)
 
 
-def create_plots(filename: Path) -> None:
-    output_dir = Path(f"{DEFAULT_ARTIFACT_DIR}/plots")
-    PlotConfigParser.create_init_yaml_config([filename], output_dir)
-    config_parser = PlotConfigParser(output_dir / "config.yaml")
+def create_plots(args: Namespace) -> None:
+    # TMA-1911: support plots CLI option
+    plot_dir = args.artifact_dir / "plots"
+    PlotConfigParser.create_init_yaml_config(
+        filenames=[args.profile_export_file],  # single run
+        output_dir=plot_dir,
+    )
+    config_parser = PlotConfigParser(plot_dir / "config.yaml")
     plot_configs = config_parser.generate_configs()
     plot_manager = PlotManager(plot_configs)
     plot_manager.generate_plots()
 
 
-def finalize(profile_export_file: Path):
-    shutil.move("llm_inputs.json", f"{DEFAULT_ARTIFACT_DIR}/data/llm_inputs.json")
-    shutil.move(
-        profile_export_file, f"{DEFAULT_ARTIFACT_DIR}/data/{profile_export_file}"
-    )
-    profile_export_file_csv = profile_export_file.stem + "_genai_perf.csv"
-    shutil.move(
-        profile_export_file_csv,
-        f"{DEFAULT_ARTIFACT_DIR}/data/{profile_export_file_csv}",
-    )
-
-
 # Separate function that can raise exceptions used for testing
 # to assert correct errors and messages.
 def run():
     try:
         # TMA-1900: refactor CLI handler
-        init_logging()
+        logging.init_logging()
         args, extra_args = parser.parse_args()
         if args.subcommand == "compare":
             args.func(args)
         else:
-            create_artifacts_dirs(args.generate_plots)
+            create_artifacts_dirs(args)
             tokenizer = get_tokenizer(args.tokenizer)
             generate_inputs(args, tokenizer)
             args.func(args, extra_args)
             data_parser = calculate_metrics(args, tokenizer)
             report_output(data_parser, args)
-            finalize(args.profile_export_file)
     except Exception as e:
         raise GenAIPerfException(e)
 
 
 def main():
-    # Interactive use will catch exceptions and log formatted errors rather than tracebacks.
+    # Interactive use will catch exceptions and log formatted errors rather than
+    # tracebacks.
     try:
         run()
     except Exception as e:
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
@@ -31,7 +31,12 @@
 
 import genai_perf.logging as logging
 import genai_perf.utils as utils
-from genai_perf.constants import CNN_DAILY_MAIL, DEFAULT_COMPARE_DIR, OPEN_ORCA
+from genai_perf.constants import (
+    CNN_DAILY_MAIL,
+    DEFAULT_ARTIFACT_DIR,
+    DEFAULT_COMPARE_DIR,
+    OPEN_ORCA,
+)
 from genai_perf.llm_inputs.llm_inputs import LlmInputs, OutputFormat, PromptSource
 from genai_perf.plots.plot_config_parser import PlotConfigParser
 from genai_perf.plots.plot_manager import PlotManager
@@ -119,19 +124,42 @@ def _check_conditional_args(
     return args
 
 
-def _update_load_manager_args(args: argparse.Namespace) -> argparse.Namespace:
+def _check_load_manager_args(args: argparse.Namespace) -> argparse.Namespace:
     """
-    Update genai-perf load manager attributes to PA format
+    Check inference load args
     """
-    for attr_key in ["concurrency", "request_rate"]:
-        attr_val = getattr(args, attr_key)
-        if attr_val is not None:
-            setattr(args, f"{attr_key}_range", f"{attr_val}")
-            delattr(args, attr_key)
-            return args
-
     # If no concurrency or request rate is set, default to 1
-    setattr(args, "concurrency_range", "1")
+    if not args.concurrency and not args.request_rate:
+        args.concurrency = 1
+    return args
+
+
+def _set_artifact_paths(args: argparse.Namespace) -> argparse.Namespace:
+    """
+    Set paths for all the artifacts.
+    """
+    if args.artifact_dir == Path(DEFAULT_ARTIFACT_DIR):
+        name = [f"{args.model}"]
+        if args.service_kind == "openai":
+            name += [f"{args.service_kind}-{args.endpoint_type}"]
+        elif args.service_kind == "triton":
+            name += [f"{args.service_kind}-{args.backend.to_lowercase()}"]
+        else:
+            raise ValueError(f"Unknown service kind '{args.service_kind}'.")
+
+        if args.concurrency:
+            name += [f"concurrency{args.concurrency}"]
+        elif args.request_rate:
+            name += [f"request_rate{args.request_rate}"]
+        args.artifact_dir = args.artifact_dir / Path("-".join(name))
+
+    if args.profile_export_file.parent != Path(""):
+        raise ValueError(
+            "Please use --artifact-dir option to define intermediary paths to "
+            "the profile export file."
+        )
+
+    args.profile_export_file = args.artifact_dir / args.profile_export_file
     return args
 
 
@@ -367,24 +395,29 @@ def _add_endpoint_args(parser):
 
 def _add_output_args(parser):
     output_group = parser.add_argument_group("Output")
-
     output_group.add_argument(
         "--generate-plots",
         action="store_true",
         required=False,
         help="An option to enable the generation of plots.",
     )
-
     output_group.add_argument(
         "--profile-export-file",
         type=Path,
-        default="profile_export.json",
+        default=Path("profile_export.json"),
         help="The path where the perf_analyzer profile export will be "
         "generated. By default, the profile export will be to profile_export.json. "
         "The genai-perf file will be exported to <profile_export_file>_genai_perf.csv. "
         "For example, if the profile export file is profile_export.json, the genai-perf file will be "
         "exported to profile_export_genai_perf.csv.",
     )
+    output_group.add_argument(
+        "--artifact-dir",
+        type=Path,
+        default=Path(DEFAULT_ARTIFACT_DIR),
+        help="The directory to store all the (output) artifacts generated by "
+        "GenAI-Perf and Perf Analyzer.",
+    )
 
 
 def _add_other_args(parser):
@@ -547,6 +580,7 @@ def parse_args():
     args = _check_model_args(parser, args)
     args = _check_conditional_args(parser, args)
     args = _check_compare_args(compare_parser, args)
-    args = _update_load_manager_args(args)
+    args = _check_load_manager_args(args)
+    args = _set_artifact_paths(args)
 
     return args, argv[passthrough_index + 1 :]
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py b/src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py
@@ -37,8 +37,8 @@
 
 class Profiler:
     @staticmethod
-    def add_protocol_args(args: Namespace):
-        cmd = [""]
+    def add_protocol_args(args: Namespace) -> list[str]:
+        cmd = []
         if args.service_kind == "triton":
             cmd += ["-i", "grpc", "--streaming"]
             if args.u is None:  # url
@@ -49,6 +49,15 @@ def add_protocol_args(args: Namespace):
             cmd += ["-i", "http"]
         return cmd
 
+    @staticmethod
+    def add_inference_load_args(args: Namespace) -> list[str]:
+        cmd = []
+        if args.concurrency:
+            cmd += ["--concurrency-range", f"{args.concurrency}"]
+        elif args.request_rate:
+            cmd += ["--request-rate-range", f"{args.request_rate}"]
+        return cmd
+
     @staticmethod
     def build_cmd(args: Namespace, extra_args: list[str] | None = None) -> list[str]:
         skip_args = [
@@ -77,6 +86,9 @@ def build_cmd(args: Namespace, extra_args: list[str] | None = None) -> list[str]
             "endpoint_type",
             "generate_plots",
             "subcommand",
+            "concurrency",
+            "request_rate",
+            "artifact_dir",
         ]
 
         utils.remove_file(args.profile_export_file)
@@ -87,7 +99,7 @@ def build_cmd(args: Namespace, extra_args: list[str] | None = None) -> list[str]
             f"{args.model}",
             f"--async",
             f"--input-data",
-            f"{DEFAULT_INPUT_DATA_JSON}",
+            f"{args.artifact_dir / DEFAULT_INPUT_DATA_JSON}",
         ]
         for arg, value in vars(args).items():
             if arg in skip_args:
@@ -109,6 +121,7 @@ def build_cmd(args: Namespace, extra_args: list[str] | None = None) -> list[str]
                     cmd += [f"--{arg}", f"{value}"]
 
         cmd += Profiler.add_protocol_args(args)
+        cmd += Profiler.add_inference_load_args(args)
 
         if extra_args is not None:
             for arg in extra_args:
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_cli.py b/src/c++/perf_analyzer/genai-perf/tests/test_cli.py
diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_wrapper.py b/src/c++/perf_analyzer/genai-perf/tests/test_wrapper.py