triton-inference-server
diff --git a/‎src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py‎
Lines changed: 41 additions & 12 deletions b/‎src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py‎
Lines changed: 41 additions & 12 deletions
diff --git a/‎src/c++/perf_analyzer/genai-perf/genai_perf/main.py‎
Lines changed: 11 additions & 6 deletions b/‎src/c++/perf_analyzer/genai-perf/genai_perf/main.py‎
Lines changed: 11 additions & 6 deletions
diff --git a/‎src/c++/perf_analyzer/genai-perf/genai_perf/parser.py‎
Lines changed: 13 additions & 2 deletions b/‎src/c++/perf_analyzer/genai-perf/genai_perf/parser.py‎
Lines changed: 13 additions & 2 deletions
diff --git a/‎src/c++/perf_analyzer/genai-perf/genai_perf/plots/__init__.py‎
Lines changed: 26 additions & 0 deletions b/‎src/c++/perf_analyzer/genai-perf/genai_perf/plots/__init__.py‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_config.py‎
Lines changed: 54 additions & 0 deletions b/‎src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_config.py‎
Lines changed: 54 additions & 0 deletions
@@ -28,20 +28,24 @@
 
 import csv
 import json
+from enum import Enum, auto
 from itertools import pairwise
+from pathlib import Path
 from typing import List
 
 import numpy as np
 import pandas as pd
 from genai_perf.constants import DEFAULT_ARTIFACT_DIR
-from genai_perf.llm_inputs.llm_inputs import OutputFormat
 from genai_perf.tokenizer import Tokenizer
 from genai_perf.utils import load_json, remove_sse_prefix
 from rich.console import Console
 from rich.table import Table
 
-_OPENAI_CHAT_COMPLETIONS = OutputFormat.OPENAI_CHAT_COMPLETIONS
-_OPENAI_COMPLETIONS = OutputFormat.OPENAI_COMPLETIONS
+
+class ResponseFormat(Enum):
+    OPENAI_CHAT_COMPLETIONS = auto()
+    OPENAI_COMPLETIONS = auto()
+    TRITON = auto()
 
 
 class Metrics:
@@ -401,10 +405,36 @@ class ProfileDataParser:
     extract core metrics and calculate various performance statistics.
     """
 
-    def __init__(self, filename: str) -> None:
+    def __init__(self, filename: Path) -> None:
         data = load_json(filename)
+        self._get_profile_metadata(data)
         self._parse_profile_data(data)
 
+    def _get_profile_metadata(self, data: dict) -> None:
+        self._service_kind = data["service_kind"]
+        if self._service_kind == "openai":
+            if data["endpoint"] == "v1/chat/completions":
+                self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
+            elif data["endpoint"] == "v1/completions":
+                self._response_format = ResponseFormat.OPENAI_COMPLETIONS
+            else:
+                # TPA-66: add PA metadata to handle this case
+                # When endpoint field is either empty or custom endpoint, fall
+                # back to parsing the response to extract the response format.
+                request = data["experiments"][0]["requests"][0]
+                response = request["response_outputs"][0]["response"]
+                if "chat.completion" in response:
+                    self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
+                elif "text_completion" in response:
+                    self._response_format = ResponseFormat.OPENAI_COMPLETIONS
+                else:
+                    raise RuntimeError("Unknown OpenAI response format.")
+
+        elif self._service_kind == "triton":
+            self._response_format = ResponseFormat.TRITON
+        else:
+            raise ValueError(f"Unknown service kind: {self._service_kind}")
+
     def _parse_profile_data(self, data: dict) -> None:
         """Parse through the entire profile data to collect statistics."""
         self._profile_results = {}
@@ -429,6 +459,10 @@ def get_statistics(self, infer_mode: str, load_level: str) -> Statistics:
             raise KeyError(f"Profile with {infer_mode}={load_level} does not exist.")
         return self._profile_results[(infer_mode, load_level)]
 
+    def get_profile_load_info(self) -> list[tuple[str, str]]:
+        """Return available (infer_mode, load_level) tuple keys."""
+        return [k for k, _ in self._profile_results.items()]
+
 
 class LLMProfileDataParser(ProfileDataParser):
     """A class that calculates and aggregates all the LLM performance statistics
@@ -447,7 +481,6 @@ class LLMProfileDataParser(ProfileDataParser):
       >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
       >>> pd = LLMProfileDataParser(
       >>>     filename="profile_export.json",
-      >>>     service_kind="triton",
       >>>     tokenizer=tokenizer,
       >>> )
       >>> stats = pd.get_statistics(infer_mode="concurrency", level=10)
@@ -458,14 +491,10 @@ class LLMProfileDataParser(ProfileDataParser):
 
     def __init__(
         self,
-        filename: str,
-        service_kind: str,
-        output_format: OutputFormat,
+        filename: Path,
         tokenizer: Tokenizer,
     ) -> None:
         self._tokenizer = tokenizer
-        self._service_kind = service_kind
-        self._output_format = output_format
         super().__init__(filename)
 
     def _parse_requests(self, requests: dict) -> LLMMetrics:
@@ -591,9 +620,9 @@ def _tokenize_triton_request_input(self, req_inputs: dict) -> list[int]:
     def _tokenize_openai_request_input(self, req_inputs: dict) -> list[int]:
         """Tokenize the OpenAI request input texts."""
         payload = json.loads(req_inputs["payload"])
-        if self._output_format == _OPENAI_CHAT_COMPLETIONS:
+        if self._response_format == ResponseFormat.OPENAI_CHAT_COMPLETIONS:
             input_text = payload["messages"][0]["content"]
-        elif self._output_format == _OPENAI_COMPLETIONS:
+        elif self._response_format == ResponseFormat.OPENAI_COMPLETIONS:
             input_text = payload["prompt"]
         else:
             raise ValueError(
 
@@ -38,6 +38,7 @@
 from genai_perf.exceptions import GenAIPerfException
 from genai_perf.llm_inputs.llm_inputs import LlmInputs
 from genai_perf.llm_metrics import LLMProfileDataParser, Statistics
+from genai_perf.plots.plot_config_parser import PlotConfigParser
 from genai_perf.plots.plot_manager import PlotManager
 from genai_perf.tokenizer import Tokenizer, get_tokenizer
 
@@ -87,8 +88,6 @@ def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
 def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> LLMProfileDataParser:
     return LLMProfileDataParser(
         filename=args.profile_export_file,
-        service_kind=args.service_kind,
-        output_format=args.output_format,
         tokenizer=tokenizer,
     )
 
@@ -111,12 +110,18 @@ def report_output(data_parser: LLMProfileDataParser, args: Namespace) -> None:
     stats.export_parquet(DEFAULT_PARQUET_FILE)
     stats.pretty_print()
     if args.generate_plots:
-        create_plots(stats)
+        create_plots(args.profile_export_file)
+
 
+def create_plots(filename: Path) -> None:
+    output_dir = Path(f"{DEFAULT_ARTIFACT_DIR}/plots")
+    PlotConfigParser.create_init_yaml_config([filename], output_dir)
+    config_parser = PlotConfigParser(output_dir / "config.yaml")
+    plot_configs = config_parser.generate_configs()
 
-def create_plots(stats: Statistics) -> None:
-    plot_manager = PlotManager(stats)
-    plot_manager.create_default_plots()
+    # TODO (harshini): plug-in configs to plot manager
+    # plot_manager = PlotManager(stats)
+    # plot_manager.create_default_plots()
 
 
 def finalize(profile_export_file: Path):
 
@@ -32,6 +32,8 @@
 import genai_perf.utils as utils
 from genai_perf.constants import CNN_DAILY_MAIL, OPEN_ORCA
 from genai_perf.llm_inputs.llm_inputs import LlmInputs, OutputFormat, PromptSource
+from genai_perf.plots.plot_config_parser import PlotConfigParser
+from genai_perf.plots.plot_manager import PlotManager
 from genai_perf.tokenizer import DEFAULT_TOKENIZER
 
 from . import __version__
@@ -488,8 +490,17 @@ def profile_handler(args, extra_args):
 
 
 def compare_handler(args: argparse.Namespace):
-    # TMA-1893: parse yaml file
-    pass
+    """Handles `compare` subcommand workflow."""
+    if args.files:
+        PlotConfigParser.create_init_yaml_config(args.files, Path("."))
+        args.config = Path("config.yaml")
+
+    config_parser = PlotConfigParser(args.config)
+    plot_configs = config_parser.generate_configs()
+
+    # TODO (harshini): plug-in configs to PlotManager
+    # plot_manager = PlotManager(plot_configs)
+    # plot_manager.generate_plots()
 
 
 ### Entrypoint ###
 
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from collections.abc import Sequence
+from dataclasses import dataclass
+from enum import Enum, auto
+from pathlib import Path
+
+
+class PlotType(Enum):
+    SCATTER = auto()
+    BOX = auto()
+    HEATMAP = auto()
+
+
+@dataclass
+class ProfileRunData:
+    name: str
+    x_metric: Sequence[int | float]
+    y_metric: Sequence[int | float]
+
+
+@dataclass
+class PlotConfig:
+    title: str
+    data: list[ProfileRunData]
+    x_label: str
+    y_label: str
+    type: PlotType
+    output: Path