2828
2929import csv
3030import json
31+ from enum import Enum , auto
3132from itertools import pairwise
33+ from pathlib import Path
3234from typing import List
3335
3436import numpy as np
3537import pandas as pd
3638from genai_perf .constants import DEFAULT_ARTIFACT_DIR
37- from genai_perf .llm_inputs .llm_inputs import OutputFormat
3839from genai_perf .tokenizer import Tokenizer
3940from genai_perf .utils import load_json , remove_sse_prefix
4041from rich .console import Console
4142from rich .table import Table
4243
43- _OPENAI_CHAT_COMPLETIONS = OutputFormat .OPENAI_CHAT_COMPLETIONS
44- _OPENAI_COMPLETIONS = OutputFormat .OPENAI_COMPLETIONS
44+
45+ class ResponseFormat (Enum ):
46+ OPENAI_CHAT_COMPLETIONS = auto ()
47+ OPENAI_COMPLETIONS = auto ()
48+ TRITON = auto ()
4549
4650
4751class Metrics :
@@ -401,10 +405,36 @@ class ProfileDataParser:
401405 extract core metrics and calculate various performance statistics.
402406 """
403407
404- def __init__ (self , filename : str ) -> None :
408+ def __init__ (self , filename : Path ) -> None :
405409 data = load_json (filename )
410+ self ._get_profile_metadata (data )
406411 self ._parse_profile_data (data )
407412
413+ def _get_profile_metadata (self , data : dict ) -> None :
414+ self ._service_kind = data ["service_kind" ]
415+ if self ._service_kind == "openai" :
416+ if data ["endpoint" ] == "v1/chat/completions" :
417+ self ._response_format = ResponseFormat .OPENAI_CHAT_COMPLETIONS
418+ elif data ["endpoint" ] == "v1/completions" :
419+ self ._response_format = ResponseFormat .OPENAI_COMPLETIONS
420+ else :
421+ # TPA-66: add PA metadata to handle this case
422+ # When endpoint field is either empty or custom endpoint, fall
423+ # back to parsing the response to extract the response format.
424+ request = data ["experiments" ][0 ]["requests" ][0 ]
425+ response = request ["response_outputs" ][0 ]["response" ]
426+ if "chat.completion" in response :
427+ self ._response_format = ResponseFormat .OPENAI_CHAT_COMPLETIONS
428+ elif "text_completion" in response :
429+ self ._response_format = ResponseFormat .OPENAI_COMPLETIONS
430+ else :
431+ raise RuntimeError ("Unknown OpenAI response format." )
432+
433+ elif self ._service_kind == "triton" :
434+ self ._response_format = ResponseFormat .TRITON
435+ else :
436+ raise ValueError (f"Unknown service kind: { self ._service_kind } " )
437+
408438 def _parse_profile_data (self , data : dict ) -> None :
409439 """Parse through the entire profile data to collect statistics."""
410440 self ._profile_results = {}
@@ -429,6 +459,10 @@ def get_statistics(self, infer_mode: str, load_level: str) -> Statistics:
429459 raise KeyError (f"Profile with { infer_mode } ={ load_level } does not exist." )
430460 return self ._profile_results [(infer_mode , load_level )]
431461
462+ def get_profile_load_info (self ) -> list [tuple [str , str ]]:
463+ """Return available (infer_mode, load_level) tuple keys."""
464+ return [k for k , _ in self ._profile_results .items ()]
465+
432466
433467class LLMProfileDataParser (ProfileDataParser ):
434468 """A class that calculates and aggregates all the LLM performance statistics
@@ -447,7 +481,6 @@ class LLMProfileDataParser(ProfileDataParser):
447481 >>> tokenizer = AutoTokenizer.from_pretrained("gpt2")
448482 >>> pd = LLMProfileDataParser(
449483 >>> filename="profile_export.json",
450- >>> service_kind="triton",
451484 >>> tokenizer=tokenizer,
452485 >>> )
453486 >>> stats = pd.get_statistics(infer_mode="concurrency", level=10)
@@ -458,14 +491,10 @@ class LLMProfileDataParser(ProfileDataParser):
458491
459492 def __init__ (
460493 self ,
461- filename : str ,
462- service_kind : str ,
463- output_format : OutputFormat ,
494+ filename : Path ,
464495 tokenizer : Tokenizer ,
465496 ) -> None :
466497 self ._tokenizer = tokenizer
467- self ._service_kind = service_kind
468- self ._output_format = output_format
469498 super ().__init__ (filename )
470499
471500 def _parse_requests (self , requests : dict ) -> LLMMetrics :
@@ -591,9 +620,9 @@ def _tokenize_triton_request_input(self, req_inputs: dict) -> list[int]:
591620 def _tokenize_openai_request_input (self , req_inputs : dict ) -> list [int ]:
592621 """Tokenize the OpenAI request input texts."""
593622 payload = json .loads (req_inputs ["payload" ])
594- if self ._output_format == _OPENAI_CHAT_COMPLETIONS :
623+ if self ._response_format == ResponseFormat . OPENAI_CHAT_COMPLETIONS :
595624 input_text = payload ["messages" ][0 ]["content" ]
596- elif self ._output_format == _OPENAI_COMPLETIONS :
625+ elif self ._response_format == ResponseFormat . OPENAI_COMPLETIONS :
597626 input_text = payload ["prompt" ]
598627 else :
599628 raise ValueError (
0 commit comments