Skip to content

Commit 41a3ac2

Browse files
nv-hwoomc-nv
authored andcommitted
Support custom artifacts directory and improve default artifacts directory (#636)
* Add artifacts dir option and more descriptive profile export filename * Clean up * fix input data path * Add tests * create one to one plot dir for each profile run * change the directory look * add helper method
1 parent 87b9d07 commit 41a3ac2

File tree

7 files changed

+188
-71
lines changed

7 files changed

+188
-71
lines changed

src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,15 @@ class OutputFormat(Enum):
3939
TENSORRTLLM = auto()
4040
VLLM = auto()
4141

42+
def to_lowercase(self):
43+
return self.name.lower()
44+
4245

4346
class LlmInputs:
4447
"""
4548
A library of methods that control the generation of LLM Inputs
4649
"""
4750

48-
OUTPUT_FILENAME = DEFAULT_INPUT_DATA_JSON
49-
5051
OPEN_ORCA_URL = "https://datasets-server.huggingface.co/rows?dataset=Open-Orca%2FOpenOrca&config=default&split=train"
5152
CNN_DAILYMAIL_URL = "https://datasets-server.huggingface.co/rows?dataset=cnn_dailymail&config=1.0.0&split=train"
5253

@@ -92,6 +93,7 @@ def create_llm_inputs(
9293
add_stream: bool = False,
9394
tokenizer: Tokenizer = get_tokenizer(DEFAULT_TOKENIZER),
9495
extra_inputs: Optional[Dict] = None,
96+
output_dir: Path = Path(""),
9597
) -> Dict:
9698
"""
9799
Given an input type, input format, and output type. Output a string of LLM Inputs
@@ -193,7 +195,7 @@ def create_llm_inputs(
193195
output_tokens_deterministic,
194196
model_name,
195197
)
196-
cls._write_json_to_file(json_in_pa_format)
198+
cls._write_json_to_file(json_in_pa_format, output_dir)
197199

198200
return json_in_pa_format
199201

@@ -540,8 +542,9 @@ def _convert_generic_json_to_trtllm_format(
540542
return pa_json
541543

542544
@classmethod
543-
def _write_json_to_file(cls, json_in_pa_format: Dict) -> None:
544-
with open(DEFAULT_INPUT_DATA_JSON, "w") as f:
545+
def _write_json_to_file(cls, json_in_pa_format: Dict, output_dir: Path) -> None:
546+
filename = output_dir / DEFAULT_INPUT_DATA_JSON
547+
with open(str(filename), "w") as f:
545548
f.write(json.dumps(json_in_pa_format, indent=2))
546549

547550
@classmethod

src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535

3636
import numpy as np
3737
import pandas as pd
38-
from genai_perf.constants import DEFAULT_ARTIFACT_DIR
3938
from genai_perf.tokenizer import Tokenizer
4039
from genai_perf.utils import load_json, remove_sse_prefix
4140
from rich.console import Console
@@ -377,15 +376,17 @@ def export_to_csv(self, csv_filename: str) -> None:
377376
for row in singular_metric_rows:
378377
csv_writer.writerow(row)
379378

380-
def export_parquet(self, parquet_filename: str) -> None:
379+
def export_parquet(self, artifact_dir: Path, filename: str) -> None:
381380
max_length = -1
382381
col_index = 0
383382
filler_list = []
384383
df = pd.DataFrame()
384+
385385
# Data frames require all columns of the same length
386386
# find the max length column
387387
for key, value in self._metrics.data.items():
388388
max_length = max(max_length, len(value))
389+
389390
# Insert None for shorter columns to match longest column
390391
for key, value in self._metrics.data.items():
391392
if len(value) < max_length:
@@ -395,9 +396,9 @@ def export_parquet(self, parquet_filename: str) -> None:
395396
diff = 0
396397
filler_list = []
397398
col_index = col_index + 1
398-
df.to_parquet(
399-
f"{DEFAULT_ARTIFACT_DIR}/data/{parquet_filename}.gzip", compression="gzip"
400-
)
399+
400+
filepath = artifact_dir / f"{filename}.gzip"
401+
df.to_parquet(filepath, compression="gzip")
401402

402403

403404
class ProfileDataParser:

src/c++/perf_analyzer/genai-perf/genai_perf/main.py

Lines changed: 26 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -26,32 +26,27 @@
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

2828
import os
29-
import shutil
3029
import sys
3130
import traceback
3231
from argparse import Namespace
3332
from pathlib import Path
3433

3534
import genai_perf.logging as logging
3635
from genai_perf import parser
37-
from genai_perf.constants import DEFAULT_ARTIFACT_DIR, DEFAULT_PARQUET_FILE
36+
from genai_perf.constants import DEFAULT_PARQUET_FILE
3837
from genai_perf.exceptions import GenAIPerfException
3938
from genai_perf.llm_inputs.llm_inputs import LlmInputs
40-
from genai_perf.llm_metrics import LLMProfileDataParser, Statistics
39+
from genai_perf.llm_metrics import LLMProfileDataParser
4140
from genai_perf.plots.plot_config_parser import PlotConfigParser
4241
from genai_perf.plots.plot_manager import PlotManager
4342
from genai_perf.tokenizer import Tokenizer, get_tokenizer
4443

4544

46-
def init_logging() -> None:
47-
logging.init_logging()
48-
49-
50-
def create_artifacts_dirs(generate_plots: bool) -> None:
51-
if not os.path.exists(f"{DEFAULT_ARTIFACT_DIR}"):
52-
os.mkdir(f"{DEFAULT_ARTIFACT_DIR}")
53-
os.mkdir(f"{DEFAULT_ARTIFACT_DIR}/data")
54-
os.mkdir(f"{DEFAULT_ARTIFACT_DIR}/plots")
45+
def create_artifacts_dirs(args: Namespace) -> None:
46+
# TMA-1911: support plots CLI option
47+
plot_dir = args.artifact_dir / "plots"
48+
os.makedirs(args.artifact_dir, exist_ok=True)
49+
os.makedirs(plot_dir, exist_ok=True)
5550

5651

5752
def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
@@ -82,6 +77,7 @@ def generate_inputs(args: Namespace, tokenizer: Tokenizer) -> None:
8277
add_stream=args.streaming,
8378
tokenizer=tokenizer,
8479
extra_inputs=extra_input_dict,
80+
output_dir=args.artifact_dir,
8581
)
8682

8783

@@ -93,12 +89,12 @@ def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> LLMProfileDataPa
9389

9490

9591
def report_output(data_parser: LLMProfileDataParser, args: Namespace) -> None:
96-
if "concurrency_range" in args:
92+
if args.concurrency:
9793
infer_mode = "concurrency"
98-
load_level = args.concurrency_range
99-
elif "request_rate_range" in args:
94+
load_level = f"{args.concurrency}"
95+
elif args.request_rate:
10096
infer_mode = "request_rate"
101-
load_level = args.request_rate_range
97+
load_level = f"{args.request_rate}"
10298
else:
10399
raise GenAIPerfException("No valid infer mode specified")
104100

@@ -107,56 +103,48 @@ def report_output(data_parser: LLMProfileDataParser, args: Namespace) -> None:
107103
args.profile_export_file.stem + "_genai_perf.csv"
108104
)
109105
stats.export_to_csv(export_csv_name)
110-
stats.export_parquet(DEFAULT_PARQUET_FILE)
106+
stats.export_parquet(args.artifact_dir, DEFAULT_PARQUET_FILE)
111107
stats.pretty_print()
112108
if args.generate_plots:
113-
create_plots(args.profile_export_file)
109+
create_plots(args)
114110

115111

116-
def create_plots(filename: Path) -> None:
117-
output_dir = Path(f"{DEFAULT_ARTIFACT_DIR}/plots")
118-
PlotConfigParser.create_init_yaml_config([filename], output_dir)
119-
config_parser = PlotConfigParser(output_dir / "config.yaml")
112+
def create_plots(args: Namespace) -> None:
113+
# TMA-1911: support plots CLI option
114+
plot_dir = args.artifact_dir / "plots"
115+
PlotConfigParser.create_init_yaml_config(
116+
filenames=[args.profile_export_file], # single run
117+
output_dir=plot_dir,
118+
)
119+
config_parser = PlotConfigParser(plot_dir / "config.yaml")
120120
plot_configs = config_parser.generate_configs()
121121
plot_manager = PlotManager(plot_configs)
122122
plot_manager.generate_plots()
123123

124124

125-
def finalize(profile_export_file: Path):
126-
shutil.move("llm_inputs.json", f"{DEFAULT_ARTIFACT_DIR}/data/llm_inputs.json")
127-
shutil.move(
128-
profile_export_file, f"{DEFAULT_ARTIFACT_DIR}/data/{profile_export_file}"
129-
)
130-
profile_export_file_csv = profile_export_file.stem + "_genai_perf.csv"
131-
shutil.move(
132-
profile_export_file_csv,
133-
f"{DEFAULT_ARTIFACT_DIR}/data/{profile_export_file_csv}",
134-
)
135-
136-
137125
# Separate function that can raise exceptions used for testing
138126
# to assert correct errors and messages.
139127
def run():
140128
try:
141129
# TMA-1900: refactor CLI handler
142-
init_logging()
130+
logging.init_logging()
143131
args, extra_args = parser.parse_args()
144132
if args.subcommand == "compare":
145133
args.func(args)
146134
else:
147-
create_artifacts_dirs(args.generate_plots)
135+
create_artifacts_dirs(args)
148136
tokenizer = get_tokenizer(args.tokenizer)
149137
generate_inputs(args, tokenizer)
150138
args.func(args, extra_args)
151139
data_parser = calculate_metrics(args, tokenizer)
152140
report_output(data_parser, args)
153-
finalize(args.profile_export_file)
154141
except Exception as e:
155142
raise GenAIPerfException(e)
156143

157144

158145
def main():
159-
# Interactive use will catch exceptions and log formatted errors rather than tracebacks.
146+
# Interactive use will catch exceptions and log formatted errors rather than
147+
# tracebacks.
160148
try:
161149
run()
162150
except Exception as e:

src/c++/perf_analyzer/genai-perf/genai_perf/parser.py

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,12 @@
3131

3232
import genai_perf.logging as logging
3333
import genai_perf.utils as utils
34-
from genai_perf.constants import CNN_DAILY_MAIL, DEFAULT_COMPARE_DIR, OPEN_ORCA
34+
from genai_perf.constants import (
35+
CNN_DAILY_MAIL,
36+
DEFAULT_ARTIFACT_DIR,
37+
DEFAULT_COMPARE_DIR,
38+
OPEN_ORCA,
39+
)
3540
from genai_perf.llm_inputs.llm_inputs import LlmInputs, OutputFormat, PromptSource
3641
from genai_perf.plots.plot_config_parser import PlotConfigParser
3742
from genai_perf.plots.plot_manager import PlotManager
@@ -119,19 +124,42 @@ def _check_conditional_args(
119124
return args
120125

121126

122-
def _update_load_manager_args(args: argparse.Namespace) -> argparse.Namespace:
127+
def _check_load_manager_args(args: argparse.Namespace) -> argparse.Namespace:
123128
"""
124-
Update genai-perf load manager attributes to PA format
129+
Check inference load args
125130
"""
126-
for attr_key in ["concurrency", "request_rate"]:
127-
attr_val = getattr(args, attr_key)
128-
if attr_val is not None:
129-
setattr(args, f"{attr_key}_range", f"{attr_val}")
130-
delattr(args, attr_key)
131-
return args
132-
133131
# If no concurrency or request rate is set, default to 1
134-
setattr(args, "concurrency_range", "1")
132+
if not args.concurrency and not args.request_rate:
133+
args.concurrency = 1
134+
return args
135+
136+
137+
def _set_artifact_paths(args: argparse.Namespace) -> argparse.Namespace:
138+
"""
139+
Set paths for all the artifacts.
140+
"""
141+
if args.artifact_dir == Path(DEFAULT_ARTIFACT_DIR):
142+
name = [f"{args.model}"]
143+
if args.service_kind == "openai":
144+
name += [f"{args.service_kind}-{args.endpoint_type}"]
145+
elif args.service_kind == "triton":
146+
name += [f"{args.service_kind}-{args.backend.to_lowercase()}"]
147+
else:
148+
raise ValueError(f"Unknown service kind '{args.service_kind}'.")
149+
150+
if args.concurrency:
151+
name += [f"concurrency{args.concurrency}"]
152+
elif args.request_rate:
153+
name += [f"request_rate{args.request_rate}"]
154+
args.artifact_dir = args.artifact_dir / Path("-".join(name))
155+
156+
if args.profile_export_file.parent != Path(""):
157+
raise ValueError(
158+
"Please use --artifact-dir option to define intermediary paths to "
159+
"the profile export file."
160+
)
161+
162+
args.profile_export_file = args.artifact_dir / args.profile_export_file
135163
return args
136164

137165

@@ -367,24 +395,29 @@ def _add_endpoint_args(parser):
367395

368396
def _add_output_args(parser):
369397
output_group = parser.add_argument_group("Output")
370-
371398
output_group.add_argument(
372399
"--generate-plots",
373400
action="store_true",
374401
required=False,
375402
help="An option to enable the generation of plots.",
376403
)
377-
378404
output_group.add_argument(
379405
"--profile-export-file",
380406
type=Path,
381-
default="profile_export.json",
407+
default=Path("profile_export.json"),
382408
help="The path where the perf_analyzer profile export will be "
383409
"generated. By default, the profile export will be to profile_export.json. "
384410
"The genai-perf file will be exported to <profile_export_file>_genai_perf.csv. "
385411
"For example, if the profile export file is profile_export.json, the genai-perf file will be "
386412
"exported to profile_export_genai_perf.csv.",
387413
)
414+
output_group.add_argument(
415+
"--artifact-dir",
416+
type=Path,
417+
default=Path(DEFAULT_ARTIFACT_DIR),
418+
help="The directory to store all the (output) artifacts generated by "
419+
"GenAI-Perf and Perf Analyzer.",
420+
)
388421

389422

390423
def _add_other_args(parser):
@@ -547,6 +580,7 @@ def parse_args():
547580
args = _check_model_args(parser, args)
548581
args = _check_conditional_args(parser, args)
549582
args = _check_compare_args(compare_parser, args)
550-
args = _update_load_manager_args(args)
583+
args = _check_load_manager_args(args)
584+
args = _set_artifact_paths(args)
551585

552586
return args, argv[passthrough_index + 1 :]

src/c++/perf_analyzer/genai-perf/genai_perf/wrapper.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@
3737

3838
class Profiler:
3939
@staticmethod
40-
def add_protocol_args(args: Namespace):
41-
cmd = [""]
40+
def add_protocol_args(args: Namespace) -> list[str]:
41+
cmd = []
4242
if args.service_kind == "triton":
4343
cmd += ["-i", "grpc", "--streaming"]
4444
if args.u is None: # url
@@ -49,6 +49,15 @@ def add_protocol_args(args: Namespace):
4949
cmd += ["-i", "http"]
5050
return cmd
5151

52+
@staticmethod
53+
def add_inference_load_args(args: Namespace) -> list[str]:
54+
cmd = []
55+
if args.concurrency:
56+
cmd += ["--concurrency-range", f"{args.concurrency}"]
57+
elif args.request_rate:
58+
cmd += ["--request-rate-range", f"{args.request_rate}"]
59+
return cmd
60+
5261
@staticmethod
5362
def build_cmd(args: Namespace, extra_args: list[str] | None = None) -> list[str]:
5463
skip_args = [
@@ -77,6 +86,9 @@ def build_cmd(args: Namespace, extra_args: list[str] | None = None) -> list[str]
7786
"endpoint_type",
7887
"generate_plots",
7988
"subcommand",
89+
"concurrency",
90+
"request_rate",
91+
"artifact_dir",
8092
]
8193

8294
utils.remove_file(args.profile_export_file)
@@ -87,7 +99,7 @@ def build_cmd(args: Namespace, extra_args: list[str] | None = None) -> list[str]
8799
f"{args.model}",
88100
f"--async",
89101
f"--input-data",
90-
f"{DEFAULT_INPUT_DATA_JSON}",
102+
f"{args.artifact_dir / DEFAULT_INPUT_DATA_JSON}",
91103
]
92104
for arg, value in vars(args).items():
93105
if arg in skip_args:
@@ -109,6 +121,7 @@ def build_cmd(args: Namespace, extra_args: list[str] | None = None) -> list[str]
109121
cmd += [f"--{arg}", f"{value}"]
110122

111123
cmd += Profiler.add_protocol_args(args)
124+
cmd += Profiler.add_inference_load_args(args)
112125

113126
if extra_args is not None:
114127
for arg in extra_args:

0 commit comments

Comments
 (0)