Skip to content

Commit fbee5b9

Browse files
the-david-oymc-nv
authored andcommitted
Support Python 3.8 in GenAI-Perf (#643)
1 parent 41f0459 commit fbee5b9

File tree

12 files changed

+62
-42
lines changed

12 files changed

+62
-42
lines changed

.github/workflows/python-package-genai.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ jobs:
3939
fail-fast: false
4040
matrix:
4141
os: ["ubuntu-22.04"]
42-
python-version: ["3.10"]
42+
python-version: ["3.8", "3.10"]
4343

4444
steps:
4545
- uses: actions/checkout@v3

src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@
2929
import csv
3030
import json
3131
from enum import Enum, auto
32-
from itertools import pairwise
32+
from itertools import tee
3333
from pathlib import Path
34-
from typing import List
34+
from typing import Dict, List, Tuple, Union
3535

3636
import numpy as np
3737
import pandas as pd
@@ -115,7 +115,7 @@ def __init__(
115115
request_throughputs: List[float] = [],
116116
request_latencies: List[int] = [],
117117
time_to_first_tokens: List[int] = [],
118-
inter_token_latencies: List[list[int]] = [[]],
118+
inter_token_latencies: List[List[int]] = [[]],
119119
output_token_throughputs: List[float] = [],
120120
output_token_throughputs_per_request: List[int] = [],
121121
num_output_tokens: List[int] = [],
@@ -170,7 +170,7 @@ def __init__(self, metrics: Metrics):
170170
self._calculate_minmax(data, attr)
171171
self._calculate_std(data, attr)
172172

173-
def _preprocess_data(self, data: list, attr: str) -> list[int | float]:
173+
def _preprocess_data(self, data: List, attr: str) -> List[Union[int, float]]:
174174
new_data = []
175175
if attr == "inter_token_latency":
176176
# flatten inter token latencies to 1D
@@ -180,11 +180,11 @@ def _preprocess_data(self, data: list, attr: str) -> list[int | float]:
180180
new_data = data
181181
return new_data
182182

183-
def _calculate_mean(self, data: list[int | float], attr: str) -> None:
183+
def _calculate_mean(self, data: List[Union[int, float]], attr: str) -> None:
184184
avg = np.mean(data)
185185
setattr(self, "avg_" + attr, avg)
186186

187-
def _calculate_percentiles(self, data: list[int | float], attr: str) -> None:
187+
def _calculate_percentiles(self, data: List[Union[int, float]], attr: str) -> None:
188188
p25, p50, p75 = np.percentile(data, [25, 50, 75])
189189
p90, p95, p99 = np.percentile(data, [90, 95, 99])
190190
setattr(self, "p25_" + attr, p25)
@@ -194,12 +194,12 @@ def _calculate_percentiles(self, data: list[int | float], attr: str) -> None:
194194
setattr(self, "p95_" + attr, p95)
195195
setattr(self, "p99_" + attr, p99)
196196

197-
def _calculate_minmax(self, data: list[int | float], attr: str) -> None:
197+
def _calculate_minmax(self, data: List[Union[int, float]], attr: str) -> None:
198198
min, max = np.min(data), np.max(data)
199199
setattr(self, "min_" + attr, min)
200200
setattr(self, "max_" + attr, max)
201201

202-
def _calculate_std(self, data: list[int | float], attr: str) -> None:
202+
def _calculate_std(self, data: List[Union[int, float]], attr: str) -> None:
203203
std = np.std(data)
204204
setattr(self, "std_" + attr, std)
205205

@@ -460,7 +460,7 @@ def get_statistics(self, infer_mode: str, load_level: str) -> Statistics:
460460
raise KeyError(f"Profile with {infer_mode}={load_level} does not exist.")
461461
return self._profile_results[(infer_mode, load_level)]
462462

463-
def get_profile_load_info(self) -> list[tuple[str, str]]:
463+
def get_profile_load_info(self) -> List[Tuple[str, str]]:
464464
"""Return available (infer_mode, load_level) tuple keys."""
465465
return [k for k, _ in self._profile_results.items()]
466466

@@ -547,7 +547,9 @@ def _parse_requests(self, requests: dict) -> LLMMetrics:
547547

548548
# inter token latency
549549
itl_per_request = []
550-
for (t1, _), (t2, n2) in pairwise(zip(res_timestamps, num_output_tokens)):
550+
for (t1, _), (t2, n2) in self._pairwise(
551+
zip(res_timestamps, num_output_tokens)
552+
):
551553
# TMA-1676: handle empty first/last responses
552554
# if the latter response has zero token (e.g. empty string),
553555
# then set it default to one for the sake of inter token latency
@@ -572,8 +574,14 @@ def _parse_requests(self, requests: dict) -> LLMMetrics:
572574
num_input_tokens,
573575
)
574576

577+
def _pairwise(self, iterable):
578+
"""Generate pairs of consecutive elements from the given iterable."""
579+
a, b = tee(iterable)
580+
next(b, None)
581+
return zip(a, b)
582+
575583
def _preprocess_response(
576-
self, res_timestamps: list[int], res_outputs: list[dict[str, str]]
584+
self, res_timestamps: List[int], res_outputs: List[Dict[str, str]]
577585
) -> None:
578586
"""Helper function to preprocess responses of a request."""
579587
if self._service_kind == "openai":
@@ -604,7 +612,7 @@ def _preprocess_response(
604612
res_timestamps.pop()
605613
res_outputs.pop()
606614

607-
def _tokenize_request_inputs(self, req_inputs: dict) -> list[int]:
615+
def _tokenize_request_inputs(self, req_inputs: dict) -> List[int]:
608616
"""Deserialize the request input and return tokenized inputs."""
609617
if self._service_kind == "triton":
610618
return self._tokenize_triton_request_input(req_inputs)
@@ -613,12 +621,12 @@ def _tokenize_request_inputs(self, req_inputs: dict) -> list[int]:
613621
else:
614622
raise ValueError(f"Unknown service kind: '{self._service_kind}'.")
615623

616-
def _tokenize_triton_request_input(self, req_inputs: dict) -> list[int]:
624+
def _tokenize_triton_request_input(self, req_inputs: dict) -> List[int]:
617625
"""Tokenize the Triton request input texts."""
618626
encodings = self._tokenizer(req_inputs["text_input"])
619627
return encodings.data["input_ids"]
620628

621-
def _tokenize_openai_request_input(self, req_inputs: dict) -> list[int]:
629+
def _tokenize_openai_request_input(self, req_inputs: dict) -> List[int]:
622630
"""Tokenize the OpenAI request input texts."""
623631
payload = json.loads(req_inputs["payload"])
624632
if self._response_format == ResponseFormat.OPENAI_CHAT_COMPLETIONS:
@@ -632,7 +640,7 @@ def _tokenize_openai_request_input(self, req_inputs: dict) -> list[int]:
632640
encodings = self._tokenizer(input_text)
633641
return encodings.data["input_ids"]
634642

635-
def _tokenize_response_outputs(self, res_outputs: dict) -> list[list[int]]:
643+
def _tokenize_response_outputs(self, res_outputs: dict) -> List[List[int]]:
636644
"""Deserialize the response output and return tokenized outputs."""
637645
if self._service_kind == "triton":
638646
return self._tokenize_triton_response_output(res_outputs)
@@ -641,22 +649,22 @@ def _tokenize_response_outputs(self, res_outputs: dict) -> list[list[int]]:
641649
else:
642650
raise ValueError(f"Unknown service kind: '{self._service_kind}'.")
643651

644-
def _tokenize_triton_response_output(self, res_outputs: dict) -> list[list[int]]:
652+
def _tokenize_triton_response_output(self, res_outputs: dict) -> List[List[int]]:
645653
"""Tokenize the Triton response output texts."""
646654
output_texts = []
647655
for output in res_outputs:
648656
output_texts.append(output["text_output"])
649657
return self._run_tokenizer(output_texts)
650658

651-
def _tokenize_openai_response_output(self, res_outputs: dict) -> list[list[int]]:
659+
def _tokenize_openai_response_output(self, res_outputs: dict) -> List[List[int]]:
652660
"""Tokenize the OpenAI response output texts."""
653661
output_texts = []
654662
for output in res_outputs:
655663
text = self._extract_openai_text_output(output["response"])
656664
output_texts.append(text)
657665
return self._run_tokenizer(output_texts)
658666

659-
def _run_tokenizer(self, output_texts: list[str]) -> list[list[int]]:
667+
def _run_tokenizer(self, output_texts: List[str]) -> List[List[int]]:
660668
# exclamation mark trick forces the llama tokenization to consistently
661669
# start each output with a specific token which allows us to safely skip
662670
# the first token of every tokenized output and get only the ones that

src/c++/perf_analyzer/genai-perf/genai_perf/plots/base_plot.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

2828
from pathlib import Path
29+
from typing import List
2930

3031
import pandas as pd
3132
from genai_perf.exceptions import GenAIPerfException
@@ -38,7 +39,7 @@ class BasePlot:
3839
Base class for plots
3940
"""
4041

41-
def __init__(self, data: list[ProfileRunData]) -> None:
42+
def __init__(self, data: List[ProfileRunData]) -> None:
4243
self._profile_data = data
4344

4445
def create_plot(

src/c++/perf_analyzer/genai-perf/genai_perf/plots/box_plot.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

2828
from pathlib import Path
29+
from typing import List
2930

3031
import plotly.graph_objects as go
3132
from genai_perf.plots.base_plot import BasePlot
@@ -37,7 +38,7 @@ class BoxPlot(BasePlot):
3738
Generate a box plot in jpeg and html format.
3839
"""
3940

40-
def __init__(self, data: list[ProfileRunData]) -> None:
41+
def __init__(self, data: List[ProfileRunData]) -> None:
4142
super().__init__(data)
4243

4344
def create_plot(

src/c++/perf_analyzer/genai-perf/genai_perf/plots/heat_map.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

2828
from pathlib import Path
29+
from typing import List
2930

3031
import plotly.graph_objects as go
3132
from genai_perf.plots.base_plot import BasePlot
@@ -38,7 +39,7 @@ class HeatMap(BasePlot):
3839
Generate a heat map in jpeg and html format.
3940
"""
4041

41-
def __init__(self, data: list[ProfileRunData]) -> None:
42+
def __init__(self, data: List[ProfileRunData]) -> None:
4243
super().__init__(data)
4344

4445
def create_plot(

src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_config.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
from dataclasses import dataclass
3030
from enum import Enum, auto
3131
from pathlib import Path
32+
from typing import List, Sequence, Union
3233

3334

3435
class PlotType(Enum):
@@ -40,14 +41,14 @@ class PlotType(Enum):
4041
@dataclass
4142
class ProfileRunData:
4243
name: str
43-
x_metric: Sequence[int | float]
44-
y_metric: Sequence[int | float]
44+
x_metric: Sequence[Union[int, float]]
45+
y_metric: Sequence[Union[int, float]]
4546

4647

4748
@dataclass
4849
class PlotConfig:
4950
title: str
50-
data: list[ProfileRunData]
51+
data: List[ProfileRunData]
5152
x_label: str
5253
y_label: str
5354
width: int

src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_config_parser.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

2828
from pathlib import Path
29+
from typing import List, Union
2930

3031
import genai_perf.logging as logging
3132

@@ -46,7 +47,7 @@ class PlotConfigParser:
4647
def __init__(self, filename: Path) -> None:
4748
self._filename = filename
4849

49-
def generate_configs(self) -> list[PlotConfig]:
50+
def generate_configs(self) -> List[PlotConfig]:
5051
"""Load YAML configuration file and convert to PlotConfigs."""
5152
logger.info(
5253
f"Generating plot configurations by parsing {self._filename}. "
@@ -57,7 +58,7 @@ def generate_configs(self) -> list[PlotConfig]:
5758
plot_configs = []
5859
for _, config in configs.items():
5960
# Collect profile run data
60-
profile_data: list[ProfileRunData] = []
61+
profile_data: List[ProfileRunData] = []
6162
for filepath in config["paths"]:
6263
stats = self._get_statistics(filepath)
6364
profile_data.append(
@@ -103,7 +104,7 @@ def _get_run_name(self, filepath: Path) -> str:
103104
return filepath.parent.name + "/" + filepath.stem
104105
return filepath.stem
105106

106-
def _get_metric(self, stats: Statistics, name: str) -> list[int | float]:
107+
def _get_metric(self, stats: Statistics, name: str) -> List[Union[int, float]]:
107108
if not name: # no metric
108109
return []
109110
elif name == "inter_token_latencies":
@@ -113,7 +114,7 @@ def _get_metric(self, stats: Statistics, name: str) -> list[int | float]:
113114
itl_flatten += request_itls
114115
return [scale(x, (1 / 1e6)) for x in itl_flatten] # ns to ms
115116
elif name == "token_positions":
116-
token_positions: list[int | float] = []
117+
token_positions: List[Union[int, float]] = []
117118
for request_itls in stats.metrics.data["inter_token_latencies"]:
118119
token_positions += list(range(1, len(request_itls) + 1))
119120
return token_positions
@@ -141,7 +142,7 @@ def _get_plot_type(self, plot_type: str) -> PlotType:
141142
)
142143

143144
@staticmethod
144-
def create_init_yaml_config(filenames: list[Path], output_dir: Path) -> None:
145+
def create_init_yaml_config(filenames: List[Path], output_dir: Path) -> None:
145146
config_str = f"""
146147
plot1:
147148
title: Time to First Token

src/c++/perf_analyzer/genai-perf/genai_perf/plots/plot_manager.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

28+
from typing import List
29+
2830
import genai_perf.logging as logging
2931
from genai_perf.plots.box_plot import BoxPlot
3032
from genai_perf.plots.heat_map import HeatMap
@@ -39,7 +41,7 @@ class PlotManager:
3941
Manage details around plots generated
4042
"""
4143

42-
def __init__(self, plot_configs: list[PlotConfig]) -> None:
44+
def __init__(self, plot_configs: List[PlotConfig]) -> None:
4345
self._plot_configs = plot_configs
4446

4547
def _generate_filename(self, title: str) -> str:

src/c++/perf_analyzer/genai-perf/genai_perf/plots/scatter_plot.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2727

2828
from pathlib import Path
29+
from typing import List
2930

3031
import plotly.graph_objects as go
3132
from genai_perf.plots.base_plot import BasePlot
@@ -37,7 +38,7 @@ class ScatterPlot(BasePlot):
3738
Generate a scatter plot in jpeg and html format.
3839
"""
3940

40-
def __init__(self, data: list[ProfileRunData]) -> None:
41+
def __init__(self, data: List[ProfileRunData]) -> None:
4142
super().__init__(data)
4243

4344
def create_plot(

src/c++/perf_analyzer/genai-perf/genai_perf/utils.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,18 @@
2727
import json
2828
from enum import Enum
2929
from pathlib import Path
30-
from typing import Any, Dict, List, Optional
30+
from typing import Any, Dict, List, Optional, Type
3131

3232
# Skip type checking to avoid mypy error
3333
# Issue: https://github.com/python/mypy/issues/10632
3434
import yaml # type: ignore
3535

3636

3737
def remove_sse_prefix(msg: str) -> str:
38-
return msg.removeprefix("data: ").strip()
38+
prefix = "data: "
39+
if msg.startswith(prefix):
40+
return msg[len(prefix) :].strip()
41+
return msg.strip()
3942

4043

4144
def load_yaml(filepath: Path) -> Dict[str, Any]:
@@ -58,14 +61,14 @@ def convert_option_name(name: str) -> str:
5861
return name.replace("_", "-")
5962

6063

61-
def get_enum_names(enum: type[Enum]) -> List:
64+
def get_enum_names(enum: Type[Enum]) -> List:
6265
names = []
6366
for e in enum:
6467
names.append(e.name.lower())
6568
return names
6669

6770

68-
def get_enum_entry(name: str, enum: type[Enum]) -> Optional[Enum]:
71+
def get_enum_entry(name: str, enum: Type[Enum]) -> Optional[Enum]:
6972
for e in enum:
7073
if e.name.lower() == name.lower():
7174
return e

0 commit comments

Comments
 (0)