Skip to content

Commit 27ae4b9

Browse files
committed
Add end-to-end model inference throughput comparison plot
1 parent 0f8477c commit 27ae4b9

File tree

1 file changed

+28
-0
lines changed

1 file changed

+28
-0
lines changed

compare_performance_metrics.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import json
2+
13
import matplotlib.pyplot as plt
24
import pandas as pd
35

46
from compare_code_metrics import _BACKSLASH_CHAR
7+
from run_experiments import ALL_MAX_NEW_TOKENS, BACKENDS
58

69
if __name__ == "__main__":
710
plt.rcParams["figure.dpi"] = 600
@@ -20,3 +23,28 @@
2023
plt.grid(False)
2124
plt.tight_layout()
2225
plt.savefig("performance-metrics.png")
26+
27+
data = {"Output Length": [], "NineToothed": [], "Triton": [], "PyTorch": []}
28+
29+
for max_new_tokens in ALL_MAX_NEW_TOKENS:
30+
data["Output Length"].append(max_new_tokens)
31+
32+
for backend in BACKENDS:
33+
with open(f"infer_{max_new_tokens}_{backend}.json") as f:
34+
num_tokens_per_second = json.load(f)["num_tokens_per_second"]
35+
36+
if backend == "ninetoothed":
37+
data["NineToothed"].append(num_tokens_per_second)
38+
elif backend == "triton":
39+
data["Triton"].append(num_tokens_per_second)
40+
elif backend == "torch":
41+
data["PyTorch"].append(num_tokens_per_second)
42+
43+
df = pd.DataFrame(data)
44+
45+
df.set_index("Output Length").plot(kind="bar", rot=0)
46+
plt.ylabel("Throughput (TPS)")
47+
plt.xlabel("Output Length")
48+
plt.grid(False)
49+
plt.tight_layout()
50+
plt.savefig("end-to-end-performance-metrics.png")

0 commit comments

Comments
 (0)