Skip to content

Commit baa92c2

Browse files
committed
add db population script.
1 parent 1122cad commit baa92c2

File tree

3 files changed

+136
-3
lines changed

3 files changed

+136
-3
lines changed

.github/workflows/benchmark.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ env:
1111
HF_HOME: /mnt/cache
1212
OMP_NUM_THREADS: 8
1313
MKL_NUM_THREADS: 8
14+
BASE_PATH: benchmark_outputs
1415

1516
jobs:
1617
torch_models_cuda_benchmark_tests:
@@ -43,8 +44,7 @@ jobs:
4344
python utils/print_env.py
4445
- name: Diffusers Benchmarking
4546
env:
46-
HF_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }}
47-
BASE_PATH: benchmark_outputs
47+
HF_TOKEN: ${{ secrets.DIFFUSERS_BOT_TOKEN }}
4848
run: |
4949
cd benchmarks && python run_all.py && python push_results.py
5050
mkdir ${BASE_PATH} && mv *.csv ${BASE_PATH}
@@ -56,6 +56,14 @@ jobs:
5656
name: benchmark_test_reports
5757
path: benchmarks/benchmark_outputs
5858

59+
- name: Update benchmarking results to DB
60+
env:
61+
PGDATABASE: metrics
62+
PGHOST: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGHOST }}
63+
PGUSER: transformers_benchmarks
64+
PGPASSWORD: ${{ secrets.TRANSFORMERS_BENCHMARKS_PGPASSWORD }}
65+
run: cd benchmarks && python populate_into_db.py
66+
5967
- name: Report success status
6068
if: ${{ success() }}
6169
run: |

benchmarks/populate_into_db.py

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
import os
2+
3+
import pandas as pd
4+
import psycopg2
5+
import psycopg2.extras
6+
7+
8+
FINAL_CSV_FILENAME = "benchmark_outputs/collated_results.csv"
9+
TABLE_NAME = "diffusers_benchmarks"
10+
11+
if __name__ == "__main__":
12+
conn = psycopg2.connect(
13+
host=os.getenv("PGHOST"),
14+
database=os.getenv("PGDATABASE"),
15+
user=os.getenv("PGUSER"),
16+
password=os.getenv("PGPASSWORD"),
17+
)
18+
cur = conn.cursor()
19+
20+
cur.execute(f"""
21+
CREATE TABLE IF NOT EXISTS {TABLE_NAME} (
22+
scenario TEXT,
23+
model_cls TEXT,
24+
num_params_M REAL,
25+
flops_M REAL,
26+
time_plain_s REAL,
27+
mem_plain_GB REAL,
28+
time_compile_s REAL,
29+
mem_compile_GB REAL,
30+
fullgraph BOOLEAN,
31+
mode TEXT,
32+
github_sha TEXT
33+
);
34+
""")
35+
conn.commit()
36+
37+
df = pd.read_csv(FINAL_CSV_FILENAME)
38+
39+
# Helper to cast values (or None) given a dtype
40+
def _cast_value(val, dtype: str):
41+
if pd.isna(val):
42+
return None
43+
44+
if dtype == "text":
45+
return str(val).strip()
46+
47+
if dtype == "float":
48+
try:
49+
return float(val)
50+
except ValueError:
51+
return None
52+
53+
if dtype == "bool":
54+
s = str(val).strip().lower()
55+
if s in ("true", "t", "yes", "1"):
56+
return True
57+
if s in ("false", "f", "no", "0"):
58+
return False
59+
if val in (1, 1.0):
60+
return True
61+
if val in (0, 0.0):
62+
return False
63+
return None
64+
65+
return val
66+
67+
rows_to_insert = []
68+
for _, row in df.iterrows():
69+
scenario = _cast_value(row.get("scenario"), "text")
70+
model_cls = _cast_value(row.get("model_cls"), "text")
71+
num_params_M = _cast_value(row.get("num_params_M"), "float")
72+
flops_M = _cast_value(row.get("flops_M"), "float")
73+
time_plain_s = _cast_value(row.get("time_plain_s"), "float")
74+
mem_plain_GB = _cast_value(row.get("mem_plain_GB"), "float")
75+
time_compile_s = _cast_value(row.get("time_compile_s"), "float")
76+
mem_compile_GB = _cast_value(row.get("mem_compile_GB"), "float")
77+
fullgraph = _cast_value(row.get("fullgraph"), "bool")
78+
mode = _cast_value(row.get("mode"), "text")
79+
80+
# If "github_sha" column exists in the CSV, cast it; else default to None
81+
if "github_sha" in df.columns:
82+
github_sha = _cast_value(row.get("github_sha"), "text")
83+
else:
84+
github_sha = None
85+
86+
rows_to_insert.append(
87+
(
88+
scenario,
89+
model_cls,
90+
num_params_M,
91+
flops_M,
92+
time_plain_s,
93+
mem_plain_GB,
94+
time_compile_s,
95+
mem_compile_GB,
96+
fullgraph,
97+
mode,
98+
github_sha,
99+
)
100+
)
101+
102+
# Batch-insert all rows (with NULL for any None)
103+
insert_sql = """
104+
INSERT INTO benchmarks (
105+
scenario,
106+
model_cls,
107+
num_params_M,
108+
flops_M,
109+
time_plain_s,
110+
mem_plain_GB,
111+
time_compile_s,
112+
mem_compile_GB,
113+
fullgraph,
114+
mode,
115+
github_sha
116+
)
117+
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
118+
"""
119+
120+
psycopg2.extras.execute_batch(cur, insert_sql, rows_to_insert)
121+
conn.commit()
122+
123+
cur.close()
124+
conn.close()

benchmarks/requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
pandas
22
peft
33
torchprofile
4-
bitsandbytes
4+
bitsandbytes
5+
psycopg2==2.9.9

0 commit comments

Comments
 (0)