Skip to content

Commit c921c8a

Browse files
authored
chore: update benchmark sorting logic, run environment, and add join benchmarks. (#768)
1 parent 11ed16b commit c921c8a

File tree

6 files changed

+83
-3
lines changed

6 files changed

+83
-3
lines changed

noxfile.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -818,7 +818,7 @@ def notebook(session: nox.Session):
818818
_print_performance_report("notebooks/")
819819

820820

821-
@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS)
821+
@nox.session(python=DEFAULT_PYTHON_VERSION)
822822
def benchmark(session: nox.Session):
823823
session.install("-e", ".[all]")
824824
base_path = os.path.join("scripts", "benchmark")
@@ -855,7 +855,7 @@ def _print_performance_report(path: str):
855855
"""
856856
print("---BIGQUERY USAGE REPORT---")
857857
results_dict = {}
858-
bytes_reports = sorted(Path(path).rglob("*.bytesprocessed"), key=lambda x: x.name)
858+
bytes_reports = sorted(Path(path).rglob("*.bytesprocessed"))
859859
for bytes_report in bytes_reports:
860860
with open(bytes_report, "r") as bytes_file:
861861
filename = bytes_report.relative_to(path).with_suffix("")
@@ -865,7 +865,7 @@ def _print_performance_report(path: str):
865865
results_dict[filename] = [query_count, total_bytes]
866866
os.remove(bytes_report)
867867

868-
millis_reports = sorted(Path(path).rglob("*.slotmillis"), key=lambda x: x.name)
868+
millis_reports = sorted(Path(path).rglob("*.slotmillis"))
869869
for millis_report in millis_reports:
870870
with open(millis_report, "r") as millis_file:
871871
filename = millis_report.relative_to(path).with_suffix("")
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/join-pandas.py
2+
3+
import bigframes.pandas as bpd
4+
5+
print("Join benchmark 1: small inner on int")
6+
7+
x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_NA_0_0")
8+
small = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_1e3_0_0")
9+
10+
ans = x.merge(small, on="id1")
11+
print(ans.shape)
12+
13+
chk = [ans["v1"].sum(), ans["v2"].sum()]
14+
print(chk)
15+
16+
bpd.reset_session()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/join-pandas.py
2+
3+
import bigframes.pandas as bpd
4+
5+
print("Join benchmark 2: medium inner on int")
6+
7+
x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_NA_0_0")
8+
medium = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_1e6_0_0")
9+
10+
ans = x.merge(medium, on="id2")
11+
print(ans.shape)
12+
13+
chk = [ans["v1"].sum(), ans["v2"].sum()]
14+
print(chk)
15+
16+
bpd.reset_session()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/join-pandas.py
2+
3+
import bigframes.pandas as bpd
4+
5+
print("Join benchmark 3: medium outer on int")
6+
7+
x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_NA_0_0")
8+
medium = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_1e6_0_0")
9+
10+
ans = x.merge(medium, how="left", on="id2")
11+
print(ans.shape)
12+
13+
chk = [ans["v1"].sum(), ans["v2"].sum()]
14+
print(chk)
15+
16+
bpd.reset_session()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/join-pandas.py
2+
3+
import bigframes.pandas as bpd
4+
5+
print("Join benchmark 4: medium inner on factor")
6+
7+
x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_NA_0_0")
8+
medium = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_1e6_0_0")
9+
10+
ans = x.merge(medium, on="id5")
11+
print(ans.shape)
12+
13+
chk = [ans["v1"].sum(), ans["v2"].sum()]
14+
print(chk)
15+
16+
bpd.reset_session()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/join-pandas.py
2+
3+
import bigframes.pandas as bpd
4+
5+
print("Join benchmark 5: big inner on int")
6+
7+
x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_NA_0_0")
8+
big = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.J1_1e9_1e9_0_0")
9+
10+
ans = x.merge(big, on="id3")
11+
print(ans.shape)
12+
13+
chk = [ans["v1"].sum(), ans["v2"].sum()]
14+
print(chk)
15+
16+
bpd.reset_session()

0 commit comments

Comments
 (0)