Skip to content

Commit e627896

Browse files
authored
chore: add q4-7 for groupby benchmark (#765)
* chore: add q4-7 for groupby benchmark * update benchmark print message.
1 parent c921c8a commit e627896

File tree

5 files changed

+66
-0
lines changed

5 files changed

+66
-0
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
2+
3+
import bigframes.pandas as bpd
4+
5+
print("Groupby benchmark 4: mean v1:v3 by id4")
6+
7+
x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
8+
9+
ans = x.groupby("id4", as_index=False, dropna=False).agg(
10+
{"v1": "mean", "v2": "mean", "v3": "mean"}
11+
)
12+
print(ans.shape)
13+
chk = [ans["v1"].sum(), ans["v2"].sum(), ans["v3"].sum()]
14+
print(chk)
15+
16+
bpd.reset_session()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
2+
3+
import bigframes.pandas as bpd
4+
5+
print("Groupby benchmark 5: sum v1:v3 by id6")
6+
7+
x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
8+
9+
ans = x.groupby("id6", as_index=False, dropna=False).agg(
10+
{"v1": "sum", "v2": "sum", "v3": "sum"}
11+
)
12+
print(ans.shape)
13+
chk = [ans["v1"].sum(), ans["v2"].sum(), ans["v3"].sum()]
14+
print(chk)
15+
16+
bpd.reset_session()
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
2+
3+
import bigframes.pandas as bpd
4+
5+
print("Groupby benchmark 6: median v3 sd v3 by id4 id5")
6+
7+
x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
8+
9+
ans = x.groupby(["id4", "id5"], as_index=False, dropna=False).agg(
10+
{"v3": ["median", "std"]}
11+
)
12+
print(ans.shape)
13+
chk = [ans["v3"]["median"].sum(), ans["v3"]["std"].sum()]
14+
print(chk)
15+
16+
bpd.reset_session()
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Contains code from https://github.com/duckdblabs/db-benchmark/blob/master/pandas/groupby-pandas.py
2+
3+
import bigframes.pandas as bpd
4+
5+
print("Groupby benchmark 7: max v1 - min v2 by id3")
6+
7+
x = bpd.read_gbq("bigframes-dev-perf.dbbenchmark.G1_1e9_1e2_5_0")
8+
9+
ans = (
10+
x.groupby("id3", as_index=False, dropna=False)
11+
.agg({"v1": "max", "v2": "min"})
12+
.assign(range_v1_v2=lambda x: x["v1"] - x["v2"])[["id3", "range_v1_v2"]]
13+
)
14+
print(ans.shape)
15+
chk = [ans["range_v1_v2"].sum()]
16+
print(chk)
17+
18+
bpd.reset_session()

scripts/benchmark/db-benchmark/sort

Whitespace-only changes.

0 commit comments

Comments
 (0)