Skip to content

Commit b1e4172

Browse files
authored
chore: update tpch q21 (#1225)
1 parent 4c3548f commit b1e4172

File tree

1 file changed

+25
-28
lines changed
  • third_party/bigframes_vendored/tpch/queries

1 file changed

+25
-28
lines changed
Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
1-
# Contains code from https://github.com/pola-rs/tpch/blob/main/queries/duckdb/q21.py
2-
3-
import typing
1+
# Contains code from https://github.com/pola-rs/tpch/blob/main/queries/polars/q21.py
42

53
import bigframes
64
import bigframes.pandas as bpd
@@ -24,39 +22,38 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
2422
index_col=bigframes.enums.DefaultIndexKind.NULL,
2523
)
2624

27-
nation = nation[nation["N_NAME"] == "SAUDI ARABIA"]
28-
orders = orders[orders["O_ORDERSTATUS"] == "F"]
29-
30-
l1 = lineitem[lineitem["L_RECEIPTDATE"] > lineitem["L_COMMITDATE"]][
31-
["L_ORDERKEY", "L_SUPPKEY"]
32-
]
25+
var1 = "SAUDI ARABIA"
3326

34-
l2 = lineitem.groupby("L_ORDERKEY", as_index=False).agg(
35-
NUNIQUE_COL=bpd.NamedAgg(column="L_SUPPKEY", aggfunc="nunique")
27+
q1 = lineitem.groupby("L_ORDERKEY", as_index=False).agg(
28+
N_SUPP_BY_ORDER=bpd.NamedAgg(column="L_SUPPKEY", aggfunc="size")
3629
)
37-
l2 = l2[l2["NUNIQUE_COL"] > 1][["L_ORDERKEY"]]
30+
q1 = q1[q1["N_SUPP_BY_ORDER"] > 1]
3831

39-
l3 = l1.groupby("L_ORDERKEY", as_index=False).agg(
40-
NUNIQUE_COL=bpd.NamedAgg(column="L_SUPPKEY", aggfunc="nunique")
41-
)
42-
l3 = l3[l3["NUNIQUE_COL"] == 1][["L_ORDERKEY"]]
32+
lineitem_filtered = lineitem[lineitem["L_RECEIPTDATE"] > lineitem["L_COMMITDATE"]]
4333

44-
l1 = l1.merge(l2, on="L_ORDERKEY", how="inner").merge(
45-
l3, on="L_ORDERKEY", how="inner"
34+
q1 = q1.merge(lineitem_filtered, on="L_ORDERKEY")
35+
36+
q_final = q1.groupby("L_ORDERKEY", as_index=False).agg(
37+
N_SUPP_BY_ORDER_FINAL=bpd.NamedAgg(column="L_SUPPKEY", aggfunc="size")
4638
)
4739

48-
merged = supplier.merge(nation, left_on="S_NATIONKEY", right_on="N_NATIONKEY")
49-
merged = merged.merge(l1, left_on="S_SUPPKEY", right_on="L_SUPPKEY")
50-
merged = merged.merge(orders, left_on="L_ORDERKEY", right_on="O_ORDERKEY")
40+
q_final = q_final.merge(q1, on="L_ORDERKEY")
41+
q_final = q_final.merge(supplier, left_on="L_SUPPKEY", right_on="S_SUPPKEY")
42+
q_final = q_final.merge(nation, left_on="S_NATIONKEY", right_on="N_NATIONKEY")
43+
q_final = q_final.merge(orders, left_on="L_ORDERKEY", right_on="O_ORDERKEY")
44+
45+
q_final = q_final[
46+
(q_final["N_SUPP_BY_ORDER_FINAL"] == 1)
47+
& (q_final["N_NAME"] == var1)
48+
& (q_final["O_ORDERSTATUS"] == "F")
49+
]
5150

52-
result = merged.groupby("S_NAME", as_index=False).agg(
51+
q_final = q_final.groupby("S_NAME", as_index=False).agg(
5352
NUMWAIT=bpd.NamedAgg(column="L_SUPPKEY", aggfunc="size")
5453
)
5554

56-
result = (
57-
typing.cast(bpd.DataFrame, result)
58-
.sort_values(["NUMWAIT", "S_NAME"], ascending=[False, True])
59-
.head(100)
60-
)
55+
q_final = q_final.sort_values(
56+
by=["NUMWAIT", "S_NAME"], ascending=[False, True]
57+
).head(100)
6158

62-
result.to_gbq()
59+
q_final.to_gbq()

0 commit comments

Comments
 (0)