Skip to content

Commit 534f7b4

Browse files
authored
chore: update tpch code (#1389)
* chore: update tpch code * update q5 * add 14, 17, 20 * add q22 * add q21 * reverse q7 q10 q21
1 parent 34d01b2 commit 534f7b4

File tree

6 files changed

+27
-36
lines changed

6 files changed

+27
-36
lines changed

third_party/bigframes_vendored/tpch/queries/q14.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
1818
var1 = date(1995, 9, 1)
1919
var2 = date(1995, 10, 1)
2020

21-
merged = lineitem.merge(part, left_on="L_PARTKEY", right_on="P_PARTKEY")
21+
merged = part.merge(lineitem, left_on="P_PARTKEY", right_on="L_PARTKEY")
2222

2323
filtered = merged[(merged["L_SHIPDATE"] >= var1) & (merged["L_SHIPDATE"] < var2)]
2424

third_party/bigframes_vendored/tpch/queries/q17.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
1919

2020
filtered_part = part[(part["P_BRAND"] == VAR1) & (part["P_CONTAINER"] == VAR2)]
2121
q1 = bpd.merge(
22-
filtered_part, lineitem, how="left", left_on="P_PARTKEY", right_on="L_PARTKEY"
22+
lineitem, filtered_part, how="right", left_on="L_PARTKEY", right_on="P_PARTKEY"
2323
)
2424

2525
grouped = (

third_party/bigframes_vendored/tpch/queries/q20.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,10 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
4949
partsupp, left_on="P_PARTKEY", right_on="PS_PARTKEY"
5050
)
5151

52-
final_join = joined_parts.merge(
53-
q1, left_on=["PS_SUPPKEY", "P_PARTKEY"], right_on=["L_SUPPKEY", "L_PARTKEY"]
52+
final_join = q1.merge(
53+
joined_parts,
54+
left_on=["L_SUPPKEY", "L_PARTKEY"],
55+
right_on=["PS_SUPPKEY", "P_PARTKEY"],
5456
)
5557
final_filtered = final_join[final_join["PS_AVAILQTY"] > final_join["SUM_QUANTITY"]]
5658

third_party/bigframes_vendored/tpch/queries/q22.py

Lines changed: 9 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,37 +15,24 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
1515
)
1616

1717
country_codes = ["13", "31", "23", "29", "30", "18", "17"]
18-
1918
customer["CNTRYCODE"] = customer["C_PHONE"].str.slice(0, 2)
19+
customer = customer[customer["CNTRYCODE"].isin(country_codes)]
2020

2121
avg_acctbal = (
22-
customer[
23-
(customer["CNTRYCODE"].isin(country_codes)) & (customer["C_ACCTBAL"] > 0)
24-
][["C_ACCTBAL"]]
22+
customer[customer["C_ACCTBAL"] > 0.0][["C_ACCTBAL"]]
2523
.mean()
2624
.rename("AVG_ACCTBAL")
2725
)
28-
29-
orders_unique = orders["O_CUSTKEY"].unique(keep_order=False).to_frame()
30-
31-
matched_customers = customer.merge(
32-
orders_unique, left_on="C_CUSTKEY", right_on="O_CUSTKEY"
33-
)
34-
matched_customers["IS_IN_ORDERS"] = True
35-
36-
customer = customer.merge(
37-
matched_customers[["C_CUSTKEY", "IS_IN_ORDERS"]], on="C_CUSTKEY", how="left"
38-
)
39-
customer["IS_IN_ORDERS"] = customer["IS_IN_ORDERS"].fillna(False)
4026
customer = customer.merge(avg_acctbal, how="cross")
4127

42-
filtered_customers = customer[
43-
(customer["CNTRYCODE"].isin(country_codes))
44-
& (customer["C_ACCTBAL"] > customer["AVG_ACCTBAL"])
45-
& (~customer["IS_IN_ORDERS"])
46-
]
28+
filtered_customer = customer[customer["C_ACCTBAL"] > customer["AVG_ACCTBAL"]]
4729

48-
result = filtered_customers.groupby("CNTRYCODE", as_index=False).agg(
30+
orders_unique = orders["O_CUSTKEY"].unique(keep_order=False).to_frame()
31+
filtered_customer = filtered_customer.merge(
32+
orders_unique, left_on="C_CUSTKEY", right_on="O_CUSTKEY", how="left"
33+
)
34+
filtered_customer = filtered_customer[filtered_customer["O_CUSTKEY"].isnull()]
35+
result = filtered_customer.groupby("CNTRYCODE", as_index=False).agg(
4936
NUMCUST=bpd.NamedAgg(column="C_CUSTKEY", aggfunc="count"),
5037
TOTACCTBAL=bpd.NamedAgg(column="C_ACCTBAL", aggfunc="sum"),
5138
)

third_party/bigframes_vendored/tpch/queries/q3.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,13 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
2323

2424
fcustomer = customer[customer["C_MKTSEGMENT"] == "BUILDING"]
2525

26-
jn1 = fcustomer.merge(orders, left_on="C_CUSTKEY", right_on="O_CUSTKEY")
27-
jn2 = jn1.merge(lineitem, left_on="O_ORDERKEY", right_on="L_ORDERKEY")
26+
filtered_orders = orders[orders["O_ORDERDATE"] < date_var]
27+
filtered_lineitem = lineitem[lineitem["L_SHIPDATE"] > date_var]
2828

29-
jn2 = jn2[jn2["O_ORDERDATE"] < date_var]
30-
jn2 = jn2[jn2["L_SHIPDATE"] > date_var]
29+
jn1 = filtered_lineitem.merge(
30+
filtered_orders, left_on="L_ORDERKEY", right_on="O_ORDERKEY"
31+
)
32+
jn2 = fcustomer.merge(jn1, left_on="C_CUSTKEY", right_on="O_CUSTKEY")
3133
jn2["REVENUE"] = jn2["L_EXTENDEDPRICE"] * (1 - jn2["L_DISCOUNT"])
3234

3335
gb = jn2.groupby(["O_ORDERKEY", "O_ORDERDATE", "O_SHIPPRIORITY"], as_index=False)

third_party/bigframes_vendored/tpch/queries/q5.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,20 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
3535
var2 = date(1994, 1, 1)
3636
var3 = date(1995, 1, 1)
3737

38+
region = region[region["R_NAME"] == var1]
39+
orders = orders[(orders["O_ORDERDATE"] >= var2) & (orders["O_ORDERDATE"] < var3)]
40+
lineitem["REVENUE"] = lineitem["L_EXTENDEDPRICE"] * (1.0 - lineitem["L_DISCOUNT"])
41+
3842
jn1 = region.merge(nation, left_on="R_REGIONKEY", right_on="N_REGIONKEY")
3943
jn2 = jn1.merge(customer, left_on="N_NATIONKEY", right_on="C_NATIONKEY")
40-
jn3 = jn2.merge(orders, left_on="C_CUSTKEY", right_on="O_CUSTKEY")
41-
jn4 = jn3.merge(lineitem, left_on="O_ORDERKEY", right_on="L_ORDERKEY")
44+
jn3 = orders.merge(jn2, left_on="O_CUSTKEY", right_on="C_CUSTKEY")
45+
jn4 = lineitem.merge(jn3, left_on="L_ORDERKEY", right_on="O_ORDERKEY")
4246
jn5 = jn4.merge(
4347
supplier,
4448
left_on=["L_SUPPKEY", "N_NATIONKEY"],
4549
right_on=["S_SUPPKEY", "S_NATIONKEY"],
4650
)
4751

48-
jn5 = jn5[jn5["R_NAME"] == var1]
49-
jn5 = jn5[(jn5["O_ORDERDATE"] >= var2) & (jn5["O_ORDERDATE"] < var3)]
50-
jn5["REVENUE"] = jn5["L_EXTENDEDPRICE"] * (1.0 - jn5["L_DISCOUNT"])
51-
5252
gb = jn5.groupby("N_NAME", as_index=False)["REVENUE"].sum()
5353
result_df = gb.sort_values("REVENUE", ascending=False)
5454

0 commit comments

Comments
 (0)