Skip to content

Commit a914a5f

Browse files
chore: Use isin() in tpch benchmarks (#1400)
1 parent 50ad3a5 commit a914a5f

File tree

4 files changed

+16
-27
lines changed

4 files changed

+16
-27
lines changed

third_party/bigframes_vendored/tpch/queries/q16.py

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,16 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
2020

2121
var1 = "Brand#45"
2222

23-
supplier = (
24-
supplier[
25-
~supplier["S_COMMENT"].str.contains("Customer.*Complaints", regex=True)
26-
]["S_SUPPKEY"]
27-
.unique(keep_order=False)
28-
.to_frame()
29-
)
23+
supplier = supplier[
24+
~supplier["S_COMMENT"].str.contains("Customer.*Complaints", regex=True)
25+
]["S_SUPPKEY"]
3026

3127
q_filtered = part.merge(partsupp, left_on="P_PARTKEY", right_on="PS_PARTKEY")
3228
q_filtered = q_filtered[q_filtered["P_BRAND"] != var1]
3329
q_filtered = q_filtered[~q_filtered["P_TYPE"].str.contains("MEDIUM POLISHED")]
3430
q_filtered = q_filtered[q_filtered["P_SIZE"].isin([49, 14, 23, 45, 19, 3, 36, 9])]
3531

36-
final_df = q_filtered.merge(
37-
supplier, left_on=["PS_SUPPKEY"], right_on=["S_SUPPKEY"]
38-
)
32+
final_df = q_filtered[q_filtered["PS_SUPPKEY"].isin(supplier)]
3933

4034
grouped = final_df.groupby(["P_BRAND", "P_TYPE", "P_SIZE"], as_index=False)
4135
result = grouped.agg(

third_party/bigframes_vendored/tpch/queries/q18.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,13 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
2222

2323
var1 = 300
2424

25+
# order with over 300 items
2526
q1 = lineitem.groupby("L_ORDERKEY", as_index=False).agg(
2627
SUM_QUANTITY=bpd.NamedAgg(column="L_QUANTITY", aggfunc="sum")
2728
)
2829
q1 = q1[q1["SUM_QUANTITY"] > var1]
2930

30-
filtered_orders = orders.merge(
31-
q1, left_on="O_ORDERKEY", right_on="L_ORDERKEY", how="inner"
32-
)
31+
filtered_orders = orders[orders["O_ORDERKEY"].isin(q1["L_ORDERKEY"])]
3332

3433
result = filtered_orders.merge(
3534
lineitem, left_on="O_ORDERKEY", right_on="L_ORDERKEY"

third_party/bigframes_vendored/tpch/queries/q20.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,21 +44,19 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
4444

4545
filtered_parts = part[part["P_NAME"].str.startswith(var4)]
4646

47-
filtered_parts = filtered_parts["P_PARTKEY"].unique(keep_order=False).to_frame()
48-
joined_parts = filtered_parts.merge(
49-
partsupp, left_on="P_PARTKEY", right_on="PS_PARTKEY"
50-
)
47+
filtered_parts = filtered_parts["P_PARTKEY"]
48+
joined_parts = partsupp[partsupp["PS_PARTKEY"].isin(filtered_parts)]
5149

5250
final_join = q1.merge(
5351
joined_parts,
5452
left_on=["L_SUPPKEY", "L_PARTKEY"],
55-
right_on=["PS_SUPPKEY", "P_PARTKEY"],
53+
right_on=["PS_SUPPKEY", "PS_PARTKEY"],
5654
)
57-
final_filtered = final_join[final_join["PS_AVAILQTY"] > final_join["SUM_QUANTITY"]]
58-
59-
final_filtered = final_filtered["PS_SUPPKEY"].unique(keep_order=False).to_frame()
55+
final_filtered = final_join[final_join["PS_AVAILQTY"] > final_join["SUM_QUANTITY"]][
56+
"PS_SUPPKEY"
57+
]
6058

61-
final_result = final_filtered.merge(q3, left_on="PS_SUPPKEY", right_on="S_SUPPKEY")
59+
final_result = q3[q3["S_SUPPKEY"].isin(final_filtered)]
6260
final_result = final_result[["S_NAME", "S_ADDRESS"]].sort_values(by="S_NAME")
6361

6462
next(final_result.to_pandas_batches(max_results=1500))

third_party/bigframes_vendored/tpch/queries/q22.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,9 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
2727

2828
filtered_customer = customer[customer["C_ACCTBAL"] > customer["AVG_ACCTBAL"]]
2929

30-
orders_unique = orders["O_CUSTKEY"].unique(keep_order=False).to_frame()
31-
filtered_customer = filtered_customer.merge(
32-
orders_unique, left_on="C_CUSTKEY", right_on="O_CUSTKEY", how="left"
33-
)
34-
filtered_customer = filtered_customer[filtered_customer["O_CUSTKEY"].isnull()]
30+
filtered_customer = filtered_customer[
31+
~filtered_customer["C_CUSTKEY"].isin(orders["O_CUSTKEY"])
32+
]
3533
result = filtered_customer.groupby("CNTRYCODE", as_index=False).agg(
3634
NUMCUST=bpd.NamedAgg(column="C_CUSTKEY", aggfunc="count"),
3735
TOTACCTBAL=bpd.NamedAgg(column="C_ACCTBAL", aggfunc="sum"),

0 commit comments

Comments
 (0)