Skip to content

Commit 0413964

Browse files
authored
chore: add tpch q14-18 (#928)
1 parent 2f6cd9f commit 0413964

File tree

10 files changed

+342
-0
lines changed

10 files changed

+342
-0
lines changed

tests/benchmark/tpch/q14.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import pathlib
15+
16+
import benchmark.utils as utils
17+
import bigframes_vendored.tpch.queries.q14 as vendored_tpch_q14
18+
19+
if __name__ == "__main__":
20+
dataset_id, session, suffix = utils.get_tpch_configuration()
21+
current_path = pathlib.Path(__file__).absolute()
22+
23+
utils.get_execution_time(
24+
vendored_tpch_q14.q, current_path, suffix, dataset_id, session
25+
)

tests/benchmark/tpch/q15.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import pathlib
15+
16+
import benchmark.utils as utils
17+
import bigframes_vendored.tpch.queries.q15 as vendored_tpch_q15
18+
19+
if __name__ == "__main__":
20+
dataset_id, session, suffix = utils.get_tpch_configuration()
21+
current_path = pathlib.Path(__file__).absolute()
22+
23+
utils.get_execution_time(
24+
vendored_tpch_q15.q, current_path, suffix, dataset_id, session
25+
)

tests/benchmark/tpch/q16.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import pathlib
15+
16+
import benchmark.utils as utils
17+
import bigframes_vendored.tpch.queries.q16 as vendored_tpch_q16
18+
19+
if __name__ == "__main__":
20+
dataset_id, session, suffix = utils.get_tpch_configuration()
21+
current_path = pathlib.Path(__file__).absolute()
22+
23+
utils.get_execution_time(
24+
vendored_tpch_q16.q, current_path, suffix, dataset_id, session
25+
)

tests/benchmark/tpch/q17.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import pathlib
15+
16+
import benchmark.utils as utils
17+
import bigframes_vendored.tpch.queries.q17 as vendored_tpch_q17
18+
19+
if __name__ == "__main__":
20+
dataset_id, session, suffix = utils.get_tpch_configuration()
21+
current_path = pathlib.Path(__file__).absolute()
22+
23+
utils.get_execution_time(
24+
vendored_tpch_q17.q, current_path, suffix, dataset_id, session
25+
)

tests/benchmark/tpch/q18.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2024 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import pathlib
15+
16+
import benchmark.utils as utils
17+
import bigframes_vendored.tpch.queries.q18 as vendored_tpch_q18
18+
19+
if __name__ == "__main__":
20+
dataset_id, session, suffix = utils.get_tpch_configuration()
21+
current_path = pathlib.Path(__file__).absolute()
22+
23+
utils.get_execution_time(
24+
vendored_tpch_q18.q, current_path, suffix, dataset_id, session
25+
)
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Contains code from https://github.com/pola-rs/tpch/blob/main/queries/polars/q14.py
2+
3+
from datetime import date
4+
5+
import bigframes
6+
7+
8+
def q(dataset_id: str, session: bigframes.Session):
9+
lineitem = session.read_gbq(
10+
f"bigframes-dev-perf.{dataset_id}.LINEITEM",
11+
index_col=bigframes.enums.DefaultIndexKind.NULL,
12+
)
13+
part = session.read_gbq(
14+
f"bigframes-dev-perf.{dataset_id}.PART",
15+
index_col=bigframes.enums.DefaultIndexKind.NULL,
16+
)
17+
18+
var1 = date(1995, 9, 1)
19+
var2 = date(1995, 10, 1)
20+
21+
merged = lineitem.merge(part, left_on="L_PARTKEY", right_on="P_PARTKEY")
22+
23+
filtered = merged[(merged["L_SHIPDATE"] >= var1) & (merged["L_SHIPDATE"] < var2)]
24+
25+
filtered["CONDI_REVENUE"] = (
26+
filtered["L_EXTENDEDPRICE"] * (1 - filtered["L_DISCOUNT"])
27+
) * filtered["P_TYPE"].str.contains("PROMO").astype("Int64")
28+
29+
total_revenue = (filtered["L_EXTENDEDPRICE"] * (1 - filtered["L_DISCOUNT"])).sum()
30+
promo_revenue = filtered["CONDI_REVENUE"].sum()
31+
32+
promo_revenue_percent = 100.00 * promo_revenue / total_revenue
33+
34+
_ = round(promo_revenue_percent, 2)
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Contains code from https://github.com/pola-rs/tpch/blob/main/queries/polars/q15.py
2+
3+
from datetime import date
4+
5+
import bigframes
6+
import bigframes.pandas as bpd
7+
8+
9+
def q(dataset_id: str, session: bigframes.Session):
10+
lineitem = session.read_gbq(
11+
f"bigframes-dev-perf.{dataset_id}.LINEITEM",
12+
index_col=bigframes.enums.DefaultIndexKind.NULL,
13+
)
14+
supplier = session.read_gbq(
15+
f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
16+
index_col=bigframes.enums.DefaultIndexKind.NULL,
17+
)
18+
19+
var1 = date(1996, 1, 1)
20+
var2 = date(1996, 4, 1)
21+
22+
filtered_lineitem = lineitem[
23+
(lineitem["L_SHIPDATE"] >= var1) & (lineitem["L_SHIPDATE"] < var2)
24+
]
25+
filtered_lineitem["REVENUE"] = filtered_lineitem["L_EXTENDEDPRICE"] * (
26+
1 - filtered_lineitem["L_DISCOUNT"]
27+
)
28+
29+
grouped_revenue = (
30+
filtered_lineitem.groupby("L_SUPPKEY", as_index=False)
31+
.agg(TOTAL_REVENUE=bpd.NamedAgg(column="REVENUE", aggfunc="sum"))
32+
.rename(columns={"L_SUPPKEY": "SUPPLIER_NO"})
33+
)
34+
35+
joined_data = bpd.merge(
36+
supplier, grouped_revenue, left_on="S_SUPPKEY", right_on="SUPPLIER_NO"
37+
)
38+
39+
max_revenue = joined_data["TOTAL_REVENUE"].max()
40+
max_revenue_suppliers = joined_data[joined_data["TOTAL_REVENUE"] == max_revenue]
41+
42+
max_revenue_suppliers["TOTAL_REVENUE"] = max_revenue_suppliers[
43+
"TOTAL_REVENUE"
44+
].round(2)
45+
q_final = max_revenue_suppliers[
46+
["S_SUPPKEY", "S_NAME", "S_ADDRESS", "S_PHONE", "TOTAL_REVENUE"]
47+
].sort_values("S_SUPPKEY")
48+
q_final.to_gbq()
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Contains code from https://github.com/pola-rs/tpch/blob/main/queries/polars/q16.py
2+
3+
import bigframes
4+
import bigframes.pandas as bpd
5+
6+
7+
def q(dataset_id: str, session: bigframes.Session):
8+
part = session.read_gbq(
9+
f"bigframes-dev-perf.{dataset_id}.PART",
10+
index_col=bigframes.enums.DefaultIndexKind.NULL,
11+
)
12+
partsupp = session.read_gbq(
13+
f"bigframes-dev-perf.{dataset_id}.PARTSUPP",
14+
index_col=bigframes.enums.DefaultIndexKind.NULL,
15+
)
16+
supplier = session.read_gbq(
17+
f"bigframes-dev-perf.{dataset_id}.SUPPLIER",
18+
index_col=bigframes.enums.DefaultIndexKind.NULL,
19+
)
20+
21+
var1 = "Brand#45"
22+
23+
supplier = supplier[
24+
supplier["S_COMMENT"].str.contains("Customer.*Complaints", regex=True)
25+
]["S_SUPPKEY"]
26+
27+
q_filtered = part.merge(partsupp, left_on="P_PARTKEY", right_on="PS_PARTKEY")
28+
q_filtered = q_filtered[q_filtered["P_BRAND"] != var1]
29+
q_filtered = q_filtered[~q_filtered["P_TYPE"].str.contains("MEDIUM POLISHED")]
30+
q_filtered = q_filtered[q_filtered["P_SIZE"].isin([49, 14, 23, 45, 19, 3, 36, 9])]
31+
32+
final_df = q_filtered[~q_filtered["PS_SUPPKEY"].isin(supplier)]
33+
34+
grouped = final_df.groupby(["P_BRAND", "P_TYPE", "P_SIZE"], as_index=False)
35+
result = grouped.agg(
36+
SUPPLIER_CNT=bpd.NamedAgg(column="PS_SUPPKEY", aggfunc="nunique")
37+
)
38+
39+
q_final = result.sort_values(
40+
by=["SUPPLIER_CNT", "P_BRAND", "P_TYPE", "P_SIZE"],
41+
ascending=[False, True, True, True],
42+
)
43+
44+
q_final.to_gbq()
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Contains code from https://github.com/pola-rs/tpch/blob/main/queries/polars/q17.py
2+
3+
import bigframes
4+
import bigframes.pandas as bpd
5+
6+
7+
def q(dataset_id: str, session: bigframes.Session):
8+
lineitem = session.read_gbq(
9+
f"bigframes-dev-perf.{dataset_id}.LINEITEM",
10+
index_col=bigframes.enums.DefaultIndexKind.NULL,
11+
)
12+
part = session.read_gbq(
13+
f"bigframes-dev-perf.{dataset_id}.PART",
14+
index_col=bigframes.enums.DefaultIndexKind.NULL,
15+
)
16+
17+
VAR1 = "Brand#23"
18+
VAR2 = "MED BOX"
19+
20+
filtered_part = part[(part["P_BRAND"] == VAR1) & (part["P_CONTAINER"] == VAR2)]
21+
q1 = bpd.merge(
22+
filtered_part, lineitem, how="left", left_on="P_PARTKEY", right_on="L_PARTKEY"
23+
)
24+
25+
grouped = (
26+
q1.groupby("P_PARTKEY", as_index=False)
27+
.agg(AVG_QUANTITY=bpd.NamedAgg(column="L_QUANTITY", aggfunc="mean"))
28+
.rename(columns={"P_PARTKEY": "KEY"})
29+
)
30+
grouped["AVG_QUANTITY"] = grouped["AVG_QUANTITY"] * 0.2
31+
32+
q_final = bpd.merge(grouped, q1, left_on="KEY", right_on="P_PARTKEY")
33+
34+
q_final = q_final[q_final["L_QUANTITY"] < q_final["AVG_QUANTITY"]]
35+
36+
q_final = bpd.DataFrame(
37+
{"AVG_YEARLY": [(q_final["L_EXTENDEDPRICE"].sum() / 7.0).round(2)]}
38+
)
39+
40+
q_final.to_gbq()
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Contains code from https://github.com/pola-rs/tpch/blob/main/queries/polars/q18.py
2+
3+
import typing
4+
5+
import bigframes
6+
import bigframes.pandas as bpd
7+
8+
9+
def q(dataset_id: str, session: bigframes.Session):
10+
customer = session.read_gbq(
11+
f"bigframes-dev-perf.{dataset_id}.CUSTOMER",
12+
index_col=bigframes.enums.DefaultIndexKind.NULL,
13+
)
14+
lineitem = session.read_gbq(
15+
f"bigframes-dev-perf.{dataset_id}.LINEITEM",
16+
index_col=bigframes.enums.DefaultIndexKind.NULL,
17+
)
18+
orders = session.read_gbq(
19+
f"bigframes-dev-perf.{dataset_id}.ORDERS",
20+
index_col=bigframes.enums.DefaultIndexKind.NULL,
21+
)
22+
23+
var1 = 300
24+
25+
q1 = lineitem.groupby("L_ORDERKEY", as_index=False).agg(
26+
SUM_QUANTITY=bpd.NamedAgg(column="L_QUANTITY", aggfunc="sum")
27+
)
28+
q1 = q1[q1["SUM_QUANTITY"] > var1]
29+
30+
filtered_orders = orders.merge(
31+
q1, left_on="O_ORDERKEY", right_on="L_ORDERKEY", how="inner"
32+
)
33+
34+
result = filtered_orders.merge(
35+
lineitem, left_on="O_ORDERKEY", right_on="L_ORDERKEY"
36+
)
37+
result = result.merge(customer, left_on="O_CUSTKEY", right_on="C_CUSTKEY")
38+
39+
final_result = result.groupby(
40+
["C_NAME", "C_CUSTKEY", "O_ORDERKEY", "O_ORDERDATE", "O_TOTALPRICE"],
41+
as_index=False,
42+
).agg(COL6=bpd.NamedAgg(column="L_QUANTITY", aggfunc="sum"))
43+
44+
final_result = final_result.rename(columns={"O_ORDERDATE": "O_ORDERDAT"})
45+
46+
final_result = typing.cast(bpd.DataFrame, final_result).sort_values(
47+
["O_TOTALPRICE", "O_ORDERDAT"], ascending=[False, True]
48+
)
49+
50+
q_final = final_result.head(100)
51+
q_final.to_gbq()

0 commit comments

Comments
 (0)