Skip to content

Commit 26c6838

Browse files
chore: Amend tpch q8 to reduce join (#1328)
1 parent a1cafa2 commit 26c6838

File tree

1 file changed

+5
-11
lines changed
  • third_party/bigframes_vendored/tpch/queries

1 file changed

+5
-11
lines changed

third_party/bigframes_vendored/tpch/queries/q8.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -62,17 +62,11 @@ def q(project_id: str, dataset_id: str, session: bigframes.Session):
6262
jn7["VOLUME"] = jn7["L_EXTENDEDPRICE"] * (1.0 - jn7["L_DISCOUNT"])
6363
jn7 = jn7.rename(columns={"N_NAME": "NATION"})
6464

65-
denominator = jn7.groupby("O_YEAR")["VOLUME"].sum().rename("DENOMINATOR")
66-
numerator = (
67-
jn7[jn7["NATION"] == var1]
68-
.groupby(jn7["O_YEAR"])["VOLUME"]
69-
.sum()
70-
.rename("NUMERATOR")
71-
)
72-
jn8 = denominator.to_frame().join(numerator.to_frame(), how="left")
65+
jn7["numerator"] = jn7["VOLUME"].where(jn7["NATION"] == var1, 0)
66+
jn7["denominator"] = jn7["VOLUME"]
7367

74-
# ValueError: Caching with offsets only supported in strictly ordered mode.
75-
jn8["MKT_SHARE"] = (jn8["NUMERATOR"] / jn8["DENOMINATOR"]).round(2)
68+
sums = jn7.groupby("O_YEAR")[["numerator", "denominator"]].sum()
69+
sums["MKT_SHARE"] = (sums["numerator"] / sums["denominator"]).round(2)
7670

77-
result_df = jn8["MKT_SHARE"].sort_index().rename("MKT_SHARE").reset_index()
71+
result_df = sums["MKT_SHARE"].sort_index().rename("MKT_SHARE").reset_index()
7872
result_df.to_gbq()

0 commit comments

Comments
 (0)