Skip to content

Commit 37869a7

Browse files
add windowizable prop
1 parent 60ba975 commit 37869a7

File tree

3 files changed

+18
-0
lines changed

3 files changed

+18
-0
lines changed

bigframes/core/array_value.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,9 @@ def compute_general_reduction(
306306
multiple input rows into a single scalar value per group. If grouping keys are
307307
provided, the operation is performed per group; otherwise, it is a global reduction.
308308
309+
Note: Intermediate aggregations (those that are inputs to further aggregations)
310+
must be windowizable. Notably excluded are approx quantile, top count ops.
311+
309312
Args:
310313
assignments (Sequence[ex.Expression]): A sequence of aggregation expressions
311314
to be calculated.

bigframes/core/expression_factoring.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,8 @@ def windowize(
134134
) -> expression.Expression:
135135
def windowize_local(expr: expression.Expression):
136136
if isinstance(expr, agg_expressions.Aggregation):
137+
if expr.op.can_be_windowized:
138+
raise ValueError(f"Op: {expr.op} cannot be windowized.")
137139
return agg_expressions.WindowExpression(expr, window)
138140
if isinstance(expr, agg_expressions.WindowExpression):
139141
raise ValueError(f"Expression {expr} already windowed!")

bigframes/operations/aggregations.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ def order_independent(self):
6868
def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionType:
6969
...
7070

71+
@property
72+
def can_be_windowized(self):
73+
# this is more of an engine property, but will treat feasibility in bigquery sql as source of truth
74+
return True
75+
7176

7277
@dataclasses.dataclass(frozen=True)
7378
class NullaryWindowOp(WindowOp):
@@ -257,6 +262,10 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
257262
raise TypeError(f"Type {input_types[0]} is not orderable")
258263
return input_types[0]
259264

265+
@property
266+
def can_be_windowized(self):
267+
return False
268+
260269

261270
@dataclasses.dataclass(frozen=True)
262271
class ApproxTopCountOp(UnaryAggregateOp):
@@ -274,6 +283,10 @@ def output_type(self, *input_types: dtypes.ExpressionType) -> dtypes.ExpressionT
274283
]
275284
return pd.ArrowDtype(pa.list_(pa.struct(fields)))
276285

286+
@property
287+
def can_be_windowized(self):
288+
return False
289+
277290

278291
@dataclasses.dataclass(frozen=True)
279292
class MeanOp(UnaryAggregateOp):

0 commit comments

Comments
 (0)