Skip to content

Commit 68d73a9

Browse files
refactor: Preprocess expression trees to pull up ordering (#1321)
1 parent 2bb068f commit 68d73a9

File tree

19 files changed

+815
-1516
lines changed

19 files changed

+815
-1516
lines changed

bigframes/core/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,9 @@ def from_table(
120120
if offsets_col:
121121
ordering = orderings.TotalOrdering.from_offset_col(offsets_col)
122122
elif primary_key:
123-
ordering = orderings.TotalOrdering.from_primary_key(primary_key)
123+
ordering = orderings.TotalOrdering.from_primary_key(
124+
[ids.ColumnId(key_part) for key_part in primary_key]
125+
)
124126

125127
# Scan all columns by default, we define this list as it can be pruned while preserving source_def
126128
scan_list = nodes.ScanList(

bigframes/core/blocks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,7 @@ def grouped_head(
14101410

14111411
block, result_id = self.apply_window_op(
14121412
value_columns[0],
1413-
agg_ops.rank_op,
1413+
agg_ops.count_op,
14141414
window_spec=window_spec,
14151415
)
14161416

bigframes/core/compile/aggregate_compiler.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def compile_aggregate(
5555
return compile_nullary_agg(aggregate.op)
5656
if isinstance(aggregate, ex.UnaryAggregation):
5757
input = scalar_compiler.compile_expression(aggregate.arg, bindings=bindings)
58-
if aggregate.op.can_order_by:
58+
if not aggregate.op.order_independent:
5959
return compile_ordered_unary_agg(aggregate.op, input, order_by=order_by) # type: ignore
6060
else:
6161
return compile_unary_agg(aggregate.op, input) # type: ignore
@@ -150,6 +150,11 @@ def _(op: agg_ops.SizeOp, window=None) -> ibis_types.NumericValue:
150150
return _apply_window_if_present(ibis_ops.count(1), window)
151151

152152

153+
@compile_unary_agg.register
154+
def _(op: agg_ops.SizeUnaryOp, _, window=None) -> ibis_types.NumericValue:
155+
return _apply_window_if_present(ibis_ops.count(1), window)
156+
157+
153158
@compile_unary_agg.register
154159
@numeric_op
155160
def _(
@@ -171,13 +176,6 @@ def _(
171176
column: ibis_types.NumericColumn,
172177
window=None,
173178
) -> ibis_types.NumericValue:
174-
# PERCENTILE_CONT has very few allowed windows. For example, "window
175-
# framing clause is not allowed for analytic function percentile_cont".
176-
if window is not None:
177-
raise NotImplementedError(
178-
f"Median with windowing is not supported. {constants.FEEDBACK_LINK}"
179-
)
180-
181179
# TODO(swast): Allow switching between exact and approximate median.
182180
# For now, the best we can do is an approximate median when we're doing
183181
# an aggregation, as PERCENTILE_CONT is only an analytic function.

bigframes/core/compile/api.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,7 @@
2424
import bigframes.core.ordering
2525
import bigframes.core.schema
2626

27-
_STRICT_COMPILER = compiler.Compiler(
28-
strict=True, enable_pruning=True, enable_densify_ids=True
29-
)
27+
_STRICT_COMPILER = compiler.Compiler(strict=True)
3028

3129

3230
class SQLCompiler:
@@ -72,9 +70,7 @@ def compile_raw(
7270
def test_only_try_evaluate(node: bigframes.core.nodes.BigFrameNode):
7371
"""Use only for unit testing paths - not fully featured. Will throw exception if fails."""
7472
node = _STRICT_COMPILER._preprocess(node)
75-
ibis = _STRICT_COMPILER.compile_ordered_ir(node)._to_ibis_expr(
76-
ordering_mode="unordered"
77-
)
73+
ibis = _STRICT_COMPILER.compile_node(node)._to_ibis_expr()
7874
return ibis.pandas.connect({}).execute(ibis)
7975

8076

@@ -83,7 +79,7 @@ def test_only_ibis_inferred_schema(node: bigframes.core.nodes.BigFrameNode):
8379
import bigframes.core.schema
8480

8581
node = _STRICT_COMPILER._preprocess(node)
86-
compiled = _STRICT_COMPILER.compile_unordered_ir(node)
82+
compiled = _STRICT_COMPILER.compile_node(node)
8783
items = tuple(
8884
bigframes.core.schema.SchemaItem(name, compiled.get_column_type(ibis_id))
8985
for name, ibis_id in zip(node.schema.names, compiled.column_ids)

0 commit comments

Comments
 (0)