Skip to content

Commit 2d98a5a

Browse files
refactor: unify row operators to same interface (#100)
Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [ ] Make sure to open an issue as a [bug/issue](https://togithub.com/googleapis/python-bigquery-dataframes/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [ ] Ensure the tests and linter pass - [ ] Code coverage does not decrease (if any source code was changed) - [ ] Appropriate docs were updated (if necessary) Fixes #<issue_number_goes_here> 🦕
1 parent dafbc1b commit 2d98a5a

File tree

15 files changed

+1534
-1242
lines changed

15 files changed

+1534
-1242
lines changed

bigframes/core/__init__.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -157,8 +157,8 @@ def project_unary_op(
157157
) -> ArrayValue:
158158
"""Creates a new expression based on this expression with unary operation applied to one column."""
159159
return ArrayValue(
160-
nodes.ProjectUnaryOpNode(
161-
child=self.node, input_id=column_name, op=op, output_id=output_name
160+
nodes.ProjectRowOpNode(
161+
child=self.node, input_ids=(column_name,), op=op, output_id=output_name
162162
)
163163
)
164164

@@ -171,10 +171,9 @@ def project_binary_op(
171171
) -> ArrayValue:
172172
"""Creates a new expression based on this expression with binary operation applied to two columns."""
173173
return ArrayValue(
174-
nodes.ProjectBinaryOpNode(
174+
nodes.ProjectRowOpNode(
175175
child=self.node,
176-
left_input_id=left_column_id,
177-
right_input_id=right_column_id,
176+
input_ids=(left_column_id, right_column_id),
178177
op=op,
179178
output_id=output_column_id,
180179
)
@@ -190,11 +189,9 @@ def project_ternary_op(
190189
) -> ArrayValue:
191190
"""Creates a new expression based on this expression with ternary operation applied to three columns."""
192191
return ArrayValue(
193-
nodes.ProjectTernaryOpNode(
192+
nodes.ProjectRowOpNode(
194193
child=self.node,
195-
input_id1=col_id_1,
196-
input_id2=col_id_2,
197-
input_id3=col_id_3,
194+
input_ids=(col_id_1, col_id_2, col_id_3),
198195
op=op,
199196
output_id=output_column_id,
200197
)

bigframes/core/block_transforms.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def equals(block1: blocks.Block, block2: blocks.Block) -> bool:
4545
lcolmapped = lmap[lcol]
4646
rcolmapped = rmap[rcol]
4747
joined_block, result_id = joined_block.apply_binary_op(
48-
lcolmapped, rcolmapped, ops.eq_nulls_match_op
48+
lcolmapped, rcolmapped, ops.eq_null_match_op
4949
)
5050
joined_block, result_id = joined_block.apply_unary_op(
5151
result_id, ops.partial_right(ops.fillna_op, False)

bigframes/core/blocks.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -581,12 +581,12 @@ def _split(
581581
# Create an ordering col and convert to string
582582
block, ordering_col = block.promote_offsets()
583583
block, string_ordering_col = block.apply_unary_op(
584-
ordering_col, ops.AsTypeOp("string[pyarrow]")
584+
ordering_col, ops.AsTypeOp(to_type="string[pyarrow]")
585585
)
586586

587587
# Apply hash method to sum col and order by it.
588588
block, string_sum_col = block.apply_binary_op(
589-
string_ordering_col, random_state_col, ops.concat_op
589+
string_ordering_col, random_state_col, ops.strconcat_op
590590
)
591591
block, hash_string_sum_col = block.apply_unary_op(string_sum_col, ops.hash_op)
592592
block = block.order_by([ordering.OrderingColumnReference(hash_string_sum_col)])
@@ -1232,8 +1232,8 @@ def add_prefix(self, prefix: str, axis: str | int | None = None) -> Block:
12321232
if axis_number == 0:
12331233
expr = self._expr
12341234
for index_col in self._index_columns:
1235-
expr = expr.project_unary_op(index_col, ops.AsTypeOp("string"))
1236-
prefix_op = ops.BinopPartialLeft(ops.add_op, prefix)
1235+
expr = expr.project_unary_op(index_col, ops.AsTypeOp(to_type="string"))
1236+
prefix_op = ops.ApplyLeft(base_op=ops.add_op, left_scalar=prefix)
12371237
expr = expr.project_unary_op(index_col, prefix_op)
12381238
return Block(
12391239
expr,
@@ -1251,8 +1251,8 @@ def add_suffix(self, suffix: str, axis: str | int | None = None) -> Block:
12511251
if axis_number == 0:
12521252
expr = self._expr
12531253
for index_col in self._index_columns:
1254-
expr = expr.project_unary_op(index_col, ops.AsTypeOp("string"))
1255-
prefix_op = ops.BinopPartialRight(ops.add_op, suffix)
1254+
expr = expr.project_unary_op(index_col, ops.AsTypeOp(to_type="string"))
1255+
prefix_op = ops.ApplyRight(base_op=ops.add_op, right_scalar=suffix)
12561256
expr = expr.project_unary_op(index_col, prefix_op)
12571257
return Block(
12581258
expr,

bigframes/core/compile/compiled.py

Lines changed: 17 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import pandas
2828

2929
import bigframes.constants as constants
30+
import bigframes.core.compile.scalar_op_compiler as op_compilers
3031
import bigframes.core.guid
3132
from bigframes.core.ordering import (
3233
encode_order_string,
@@ -43,8 +44,11 @@
4344
ORDER_ID_COLUMN = "bigframes_ordering_id"
4445
PREDICATE_COLUMN = "bigframes_predicate"
4546

47+
4648
T = typing.TypeVar("T", bound="BaseIbisIR")
4749

50+
op_compiler = op_compilers.scalar_op_compiler
51+
4852

4953
class BaseIbisIR(abc.ABC):
5054
"""Implementation detail, contains common logic between ordered and unordered IR"""
@@ -147,49 +151,20 @@ def _reproject_to_table(self: T) -> T:
147151
"""
148152
...
149153

150-
def project_unary_op(
154+
def project_row_op(
151155
self: T,
152-
input_column_id: str,
153-
op: ops.UnaryOp,
156+
input_column_ids: typing.Sequence[str],
157+
op: ops.RowOp,
154158
output_column_id: typing.Optional[str] = None,
155159
) -> T:
156160
"""Creates a new expression based on this expression with unary operation applied to one column."""
157161
result_id = (
158-
output_column_id or input_column_id
162+
output_column_id or input_column_ids[0]
159163
) # overwrite input if not output id provided
160-
value = op._as_ibis(self._get_ibis_column(input_column_id)).name(result_id)
164+
inputs = tuple(self._get_ibis_column(col) for col in input_column_ids)
165+
value = op_compiler.compile_row_op(op, inputs).name(result_id)
161166
return self._set_or_replace_by_id(result_id, value)
162167

163-
def project_binary_op(
164-
self: T,
165-
left_column_id: str,
166-
right_column_id: str,
167-
op: ops.BinaryOp,
168-
output_column_id: str,
169-
) -> T:
170-
"""Creates a new expression based on this expression with binary operation applied to two columns."""
171-
value = op(
172-
self._get_ibis_column(left_column_id),
173-
self._get_ibis_column(right_column_id),
174-
).name(output_column_id)
175-
return self._set_or_replace_by_id(output_column_id, value)
176-
177-
def project_ternary_op(
178-
self: T,
179-
col_id_1: str,
180-
col_id_2: str,
181-
col_id_3: str,
182-
op: ops.TernaryOp,
183-
output_column_id: str,
184-
) -> T:
185-
"""Creates a new expression based on this expression with ternary operation applied to three columns."""
186-
value = op(
187-
self._get_ibis_column(col_id_1),
188-
self._get_ibis_column(col_id_2),
189-
self._get_ibis_column(col_id_3),
190-
).name(output_column_id)
191-
return self._set_or_replace_by_id(output_column_id, value)
192-
193168
def assign(self: T, source_id: str, destination_id: str) -> T:
194169
return self._set_or_replace_by_id(
195170
destination_id, self._get_ibis_column(source_id)
@@ -454,7 +429,9 @@ def unpivot(
454429
None, force_dtype=col_dtype
455430
)
456431
ibis_values = [
457-
ops.AsTypeOp(col_dtype)._as_ibis(unpivot_table[col])
432+
op_compiler.compile_row_op(
433+
ops.AsTypeOp(col_dtype), (unpivot_table[col],)
434+
)
458435
if col is not None
459436
else null_value
460437
for col in source_cols
@@ -521,9 +498,7 @@ def aggregate(
521498
expr = OrderedIR(result, columns=columns, ordering=ordering)
522499
if dropna:
523500
for column_id in by_column_ids:
524-
expr = expr._filter(
525-
ops.notnull_op._as_ibis(expr._get_ibis_column(column_id))
526-
)
501+
expr = expr._filter(expr._get_ibis_column(column_id).notnull())
527502
# Can maybe remove this as Ordering id is redundant as by_column is unique after aggregation
528503
return expr._project_offsets()
529504
else:
@@ -982,7 +957,9 @@ def unpivot(
982957
None, force_dtype=col_dtype
983958
)
984959
ibis_values = [
985-
ops.AsTypeOp(col_dtype)._as_ibis(unpivot_table[col])
960+
op_compiler.compile_row_op(
961+
ops.AsTypeOp(col_dtype), (unpivot_table[col],)
962+
)
986963
if col is not None
987964
else null_value
988965
for col in source_cols

bigframes/core/compile/compiler.py

Lines changed: 3 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -143,23 +143,9 @@ def compile_reversed(node: nodes.ReversedNode, ordered: bool = True):
143143

144144

145145
@_compile_node.register
146-
def compile_project_unary(node: nodes.ProjectUnaryOpNode, ordered: bool = True):
147-
return compile_node(node.child, ordered).project_unary_op(
148-
node.input_id, node.op, node.output_id
149-
)
150-
151-
152-
@_compile_node.register
153-
def compile_project_binary(node: nodes.ProjectBinaryOpNode, ordered: bool = True):
154-
return compile_node(node.child, ordered).project_binary_op(
155-
node.left_input_id, node.right_input_id, node.op, node.output_id
156-
)
157-
158-
159-
@_compile_node.register
160-
def compile_project_ternary(node: nodes.ProjectTernaryOpNode, ordered: bool = True):
161-
return compile_node(node.child, ordered).project_ternary_op(
162-
node.input_id1, node.input_id2, node.input_id3, node.op, node.output_id
146+
def compile_project(node: nodes.ProjectRowOpNode, ordered: bool = True):
147+
return compile_node(node.child, ordered).project_row_op(
148+
node.input_ids, node.op, node.output_id
163149
)
164150

165151

0 commit comments

Comments
 (0)