|
26 | 26 | import ibis.expr.types as ibis_types
|
27 | 27 | import pandas
|
28 | 28 |
|
29 |
| -import bigframes.constants as constants |
30 | 29 | import bigframes.core.compile.scalar_op_compiler as op_compilers
|
31 |
| -import bigframes.core.expression as expressions |
| 30 | +import bigframes.core.expression as ex |
32 | 31 | import bigframes.core.guid
|
33 | 32 | from bigframes.core.ordering import (
|
34 | 33 | encode_order_string,
|
@@ -96,16 +95,6 @@ def _reduced_predicate(self) -> typing.Optional[ibis_types.BooleanValue]:
|
96 | 95 | else None
|
97 | 96 | )
|
98 | 97 |
|
99 |
| - @abc.abstractmethod |
100 |
| - def select_columns(self: T, column_ids: typing.Sequence[str]) -> T: |
101 |
| - """Creates a new expression based on this expression with new columns.""" |
102 |
| - ... |
103 |
| - |
104 |
| - def drop_columns(self: T, columns: Iterable[str]) -> T: |
105 |
| - return self.select_columns( |
106 |
| - [col for col in self.column_ids if col not in columns] |
107 |
| - ) |
108 |
| - |
109 | 98 | @abc.abstractmethod
|
110 | 99 | def filter(self: T, predicate_id: str, keep_null: bool = False) -> T:
|
111 | 100 | """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression."""
|
@@ -152,40 +141,26 @@ def _reproject_to_table(self: T) -> T:
|
152 | 141 | """
|
153 | 142 | ...
|
154 | 143 |
|
155 |
| - def project_expression( |
| 144 | + def projection( |
156 | 145 | self: T,
|
157 |
| - expression: expressions.Expression, |
158 |
| - output_column_id: typing.Optional[str] = None, |
| 146 | + expression_id_pairs: typing.Tuple[typing.Tuple[ex.Expression, str], ...], |
159 | 147 | ) -> T:
|
160 | 148 | """Apply an expression to the ArrayValue and assign the output to a column."""
|
161 |
| - result_id = ( |
162 |
| - output_column_id or expression.unbound_variables[0] |
163 |
| - ) # overwrite input if not output id provided |
164 |
| - bindings = { |
165 |
| - col: self._get_ibis_column(col) for col in expression.unbound_variables |
166 |
| - } |
167 |
| - value = op_compiler.compile_expression(expression, bindings).name(result_id) |
168 |
| - return self._set_or_replace_by_id(result_id, value) |
| 149 | + bindings = {col: self._get_ibis_column(col) for col in self.column_ids} |
| 150 | + values = [ |
| 151 | + op_compiler.compile_expression(expression, bindings).name(id) |
| 152 | + for expression, id in expression_id_pairs |
| 153 | + ] |
| 154 | + result = self._select(tuple(values)) # type: ignore |
169 | 155 |
|
170 |
| - def assign(self: T, source_id: str, destination_id: str) -> T: |
171 |
| - return self._set_or_replace_by_id( |
172 |
| - destination_id, self._get_ibis_column(source_id) |
173 |
| - ) |
| 156 | + # Need to reproject to convert ibis Scalar to ibis Column object |
| 157 | + if any(exp_id[0].is_const for exp_id in expression_id_pairs): |
| 158 | + result = result._reproject_to_table() |
| 159 | + return result |
174 | 160 |
|
175 |
| - def assign_constant( |
176 |
| - self: T, |
177 |
| - destination_id: str, |
178 |
| - value: typing.Any, |
179 |
| - dtype: typing.Optional[bigframes.dtypes.Dtype], |
180 |
| - ) -> T: |
181 |
| - # TODO(b/281587571): Solve scalar constant aggregation problem w/Ibis. |
182 |
| - ibis_value = bigframes.dtypes.literal_to_ibis_scalar(value, dtype) |
183 |
| - if ibis_value is None: |
184 |
| - raise NotImplementedError( |
185 |
| - f"Type not supported as scalar value {type(value)}. {constants.FEEDBACK_LINK}" |
186 |
| - ) |
187 |
| - expr = self._set_or_replace_by_id(destination_id, ibis_value) |
188 |
| - return expr._reproject_to_table() |
| 161 | + @abc.abstractmethod |
| 162 | + def _select(self: T, values: typing.Tuple[ibis_types.Value]) -> T: |
| 163 | + ... |
189 | 164 |
|
190 | 165 | @abc.abstractmethod
|
191 | 166 | def _set_or_replace_by_id(self: T, id: str, new_value: ibis_types.Value) -> T:
|
@@ -330,14 +305,6 @@ def _to_ibis_expr(
|
330 | 305 | table = table.filter(ibis.random() < ibis.literal(fraction))
|
331 | 306 | return table
|
332 | 307 |
|
333 |
| - def select_columns(self, column_ids: typing.Sequence[str]) -> UnorderedIR: |
334 |
| - """Creates a new expression based on this expression with new columns.""" |
335 |
| - columns = [self._get_ibis_column(col_id) for col_id in column_ids] |
336 |
| - builder = self.builder() |
337 |
| - builder.columns = list(columns) |
338 |
| - new_expr = builder.build() |
339 |
| - return new_expr |
340 |
| - |
341 | 308 | def filter(self, predicate_id: str, keep_null: bool = False) -> UnorderedIR:
|
342 | 309 | condition = typing.cast(
|
343 | 310 | ibis_types.BooleanValue, self._get_ibis_column(predicate_id)
|
@@ -577,6 +544,11 @@ def _set_or_replace_by_id(
|
577 | 544 | builder.columns = [*self.columns, new_value.name(id)]
|
578 | 545 | return builder.build()
|
579 | 546 |
|
| 547 | + def _select(self, values: typing.Tuple[ibis_types.Value]) -> UnorderedIR: |
| 548 | + builder = self.builder() |
| 549 | + builder.columns = values |
| 550 | + return builder.build() |
| 551 | + |
580 | 552 | def _reproject_to_table(self) -> UnorderedIR:
|
581 | 553 | """
|
582 | 554 | Internal operators that projects the internal representation into a
|
@@ -816,20 +788,6 @@ def promote_offsets(self, col_id: str) -> OrderedIR:
|
816 | 788 | ]
|
817 | 789 | return expr_builder.build()
|
818 | 790 |
|
819 |
| - def select_columns(self, column_ids: typing.Sequence[str]) -> OrderedIR: |
820 |
| - """Creates a new expression based on this expression with new columns.""" |
821 |
| - columns = [self._get_ibis_column(col_id) for col_id in column_ids] |
822 |
| - expr = self |
823 |
| - for ordering_column in set(self.column_ids).intersection( |
824 |
| - [col_ref.column_id for col_ref in self._ordering.ordering_value_columns] |
825 |
| - ): |
826 |
| - # Need to hide ordering columns that are being dropped. Alternatively, could project offsets |
827 |
| - expr = expr._hide_column(ordering_column) |
828 |
| - builder = expr.builder() |
829 |
| - builder.columns = list(columns) |
830 |
| - new_expr = builder.build() |
831 |
| - return new_expr |
832 |
| - |
833 | 791 | ## Methods that only work with ordering
|
834 | 792 | def project_window_op(
|
835 | 793 | self,
|
@@ -1221,6 +1179,29 @@ def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> Ordered
|
1221 | 1179 | builder.columns = [*self.columns, new_value.name(id)]
|
1222 | 1180 | return builder.build()
|
1223 | 1181 |
|
| 1182 | + def _select(self, values: typing.Tuple[ibis_types.Value]) -> OrderedIR: |
| 1183 | + """Safely assign by id while maintaining ordering integrity.""" |
| 1184 | + # TODO: Split into explicit set and replace methods |
| 1185 | + ordering_col_ids = [ |
| 1186 | + col_ref.column_id for col_ref in self._ordering.ordering_value_columns |
| 1187 | + ] |
| 1188 | + ir = self |
| 1189 | + mappings = {value.name: value for value in values} |
| 1190 | + for ordering_id in ordering_col_ids: |
| 1191 | + # Drop case |
| 1192 | + if (ordering_id not in mappings) and (ordering_id in ir.column_ids): |
| 1193 | + # id is being dropped, hide it first |
| 1194 | + ir = ir._hide_column(ordering_id) |
| 1195 | + # Mutate case |
| 1196 | + elif (ordering_id in mappings) and not mappings[ordering_id].equals( |
| 1197 | + ir._get_any_column(ordering_id) |
| 1198 | + ): |
| 1199 | + ir = ir._hide_column(ordering_id) |
| 1200 | + |
| 1201 | + builder = ir.builder() |
| 1202 | + builder.columns = list(values) |
| 1203 | + return builder.build() |
| 1204 | + |
1224 | 1205 | ## Ordering specific helpers
|
1225 | 1206 | def _get_any_column(self, key: str) -> ibis_types.Value:
|
1226 | 1207 | """Gets the Ibis expression for a given column. Will also get hidden columns."""
|
|
0 commit comments