|
26 | 26 | import ibis.expr.types as ibis_types |
27 | 27 | import pandas |
28 | 28 |
|
29 | | -import bigframes.constants as constants |
30 | 29 | import bigframes.core.compile.scalar_op_compiler as op_compilers |
31 | | -import bigframes.core.expression as expressions |
| 30 | +import bigframes.core.expression as ex |
32 | 31 | import bigframes.core.guid |
33 | 32 | from bigframes.core.ordering import ( |
34 | 33 | encode_order_string, |
@@ -96,16 +95,6 @@ def _reduced_predicate(self) -> typing.Optional[ibis_types.BooleanValue]: |
96 | 95 | else None |
97 | 96 | ) |
98 | 97 |
|
99 | | - @abc.abstractmethod |
100 | | - def select_columns(self: T, column_ids: typing.Sequence[str]) -> T: |
101 | | - """Creates a new expression based on this expression with new columns.""" |
102 | | - ... |
103 | | - |
104 | | - def drop_columns(self: T, columns: Iterable[str]) -> T: |
105 | | - return self.select_columns( |
106 | | - [col for col in self.column_ids if col not in columns] |
107 | | - ) |
108 | | - |
109 | 98 | @abc.abstractmethod |
110 | 99 | def filter(self: T, predicate_id: str, keep_null: bool = False) -> T: |
111 | 100 | """Filter the table on a given expression, the predicate must be a boolean series aligned with the table expression.""" |
@@ -152,40 +141,26 @@ def _reproject_to_table(self: T) -> T: |
152 | 141 | """ |
153 | 142 | ... |
154 | 143 |
|
155 | | - def project_expression( |
| 144 | + def projection( |
156 | 145 | self: T, |
157 | | - expression: expressions.Expression, |
158 | | - output_column_id: typing.Optional[str] = None, |
| 146 | + expression_id_pairs: typing.Tuple[typing.Tuple[ex.Expression, str], ...], |
159 | 147 | ) -> T: |
160 | 148 | """Apply an expression to the ArrayValue and assign the output to a column.""" |
161 | | - result_id = ( |
162 | | - output_column_id or expression.unbound_variables[0] |
163 | | - ) # overwrite input if not output id provided |
164 | | - bindings = { |
165 | | - col: self._get_ibis_column(col) for col in expression.unbound_variables |
166 | | - } |
167 | | - value = op_compiler.compile_expression(expression, bindings).name(result_id) |
168 | | - return self._set_or_replace_by_id(result_id, value) |
| 149 | + bindings = {col: self._get_ibis_column(col) for col in self.column_ids} |
| 150 | + values = [ |
| 151 | + op_compiler.compile_expression(expression, bindings).name(id) |
| 152 | + for expression, id in expression_id_pairs |
| 153 | + ] |
| 154 | + result = self._select(tuple(values)) # type: ignore |
169 | 155 |
|
170 | | - def assign(self: T, source_id: str, destination_id: str) -> T: |
171 | | - return self._set_or_replace_by_id( |
172 | | - destination_id, self._get_ibis_column(source_id) |
173 | | - ) |
| 156 | + # Need to reproject to convert ibis Scalar to ibis Column object |
| 157 | + if any(exp_id[0].is_const for exp_id in expression_id_pairs): |
| 158 | + result = result._reproject_to_table() |
| 159 | + return result |
174 | 160 |
|
175 | | - def assign_constant( |
176 | | - self: T, |
177 | | - destination_id: str, |
178 | | - value: typing.Any, |
179 | | - dtype: typing.Optional[bigframes.dtypes.Dtype], |
180 | | - ) -> T: |
181 | | - # TODO(b/281587571): Solve scalar constant aggregation problem w/Ibis. |
182 | | - ibis_value = bigframes.dtypes.literal_to_ibis_scalar(value, dtype) |
183 | | - if ibis_value is None: |
184 | | - raise NotImplementedError( |
185 | | - f"Type not supported as scalar value {type(value)}. {constants.FEEDBACK_LINK}" |
186 | | - ) |
187 | | - expr = self._set_or_replace_by_id(destination_id, ibis_value) |
188 | | - return expr._reproject_to_table() |
| 161 | + @abc.abstractmethod |
| 162 | + def _select(self: T, values: typing.Tuple[ibis_types.Value]) -> T: |
| 163 | + ... |
189 | 164 |
|
190 | 165 | @abc.abstractmethod |
191 | 166 | def _set_or_replace_by_id(self: T, id: str, new_value: ibis_types.Value) -> T: |
@@ -330,14 +305,6 @@ def _to_ibis_expr( |
330 | 305 | table = table.filter(ibis.random() < ibis.literal(fraction)) |
331 | 306 | return table |
332 | 307 |
|
333 | | - def select_columns(self, column_ids: typing.Sequence[str]) -> UnorderedIR: |
334 | | - """Creates a new expression based on this expression with new columns.""" |
335 | | - columns = [self._get_ibis_column(col_id) for col_id in column_ids] |
336 | | - builder = self.builder() |
337 | | - builder.columns = list(columns) |
338 | | - new_expr = builder.build() |
339 | | - return new_expr |
340 | | - |
341 | 308 | def filter(self, predicate_id: str, keep_null: bool = False) -> UnorderedIR: |
342 | 309 | condition = typing.cast( |
343 | 310 | ibis_types.BooleanValue, self._get_ibis_column(predicate_id) |
@@ -577,6 +544,11 @@ def _set_or_replace_by_id( |
577 | 544 | builder.columns = [*self.columns, new_value.name(id)] |
578 | 545 | return builder.build() |
579 | 546 |
|
| 547 | + def _select(self, values: typing.Tuple[ibis_types.Value]) -> UnorderedIR: |
| 548 | + builder = self.builder() |
| 549 | + builder.columns = values |
| 550 | + return builder.build() |
| 551 | + |
580 | 552 | def _reproject_to_table(self) -> UnorderedIR: |
581 | 553 | """ |
582 | 554 | Internal operators that projects the internal representation into a |
@@ -816,20 +788,6 @@ def promote_offsets(self, col_id: str) -> OrderedIR: |
816 | 788 | ] |
817 | 789 | return expr_builder.build() |
818 | 790 |
|
819 | | - def select_columns(self, column_ids: typing.Sequence[str]) -> OrderedIR: |
820 | | - """Creates a new expression based on this expression with new columns.""" |
821 | | - columns = [self._get_ibis_column(col_id) for col_id in column_ids] |
822 | | - expr = self |
823 | | - for ordering_column in set(self.column_ids).intersection( |
824 | | - [col_ref.column_id for col_ref in self._ordering.ordering_value_columns] |
825 | | - ): |
826 | | - # Need to hide ordering columns that are being dropped. Alternatively, could project offsets |
827 | | - expr = expr._hide_column(ordering_column) |
828 | | - builder = expr.builder() |
829 | | - builder.columns = list(columns) |
830 | | - new_expr = builder.build() |
831 | | - return new_expr |
832 | | - |
833 | 791 | ## Methods that only work with ordering |
834 | 792 | def project_window_op( |
835 | 793 | self, |
@@ -1221,6 +1179,29 @@ def _set_or_replace_by_id(self, id: str, new_value: ibis_types.Value) -> Ordered |
1221 | 1179 | builder.columns = [*self.columns, new_value.name(id)] |
1222 | 1180 | return builder.build() |
1223 | 1181 |
|
| 1182 | + def _select(self, values: typing.Tuple[ibis_types.Value]) -> OrderedIR: |
| 1183 | + """Safely assign by id while maintaining ordering integrity.""" |
| 1184 | + # TODO: Split into explicit set and replace methods |
| 1185 | + ordering_col_ids = [ |
| 1186 | + col_ref.column_id for col_ref in self._ordering.ordering_value_columns |
| 1187 | + ] |
| 1188 | + ir = self |
| 1189 | + mappings = {value.name: value for value in values} |
| 1190 | + for ordering_id in ordering_col_ids: |
| 1191 | + # Drop case |
| 1192 | + if (ordering_id not in mappings) and (ordering_id in ir.column_ids): |
| 1193 | + # id is being dropped, hide it first |
| 1194 | + ir = ir._hide_column(ordering_id) |
| 1195 | + # Mutate case |
| 1196 | + elif (ordering_id in mappings) and not mappings[ordering_id].equals( |
| 1197 | + ir._get_any_column(ordering_id) |
| 1198 | + ): |
| 1199 | + ir = ir._hide_column(ordering_id) |
| 1200 | + |
| 1201 | + builder = ir.builder() |
| 1202 | + builder.columns = list(values) |
| 1203 | + return builder.build() |
| 1204 | + |
1224 | 1205 | ## Ordering specific helpers |
1225 | 1206 | def _get_any_column(self, key: str) -> ibis_types.Value: |
1226 | 1207 | """Gets the Ibis expression for a given column. Will also get hidden columns.""" |
|
0 commit comments