Skip to content

Commit 56dd591

Browse files
refactor: Remove ibis usage to define new dataframe objects (#783)
1 parent d5ae680 commit 56dd591

File tree

6 files changed

+29
-147
lines changed

6 files changed

+29
-147
lines changed

bigframes/core/__init__.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
import warnings
2424

2525
import google.cloud.bigquery
26-
import ibis.expr.types as ibis_types
2726
import pandas
2827
import pyarrow as pa
2928
import pyarrow.feather as pa_feather
@@ -60,30 +59,6 @@ class ArrayValue:
6059

6160
node: nodes.BigFrameNode
6261

63-
# DO NOT use, on deprecation path
64-
@classmethod
65-
def from_ibis(
66-
cls,
67-
session: Session,
68-
table: ibis_types.Table,
69-
columns: Sequence[ibis_types.Value],
70-
hidden_ordering_columns: Sequence[ibis_types.Value],
71-
ordering: orderings.ExpressionOrdering,
72-
):
73-
import bigframes.core.compile.ibis_types
74-
75-
node = nodes.ReadGbqNode(
76-
table=table,
77-
table_session=session,
78-
columns=tuple(
79-
bigframes.core.compile.ibis_types.ibis_value_to_canonical_type(column)
80-
for column in columns
81-
),
82-
hidden_ordering_columns=tuple(hidden_ordering_columns),
83-
ordering=ordering,
84-
)
85-
return cls(node)
86-
8762
@classmethod
8863
def from_pyarrow(cls, arrow_table: pa.Table, session: Session):
8964
adapted_table = local_data.adapt_pa_table(arrow_table)

bigframes/core/blocks.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2345,7 +2345,6 @@ def _get_rows_as_json_values(self) -> Block:
23452345
# TODO(shobs): Replace direct SQL manipulation by structured expression
23462346
# manipulation
23472347
ordering_column_name = guid.generate_guid()
2348-
self.session._cache_with_offsets(self.expr)
23492348
expr = self.expr.promote_offsets(ordering_column_name)
23502349
expr_sql = self.session._to_sql(expr)
23512350

@@ -2415,17 +2414,31 @@ def _get_rows_as_json_values(self) -> Block:
24152414
)
24162415
SELECT {select_columns_csv} FROM T1
24172416
"""
2418-
ibis_table = self.session.ibis_client.sql(json_sql)
2419-
order_for_ibis_table = ordering.ExpressionOrdering.from_offset_col(
2420-
ordering_column_name
2421-
)
2422-
expr = core.ArrayValue.from_ibis(
2423-
self.session,
2424-
ibis_table,
2425-
[ibis_table[col] for col in select_columns if col != ordering_column_name],
2426-
hidden_ordering_columns=[ibis_table[ordering_column_name]],
2427-
ordering=order_for_ibis_table,
2417+
# The only ways this code is used is through df.apply(axis=1) cope path
2418+
destination, query_job = self.session._query_to_destination(
2419+
json_sql, index_cols=[ordering_column_name], api_name="apply"
2420+
)
2421+
if not destination:
2422+
raise ValueError(f"Query job {query_job} did not produce result table")
2423+
2424+
new_schema = (
2425+
self.expr.schema.select([*self.index_columns])
2426+
.append(
2427+
bf_schema.SchemaItem(
2428+
row_json_column_name, bigframes.dtypes.STRING_DTYPE
2429+
)
2430+
)
2431+
.append(
2432+
bf_schema.SchemaItem(ordering_column_name, bigframes.dtypes.INT_DTYPE)
2433+
)
24282434
)
2435+
2436+
expr = core.ArrayValue.from_table(
2437+
self.session.bqclient.get_table(destination),
2438+
schema=new_schema,
2439+
session=self.session,
2440+
offsets_col=ordering_column_name,
2441+
).drop_columns([ordering_column_name])
24292442
block = Block(
24302443
expr,
24312444
index_columns=self.index_columns,

bigframes/core/compile/compiler.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -223,22 +223,6 @@ def compile_read_table_ordered(node: nodes.ReadTableNode):
223223
)
224224

225225

226-
@_compile_node.register
227-
def compile_readgbq(node: nodes.ReadGbqNode, ordered: bool = True):
228-
if ordered:
229-
return compiled.OrderedIR(
230-
node.table,
231-
node.columns,
232-
node.hidden_ordering_columns,
233-
node.ordering,
234-
)
235-
else:
236-
return compiled.UnorderedIR(
237-
node.table,
238-
node.columns,
239-
)
240-
241-
242226
@_compile_node.register
243227
def compile_promote_offsets(node: nodes.PromoteOffsetsNode, ordered: bool = True):
244228
result = compile_ordered_ir(node.child).promote_offsets(node.col_id)

bigframes/core/nodes.py

Lines changed: 0 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@
3434
import bigframes.operations.aggregations as agg_ops
3535

3636
if typing.TYPE_CHECKING:
37-
import ibis.expr.types as ibis_types
38-
3937
import bigframes.core.ordering as orderings
4038
import bigframes.session
4139

@@ -302,54 +300,6 @@ def transform_children(
302300
return self
303301

304302

305-
# TODO: Refactor to take raw gbq object reference
306-
@dataclass(frozen=True)
307-
class ReadGbqNode(BigFrameNode):
308-
table: ibis_types.Table = field()
309-
table_session: bigframes.session.Session = field()
310-
columns: Tuple[ibis_types.Value, ...] = field()
311-
hidden_ordering_columns: Tuple[ibis_types.Value, ...] = field()
312-
ordering: orderings.ExpressionOrdering = field()
313-
314-
@property
315-
def session(self):
316-
return self.table_session
317-
318-
def __hash__(self):
319-
return self._node_hash
320-
321-
@property
322-
def roots(self) -> typing.Set[BigFrameNode]:
323-
return {self}
324-
325-
@functools.cached_property
326-
def schema(self) -> schemata.ArraySchema:
327-
from bigframes.core.compile.ibis_types import ibis_dtype_to_bigframes_dtype
328-
329-
items = tuple(
330-
schemata.SchemaItem(
331-
value.get_name(),
332-
ibis_dtype_to_bigframes_dtype(value.type()),
333-
)
334-
for value in self.columns
335-
)
336-
return schemata.ArraySchema(items)
337-
338-
@functools.cached_property
339-
def variables_introduced(self) -> int:
340-
return len(self.columns) + len(self.hidden_ordering_columns)
341-
342-
@property
343-
def relation_ops_created(self) -> int:
344-
# Assume worst case, where readgbq actually has baked in analytic operation to generate index
345-
return 2
346-
347-
def transform_children(
348-
self, t: Callable[[BigFrameNode], BigFrameNode]
349-
) -> BigFrameNode:
350-
return self
351-
352-
353303
## Put ordering in here or just add order_by node above?
354304
@dataclass(frozen=True)
355305
class ReadTableNode(BigFrameNode):

tests/unit/resources.py

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import google.cloud.bigquery
2121
import ibis
2222
import pandas
23+
import pyarrow as pa
2324
import pytest
2425

2526
import bigframes
@@ -130,18 +131,9 @@ def create_arrayvalue(
130131
df: pandas.DataFrame, total_ordering_columns: List[str]
131132
) -> core.ArrayValue:
132133
session = create_pandas_session({"test_table": df})
133-
ibis_table = session.ibis_client.table("test_table")
134-
columns = tuple(ibis_table[key] for key in ibis_table.columns)
135-
ordering = bigframes.core.ordering.ExpressionOrdering(
136-
tuple(
137-
[core.orderings.ascending_over(column) for column in total_ordering_columns]
138-
),
139-
total_ordering_columns=frozenset(total_ordering_columns),
140-
)
141-
return core.ArrayValue.from_ibis(
134+
return core.ArrayValue.from_pyarrow(
135+
arrow_table=pa.Table.from_pandas(df, preserve_index=False),
142136
session=session,
143-
table=ibis_table,
144-
columns=columns,
145-
hidden_ordering_columns=(),
146-
ordering=ordering,
137+
).order_by(
138+
[bigframes.core.ordering.ascending_over(col) for col in total_ordering_columns]
147139
)

tests/unit/test_core.py

Lines changed: 0 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,45 +15,13 @@
1515
import ibis.expr.types as ibis_types
1616
import pandas
1717

18-
import bigframes.core as core
1918
import bigframes.core.expression as ex
20-
import bigframes.core.ordering as order
2119
import bigframes.operations as ops
2220
import bigframes.operations.aggregations as agg_ops
2321

2422
from . import resources
2523

2624

27-
def test_arrayvalue_constructor_from_ibis_table_adds_all_columns():
28-
session = resources.create_pandas_session(
29-
{
30-
"test_table": pandas.DataFrame(
31-
{
32-
"col1": [1, 2, 3],
33-
"not_included": [True, False, True],
34-
"col2": ["a", "b", "c"],
35-
"col3": [0.1, 0.2, 0.3],
36-
}
37-
)
38-
}
39-
)
40-
ibis_table = session.ibis_client.table("test_table")
41-
columns = (ibis_table["col1"], ibis_table["col2"], ibis_table["col3"])
42-
ordering = order.ExpressionOrdering(
43-
tuple([order.ascending_over("col1")]),
44-
total_ordering_columns=frozenset(["col1"]),
45-
)
46-
actual = core.ArrayValue.from_ibis(
47-
session=session,
48-
table=ibis_table,
49-
columns=columns,
50-
ordering=ordering,
51-
hidden_ordering_columns=(),
52-
)
53-
assert actual._compile_ordered()._table is ibis_table
54-
assert len(actual.column_ids) == 3
55-
56-
5725
def test_arrayvalue_with_get_column_type():
5826
value = resources.create_arrayvalue(
5927
pandas.DataFrame(

0 commit comments

Comments
 (0)