Skip to content

Commit 8e04c38

Browse files
refactor: Respect session default index in merge and reset_index methods (#862)
1 parent f23de1a commit 8e04c38

File tree

2 files changed

+51
-10
lines changed

2 files changed

+51
-10
lines changed

bigframes/core/blocks.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -327,18 +327,30 @@ def reset_index(self, drop: bool = True) -> Block:
327327
A new Block because dropping index columns can break references
328328
from Index classes that point to this block.
329329
"""
330-
new_index_col_id = guid.generate_guid()
331-
expr = self._expr.promote_offsets(new_index_col_id)
330+
expr = self._expr
331+
if (
332+
self.session._default_index_type
333+
== bigframes.enums.DefaultIndexKind.SEQUENTIAL_INT64
334+
):
335+
new_index_col_id = guid.generate_guid()
336+
expr = expr.promote_offsets(new_index_col_id)
337+
new_index_cols = [new_index_col_id]
338+
elif self.session._default_index_type == bigframes.enums.DefaultIndexKind.NULL:
339+
new_index_cols = []
340+
else:
341+
raise ValueError(
342+
f"Unrecognized default index kind: {self.session._default_index_type}"
343+
)
344+
332345
if drop:
333346
# Even though the index might be part of the ordering, keep that
334347
# ordering expression as reset_index shouldn't change the row
335348
# order.
336349
expr = expr.drop_columns(self.index_columns)
337350
return Block(
338351
expr,
339-
index_columns=[new_index_col_id],
352+
index_columns=new_index_cols,
340353
column_labels=self.column_labels,
341-
index_labels=[None],
342354
)
343355
else:
344356
# Add index names to column index
@@ -362,9 +374,8 @@ def reset_index(self, drop: bool = True) -> Block:
362374

363375
return Block(
364376
expr,
365-
index_columns=[new_index_col_id],
377+
index_columns=new_index_cols,
366378
column_labels=column_labels_modified,
367-
index_labels=[None],
368379
)
369380

370381
def set_index(
@@ -2096,13 +2107,17 @@ def merge(
20962107
#
20972108
# This keeps us from generating an index if the user joins a large
20982109
# BigQuery table against small local data, for example.
2099-
if len(self._index_columns) > 0 and len(other._index_columns) > 0:
2110+
if (
2111+
self.index.is_null
2112+
or other.index.is_null
2113+
or self.session._default_index_type == bigframes.enums.DefaultIndexKind.NULL
2114+
):
2115+
expr = joined_expr
2116+
index_columns = []
2117+
else:
21002118
offset_index_id = guid.generate_guid()
21012119
expr = joined_expr.promote_offsets(offset_index_id)
21022120
index_columns = [offset_index_id]
2103-
else:
2104-
expr = joined_expr
2105-
index_columns = []
21062121

21072122
return Block(expr, index_columns=index_columns, column_labels=labels)
21082123

@@ -2604,6 +2619,10 @@ def column_ids(self) -> Sequence[str]:
26042619
"""Column(s) to use as row labels."""
26052620
return self._block._index_columns
26062621

2622+
@property
2623+
def is_null(self) -> bool:
2624+
return len(self._block._index_columns) == 0
2625+
26072626
def to_pandas(self, *, ordered: Optional[bool] = None) -> pd.Index:
26082627
"""Executes deferred operations and downloads the results."""
26092628
if len(self.column_ids) == 0:

tests/system/small/test_unordered.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,28 @@ def test_unordered_drop_duplicates(unordered_session, keep):
116116
assert_pandas_df_equal(bf_result.to_pandas(), pd_result, ignore_order=True)
117117

118118

119+
def test_unordered_reset_index(unordered_session):
120+
pd_df = pd.DataFrame({"a": [1, 1, 3], "b": [4, 4, 6]}, dtype=pd.Int64Dtype())
121+
bf_df = bpd.DataFrame(pd_df, session=unordered_session)
122+
123+
bf_result = bf_df.set_index("b").reset_index(drop=False)
124+
pd_result = pd_df.set_index("b").reset_index(drop=False)
125+
126+
assert_pandas_df_equal(bf_result.to_pandas(), pd_result)
127+
128+
129+
def test_unordered_merge(unordered_session):
130+
pd_df = pd.DataFrame(
131+
{"a": [1, 1, 3], "b": [4, 4, 6], "c": [1, 2, 3]}, dtype=pd.Int64Dtype()
132+
)
133+
bf_df = bpd.DataFrame(pd_df, session=unordered_session)
134+
135+
bf_result = bf_df.merge(bf_df, left_on="a", right_on="c")
136+
pd_result = pd_df.merge(pd_df, left_on="a", right_on="c")
137+
138+
assert_pandas_df_equal(bf_result.to_pandas(), pd_result, ignore_order=True)
139+
140+
119141
@pytest.mark.parametrize(
120142
("function"),
121143
[

0 commit comments

Comments
 (0)