Skip to content

Commit 9662745

Browse files
fix: Remove itertools.pairwise usage (#1638)
1 parent e84f232 commit 9662745

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

bigframes/core/local_data.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,7 @@ def _(
212212
value_generator = iter_array(
213213
array.flatten(), bigframes.dtypes.get_array_inner_type(dtype)
214214
)
215-
for (start, end) in itertools.pairwise(array.offsets):
215+
for (start, end) in _pairwise(array.offsets):
216216
arr_size = end.as_py() - start.as_py()
217217
yield list(itertools.islice(value_generator, arr_size))
218218

@@ -389,3 +389,16 @@ def _physical_type_replacements(dtype: pa.DataType) -> pa.DataType:
389389
if dtype in _ARROW_MANAGED_STORAGE_OVERRIDES:
390390
return _ARROW_MANAGED_STORAGE_OVERRIDES[dtype]
391391
return dtype
392+
393+
394+
def _pairwise(iterable):
395+
do_yield = False
396+
a = None
397+
b = None
398+
for item in iterable:
399+
a = b
400+
b = item
401+
if do_yield:
402+
yield (a, b)
403+
else:
404+
do_yield = True

tests/unit/test_local_data.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
import pandas as pd
15+
import pandas.testing
16+
import pyarrow as pa
17+
18+
from bigframes import dtypes
19+
from bigframes.core import local_data
20+
21+
pd_data = pd.DataFrame(
22+
{
23+
"ints": [10, 20, 30, 40],
24+
"nested_ints": [[1, 2], [3, 4, 5], [], [20, 30]],
25+
"structs": [{"a": 100}, {}, {"b": 200}, {"b": 300}],
26+
}
27+
)
28+
29+
pd_data_normalized = pd.DataFrame(
30+
{
31+
"ints": pd.Series([10, 20, 30, 40], dtype=dtypes.INT_DTYPE),
32+
"nested_ints": pd.Series(
33+
[[1, 2], [3, 4, 5], [], [20, 30]], dtype=pd.ArrowDtype(pa.list_(pa.int64()))
34+
),
35+
"structs": pd.Series(
36+
[{"a": 100}, {}, {"b": 200}, {"b": 300}],
37+
dtype=pd.ArrowDtype(pa.struct({"a": pa.int64(), "b": pa.int64()})),
38+
),
39+
}
40+
)
41+
42+
43+
def test_local_data_well_formed_round_trip():
44+
local_entry = local_data.ManagedArrowTable.from_pandas(pd_data)
45+
result = pd.DataFrame(local_entry.itertuples(), columns=pd_data.columns)
46+
pandas.testing.assert_frame_equal(pd_data_normalized, result, check_dtype=False)

0 commit comments

Comments
 (0)