Skip to content

Commit 199bf20

Browse files
authored
REF: Remove BlockManager.arrays in favor of BlockManager.blocks usage (#58804)
* REF: Remove BlockManager.arrays in favor of BlockManager.blocks usage * Add back arrays * Whitespace
1 parent 9f71476 commit 199bf20

File tree

21 files changed

+74
-65
lines changed

21 files changed

+74
-65
lines changed

pandas/_testing/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -531,8 +531,8 @@ def shares_memory(left, right) -> bool:
531531
left._mask, right._mask
532532
)
533533

534-
if isinstance(left, DataFrame) and len(left._mgr.arrays) == 1:
535-
arr = left._mgr.arrays[0]
534+
if isinstance(left, DataFrame) and len(left._mgr.blocks) == 1:
535+
arr = left._mgr.blocks[0].values
536536
return shares_memory(arr, right)
537537

538538
raise NotImplementedError(type(left), type(right))

pandas/core/frame.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,7 @@ def _is_homogeneous_type(self) -> bool:
10461046
False
10471047
"""
10481048
# The "<" part of "<=" here is for empty DataFrame cases
1049-
return len({arr.dtype for arr in self._mgr.arrays}) <= 1
1049+
return len({block.values.dtype for block in self._mgr.blocks}) <= 1
10501050

10511051
@property
10521052
def _can_fast_transpose(self) -> bool:
@@ -5726,7 +5726,6 @@ def shift(
57265726
periods = cast(int, periods)
57275727

57285728
ncols = len(self.columns)
5729-
arrays = self._mgr.arrays
57305729
if axis == 1 and periods != 0 and ncols > 0 and freq is None:
57315730
if fill_value is lib.no_default:
57325731
# We will infer fill_value to match the closest column
@@ -5752,12 +5751,12 @@ def shift(
57525751

57535752
result.columns = self.columns.copy()
57545753
return result
5755-
elif len(arrays) > 1 or (
5754+
elif len(self._mgr.blocks) > 1 or (
57565755
# If we only have one block and we know that we can't
57575756
# keep the same dtype (i.e. the _can_hold_element check)
57585757
# then we can go through the reindex_indexer path
57595758
# (and avoid casting logic in the Block method).
5760-
not can_hold_element(arrays[0], fill_value)
5759+
not can_hold_element(self._mgr.blocks[0].values, fill_value)
57615760
):
57625761
# GH#35488 we need to watch out for multi-block cases
57635762
# We only get here with fill_value not-lib.no_default
@@ -11453,7 +11452,7 @@ def _get_data() -> DataFrame:
1145311452
if numeric_only:
1145411453
df = _get_data()
1145511454
if axis is None:
11456-
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
11455+
dtype = find_common_type([block.values.dtype for block in df._mgr.blocks])
1145711456
if isinstance(dtype, ExtensionDtype):
1145811457
df = df.astype(dtype)
1145911458
arr = concat_compat(list(df._iter_column_arrays()))
@@ -11478,7 +11477,9 @@ def _get_data() -> DataFrame:
1147811477

1147911478
# kurtosis excluded since groupby does not implement it
1148011479
if df.shape[1] and name != "kurt":
11481-
dtype = find_common_type([arr.dtype for arr in df._mgr.arrays])
11480+
dtype = find_common_type(
11481+
[block.values.dtype for block in df._mgr.blocks]
11482+
)
1148211483
if isinstance(dtype, ExtensionDtype):
1148311484
# GH 54341: fastpath for EA-backed axis=1 reductions
1148411485
# This flattens the frame into a single 1D array while keeping
@@ -11552,8 +11553,8 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series:
1155211553
else:
1155311554
raise NotImplementedError(name)
1155411555

11555-
for arr in self._mgr.arrays:
11556-
middle = func(arr, axis=0, skipna=skipna)
11556+
for blocks in self._mgr.blocks:
11557+
middle = func(blocks.values, axis=0, skipna=skipna)
1155711558
result = ufunc(result, middle)
1155811559

1155911560
res_ser = self._constructor_sliced(result, index=self.index, copy=False)

pandas/core/generic.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6373,7 +6373,7 @@ def astype(
63736373
# TODO(EA2D): special case not needed with 2D EAs
63746374
dtype = pandas_dtype(dtype)
63756375
if isinstance(dtype, ExtensionDtype) and all(
6376-
arr.dtype == dtype for arr in self._mgr.arrays
6376+
block.values.dtype == dtype for block in self._mgr.blocks
63776377
):
63786378
return self.copy(deep=False)
63796379
# GH 18099/22869: columnwise conversion to extension dtype
@@ -11148,9 +11148,9 @@ def _logical_func(
1114811148
if (
1114911149
self.ndim > 1
1115011150
and axis == 1
11151-
and len(self._mgr.arrays) > 1
11151+
and len(self._mgr.blocks) > 1
1115211152
# TODO(EA2D): special-case not needed
11153-
and all(x.ndim == 2 for x in self._mgr.arrays)
11153+
and all(block.values.ndim == 2 for block in self._mgr.blocks)
1115411154
and not kwargs
1115511155
):
1115611156
# Fastpath avoiding potentially expensive transpose

pandas/core/indexing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,10 +1804,10 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc") -> None:
18041804

18051805
# if there is only one block/type, still have to take split path
18061806
# unless the block is one-dimensional or it can hold the value
1807-
if not take_split_path and len(self.obj._mgr.arrays) and self.ndim > 1:
1807+
if not take_split_path and len(self.obj._mgr.blocks) and self.ndim > 1:
18081808
# in case of dict, keys are indices
18091809
val = list(value.values()) if isinstance(value, dict) else value
1810-
arr = self.obj._mgr.arrays[0]
1810+
arr = self.obj._mgr.blocks[0].values
18111811
take_split_path = not can_hold_element(
18121812
arr, extract_array(val, extract_numpy=True)
18131813
)

pandas/core/internals/managers.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,8 @@ def arrays(self) -> list[ArrayLike]:
353353
Warning! The returned arrays don't handle Copy-on-Write, so this should
354354
be used with caution (only in read-mode).
355355
"""
356+
# TODO: Deprecate, usage in Dask
357+
# https://github.com/dask/dask/blob/484fc3f1136827308db133cd256ba74df7a38d8c/dask/base.py#L1312
356358
return [blk.values for blk in self.blocks]
357359

358360
def __repr__(self) -> str:
@@ -2068,7 +2070,7 @@ def array(self) -> ArrayLike:
20682070
"""
20692071
Quick access to the backing array of the Block.
20702072
"""
2071-
return self.arrays[0]
2073+
return self.blocks[0].values
20722074

20732075
# error: Cannot override writeable attribute with read-only property
20742076
@property

pandas/tests/apply/test_str.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def test_transform_groupby_kernel_frame(request, float_frame, op):
287287
# same thing, but ensuring we have multiple blocks
288288
assert "E" not in float_frame.columns
289289
float_frame["E"] = float_frame["A"].copy()
290-
assert len(float_frame._mgr.arrays) > 1
290+
assert len(float_frame._mgr.blocks) > 1
291291

292292
ones = np.ones(float_frame.shape[0])
293293
gb2 = float_frame.groupby(ones)

pandas/tests/extension/base/casting.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,9 @@ def test_astype_object_frame(self, all_data):
3030
blk = result._mgr.blocks[0]
3131
assert isinstance(blk, NumpyBlock), type(blk)
3232
assert blk.is_object
33-
assert isinstance(result._mgr.arrays[0], np.ndarray)
34-
assert result._mgr.arrays[0].dtype == np.dtype(object)
33+
arr = result._mgr.blocks[0].values
34+
assert isinstance(arr, np.ndarray)
35+
assert arr.dtype == np.dtype(object)
3536

3637
# check that we can compare the dtypes
3738
comp = result.dtypes == df.dtypes

pandas/tests/extension/base/constructors.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,15 +69,15 @@ def test_dataframe_constructor_from_dict(self, data, from_series):
6969
assert result.shape == (len(data), 1)
7070
if hasattr(result._mgr, "blocks"):
7171
assert isinstance(result._mgr.blocks[0], EABackedBlock)
72-
assert isinstance(result._mgr.arrays[0], ExtensionArray)
72+
assert isinstance(result._mgr.blocks[0].values, ExtensionArray)
7373

7474
def test_dataframe_from_series(self, data):
7575
result = pd.DataFrame(pd.Series(data))
7676
assert result.dtypes[0] == data.dtype
7777
assert result.shape == (len(data), 1)
7878
if hasattr(result._mgr, "blocks"):
7979
assert isinstance(result._mgr.blocks[0], EABackedBlock)
80-
assert isinstance(result._mgr.arrays[0], ExtensionArray)
80+
assert isinstance(result._mgr.blocks[0].values, ExtensionArray)
8181

8282
def test_series_given_mismatched_index_raises(self, data):
8383
msg = r"Length of values \(3\) does not match length of index \(5\)"

pandas/tests/extension/base/getitem.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -450,7 +450,7 @@ def test_loc_len1(self, data):
450450
df = pd.DataFrame({"A": data})
451451
res = df.loc[[0], "A"]
452452
assert res.ndim == 1
453-
assert res._mgr.arrays[0].ndim == 1
453+
assert res._mgr.blocks[0].ndim == 1
454454
if hasattr(res._mgr, "blocks"):
455455
assert res._mgr._block.ndim == 1
456456

pandas/tests/extension/base/reshaping.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_concat(self, data, in_frame):
2929
assert dtype == data.dtype
3030
if hasattr(result._mgr, "blocks"):
3131
assert isinstance(result._mgr.blocks[0], EABackedBlock)
32-
assert isinstance(result._mgr.arrays[0], ExtensionArray)
32+
assert isinstance(result._mgr.blocks[0].values, ExtensionArray)
3333

3434
@pytest.mark.parametrize("in_frame", [True, False])
3535
def test_concat_all_na_block(self, data_missing, in_frame):

0 commit comments

Comments
 (0)