Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1209,6 +1209,7 @@ Conversion
- Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`)
- Bug in :meth:`TimedeltaArray.astype` raising ``TypeError`` when converting to a pyarrow duration type (:issue:`49795`)
- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` raising for extension array dtypes (:issue:`29618`, :issue:`50261`, :issue:`31913`)
- Bug in :meth:`Series` not copying data when created from :class:`Index` and ``dtype`` is equal to ``dtype`` from :class:`Index` (:issue:`52008`)

Strings
^^^^^^^
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/internals.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ class BlockManager:

class BlockValuesRefs:
referenced_blocks: list[weakref.ref]
def __init__(self, blk: SharedBlock) -> None: ...
def __init__(self, blk: SharedBlock | None = ...) -> None: ...
def add_reference(self, blk: SharedBlock) -> None: ...
def add_index_reference(self, index: object) -> None: ...
def has_reference(self) -> bool: ...
7 changes: 5 additions & 2 deletions pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -877,8 +877,11 @@ cdef class BlockValuesRefs:
cdef:
public list referenced_blocks

def __cinit__(self, blk: SharedBlock) -> None:
self.referenced_blocks = [weakref.ref(blk)]
def __cinit__(self, blk: SharedBlock | None = None) -> None:
if blk is not None:
self.referenced_blocks = [weakref.ref(blk)]
else:
self.referenced_blocks = []

def add_reference(self, blk: SharedBlock) -> None:
"""Adds a new reference to our reference collection.
Expand Down
7 changes: 5 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
index as libindex,
lib,
)
from pandas._libs.internals import BlockValuesRefs
import pandas._libs.join as libjoin
from pandas._libs.lib import (
is_datetime_array,
Expand Down Expand Up @@ -652,9 +653,11 @@ def _simple_new(cls, values: ArrayLike, name: Hashable = None, refs=None) -> Sel
result._name = name
result._cache = {}
result._reset_identity()
result._references = refs
if refs is not None:
refs.add_index_reference(result)
result._references = refs
else:
result._references = BlockValuesRefs()
result._references.add_index_reference(result)

return result

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,8 @@ def inferred_type(self) -> str:
"""Return a string of the type inferred from the values"""
return "interval"

@Appender(Index.memory_usage.__doc__)
# Cannot determine type of "memory_usage"
@Appender(Index.memory_usage.__doc__) # type: ignore[has-type]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this related?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like a mypy bug.

Adding

from pandas._libs.internals import BlockValuesRefs

to the base file causes this

def memory_usage(self, deep: bool = False) -> int:
# we don't use an explicit engine
# so return the bytes here
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -1255,7 +1255,8 @@ def f(level) -> bool:

return any(f(level) for level in self._inferred_type_levels)

@doc(Index.memory_usage)
# Cannot determine type of "memory_usage"
@doc(Index.memory_usage) # type: ignore[has-type]
def memory_usage(self, deep: bool = False) -> int:
# we are overwriting our base class to avoid
# computing .values here which could materialize
Expand Down
11 changes: 8 additions & 3 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
internals as libinternals,
lib,
)
from pandas._libs.internals import BlockPlacement
from pandas._libs.internals import (
BlockPlacement,
BlockValuesRefs,
)
from pandas.errors import PerformanceWarning
from pandas.util._decorators import cache_readonly
from pandas.util._exceptions import find_stack_level
Expand Down Expand Up @@ -1877,11 +1880,13 @@ def from_blocks(
return cls(blocks[0], axes[0], verify_integrity=False)

@classmethod
def from_array(cls, array: ArrayLike, index: Index) -> SingleBlockManager:
def from_array(
cls, array: ArrayLike, index: Index, refs: BlockValuesRefs | None = None
) -> SingleBlockManager:
"""
Constructor for if we have an array that is not yet a Block.
"""
block = new_block(array, placement=slice(0, len(index)), ndim=1)
block = new_block(array, placement=slice(0, len(index)), ndim=1, refs=refs)
return cls(block, index)

def to_2d_mgr(self, columns: Index) -> BlockManager:
Expand Down
11 changes: 8 additions & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,10 +429,15 @@ def __init__(
raise NotImplementedError(
"initializing a Series from a MultiIndex is not supported"
)

refs = None
if isinstance(data, Index):
if dtype is not None:
# astype copies
data = data.astype(dtype)
data = data.astype(dtype, copy=False)

if using_copy_on_write():
refs = data._references
data = data._values
else:
# GH#24096 we need to ensure the index remains immutable
data = data._values.copy()
Expand Down Expand Up @@ -496,7 +501,7 @@ def __init__(

manager = get_option("mode.data_manager")
if manager == "block":
data = SingleBlockManager.from_array(data, index)
data = SingleBlockManager.from_array(data, index, refs=refs)
elif manager == "array":
data = SingleArrayManager.from_array(data, index)

Expand Down
36 changes: 36 additions & 0 deletions pandas/tests/copy_view/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@

from pandas import (
DataFrame,
DatetimeIndex,
Index,
Period,
PeriodIndex,
Series,
Timedelta,
TimedeltaIndex,
Timestamp,
)
import pandas._testing as tm
from pandas.tests.copy_view.util import get_array
Expand Down Expand Up @@ -82,6 +89,35 @@ def test_series_from_series_with_reindex(using_copy_on_write):
assert not result._mgr.blocks[0].refs.has_reference()


@pytest.mark.parametrize(
"idx",
[
Index([1, 2]),
DatetimeIndex([Timestamp("2019-12-31"), Timestamp("2020-12-31")]),
PeriodIndex([Period("2019-12-31"), Period("2020-12-31")]),
TimedeltaIndex([Timedelta("1 days"), Timedelta("2 days")]),
],
)
def test_series_from_index(using_copy_on_write, idx):
ser = Series(idx)
expected = idx.copy(deep=True)
if using_copy_on_write:
assert np.shares_memory(get_array(ser), get_array(idx))
assert not ser._mgr._has_no_reference(0)
else:
assert not np.shares_memory(get_array(ser), get_array(idx))
ser.iloc[0] = ser.iloc[1]
tm.assert_index_equal(idx, expected)


def test_series_from_index_different_dtypes(using_copy_on_write):
idx = Index([1, 2, 3], dtype="int64")
ser = Series(idx, dtype="int32")
assert not np.shares_memory(get_array(ser), get_array(idx))
if using_copy_on_write:
assert ser._mgr._has_no_reference(0)


@pytest.mark.parametrize("func", [lambda x: x, lambda x: x._mgr])
@pytest.mark.parametrize("columns", [None, ["a"]])
def test_dataframe_constructor_mgr_or_df(using_copy_on_write, columns, func):
Expand Down
11 changes: 8 additions & 3 deletions pandas/tests/copy_view/util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from pandas import Series
from pandas import (
Index,
Series,
)
from pandas.core.arrays import BaseMaskedArray


Expand All @@ -10,7 +13,9 @@ def get_array(obj, col=None):
which triggers tracking references / CoW (and we might be testing that
this is done by some other operation).
"""
if isinstance(obj, Series) and (col is None or obj.name == col):
if isinstance(obj, Index):
arr = obj._values
elif isinstance(obj, Series) and (col is None or obj.name == col):
arr = obj._values
else:
assert col is not None
Expand All @@ -19,4 +24,4 @@ def get_array(obj, col=None):
arr = obj._get_column_array(icol)
if isinstance(arr, BaseMaskedArray):
return arr._data
return arr
return getattr(arr, "_ndarray", arr)
8 changes: 8 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2056,6 +2056,14 @@ def test_series_constructor_ea_all_na(self):
)
tm.assert_series_equal(result, expected)

def test_series_from_index_dtype_equal_does_not_copy(self):
# GH#52008
idx = Index([1, 2, 3])
expected = idx.copy(deep=True)
ser = Series(idx, dtype="int64")
ser.iloc[0] = 100
tm.assert_index_equal(idx, expected)


class TestSeriesConstructorIndexCoercion:
def test_series_constructor_datetimelike_index_coercion(self):
Expand Down