Skip to content

Commit a15bc15

Browse files
committed
BUG: Passing original properties of and to subclasses constructors
1 parent 89bc204 commit a15bc15

File tree

7 files changed

+137
-36
lines changed

7 files changed

+137
-36
lines changed

doc/source/development/extending.rst

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -402,23 +402,50 @@ To let original data structures have additional properties, you should let ``pan
402402

403403
1. Define ``_internal_names`` and ``_internal_names_set`` for temporary properties which WILL NOT be passed to manipulation results.
404404
2. Define ``_metadata`` for normal properties which will be passed to manipulation results.
405+
If used, a ``Series`` subclass must also be defined with the same ``_metadata`` property and the first parameter of the constructors must be the data.
406+
Avoid the following names for your normal properties: ``data``, ``index``, ``columns``, ``dtype``, ``copy``, ``name``, ``_name`` and ``fastpath``.
405407

406408
Below is an example to define two original properties, "internal_cache" as a temporary property and "added_property" as a normal property
407409

408410
.. code-block:: python
409411
410-
class SubclassedDataFrame2(pd.DataFrame):
412+
class SubclassedDataFrame(pd.DataFrame):
411413
412-
# temporary properties
413-
_internal_names = pd.DataFrame._internal_names + ["internal_cache"]
414-
_internal_names_set = set(_internal_names)
414+
# temporary properties
415+
_internal_names = pd.DataFrame._internal_names + ["internal_cache"]
416+
_internal_names_set = set(_internal_names)
415417
416-
# normal properties
417-
_metadata = ["added_property"]
418+
# normal properties
419+
_metadata = ["added_property"]
418420
419-
@property
420-
def _constructor(self):
421-
return SubclassedDataFrame2
421+
def __init__(self, data=None, added_property=None, *args, **kwargs):
422+
super().__init__(data, *args, **kwargs)
423+
self.added_property = added_property
424+
425+
@property
426+
def _constructor(self):
427+
return SubclassedDataFrame
428+
429+
@property
430+
def _constructor_sliced(self):
431+
return SubclassedSeries
432+
433+
class SubclassedSeries(pd.Series):
434+
435+
# normal properties
436+
_metadata = ["original_property"]
437+
438+
def __init__(self, data=None, original_property=None, *args, **kwargs):
439+
super().__init__(data, *args, **kwargs)
440+
self.original_property = original_property
441+
442+
@property
443+
def _constructor(self):
444+
return SubclassedSeries
445+
446+
@property
447+
def _constructor_expanddim(self):
448+
return SubclassedDataFrame
422449
423450
.. code-block:: python
424451

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -843,6 +843,7 @@ Other
843843
- Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
844844
- Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`)
845845
- Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`)
846+
- Bug in original properties ``_metadata`` of :class:`Dataframe` and :class:`Series` subclasses'. For some operations (i.e. ``concat``) the new object wasn't receiving the original properties (:issue:`34177`)
846847
- Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`)
847848
- Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`)
848849
- Fixed regression in :meth:`DataFrame.from_records` not initializing subclasses properly (:issue:`57008`)

pandas/_testing/__init__.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -325,32 +325,35 @@ def to_array(obj):
325325

326326

327327
class SubclassedSeries(Series):
328-
_metadata = ["testattr", "name"]
328+
_metadata = ["testattr"]
329+
330+
def __init__(self, data=None, testattr=None, *args, **kwargs):
331+
super().__init__(data, *args, **kwargs)
332+
self.testattr = testattr
329333

330334
@property
331335
def _constructor(self):
332-
# For testing, those properties return a generic callable, and not
333-
# the actual class. In this case that is equivalent, but it is to
334-
# ensure we don't rely on the property returning a class
335-
# See https://github.com/pandas-dev/pandas/pull/46018 and
336-
# https://github.com/pandas-dev/pandas/issues/32638 and linked issues
337-
return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
336+
return SubclassedSeries
338337

339338
@property
340339
def _constructor_expanddim(self):
341-
return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
340+
return SubclassedDataFrame
342341

343342

344343
class SubclassedDataFrame(DataFrame):
345344
_metadata = ["testattr"]
346345

346+
def __init__(self, data=None, testattr=None, *args, **kwargs):
347+
super().__init__(data, *args, **kwargs)
348+
self.testattr = testattr
349+
347350
@property
348351
def _constructor(self):
349-
return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
352+
return SubclassedDataFrame
350353

351354
@property
352355
def _constructor_sliced(self):
353-
return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
356+
return SubclassedSeries
354357

355358

356359
def convert_rows_list_to_csv_str(rows_list: list[str]) -> str:

pandas/core/frame.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -675,7 +675,8 @@ def _constructor_from_mgr(self, mgr, axes) -> DataFrame:
675675

676676
# We assume that the subclass __init__ knows how to handle a
677677
# pd.DataFrame object.
678-
return self._constructor(df)
678+
metadata = {k: getattr(self, k) for k in self._metadata}
679+
return self._constructor(df, **metadata)
679680

680681
_constructor_sliced: Callable[..., Series] = Series
681682

@@ -690,7 +691,8 @@ def _constructor_sliced_from_mgr(self, mgr, axes) -> Series:
690691

691692
# We assume that the subclass __init__ knows how to handle a
692693
# pd.Series object.
693-
return self._constructor_sliced(ser)
694+
metadata = {k: getattr(self, k) for k in self._metadata}
695+
return self._constructor_sliced(ser, **metadata)
694696

695697
# ----------------------------------------------------------------------
696698
# Constructors

pandas/core/series.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -339,7 +339,7 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc]
339339
_HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
340340

341341
_name: Hashable
342-
_metadata: list[str] = ["_name"]
342+
_metadata: list[str] = []
343343
_internal_names_set = {"index", "name"} | NDFrame._internal_names_set
344344
_accessors = {"dt", "cat", "str", "sparse"}
345345
_hidden_attrs = (
@@ -372,6 +372,9 @@ def __init__(
372372
copy: bool | None = None,
373373
) -> None:
374374
allow_mgr = False
375+
if "_name" not in self._metadata:
376+
# Subclass overrides _metadata, see @540db96b
377+
self._metadata.append("_name")
375378
if (
376379
isinstance(data, SingleBlockManager)
377380
and index is None
@@ -610,7 +613,9 @@ def _constructor_from_mgr(self, mgr, axes):
610613

611614
# We assume that the subclass __init__ knows how to handle a
612615
# pd.Series object.
613-
return self._constructor(ser)
616+
self._metadata.remove("_name")
617+
metadata = {k: getattr(self, k) for k in self._metadata}
618+
return self._constructor(ser, **metadata)
614619

615620
@property
616621
def _constructor_expanddim(self) -> Callable[..., DataFrame]:
@@ -634,7 +639,9 @@ def _constructor_expanddim_from_mgr(self, mgr, axes):
634639

635640
# We assume that the subclass __init__ knows how to handle a
636641
# pd.DataFrame object.
637-
return self._constructor_expanddim(df)
642+
self._metadata.remove("_name")
643+
metadata = {k: getattr(self, k) for k in self._metadata}
644+
return self._constructor_expanddim(df, **metadata)
638645

639646
# types
640647
@property

pandas/tests/frame/test_arithmetic.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
timezone,
55
)
66
from enum import Enum
7-
import functools
87
import operator
98
import re
109

@@ -2139,6 +2138,12 @@ def test_frame_op_subclass_nonclass_constructor():
21392138
# GH#43201 subclass._constructor is a function, not the subclass itself
21402139

21412140
class SubclassedSeries(Series):
2141+
_metadata = ["my_extra_data"]
2142+
2143+
def __init__(self, data=None, my_extra_data=None, *args, **kwargs) -> None:
2144+
self.my_extra_data = my_extra_data
2145+
super().__init__(data, *args, **kwargs)
2146+
21422147
@property
21432148
def _constructor(self):
21442149
return SubclassedSeries
@@ -2150,21 +2155,25 @@ def _constructor_expanddim(self):
21502155
class SubclassedDataFrame(DataFrame):
21512156
_metadata = ["my_extra_data"]
21522157

2153-
def __init__(self, my_extra_data, *args, **kwargs) -> None:
2158+
def __init__(self, data=None, my_extra_data=None, *args, **kwargs) -> None:
21542159
self.my_extra_data = my_extra_data
2155-
super().__init__(*args, **kwargs)
2160+
super().__init__(data, *args, **kwargs)
21562161

21572162
@property
21582163
def _constructor(self):
2159-
return functools.partial(type(self), self.my_extra_data)
2164+
return SubclassedDataFrame
21602165

21612166
@property
21622167
def _constructor_sliced(self):
21632168
return SubclassedSeries
21642169

2165-
sdf = SubclassedDataFrame("some_data", {"A": [1, 2, 3], "B": [4, 5, 6]})
2170+
sdf = SubclassedDataFrame(
2171+
my_extra_data="some_data", data={"A": [1, 2, 3], "B": [4, 5, 6]}
2172+
)
21662173
result = sdf * 2
2167-
expected = SubclassedDataFrame("some_data", {"A": [2, 4, 6], "B": [8, 10, 12]})
2174+
expected = SubclassedDataFrame(
2175+
my_extra_data="some_data", data={"A": [2, 4, 6], "B": [8, 10, 12]}
2176+
)
21682177
tm.assert_frame_equal(result, expected)
21692178

21702179
result = sdf + sdf

pandas/tests/frame/test_subclass.py

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Index,
88
MultiIndex,
99
Series,
10+
concat,
1011
)
1112
import pandas._testing as tm
1213

@@ -742,16 +743,67 @@ def test_equals_subclass(self):
742743
assert df1.equals(df2)
743744
assert df2.equals(df1)
744745

746+
def test_original_property_is_preserved_when_subclassing(self):
747+
original_property = "original_property"
748+
749+
class SubclassedSeries(Series):
750+
_metadata = [original_property]
751+
752+
def __init__(self, data=None, original_property=None, *args, **kwargs):
753+
super().__init__(data, *args, **kwargs)
754+
self.original_property = original_property
755+
756+
@property
757+
def _constructor(self):
758+
return SubclassedSeries
759+
760+
@property
761+
def _constructor_expanddim(self):
762+
return SubclassedDataFrame
763+
764+
class SubclassedDataFrame(DataFrame):
765+
_metadata = ["original_property"]
766+
767+
def __init__(self, data=None, original_property=None, *args, **kwargs):
768+
super().__init__(data, *args, **kwargs)
769+
self.original_property = original_property
770+
771+
@property
772+
def _constructor(self):
773+
return SubclassedDataFrame
774+
775+
@property
776+
def _constructor_sliced(self):
777+
return SubclassedSeries
778+
779+
data = {"key": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]}
780+
df = SubclassedDataFrame(data, original_property="original_property")
781+
tm.assert_equal(df.original_property, original_property)
782+
tm.assert_equal(df[df["value"] == 1].original_property, original_property)
783+
tm.assert_equal(df.loc[df["key"] == "foo"].original_property, original_property)
784+
tm.assert_equal(df["value"].original_property, original_property)
785+
786+
tm.assert_equal(concat([df, df]).original_property, original_property)
787+
788+
df1 = SubclassedDataFrame(
789+
{"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]},
790+
original_property="original_property",
791+
)
792+
df2 = SubclassedDataFrame(
793+
{"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]},
794+
original_property="original_property",
795+
)
796+
merged_df = df1.merge(df2, left_on="lkey", right_on="rkey")
797+
tm.assert_equal(merged_df.original_property, original_property)
798+
plus = df1 + df2
799+
tm.assert_equal(plus.original_property, original_property)
800+
745801

746802
class MySubclassWithMetadata(DataFrame):
747803
_metadata = ["my_metadata"]
748804

749-
def __init__(self, *args, **kwargs) -> None:
750-
super().__init__(*args, **kwargs)
751-
752-
my_metadata = kwargs.pop("my_metadata", None)
753-
if args and isinstance(args[0], MySubclassWithMetadata):
754-
my_metadata = args[0].my_metadata # type: ignore[has-type]
805+
def __init__(self, data=None, my_metadata=None, *args, **kwargs) -> None:
806+
super().__init__(data, *args, **kwargs)
755807
self.my_metadata = my_metadata
756808

757809
@property

0 commit comments

Comments
 (0)