-
Notifications
You must be signed in to change notification settings - Fork 79
✨ add pandas.Series
#68
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 18 commits
89a2c38
b726b19
24bf87a
1bb8a71
bc37f7f
43be7f6
50b0547
16aa656
0d899c9
33f45f2
98e58d6
b4a35ac
1660ca7
b1e04f0
2ac57cd
339c71b
7129b91
c9c4f12
cc9dac0
501c173
09e073d
b15b3d0
2718471
8e408bc
7b4433f
6cff2d7
3d7b805
b751633
d78fc5a
6a55cd2
ba7a941
933540a
25da211
811d664
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
The `Series` class provides support for working with pandas Series objects. | ||
|
||
```py | ||
import pandas as pd | ||
from pydantic import BaseModel | ||
|
||
from pydantic_extra_types.pandas_types import Series | ||
|
||
|
||
class MyData(BaseModel): | ||
numbers: Series | ||
|
||
|
||
data = {"numbers": pd.Series([1, 2, 3, 4, 5])} | ||
my_data = MyData(**data) | ||
|
||
print(my_data.numbers) | ||
# > 0 1 | ||
# > 1 2 | ||
# > 2 3 | ||
# > 3 4 | ||
# > 4 5 | ||
# > dtype: int64 | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
from typing import Any, List, Tuple, Type, TypeVar, Union | ||
|
||
from pydantic import GetCoreSchemaHandler | ||
from pydantic_core import core_schema | ||
|
||
try: | ||
import pandas as pd | ||
except ModuleNotFoundError: # pragma: no cover | ||
raise RuntimeError( | ||
'`Series` requires "pandas" to be installed. You can install it with "pip install pandas"' | ||
) | ||
|
||
T = TypeVar('T', str, bytes, bool, int, float, complex, pd.Timestamp, pd.Timedelta, pd.Period) | ||
|
||
|
||
class Series: | ||
def __init__(self, value: Any) -> None: | ||
self.value = pd.Series(value) | ||
|
||
@classmethod | ||
def __get_pydantic_core_schema__( | ||
cls, source: Type[Any], handler: GetCoreSchemaHandler | ||
) -> core_schema.AfterValidatorFunctionSchema: | ||
return core_schema.general_after_validator_function( | ||
cls._validate, | ||
core_schema.any_schema(), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is the best we can do. Is it? A series should be an |
||
) | ||
|
||
@classmethod | ||
def _validate(cls, __input_value: Any, _: core_schema.ValidationInfo) -> 'Series': | ||
if isinstance(__input_value, pd.Series): | ||
return cls(__input_value) | ||
return cls(pd.Series(__input_value)) | ||
|
||
def __repr__(self) -> str: | ||
return repr(self.value) | ||
|
||
def __getattr__(self, name: str) -> Any: | ||
return getattr(self.value, name) | ||
|
||
def __eq__(self, __value: object) -> bool: | ||
return isinstance(__value, (pd.Series, Series)) | ||
|
||
def __add__(self, other: Union['Series', List[Any], Tuple[Any], T]) -> 'Series': | ||
if isinstance(other, Series): | ||
result_val = self.value + other.value | ||
else: | ||
result_val = self.value + other | ||
return Series(result_val) |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -39,14 +39,15 @@ classifiers = [ | |||||
] | ||||||
requires-python = '>=3.7' | ||||||
dependencies = [ | ||||||
'pydantic>=2.0b3', | ||||||
'pydantic>=2.0', | ||||||
|
'pydantic>=2.0', | |
'pydantic>=2.0b3', |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not pandas>=2.0
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Because pandas>=2.0
requires python >=3.8
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,3 +4,4 @@ annotated-types | |
black | ||
pyupgrade | ||
ruff | ||
pandas-stubs | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likely not needed on pandas >= 2. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,25 @@ | ||
# | ||
# This file is autogenerated by pip-compile with Python 3.11 | ||
# This file is autogenerated by pip-compile with Python 3.10 | ||
# by the following command: | ||
# | ||
# pip-compile --extra=all --output-file=requirements/pyproject.txt --resolver=backtracking pyproject.toml | ||
# | ||
annotated-types==0.5.0 | ||
# via pydantic | ||
phonenumbers==8.13.13 | ||
pandas==1.3.5 | ||
# via pydantic-extra-types (pyproject.toml) | ||
phonenumbers==8.13.15 | ||
# via pydantic-extra-types (pyproject.toml) | ||
pycountry==22.3.5 | ||
# via pydantic-extra-types (pyproject.toml) | ||
pydantic==2.0b2 | ||
pydantic==2.0 | ||
# via pydantic-extra-types (pyproject.toml) | ||
pydantic-core==0.38.0 | ||
# via pydantic | ||
typing-extensions==4.6.3 | ||
pydantic-core==2.0.1 | ||
# via pydantic | ||
typing-extensions==4.7.0 | ||
# via | ||
# pydantic | ||
# pydantic-core | ||
|
||
# The following packages are considered to be unsafe in a requirements file: | ||
# setuptools |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
import pandas as pd | ||
import pytest | ||
from pydantic import BaseModel | ||
|
||
from pydantic_extra_types.pandas_types import Series | ||
|
||
|
||
@pytest.fixture(scope='session', name='SeriesModel') | ||
def series_model_fixture(): | ||
class SeriesModel(BaseModel): | ||
data: Series | ||
|
||
return SeriesModel | ||
|
||
|
||
@pytest.mark.parametrize( | ||
'data, expected', | ||
[ | ||
([1, 2, 3], [1, 2, 3]), | ||
([], []), | ||
([10, 20, 30, 40], [10, 20, 30, 40]), | ||
], | ||
) | ||
def test_series_creation(data, expected): | ||
if pd.__version__ <= '1.5.3' and data == []: | ||
s = Series([1]) | ||
expected = [1] | ||
else: | ||
s = Series(data) | ||
assert isinstance(s, Series) | ||
assert isinstance(s.value, pd.Series) | ||
assert s.value.tolist() == expected | ||
|
||
|
||
def test_series_repr(): | ||
data = [1, 2, 3] | ||
s = Series(data) | ||
assert repr(s) == repr(pd.Series(data)) | ||
|
||
|
||
def test_series_attribute_access(): | ||
data = [1, 2, 3] | ||
s = Series(data) | ||
assert s.sum() == pd.Series(data).sum() | ||
|
||
|
||
def test_series_equality(): | ||
data = [1, 2, 3] | ||
s1 = Series(data) | ||
s2 = Series(data) | ||
assert s1 == s2 | ||
assert s2 == pd.Series(data) | ||
|
||
|
||
def test_series_addition(): | ||
data1 = [1, 2, 3] | ||
data2 = [4, 5, 6] | ||
s1 = Series(data1) | ||
s2 = Series(data2) | ||
s3 = s1 + s2 | ||
assert isinstance(s3, Series) | ||
assert s3.value.tolist() == [5, 7, 9] | ||
|
||
|
||
@pytest.mark.parametrize( | ||
'data, other, expected', | ||
[ | ||
([1, 2, 3], [4, 5, 6], [5, 7, 9]), | ||
([10, 20, 30], (1, 2, 3), [11, 22, 33]), | ||
([5, 10, 15], pd.Series([1, 2, 3]), [6, 12, 18]), | ||
], | ||
) | ||
def test_series_addition_with_types(data, other, expected): | ||
s = Series(data) | ||
result = s + other | ||
assert isinstance(result, Series) | ||
assert result.value.tolist() == expected | ||
|
||
|
||
@pytest.mark.parametrize( | ||
'data, other', | ||
[ | ||
([1, 2, 3], 'invalid'), # Invalid type for addition | ||
([1, 2, 3], {'a': 1, 'b': 2}), # Invalid type for addition | ||
], | ||
) | ||
def test_series_addition_invalid_type_error(data, other) -> None: | ||
s = Series(data) | ||
with pytest.raises(TypeError): | ||
s + other | ||
|
||
|
||
@pytest.mark.parametrize( | ||
'data, other', | ||
[ | ||
([1, 2, 3], []), | ||
], | ||
) | ||
def test_series_addition_invalid_value_error(data, other) -> None: | ||
s = Series(data) | ||
with pytest.raises(ValueError): | ||
s + other | ||
|
||
|
||
def test_valid_series_model(SeriesModel) -> None: | ||
model = SeriesModel(data=[1, 2, 4]) | ||
assert isinstance(model.data, Series) | ||
assert model.data == pd.Series([1, 2, 4]) | ||
|
||
|
||
def test_valid_series_model_with_pd_series(SeriesModel) -> None: | ||
s = pd.Series([1, 2, 4]) | ||
model = SeriesModel(data=s) | ||
assert isinstance(model.data, Series) | ||
assert model.data == s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is this "value"?
pd.Series
accepts many arguments. 🤔