Skip to content

Commit b9c8212

Browse files
committed
Initial code commit
1 parent 3763d6c commit b9c8212

File tree

7 files changed

+473
-0
lines changed

7 files changed

+473
-0
lines changed

.gitignore

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
share/python-wheels/
24+
*.egg-info/
25+
.installed.cfg
26+
*.egg
27+
MANIFEST
28+
29+
# PyInstaller
30+
# Usually these files are written by a python script from a template
31+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
32+
*.manifest
33+
*.spec
34+
35+
# Installer logs
36+
pip-log.txt
37+
pip-delete-this-directory.txt
38+
39+
# Unit test / coverage reports
40+
htmlcov/
41+
.tox/
42+
.nox/
43+
.coverage
44+
.coverage.*
45+
.cache
46+
nosetests.xml
47+
coverage.xml
48+
*.cover
49+
*.py,cover
50+
.hypothesis/
51+
.pytest_cache/
52+
cover/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
.pybuilder/
76+
target/
77+
78+
# Jupyter Notebook
79+
.ipynb_checkpoints
80+
81+
# IPython
82+
profile_default/
83+
ipython_config.py
84+
85+
# pyenv
86+
# For a library or package, you might want to ignore these files since the code is
87+
# intended to run in multiple environments; otherwise, check them in:
88+
# .python-version
89+
90+
# pipenv
91+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
93+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
94+
# install all needed dependencies.
95+
#Pipfile.lock
96+
97+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
98+
__pypackages__/
99+
100+
# Celery stuff
101+
celerybeat-schedule
102+
celerybeat.pid
103+
104+
# SageMath parsed files
105+
*.sage.py
106+
107+
# Environments
108+
.env
109+
.venv
110+
env/
111+
venv/
112+
ENV/
113+
env.bak/
114+
venv.bak/
115+
116+
# Spyder project settings
117+
.spyderproject
118+
.spyproject
119+
120+
# Rope project settings
121+
.ropeproject
122+
123+
# mkdocs documentation
124+
/site
125+
126+
# mypy
127+
.mypy_cache/
128+
.dmypy.json
129+
dmypy.json
130+
131+
# Pyre type checker
132+
.pyre/
133+
134+
# pytype static type analyzer
135+
.pytype/
136+
137+
# Cython debug symbols
138+
cython_debug/

dev-requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
flake8
2+
pytest
3+
pytest-cov

pandas_select/__init__.py

Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
import inspect
2+
import operator
3+
from functools import wraps
4+
5+
import pandas as pd
6+
from pandas.core.accessor import CachedAccessor
7+
from pandas.core.indexes.accessors import (CombinedDatetimelikeProperties,
8+
DatetimeProperties,
9+
PeriodProperties)
10+
from pandas.core.strings import StringMethods
11+
from pandas.util._decorators import doc
12+
13+
_str_boolean_methods = set([
14+
'contains',
15+
'endswith',
16+
'isalnum',
17+
'isalpha',
18+
'isdecimal',
19+
'isdigit',
20+
'islower',
21+
'isnumeric',
22+
'isspace',
23+
'istitle',
24+
'isupper',
25+
'match',
26+
'startswith',
27+
])
28+
29+
_date_boolean_methods = set([
30+
'is_leap_year',
31+
'is_month_end',
32+
'is_month_start',
33+
'is_quarter_end',
34+
'is_quarter_start',
35+
'is_year_end',
36+
'is_year_start',
37+
])
38+
39+
40+
class StringSelectMethods(StringMethods):
41+
def __init__(self, *args, **kwargs):
42+
frame_or_series = args[0]
43+
44+
# the superclass will override _parent, so we need to use _parent_frame
45+
self._parent_frame = frame_or_series._parent
46+
self._series = frame_or_series._series
47+
super().__init__(self._series, *args[1:], **kwargs)
48+
49+
def __getattribute__(self, attr):
50+
if (not attr.startswith("_") and # noqa
51+
inspect.isroutine(getattr(StringMethods, attr, None))
52+
and attr not in _str_boolean_methods): # noqa
53+
raise NotImplementedError("Boolean selection with this method "
54+
"does not make sense.")
55+
else:
56+
return super().__getattribute__(attr)
57+
58+
def _wrap_result(self, *args, **kwargs):
59+
# remove methods that don't return boolean index
60+
bool_idx = super()._wrap_result(*args, **kwargs)
61+
return self._parent_frame.loc[bool_idx]
62+
63+
64+
class SelectDatetimeProperties(DatetimeProperties):
65+
def __init__(self, parent, *args, **kwargs):
66+
# datetime properties holds an attribute _parent
67+
# we need to add the parent_frame (or series) to the subclass instances
68+
self._parent_frame = parent
69+
super().__init__(*args, **kwargs)
70+
71+
def __getattribute__(self, attr):
72+
if (not attr.startswith("_") and # noqa
73+
inspect.isroutine(getattr(DatetimeProperties, attr, None))
74+
and attr not in _date_boolean_methods): # noqa
75+
raise NotImplementedError("Boolean selection with this method "
76+
"does not make sense.")
77+
elif attr in _date_boolean_methods:
78+
idx = super().__getattribute__(attr)
79+
return self._parent_frame.loc[idx]
80+
else:
81+
return super().__getattribute__(attr)
82+
83+
84+
class SelectPeriodProperties(PeriodProperties):
85+
def __init__(self, parent, *args, **kwargs):
86+
self._parent_frame = parent
87+
super().__init__(*args, **kwargs)
88+
89+
@property
90+
def is_leap_year(self):
91+
return self._parent_frame.loc[super().is_leap_year]
92+
93+
94+
class DateSelectMethods(CombinedDatetimelikeProperties):
95+
def __new__(cls, series):
96+
properties = super().__new__(cls, series._series)
97+
if isinstance(properties, DatetimeProperties):
98+
return SelectDatetimeProperties(
99+
series._parent,
100+
properties._parent,
101+
properties.orig)
102+
elif isinstance(properties, PeriodProperties):
103+
return SelectPeriodProperties(
104+
series._frame,
105+
properties._parent,
106+
properties.orig
107+
)
108+
raise AttributeError("Can only use select.dt accessor on"
109+
"datetimelike and periodlike values.")
110+
111+
112+
def selector_wrapper(klass, method_name):
113+
method = getattr(klass, method_name)
114+
115+
@wraps(method)
116+
def selector(self, *args, **kwargs):
117+
# for a series accessor series and parent are the same thing
118+
# for a frame accessor we're indexing on the parent dataframe
119+
series = self._series
120+
idx = getattr(klass, method_name)(series, *args, **kwargs)
121+
return self._parent.loc[idx]
122+
return selector
123+
124+
125+
class SelectableIndex:
126+
def __init__(self, parent):
127+
self._parent = parent
128+
self._index = parent.index
129+
130+
def __getattr__(self, attr):
131+
return getattr(self._index, attr)
132+
133+
def __repr__(self):
134+
return pd.Index.__repr__(self)
135+
136+
def _compare(self, op, cmp):
137+
idx = op(self._parent.index, cmp)
138+
return self._parent.loc[idx]
139+
140+
def __lt__(self, cmp):
141+
return self._compare(operator.lt, cmp)
142+
143+
def __le__(self, cmp):
144+
return self._compare(operator.le, cmp)
145+
146+
def __eq__(self, cmp):
147+
return self._compare(operator.eq, cmp)
148+
149+
def __ne__(self, cmp):
150+
return self._compare(operator.ne, cmp)
151+
152+
def __gt__(self, cmp):
153+
return self._compare(operator.gt, cmp)
154+
155+
def __ge__(self, cmp):
156+
return self._compare(operator.ge, cmp)
157+
158+
@doc(pd.Index.isna)
159+
def isna(self):
160+
return self._parent.loc[self._parent.index.isna()]
161+
162+
@doc(pd.Index.isnull)
163+
def isnull(self):
164+
return self._parent.loc[self._parent.index.isnull()]
165+
166+
@doc(pd.Index.isin)
167+
def isin(self, values, levels=None):
168+
idx = self._parent.index.isin(values, levels)
169+
return self._parent.loc[idx]
170+
171+
172+
@pd.api.extensions.register_series_accessor("select")
173+
class SelectableColumn:
174+
str = CachedAccessor("str", StringSelectMethods)
175+
dt = CachedAccessor("dt", DateSelectMethods)
176+
177+
__lt__ = selector_wrapper(pd.Series, "__lt__")
178+
__le__ = selector_wrapper(pd.Series, "__le__")
179+
__eq__ = selector_wrapper(pd.Series, "__eq__")
180+
__ne__ = selector_wrapper(pd.Series, "__ne__")
181+
__gt__ = selector_wrapper(pd.Series, "__gt__")
182+
__ge__ = selector_wrapper(pd.Series, "__ge__")
183+
isna = selector_wrapper(pd.Series, "isna")
184+
isnull = selector_wrapper(pd.Series, "isnull")
185+
isin = selector_wrapper(pd.Series, "isin")
186+
187+
def __init__(self, parent, series=None):
188+
# if accessed as the series accessor, parent is the series
189+
# if returned by a selectable dataframe, parent is the frame
190+
if series is None:
191+
series = parent
192+
self._parent = parent
193+
self._series = series
194+
195+
def __getattr__(self, attr):
196+
return getattr(self._series, attr)
197+
198+
def __repr__(self):
199+
return pd.Series.__repr__(self)
200+
201+
@property
202+
def index(self):
203+
return SelectableIndex(self._parent)
204+
205+
206+
@pd.api.extensions.register_dataframe_accessor('select')
207+
class DataFrameSelectAccessor:
208+
def __init__(self, frame):
209+
self._frame = frame
210+
211+
def __repr__(self):
212+
return pd.DataFrame.__repr__(self)
213+
214+
def __dir__(self):
215+
return self._frame.columns.tolist() + ['index']
216+
217+
def __getattr__(self, attr):
218+
if attr in self._frame.columns:
219+
return SelectableColumn(self._frame, self._frame[attr])
220+
return getattr(self._frame, attr)
221+
222+
def __getitem__(self, key):
223+
try:
224+
getattr(self, key)
225+
except AttributeError:
226+
raise KeyError(f"{key}")
227+
228+
@property
229+
def index(self):
230+
return SelectableIndex(self._frame)

pandas_select/tests/conftest.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pytest
4+
5+
6+
@pytest.fixture
7+
def dta():
8+
return pd.DataFrame.from_dict({
9+
'A': np.arange(1, 16),
10+
'B': pd.date_range('2020-01-01', periods=15),
11+
'C': ['A', 'B', 'C'] * 5,
12+
'D': pd.Categorical(['A', 'B', 'C'] * 5)
13+
})

0 commit comments

Comments
 (0)