Skip to content

Commit 3d17e56

Browse files
committed
ENH: Introduce pandas.col
1 parent 5774290 commit 3d17e56

File tree

2 files changed

+187
-0
lines changed

2 files changed

+187
-0
lines changed

pandas/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@
105105
Series,
106106
DataFrame,
107107
)
108+
from pandas.core.col import col
108109

109110
from pandas.core.dtypes.dtypes import SparseDtype
110111

@@ -281,6 +282,7 @@
281282
"array",
282283
"arrays",
283284
"bdate_range",
285+
"col",
284286
"concat",
285287
"crosstab",
286288
"cut",

pandas/core/col.py

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
from __future__ import annotations
2+
3+
from collections.abc import (
4+
Callable,
5+
Hashable,
6+
)
7+
from typing import (
8+
TYPE_CHECKING,
9+
Any,
10+
)
11+
12+
from pandas.core.series import Series
13+
14+
if TYPE_CHECKING:
15+
from pandas import DataFrame
16+
17+
18+
def parse_args(df: DataFrame, *args) -> tuple[Series]:
19+
return tuple([x(df) if isinstance(x, Expr) else x for x in args])
20+
21+
22+
def parse_kwargs(df: DataFrame, **kwargs) -> dict[Hashable, Series]:
23+
return {
24+
key: val(df) if isinstance(val, Expr) else val for key, val in kwargs.items()
25+
}
26+
27+
28+
class Expr:
29+
def __init__(self, func: Callable[[DataFrame], Series]) -> None:
30+
self._func = func
31+
32+
def __call__(self, df: DataFrame) -> Series:
33+
return self._func(df)
34+
35+
# namespaces
36+
@property
37+
def dt(self) -> NamespaceExpr:
38+
return NamespaceExpr(self, "dt")
39+
40+
@property
41+
def str(self) -> NamespaceExpr:
42+
return NamespaceExpr(self, "str")
43+
44+
@property
45+
def cat(self) -> NamespaceExpr:
46+
return NamespaceExpr(self, "cat")
47+
48+
@property
49+
def list(self) -> NamespaceExpr:
50+
return NamespaceExpr(self, "list")
51+
52+
@property
53+
def sparse(self) -> NamespaceExpr:
54+
return NamespaceExpr(self, "sparse")
55+
56+
@property
57+
def struct(self) -> NamespaceExpr:
58+
return NamespaceExpr(self, "struct")
59+
60+
# Binary ops
61+
62+
def __add__(self, other) -> Expr:
63+
if isinstance(other, Expr):
64+
return Expr(lambda df: self(df).__add__(other(df)))
65+
return Expr(lambda df: self(df).__add__(other))
66+
67+
def __radd__(self, other) -> Expr:
68+
if isinstance(other, Expr):
69+
return Expr(lambda df: self(df).__radd__(other(df)))
70+
return Expr(lambda df: self(df).__radd__(other))
71+
72+
def __sub__(self, other) -> Expr:
73+
if isinstance(other, Expr):
74+
return Expr(lambda df: self(df).__sub__(other(df)))
75+
return Expr(lambda df: self(df).__sub__(other))
76+
77+
def __rsub__(self, other) -> Expr:
78+
if isinstance(other, Expr):
79+
return Expr(lambda df: self(df).__rsub__(other(df)))
80+
return Expr(lambda df: self(df).__rsub__(other))
81+
82+
def __mul__(self, other) -> Expr:
83+
if isinstance(other, Expr):
84+
return Expr(lambda df: self(df).__mul__(other(df)))
85+
return Expr(lambda df: self(df).__mul__(other))
86+
87+
def __rmul__(self, other) -> Expr:
88+
if isinstance(other, Expr):
89+
return Expr(lambda df: self(df).__rmul__(other(df)))
90+
return Expr(lambda df: self(df).__rmul__(other))
91+
92+
def __truediv__(self, other) -> Expr:
93+
if isinstance(other, Expr):
94+
return Expr(lambda df: self(df).__truediv__(other(df)))
95+
return Expr(lambda df: self(df).__truediv__(other))
96+
97+
def __rtruediv__(self, other) -> Expr:
98+
if isinstance(other, Expr):
99+
return Expr(lambda df: self(df).__rtruediv__(other(df)))
100+
return Expr(lambda df: self(df).__rtruediv__(other))
101+
102+
def __floordiv__(self, other) -> Expr:
103+
if isinstance(other, Expr):
104+
return Expr(lambda df: self(df).__floordiv__(other(df)))
105+
return Expr(lambda df: self(df).__floordiv__(other))
106+
107+
def __rfloordiv__(self, other) -> Expr:
108+
if isinstance(other, Expr):
109+
return Expr(lambda df: self(df).__rfloordiv__(other(df)))
110+
return Expr(lambda df: self(df).__rfloordiv__(other))
111+
112+
def __ge__(self, other) -> Expr:
113+
if isinstance(other, Expr):
114+
return Expr(lambda df: self(df).__ge__(other(df)))
115+
return Expr(lambda df: self(df).__ge__(other))
116+
117+
def __gt__(self, other) -> Expr:
118+
if isinstance(other, Expr):
119+
return Expr(lambda df: self(df).__gt__(other(df)))
120+
return Expr(lambda df: self(df).__gt__(other))
121+
122+
def __le__(self, other) -> Expr:
123+
if isinstance(other, Expr):
124+
return Expr(lambda df: self(df).__le__(other(df)))
125+
return Expr(lambda df: self(df).__le__(other))
126+
127+
def __lt__(self, other) -> Expr:
128+
if isinstance(other, Expr):
129+
return Expr(lambda df: self(df).__lt__(other(df)))
130+
return Expr(lambda df: self(df).__lt__(other))
131+
132+
def __eq__(self, other) -> Expr:
133+
if isinstance(other, Expr):
134+
return Expr(lambda df: self(df).__eq__(other(df)))
135+
return Expr(lambda df: self(df).__eq__(other))
136+
137+
def __neq__(self, other) -> Expr:
138+
if isinstance(other, Expr):
139+
return Expr(lambda df: self(df).__neq__(other(df)))
140+
return Expr(lambda df: self(df).__neq__(other))
141+
142+
def __mod__(self, other) -> Expr:
143+
if isinstance(other, Expr):
144+
return Expr(lambda df: self(df).__mod__(other(df)))
145+
return Expr(lambda df: self(df).__mod__(other))
146+
147+
# Everything else
148+
149+
def __getattr__(self, attr: str) -> Expr:
150+
def func(df: DataFrame, *args: Any, **kwargs: Any) -> Series:
151+
args = parse_args(df, *args)
152+
kwargs = parse_kwargs(df, **kwargs)
153+
return getattr(self(df), attr)(*args, **kwargs)
154+
155+
return lambda *args, **kwargs: Expr(lambda df: func(df, *args, **kwargs))
156+
157+
158+
class NamespaceExpr:
159+
def __init__(self, func: Callable[[DataFrame], Series], namespace: str) -> None:
160+
self._func = func
161+
self._namespace = namespace
162+
163+
def __getattr__(self, attr: str) -> Any:
164+
if isinstance(getattr(getattr(Series, self._namespace), attr), property):
165+
166+
def func(df):
167+
return getattr(getattr(self._func(df), self._namespace), attr)
168+
169+
return Expr(func)
170+
171+
def func(df, *args, **kwargs):
172+
args = parse_args(df, *args)
173+
kwargs = parse_kwargs(df, **kwargs)
174+
return getattr(getattr(self._func(df), self._namespace), attr)(
175+
*args, **kwargs
176+
)
177+
178+
return lambda *args, **kwargs: Expr(lambda df: func(df, *args, **kwargs))
179+
180+
181+
def col(col_name: Hashable) -> Expr:
182+
if not isinstance(col_name, Hashable):
183+
msg = f"Expected Hashable, got: {type(col_name)}"
184+
raise TypeError(msg)
185+
return Expr(lambda df: df[col_name])

0 commit comments

Comments
 (0)