Skip to content

Commit ab7330a

Browse files
committed
feat(expr-ir): Add a concrete impl for cs.by_name
Adding `parse_into_selector_ir` will require calling this a lot I'd rather skip using `re` when a more performant option is there
1 parent 040d377 commit ab7330a

File tree

2 files changed

+58
-7
lines changed

2 files changed

+58
-7
lines changed

narwhals/_plan/expressions/selectors.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,30 @@ def matches_column(self, name: str, dtype: DType) -> bool:
9696
return dtype in self.dtypes
9797

9898

99+
class ByName(Selector):
100+
# NOTE: `polars` allows this and `by_index` to redefine schema order in a `select`
101+
# > Matching columns are returned in the order in which they are declared in
102+
# > the selector, not the underlying schema order.
103+
# If you wanna support that (later), then a `frozenset` won't work
104+
__slots__ = ("names",)
105+
names: frozenset[str]
106+
107+
def __repr__(self) -> str:
108+
els = ", ".join(f"{nm!r}" for nm in sorted(self.names))
109+
return f"ncs.by_name({els})"
110+
111+
@staticmethod
112+
def from_names(*names: OneOrIterable[str]) -> ByName:
113+
return ByName(names=frozenset(flatten_hash_safe(names)))
114+
115+
@staticmethod
116+
def from_name(name: str, /) -> ByName:
117+
return ByName(names=frozenset((name,)))
118+
119+
def matches_column(self, name: str, dtype: DType) -> bool:
120+
return name in self.names
121+
122+
99123
class Categorical(Selector):
100124
def __repr__(self) -> str:
101125
return "ncs.categorical()"
@@ -192,12 +216,6 @@ class Matches(Selector):
192216
def from_string(pattern: str, /) -> Matches:
193217
return Matches(pattern=re.compile(pattern))
194218

195-
@staticmethod
196-
def from_names(*names: OneOrIterable[str]) -> Matches:
197-
"""Implements `cs.by_name` to support `__r<op>__` with column selections."""
198-
it = flatten_hash_safe(names)
199-
return Matches.from_string(f"^({'|'.join(re.escape(name) for name in it)})$")
200-
201219
def __repr__(self) -> str:
202220
return f"ncs.matches(pattern={self.pattern.pattern!r})"
203221

@@ -245,7 +263,11 @@ def by_dtype(*dtypes: OneOrIterable[DType | type[DType]]) -> expr.Selector:
245263

246264

247265
def by_name(*names: OneOrIterable[str]) -> expr.Selector:
248-
return Matches.from_names(*names).to_selector().to_narwhals()
266+
if len(names) == 1 and isinstance(names[0], str):
267+
sel = ByName.from_name(names[0])
268+
else:
269+
sel = ByName.from_names(*names)
270+
return sel.to_selector().to_narwhals()
249271

250272

251273
def boolean() -> expr.Selector:

tests/plan/over_test.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,21 @@ def data() -> Data:
2727
}
2828

2929

30+
@pytest.fixture
31+
def data_with_null(data: Data) -> Data:
32+
return data | {"b": [1, 2, None, 5, 3]}
33+
34+
3035
@pytest.fixture
3136
def data_alt() -> Data:
3237
return {"a": [3, 5, 1, 2, None], "b": [0, 1, 3, 2, 1], "c": [9, 1, 2, 1, 1]}
3338

3439

40+
XFAIL_REQUIRES_PARTITION_BY = pytest.mark.xfail(
41+
reason="Native group_by isn't enough", raises=InvalidOperationError
42+
)
43+
44+
3545
@pytest.mark.parametrize(
3646
"partition_by",
3747
[
@@ -100,6 +110,25 @@ def test_over_multiple(data: Data, partition_by: OneOrIterable[IntoExprColumn])
100110
assert_equal_data(result, expected)
101111

102112

113+
@XFAIL_REQUIRES_PARTITION_BY
114+
def test_over_cum_sum(data_with_null: Data) -> None: # pragma: no cover
115+
df = dataframe(data_with_null)
116+
expected = {
117+
"a": ["a", "a", "b", "b", "b"],
118+
"b": [1, 2, None, 5, 3],
119+
"c": [5, 4, 3, 2, 1],
120+
"b_cum_sum": [1, 3, None, 5, 8],
121+
"c_cum_sum": [5, 9, 3, 5, 6],
122+
}
123+
124+
result = (
125+
df.with_columns(nwp.col("b", "c").cum_sum().over("a").name.suffix("_cum_sum"))
126+
.sort("i")
127+
.drop("i")
128+
)
129+
assert_equal_data(result, expected)
130+
131+
103132
def test_over_std_var(data: Data) -> None:
104133
expected = {
105134
"a": ["a", "a", "b", "b", "b"],

0 commit comments

Comments
 (0)