Skip to content

Commit 50bcb9c

Browse files
committed
refactor: Finish sort_by_ir removal
1 parent cc780de commit 50bcb9c

File tree

7 files changed

+73
-17
lines changed

7 files changed

+73
-17
lines changed

narwhals/_plan/arrow/dataframe.py

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,6 @@
1616
from narwhals._plan.compliant.dataframe import EagerDataFrame
1717
from narwhals._plan.compliant.typing import namespace
1818
from narwhals._plan.expressions import NamedIR
19-
from narwhals._plan.typing import Seq
20-
from narwhals._typing_compat import deprecated
2119
from narwhals._utils import Implementation, Version
2220
from narwhals.schema import Schema
2321

@@ -30,7 +28,7 @@
3028
from narwhals._plan.arrow.namespace import ArrowNamespace
3129
from narwhals._plan.expressions import ExprIR, NamedIR
3230
from narwhals._plan.options import SortMultipleOptions
33-
from narwhals._plan.typing import NonCrossJoinStrategy, Seq
31+
from narwhals._plan.typing import NonCrossJoinStrategy
3432
from narwhals.dtypes import DType
3533
from narwhals.typing import IntoSchema
3634

@@ -100,13 +98,6 @@ def sort(self, by: Sequence[str], options: SortMultipleOptions) -> Self:
10098
indices = pc.sort_indices(native.select(list(by)), options=options.to_arrow(by))
10199
return self._with_native(native.take(indices))
102100

103-
# TODO @dangotbanned: Finish scoping out what breaks if removed
104-
@deprecated("Use `DataFrame.sort` instead", category=None)
105-
def sort_by_ir(self, by: Seq[NamedIR], options: SortMultipleOptions) -> Self:
106-
df_by = self.select(by)
107-
indices = pc.sort_indices(df_by.native, options=options.to_arrow(df_by.columns))
108-
return self._with_native(self.native.take(indices))
109-
110101
def with_row_index(self, name: str) -> Self:
111102
return self._with_native(self.native.add_column(0, name, fn.int_range(len(self))))
112103

narwhals/_plan/arrow/expr.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
from narwhals._plan.compliant.expr import EagerExpr
1616
from narwhals._plan.compliant.scalar import EagerScalar
1717
from narwhals._plan.compliant.typing import namespace
18-
from narwhals._plan.expressions import NamedIR
1918
from narwhals._utils import Implementation, Version, _StoresNative, not_implemented
2019
from narwhals.exceptions import InvalidOperationError, ShapeError
2120

@@ -363,10 +362,10 @@ def over_ordered(
363362
raise NotImplementedError(msg)
364363

365364
# NOTE: Converting `over(order_by=..., options=...)` into the right shape for `DataFrame.sort`
366-
sort_by = tuple(NamedIR.from_ir(e) for e in node.order_by)
367-
options = node.sort_options.to_multiple(len(node.order_by))
365+
sort_by = tuple(node.order_by_names())
366+
options = node.sort_options.to_multiple(len(sort_by))
368367
idx_name = temp.column_name(frame)
369-
sorted_context = frame.with_row_index(idx_name).sort_by_ir(sort_by, options)
368+
sorted_context = frame.with_row_index(idx_name).sort(sort_by, options)
370369
evaluated = node.expr.dispatch(self, sorted_context.drop([idx_name]), name)
371370
if isinstance(evaluated, ArrowScalar):
372371
# NOTE: We're already sorted, defer broadcasting to the outer context

narwhals/_plan/exceptions.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,24 @@ def over_row_separable_error(
139139
return InvalidOperationError(msg)
140140

141141

142+
def over_order_by_names_error(
143+
expr: ir.OrderedWindowExpr, by: ir.ExprIR
144+
) -> InvalidOperationError:
145+
if by.meta.is_column_selection(allow_aliasing=True):
146+
# narwhals dev error
147+
msg = (
148+
f"Cannot use `{type(expr).__name__}.order_by_names()` before expression expansion.\n"
149+
f"Found unresolved selection {by!r}, in:\n{expr!r}"
150+
)
151+
else:
152+
# user error
153+
msg = (
154+
f"Only column selection expressions are supported in `over(order_by=...)`.\n"
155+
f"Found {by!r}, in:\n{expr!r}"
156+
)
157+
return InvalidOperationError(msg)
158+
159+
142160
def invalid_into_expr_error(
143161
first_input: Iterable[IntoExpr],
144162
more_inputs: tuple[Any, ...],

narwhals/_plan/expressions/expr.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,10 @@
77

88
from narwhals._plan._expr_ir import ExprIR, SelectorIR
99
from narwhals._plan.common import replace
10-
from narwhals._plan.exceptions import function_expr_invalid_operation_error
10+
from narwhals._plan.exceptions import (
11+
function_expr_invalid_operation_error,
12+
over_order_by_names_error,
13+
)
1114
from narwhals._plan.expressions import selectors as cs
1215
from narwhals._plan.options import ExprIROptions
1316
from narwhals._plan.typing import (
@@ -383,6 +386,19 @@ def iter_root_names(self) -> t.Iterator[ExprIR]:
383386
yield from e.iter_left()
384387
yield self
385388

389+
def order_by_names(self) -> Iterator[str]:
390+
"""Yield the names resolved from expanding `order_by`.
391+
392+
Raises:
393+
InvalidOperationError: If used *before* expansion, or
394+
`order_by` contains expressions that do more than select.
395+
"""
396+
for by in self.order_by:
397+
if isinstance(by, Column):
398+
yield by.name
399+
else:
400+
raise over_order_by_names_error(self, by)
401+
386402

387403
class Len(ExprIR, config=ExprIROptions.namespaced()):
388404
@property

narwhals/_plan/options.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ def to_multiple(self, n_repeat: int = 1, /) -> SortMultipleOptions:
167167
if n_repeat == 1:
168168
desc: Seq[bool] = (self.descending,)
169169
nulls: Seq[bool] = (self.nulls_last,)
170-
else: # pragma: no cover
170+
else:
171171
desc = tuple(repeat(self.descending, n_repeat))
172172
nulls = tuple(repeat(self.nulls_last, n_repeat))
173173
return SortMultipleOptions(descending=desc, nulls_last=nulls)

tests/plan/expr_expansion_test.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -726,3 +726,16 @@ def test_expand_binary_expr_combination_invalid(df_1: Frame) -> None:
726726
)
727727
with pytest.raises(MultiOutputExpressionError, match=pattern):
728728
df_1.project(ten_to_nine)
729+
730+
731+
def test_over_order_by_names() -> None:
732+
expr = nwp.col("a").first().over(order_by=ncs.string())
733+
e_ir = expr._ir
734+
assert isinstance(e_ir, ir.OrderedWindowExpr)
735+
with pytest.raises(
736+
InvalidOperationError,
737+
match=re_compile(
738+
r"cannot use.+order_by_names.+before.+expansion.+ncs.string\(\)"
739+
),
740+
):
741+
list(e_ir.order_by_names())

tests/plan/over_test.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import narwhals._plan as nwp
1111
from narwhals._plan import selectors as ncs
1212
from narwhals.exceptions import InvalidOperationError
13-
from tests.plan.utils import assert_equal_data, dataframe
13+
from tests.plan.utils import assert_equal_data, dataframe, re_compile
1414

1515
if TYPE_CHECKING:
1616
from narwhals._plan.typing import IntoExprColumn, OneOrIterable
@@ -225,3 +225,22 @@ def test_shift_kitchen_sink(data_alt: Data) -> None:
225225
)
226226
expected = {"b": [0, 5, 10, 15, 500], "c": [5, 5, 10, 45, 500]}
227227
assert_equal_data(result, expected)
228+
229+
230+
def test_over_order_by_expr(data_alt: Data) -> None:
231+
df = dataframe(data_alt)
232+
result = df.select(
233+
nwp.all()
234+
+ nwp.all().last().over(order_by=[nwp.nth(1), ncs.first()], descending=True)
235+
)
236+
expected = {"a": [6, 8, 4, 5, None], "b": [0, 1, 3, 2, 1], "c": [18, 10, 11, 10, 10]}
237+
assert_equal_data(result, expected)
238+
239+
240+
def test_over_order_by_expr_invalid(data_alt: Data) -> None:
241+
df = dataframe(data_alt)
242+
with pytest.raises(
243+
InvalidOperationError,
244+
match=re_compile(r"only.+column.+selection.+in.+order_by.+found.+sort"),
245+
):
246+
df.select(nwp.col("a").first().over(order_by=nwp.col("b").sort()))

0 commit comments

Comments
 (0)