Skip to content

Commit cc780de

Browse files
committed
refactor: Start transitioning DataFrame.sort to selectors-only
Need to rewrite `ArrowExpr.over_ordered` to make use of the simplification Maybe something like loop `order_by` and use `meta.as_selector`?
1 parent 20ac1d3 commit cc780de

File tree

4 files changed

+14
-6
lines changed

4 files changed

+14
-6
lines changed

narwhals/_plan/arrow/dataframe.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from narwhals._plan.compliant.typing import namespace
1818
from narwhals._plan.expressions import NamedIR
1919
from narwhals._plan.typing import Seq
20+
from narwhals._typing_compat import deprecated
2021
from narwhals._utils import Implementation, Version
2122
from narwhals.schema import Schema
2223

@@ -94,7 +95,14 @@ def _evaluate_irs(self, nodes: Iterable[NamedIR[ExprIR]], /) -> Iterator[Series]
9495
from_named_ir = ns._expr.from_named_ir
9596
yield from ns._expr.align(from_named_ir(e, self) for e in nodes)
9697

97-
def sort(self, by: Seq[NamedIR], options: SortMultipleOptions) -> Self:
98+
def sort(self, by: Sequence[str], options: SortMultipleOptions) -> Self:
99+
native = self.native
100+
indices = pc.sort_indices(native.select(list(by)), options=options.to_arrow(by))
101+
return self._with_native(native.take(indices))
102+
103+
# TODO @dangotbanned: Finish scoping out what breaks if removed
104+
@deprecated("Use `DataFrame.sort` instead", category=None)
105+
def sort_by_ir(self, by: Seq[NamedIR], options: SortMultipleOptions) -> Self:
98106
df_by = self.select(by)
99107
indices = pc.sort_indices(df_by.native, options=options.to_arrow(df_by.columns))
100108
return self._with_native(self.native.take(indices))

narwhals/_plan/arrow/expr.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,7 @@ def over_ordered(
366366
sort_by = tuple(NamedIR.from_ir(e) for e in node.order_by)
367367
options = node.sort_options.to_multiple(len(node.order_by))
368368
idx_name = temp.column_name(frame)
369-
sorted_context = frame.with_row_index(idx_name).sort(sort_by, options)
369+
sorted_context = frame.with_row_index(idx_name).sort_by_ir(sort_by, options)
370370
evaluated = node.expr.dispatch(self, sorted_context.drop([idx_name]), name)
371371
if isinstance(evaluated, ArrowScalar):
372372
# NOTE: We're already sorted, defer broadcasting to the outer context

narwhals/_plan/compliant/dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def rename(self, mapping: Mapping[str, str]) -> Self: ...
6464
def schema(self) -> Mapping[str, DType]: ...
6565
def select(self, irs: Seq[NamedIR]) -> Self: ...
6666
def select_names(self, *column_names: str) -> Self: ...
67-
def sort(self, by: Seq[NamedIR], options: SortMultipleOptions) -> Self: ...
67+
def sort(self, by: Sequence[str], options: SortMultipleOptions) -> Self: ...
6868
def with_columns(self, irs: Seq[NamedIR]) -> Self: ...
6969

7070

narwhals/_plan/dataframe.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -105,10 +105,10 @@ def sort(
105105
descending: OneOrIterable[bool] = False,
106106
nulls_last: OneOrIterable[bool] = False,
107107
) -> Self:
108-
sort = _parse.parse_sort_by_into_seq_of_expr_ir(by, *more_by)
108+
s_irs = _parse.parse_into_seq_of_selector_ir(by, *more_by)
109+
names = expand_selector_irs_names(s_irs, schema=self)
109110
opts = SortMultipleOptions.parse(descending=descending, nulls_last=nulls_last)
110-
named_irs, _ = prepare_projection(sort, schema=self)
111-
return self._with_compliant(self._compliant.sort(named_irs, opts))
111+
return self._with_compliant(self._compliant.sort(names, opts))
112112

113113
def drop(
114114
self, *columns: OneOrIterable[ColumnNameOrSelector], strict: bool = True

0 commit comments

Comments
 (0)