|
2 | 2 |
|
3 | 3 | import operator |
4 | 4 | from io import BytesIO |
5 | | -from typing import TYPE_CHECKING, Any, Literal, cast |
| 5 | +from typing import TYPE_CHECKING, Any, cast |
6 | 6 |
|
7 | 7 | import ibis |
8 | 8 | import ibis.expr.types as ir |
9 | 9 |
|
10 | | -from narwhals._ibis.utils import evaluate_exprs, native_to_narwhals_dtype |
| 10 | +from narwhals._ibis.expr import IbisExpr |
| 11 | +from narwhals._ibis.utils import evaluate_exprs, lit, native_to_narwhals_dtype |
11 | 12 | from narwhals._sql.dataframe import SQLLazyFrame |
12 | 13 | from narwhals._utils import ( |
13 | 14 | Implementation, |
14 | 15 | ValidateBackendVersion, |
15 | 16 | Version, |
| 17 | + generate_temporary_column_name, |
16 | 18 | not_implemented, |
17 | 19 | parse_columns_to_drop, |
18 | 20 | to_pyarrow_table, |
19 | 21 | zip_strict, |
20 | 22 | ) |
21 | | -from narwhals.exceptions import ColumnNotFoundError, InvalidOperationError |
| 23 | +from narwhals.exceptions import InvalidOperationError |
22 | 24 |
|
23 | 25 | if TYPE_CHECKING: |
24 | 26 | from collections.abc import Iterable, Iterator, Mapping, Sequence |
|
31 | 33 | from typing_extensions import Self, TypeAlias, TypeIs |
32 | 34 |
|
33 | 35 | from narwhals._compliant.typing import CompliantDataFrameAny |
34 | | - from narwhals._ibis.expr import IbisExpr |
35 | 36 | from narwhals._ibis.group_by import IbisGroupBy |
36 | 37 | from narwhals._ibis.namespace import IbisNamespace |
37 | 38 | from narwhals._ibis.series import IbisInterchangeSeries |
|
40 | 41 | from narwhals.dataframe import LazyFrame |
41 | 42 | from narwhals.dtypes import DType |
42 | 43 | from narwhals.stable.v1 import DataFrame as DataFrameV1 |
43 | | - from narwhals.typing import AsofJoinStrategy, JoinStrategy, LazyUniqueKeepStrategy |
| 44 | + from narwhals.typing import AsofJoinStrategy, JoinStrategy, UniqueKeepStrategy |
44 | 45 |
|
45 | 46 | JoinPredicates: TypeAlias = "Sequence[ir.BooleanColumn] | Sequence[str]" |
46 | 47 |
|
@@ -320,21 +321,33 @@ def collect_schema(self) -> dict[str, DType]: |
320 | 321 | } |
321 | 322 |
|
322 | 323 | def unique( |
323 | | - self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy |
| 324 | + self, |
| 325 | + subset: Sequence[str] | None, |
| 326 | + *, |
| 327 | + keep: UniqueKeepStrategy, |
| 328 | + order_by: Sequence[str] | None, |
324 | 329 | ) -> Self: |
325 | | - if subset_ := subset if keep == "any" else (subset or self.columns): |
326 | | - # Sanitise input |
327 | | - if any(x not in self.columns for x in subset_): |
328 | | - msg = f"Columns {set(subset_).difference(self.columns)} not found in {self.columns}." |
329 | | - raise ColumnNotFoundError(msg) |
330 | | - |
331 | | - mapped_keep: dict[str, Literal["first"] | None] = { |
332 | | - "any": "first", |
333 | | - "none": None, |
334 | | - } |
335 | | - to_keep = mapped_keep[keep] |
336 | | - return self._with_native(self.native.distinct(on=subset_, keep=to_keep)) |
337 | | - return self._with_native(self.native.distinct(on=subset)) |
| 330 | + subset_ = subset or self.columns |
| 331 | + if error := self._check_columns_exist(subset_): |
| 332 | + raise error |
| 333 | + tmp_name = generate_temporary_column_name(8, self.columns) |
| 334 | + if order_by and keep == "last": |
| 335 | + order_by_ = IbisExpr._sort(*order_by, descending=True, nulls_last=True) |
| 336 | + elif order_by: |
| 337 | + order_by_ = IbisExpr._sort(*order_by, descending=False, nulls_last=False) |
| 338 | + else: |
| 339 | + order_by_ = lit(1) |
| 340 | + window = ibis.window(group_by=subset_, order_by=order_by_) |
| 341 | + if keep == "none": |
| 342 | + expr = self.native.count().over(window) |
| 343 | + else: |
| 344 | + expr = ibis.row_number().over(window) + lit(1) |
| 345 | + df = ( |
| 346 | + self.native.mutate(**{tmp_name: expr}) |
| 347 | + .filter(ibis._[tmp_name] == lit(1)) |
| 348 | + .drop(tmp_name) |
| 349 | + ) |
| 350 | + return self._with_native(df) |
338 | 351 |
|
339 | 352 | def sort(self, *by: str, descending: bool | Sequence[bool], nulls_last: bool) -> Self: |
340 | 353 | from narwhals._ibis.expr import IbisExpr |
|
0 commit comments