Skip to content

Commit 4a05028

Browse files
authored
Merge branch 'main' into oh-nodes
2 parents b03c4d3 + c952d58 commit 4a05028

File tree

17 files changed

+93
-102
lines changed

17 files changed

+93
-102
lines changed

narwhals/_arrow/dataframe.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
from narwhals.utils import (
2626
Implementation,
2727
Version,
28-
check_column_exists,
2928
check_column_names_are_unique,
3029
convert_str_slice_to_int_slice,
3130
generate_temporary_column_name,
@@ -440,9 +439,7 @@ def join(
440439
join_asof = not_implemented()
441440

442441
def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
443-
to_drop = parse_columns_to_drop(
444-
compliant_frame=self, columns=columns, strict=strict
445-
)
442+
to_drop = parse_columns_to_drop(self, columns, strict=strict)
446443
return self._with_native(self.native.drop(to_drop), validate_column_names=False)
447444

448445
def drop_nulls(self: ArrowDataFrame, subset: Sequence[str] | None) -> ArrowDataFrame:
@@ -693,7 +690,8 @@ def unique(
693690
# and has no effect on the output.
694691
import numpy as np # ignore-banned-import
695692

696-
check_column_exists(self.columns, subset)
693+
if subset and (error := self._check_columns_exist(subset)):
694+
raise error
697695
subset = list(subset or self.columns)
698696

699697
if keep in {"any", "first", "last"}:

narwhals/_arrow/expr.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from narwhals._arrow.series import ArrowSeries
88
from narwhals._compliant import EagerExpr
99
from narwhals._expression_parsing import evaluate_output_names_and_aliases
10-
from narwhals.exceptions import ColumnNotFoundError
1110
from narwhals.utils import Implementation, generate_temporary_column_name, not_implemented
1211

1312
if TYPE_CHECKING:
@@ -69,12 +68,9 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]:
6968
for column_name in evaluate_column_names(df)
7069
]
7170
except KeyError as e:
72-
missing_columns = [
73-
x for x in evaluate_column_names(df) if x not in df.columns
74-
]
75-
raise ColumnNotFoundError.from_missing_and_available_column_names(
76-
missing_columns=missing_columns, available_columns=df.columns
77-
) from e
71+
if error := df._check_columns_exist(evaluate_column_names(df)):
72+
raise error from e
73+
raise
7874

7975
return cls(
8076
func,

narwhals/_compliant/dataframe.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
from narwhals.utils import (
3737
Version,
3838
_StoresNative,
39+
check_columns_exist,
3940
is_compliant_series,
4041
is_index_selector,
4142
is_range,
@@ -59,6 +60,7 @@
5960
from narwhals._translate import IntoArrowTable
6061
from narwhals.dataframe import DataFrame
6162
from narwhals.dtypes import DType
63+
from narwhals.exceptions import ColumnNotFoundError
6264
from narwhals.schema import Schema
6365
from narwhals.typing import (
6466
AsofJoinStrategy,
@@ -270,6 +272,9 @@ def _evaluate_aliases(self, *exprs: CompliantExprT_contra) -> list[str]:
270272
it = (expr._evaluate_aliases(self) for expr in exprs)
271273
return list(chain.from_iterable(it))
272274

275+
def _check_columns_exist(self, subset: Sequence[str]) -> ColumnNotFoundError | None:
276+
return check_columns_exist(subset, available=self.columns)
277+
273278

274279
class CompliantLazyFrame(
275280
_StoresNative[NativeFrameT],
@@ -377,6 +382,9 @@ def _evaluate_aliases(self, *exprs: CompliantExprT_contra) -> list[str]:
377382
it = (expr._evaluate_aliases(self) for expr in exprs)
378383
return list(chain.from_iterable(it))
379384

385+
def _check_columns_exist(self, subset: Sequence[str]) -> ColumnNotFoundError | None:
386+
return check_columns_exist(subset, available=self.columns)
387+
380388

381389
class EagerDataFrame(
382390
CompliantDataFrame[EagerSeriesT, EagerExprT, NativeFrameT, "DataFrame[NativeFrameT]"],

narwhals/_dask/dataframe.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from narwhals.utils import (
1212
Implementation,
1313
_remap_full_join_keys,
14-
check_column_exists,
1514
check_column_names_are_unique,
1615
generate_temporary_column_name,
1716
not_implemented,
@@ -200,9 +199,7 @@ def collect_schema(self) -> dict[str, DType]:
200199
return self.schema
201200

202201
def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
203-
to_drop = parse_columns_to_drop(
204-
compliant_frame=self, columns=columns, strict=strict
205-
)
202+
to_drop = parse_columns_to_drop(self, columns, strict=strict)
206203

207204
return self._with_native(self.native.drop(columns=to_drop))
208205

@@ -222,7 +219,8 @@ def head(self, n: int) -> Self:
222219
def unique(
223220
self, subset: Sequence[str] | None, *, keep: LazyUniqueKeepStrategy
224221
) -> Self:
225-
check_column_exists(self.columns, subset)
222+
if subset and (error := self._check_columns_exist(subset)):
223+
raise error
226224
if keep == "none":
227225
subset = subset or self.columns
228226
token = generate_temporary_column_name(n_bytes=8, columns=subset)

narwhals/_dask/expr.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
)
1515
from narwhals._expression_parsing import ExprKind, evaluate_output_names_and_aliases
1616
from narwhals._pandas_like.utils import native_to_narwhals_dtype
17-
from narwhals.exceptions import ColumnNotFoundError, InvalidOperationError
17+
from narwhals.exceptions import InvalidOperationError
1818
from narwhals.utils import Implementation, generate_temporary_column_name, not_implemented
1919

2020
if TYPE_CHECKING:
@@ -106,12 +106,9 @@ def func(df: DaskLazyFrame) -> list[dx.Series]:
106106
for column_name in evaluate_column_names(df)
107107
]
108108
except KeyError as e:
109-
missing_columns = [
110-
x for x in evaluate_column_names(df) if x not in df.columns
111-
]
112-
raise ColumnNotFoundError.from_missing_and_available_column_names(
113-
missing_columns=missing_columns, available_columns=df.columns
114-
) from e
109+
if error := df._check_columns_exist(evaluate_column_names(df)):
110+
raise error from e
111+
raise
115112

116113
return cls(
117114
func,

narwhals/_duckdb/dataframe.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
native_to_narwhals_dtype,
1818
)
1919
from narwhals.dependencies import get_duckdb
20-
from narwhals.exceptions import ColumnNotFoundError, InvalidOperationError
20+
from narwhals.exceptions import InvalidOperationError
2121
from narwhals.typing import CompliantLazyFrame
2222
from narwhals.utils import (
2323
Implementation,
@@ -182,7 +182,7 @@ def select(self, *exprs: DuckDBExpr) -> Self:
182182
return self._with_native(self.native.select(*selection))
183183

184184
def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
185-
columns_to_drop = parse_columns_to_drop(self, columns=columns, strict=strict)
185+
columns_to_drop = parse_columns_to_drop(self, columns, strict=strict)
186186
selection = (name for name in self.columns if name not in columns_to_drop)
187187
return self._with_native(self.native.select(*selection))
188188

@@ -387,9 +387,8 @@ def unique(
387387
)
388388
raise NotImplementedError(msg)
389389
# Sanitise input
390-
if any(x not in self.columns for x in subset_):
391-
msg = f"Columns {set(subset_).difference(self.columns)} not found in {self.columns}."
392-
raise ColumnNotFoundError(msg)
390+
if error := self._check_columns_exist(subset_):
391+
raise error
393392
idx_name = generate_temporary_column_name(8, self.columns)
394393
count_name = generate_temporary_column_name(8, [*self.columns, idx_name])
395394
partition_by_sql = generate_partition_by_sql(*(subset_))

narwhals/_ibis/dataframe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ def select(self, *exprs: IbisExpr) -> Self:
176176
return self._with_native(t)
177177

178178
def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
179-
columns_to_drop = parse_columns_to_drop(self, columns=columns, strict=strict)
179+
columns_to_drop = parse_columns_to_drop(self, columns, strict=strict)
180180
selection = (col for col in self.columns if col not in columns_to_drop)
181181
return self._with_native(self.native.select(*selection))
182182

narwhals/_pandas_like/dataframe.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
Implementation,
3636
_into_arrow_table,
3737
_remap_full_join_keys,
38-
check_column_exists,
3938
exclude_column_names,
4039
generate_temporary_column_name,
4140
parse_columns_to_drop,
@@ -485,9 +484,7 @@ def rename(self, mapping: Mapping[str, str]) -> Self:
485484
)
486485

487486
def drop(self, columns: Sequence[str], *, strict: bool) -> Self:
488-
to_drop = parse_columns_to_drop(
489-
compliant_frame=self, columns=columns, strict=strict
490-
)
487+
to_drop = parse_columns_to_drop(self, columns, strict=strict)
491488
return self._with_native(
492489
self.native.drop(columns=to_drop), validate_column_names=False
493490
)
@@ -753,7 +750,8 @@ def unique(
753750
# The param `maintain_order` is only here for compatibility with the Polars API
754751
# and has no effect on the output.
755752
mapped_keep = {"none": False, "any": "first"}.get(keep, keep)
756-
check_column_exists(self.columns, subset)
753+
if subset and (error := self._check_columns_exist(subset)):
754+
raise error
757755
return self._with_native(
758756
self.native.drop_duplicates(subset=subset, keep=mapped_keep),
759757
validate_column_names=False,

narwhals/_pandas_like/expr.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from narwhals._expression_parsing import evaluate_output_names_and_aliases
77
from narwhals._pandas_like.group_by import PandasLikeGroupBy
88
from narwhals._pandas_like.series import PandasLikeSeries
9-
from narwhals.exceptions import ColumnNotFoundError
109
from narwhals.utils import generate_temporary_column_name
1110

1211
if TYPE_CHECKING:
@@ -138,12 +137,9 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]:
138137
for column_name in evaluate_column_names(df)
139138
]
140139
except KeyError as e:
141-
missing_columns = [
142-
x for x in evaluate_column_names(df) if x not in df.columns
143-
]
144-
raise ColumnNotFoundError.from_missing_and_available_column_names(
145-
missing_columns=missing_columns, available_columns=df.columns
146-
) from e
140+
if error := df._check_columns_exist(evaluate_column_names(df)):
141+
raise error from e
142+
raise
147143

148144
return cls(
149145
func,

narwhals/_pandas_like/utils.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,12 @@
88
import pandas as pd
99

1010
from narwhals._compliant.series import EagerSeriesNamespace
11-
from narwhals.exceptions import ColumnNotFoundError, DuplicateError, ShapeError
11+
from narwhals.exceptions import DuplicateError, ShapeError
1212
from narwhals.utils import (
1313
Implementation,
1414
Version,
1515
_DeferredIterable,
16+
check_columns_exist,
1617
isinstance_or_issubclass,
1718
)
1819

@@ -622,21 +623,21 @@ def select_columns_by_name(
622623
):
623624
# See https://github.com/narwhals-dev/narwhals/issues/1349#issuecomment-2470118122
624625
# for why we need this
625-
available_columns = df.columns.tolist() # type: ignore[attr-defined]
626-
missing_columns = [x for x in column_names if x not in available_columns]
627-
if missing_columns: # pragma: no cover
628-
raise ColumnNotFoundError.from_missing_and_available_column_names(
629-
missing_columns, available_columns
630-
)
626+
if error := check_columns_exist(
627+
column_names, # type: ignore[arg-type]
628+
available=df.columns.tolist(), # type: ignore[attr-defined]
629+
):
630+
raise error
631631
return df.loc[:, column_names] # type: ignore[attr-defined]
632632
try:
633633
return df[column_names] # type: ignore[index]
634634
except KeyError as e:
635-
available_columns = df.columns.tolist() # type: ignore[attr-defined]
636-
missing_columns = [x for x in column_names if x not in available_columns]
637-
raise ColumnNotFoundError.from_missing_and_available_column_names(
638-
missing_columns, available_columns
639-
) from e
635+
if error := check_columns_exist(
636+
column_names, # type: ignore[arg-type]
637+
available=df.columns.tolist(), # type: ignore[attr-defined]
638+
):
639+
raise error from e
640+
raise
640641

641642

642643
def check_column_names_are_unique(columns: pd.Index[str]) -> None:

0 commit comments

Comments
 (0)