Skip to content

Commit 84a64ab

Browse files
authored
feat!: remove require at least one expression be passed to lazyframe select and lazyframe.with_columns, remove lazyframe.clone (#2206)
1 parent df38225 commit 84a64ab

File tree

10 files changed

+31
-69
lines changed

10 files changed

+31
-69
lines changed

docs/api-reference/lazyframe.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
handler: python
55
options:
66
members:
7-
- clone
87
- collect
98
- collect_schema
109
- columns

narwhals/_dask/dataframe.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,6 @@ def collect(
9494
backend: Implementation | None,
9595
**kwargs: Any,
9696
) -> CompliantDataFrame[Any]:
97-
import pandas as pd
98-
9997
result = self._native_frame.compute(**kwargs)
10098

10199
if backend is None or backend is Implementation.PANDAS:
@@ -162,15 +160,6 @@ def aggregate(self: Self, *exprs: DaskExpr) -> Self:
162160

163161
def select(self: Self, *exprs: DaskExpr) -> Self:
164162
new_series = evaluate_exprs(self, *exprs)
165-
166-
if not new_series:
167-
# return empty dataframe, like Polars does
168-
return self._from_native_frame(
169-
dd.from_pandas(
170-
pd.DataFrame(), npartitions=self._native_frame.npartitions
171-
),
172-
)
173-
174163
df = select_columns_by_name(
175164
self._native_frame.assign(**dict(new_series)),
176165
[s[0] for s in new_series],

narwhals/_dask/namespace.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,10 @@ def func(df: DaskLazyFrame) -> list[dx.Series]:
107107

108108
def len(self: Self) -> DaskExpr:
109109
def func(df: DaskLazyFrame) -> list[dx.Series]:
110-
if not df.columns:
111-
return [
112-
dd.from_pandas(
113-
pd.Series([0], name="len"),
114-
npartitions=df._native_frame.npartitions,
115-
)
116-
]
110+
# We don't allow dataframes with 0 columns, so `[0]` is safe.
117111
return [df._native_frame[df.columns[0]].size.to_series()]
118112

119-
# coverage bug? this is definitely hit
120-
return DaskExpr( # pragma: no cover
113+
return DaskExpr(
121114
func,
122115
depth=0,
123116
function_name="len",

narwhals/_duckdb/dataframe.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,6 @@ def select(
151151
*exprs: DuckDBExpr,
152152
) -> Self:
153153
new_columns_map = evaluate_exprs(self, *exprs)
154-
if not new_columns_map:
155-
# TODO(marco): return empty relation with 0 columns?
156-
return self._from_native_frame(self._native_frame.limit(0))
157154
return self._from_native_frame(
158155
self._native_frame.select(*(val.alias(col) for col, val in new_columns_map)),
159156
)

narwhals/_spark_like/dataframe.py

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
import pyarrow as pa
3030
from sqlframe.base.column import Column
3131
from sqlframe.base.dataframe import BaseDataFrame
32-
from sqlframe.base.session import _BaseSession
3332
from sqlframe.base.window import Window
3433
from typing_extensions import Self
3534
from typing_extensions import TypeAlias
@@ -41,7 +40,6 @@
4140
from narwhals.utils import Version
4241

4342
SQLFrameDataFrame = BaseDataFrame[Any, Any, Any, Any, Any]
44-
SQLFrameSession = _BaseSession[Any, Any, Any, Any, Any, Any, Any]
4543

4644
Incomplete: TypeAlias = Any # pragma: no cover
4745
"""Marker for working code that fails type checking."""
@@ -90,14 +88,6 @@ def _Window(self: Self) -> type[Window]: # noqa: N802
9088
else:
9189
return import_window(self._implementation)
9290

93-
@property
94-
def _session(self: Self) -> SQLFrameSession:
95-
if TYPE_CHECKING:
96-
return self._native_frame.session
97-
if self._implementation is Implementation.PYSPARK:
98-
return self._native_frame.sparkSession
99-
return self._native_frame.session
100-
10191
def __native_namespace__(self: Self) -> ModuleType: # pragma: no cover
10292
return self._implementation.to_native_namespace()
10393

@@ -240,13 +230,6 @@ def select(
240230
*exprs: SparkLikeExpr,
241231
) -> Self:
242232
new_columns = evaluate_exprs(self, *exprs)
243-
244-
if not new_columns:
245-
# return empty dataframe, like Polars does
246-
schema = self._native_dtypes.StructType([])
247-
spark_df = self._session.createDataFrame([], schema)
248-
return self._from_native_frame(spark_df)
249-
250233
new_columns_list = [col.alias(col_name) for (col_name, col) in new_columns]
251234
return self._from_native_frame(self._native_frame.select(*new_columns_list))
252235

narwhals/dataframe.py

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -278,9 +278,6 @@ def join(
278278
)
279279
)
280280

281-
def clone(self: Self) -> Self:
282-
return self._from_compliant_dataframe(self._compliant_frame.clone())
283-
284281
def gather_every(self: Self, n: int, offset: int = 0) -> Self:
285282
return self._from_compliant_dataframe(
286283
self._compliant_frame.gather_every(n=n, offset=offset)
@@ -1875,7 +1872,7 @@ def clone(self: Self) -> Self:
18751872
Returns:
18761873
An identical copy of the original dataframe.
18771874
"""
1878-
return super().clone()
1875+
return self._from_compliant_dataframe(self._compliant_frame.clone())
18791876

18801877
def gather_every(self: Self, n: int, offset: int = 0) -> Self:
18811878
r"""Take every nth row in the DataFrame and return as a new DataFrame.
@@ -2520,6 +2517,9 @@ def with_columns(
25202517
|└───────┴──────────────┴───────┘|
25212518
└────────────────────────────────┘
25222519
"""
2520+
if not exprs and not named_exprs:
2521+
msg = "At least one expression must be passed to LazyFrame.with_columns"
2522+
raise ValueError(msg)
25232523
return super().with_columns(*exprs, **named_exprs)
25242524

25252525
def select(
@@ -2561,6 +2561,9 @@ def select(
25612561
|└───────┴──────────┘|
25622562
└────────────────────┘
25632563
"""
2564+
if not exprs and not named_exprs:
2565+
msg = "At least one expression must be passed to LazyFrame.select"
2566+
raise ValueError(msg)
25642567
return super().select(*exprs, **named_exprs)
25652568

25662569
def rename(self: Self, mapping: dict[str, str]) -> Self:
@@ -3055,14 +3058,6 @@ def join_asof(
30553058
suffix=suffix,
30563059
)
30573060

3058-
def clone(self: Self) -> Self:
3059-
r"""Create a copy of this DataFrame.
3060-
3061-
Returns:
3062-
An identical copy of the original LazyFrame.
3063-
"""
3064-
return super().clone()
3065-
30663061
def lazy(self: Self) -> Self:
30673062
"""Restrict available API methods to lazy-only ones.
30683063

tests/expr_and_series/len_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def test_namespace_len(constructor: Constructor) -> None:
3636
assert_equal_data(df, expected)
3737
df = (
3838
nw.from_native(constructor({"a": [1, 2, 3], "b": [4, 5, 6]}))
39-
.select()
39+
.filter(nw.col("a") < 0)
4040
.select(nw.len(), a=nw.len())
4141
)
4242
expected = {"len": [0], "a": [0]}

tests/frame/clone_test.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,20 +3,18 @@
33
import pytest
44

55
import narwhals.stable.v1 as nw
6-
from tests.utils import Constructor
6+
from tests.utils import ConstructorEager
77
from tests.utils import assert_equal_data
88

99

10-
def test_clone(request: pytest.FixtureRequest, constructor: Constructor) -> None:
11-
if "dask" in str(constructor):
12-
request.applymarker(pytest.mark.xfail)
13-
if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
14-
request.applymarker(pytest.mark.xfail)
15-
if "pyarrow_table" in str(constructor):
10+
def test_clone(
11+
request: pytest.FixtureRequest, constructor_eager: ConstructorEager
12+
) -> None:
13+
if "pyarrow_table" in str(constructor_eager):
1614
request.applymarker(pytest.mark.xfail)
1715

1816
expected = {"a": [1, 2], "b": [3, 4]}
19-
df = nw.from_native(constructor(expected))
17+
df = nw.from_native(constructor_eager(expected), eager_only=True)
2018
df_clone = df.clone()
2119
assert df is not df_clone
2220
assert df._compliant_frame is not df_clone._compliant_frame

tests/frame/select_test.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,9 @@ def test_select(constructor: Constructor) -> None:
2929
assert_equal_data(result, expected)
3030

3131

32-
def test_empty_select(constructor: Constructor, request: pytest.FixtureRequest) -> None:
33-
if "duckdb" in str(constructor) or "sqlframe" in str(constructor):
34-
request.applymarker(pytest.mark.xfail)
35-
result = nw.from_native(constructor({"a": [1, 2, 3]})).lazy().select()
36-
assert result.collect().shape == (0, 0)
32+
def test_empty_select(constructor_eager: ConstructorEager) -> None:
33+
result = nw.from_native(constructor_eager({"a": [1, 2, 3]}), eager_only=True).select()
34+
assert result.shape == (0, 0)
3735

3836

3937
def test_non_string_select() -> None:

tests/frame/with_columns_test.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import narwhals.stable.v1 as nw
88
from tests.utils import PYARROW_VERSION
99
from tests.utils import Constructor
10+
from tests.utils import ConstructorEager
1011
from tests.utils import assert_equal_data
1112

1213

@@ -30,13 +31,22 @@ def test_with_columns_order(constructor: Constructor) -> None:
3031
assert_equal_data(result, expected)
3132

3233

33-
def test_with_columns_empty(constructor: Constructor) -> None:
34+
def test_with_columns_empty(constructor_eager: ConstructorEager) -> None:
3435
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}
35-
df = nw.from_native(constructor(data))
36+
df = nw.from_native(constructor_eager(data))
3637
result = df.select().with_columns()
3738
assert_equal_data(result, {})
3839

3940

41+
def test_select_with_columns_empty_lazy(constructor: Constructor) -> None:
42+
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0]}
43+
df = nw.from_native(constructor(data)).lazy()
44+
with pytest.raises(ValueError, match="At least one"):
45+
df.with_columns()
46+
with pytest.raises(ValueError, match="At least one"):
47+
df.select()
48+
49+
4050
def test_with_columns_order_single_row(constructor: Constructor) -> None:
4151
data = {"a": [1, 3, 2], "b": [4, 4, 6], "z": [7.0, 8.0, 9.0], "i": [0, 1, 2]}
4252
df = nw.from_native(constructor(data)).filter(nw.col("i") < 1).drop("i")

0 commit comments

Comments
 (0)