Skip to content

Commit 1b8d548

Browse files
authored
chore: Deprecate native_namespace in favour of backend in scan_parquet (#2217)
1 parent 436360b commit 1b8d548

File tree

3 files changed

+49
-14
lines changed

3 files changed

+49
-14
lines changed

narwhals/functions.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -986,8 +986,13 @@ def _read_parquet_impl(
986986
return from_native(native_frame, eager_only=True)
987987

988988

989+
@deprecate_native_namespace(warn_version="1.31.0", required=True)
989990
def scan_parquet(
990-
source: str, *, native_namespace: ModuleType, **kwargs: Any
991+
source: str,
992+
*,
993+
backend: ModuleType | Implementation | str | None = None,
994+
native_namespace: ModuleType | None = None, # noqa: ARG001
995+
**kwargs: Any,
991996
) -> LazyFrame[Any]:
992997
"""Lazily read from a parquet file.
993998
@@ -996,7 +1001,19 @@ def scan_parquet(
9961001
9971002
Arguments:
9981003
source: Path to a file.
1004+
backend: The eager backend for DataFrame creation.
1005+
`backend` can be specified in various ways:
1006+
1007+
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
1008+
`POLARS`, `MODIN` or `CUDF`.
1009+
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
1010+
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
9991011
native_namespace: The native library to use for DataFrame creation.
1012+
1013+
**Deprecated** (v1.31.0):
1014+
Please use `backend` instead. Note that `native_namespace` is still available
1015+
(and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
1016+
see [perfect backwards compatibility policy](../backcompat.md/).
10001017
kwargs: Extra keyword arguments which are passed to the native parquet reader.
10011018
For example, you could use
10021019
`nw.scan_parquet('file.parquet', native_namespace=pd, engine='pyarrow')`.
@@ -1008,9 +1025,7 @@ def scan_parquet(
10081025
>>> import dask.dataframe as dd
10091026
>>> import narwhals as nw
10101027
>>>
1011-
>>> nw.scan_parquet(
1012-
... "file.parquet", native_namespace=dd
1013-
... ).collect() # doctest:+SKIP
1028+
>>> nw.scan_parquet("file.parquet", backend="dask").collect() # doctest:+SKIP
10141029
┌──────────────────┐
10151030
|Narwhals DataFrame|
10161031
|------------------|
@@ -1019,13 +1034,15 @@ def scan_parquet(
10191034
| 1 2 5 |
10201035
└──────────────────┘
10211036
"""
1022-
return _scan_parquet_impl(source, native_namespace=native_namespace, **kwargs)
1037+
backend = cast("ModuleType | Implementation | str", backend)
1038+
return _scan_parquet_impl(source, backend=backend, **kwargs)
10231039

10241040

10251041
def _scan_parquet_impl(
1026-
source: str, *, native_namespace: ModuleType, **kwargs: Any
1042+
source: str, *, backend: ModuleType | Implementation | str, **kwargs: Any
10271043
) -> LazyFrame[Any]:
1028-
implementation = Implementation.from_native_namespace(native_namespace)
1044+
implementation = Implementation.from_backend(backend)
1045+
native_namespace = implementation.to_native_namespace()
10291046
native_frame: NativeFrame | NativeLazyFrame
10301047
if implementation is Implementation.POLARS:
10311048
native_frame = native_namespace.scan_parquet(source, **kwargs)

narwhals/stable/v1/__init__.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2420,8 +2420,13 @@ def read_parquet(
24202420
)
24212421

24222422

2423+
@deprecate_native_namespace(required=True)
24232424
def scan_parquet(
2424-
source: str, *, native_namespace: ModuleType, **kwargs: Any
2425+
source: str,
2426+
*,
2427+
backend: ModuleType | Implementation | str | None = None,
2428+
native_namespace: ModuleType | None = None, # noqa: ARG001
2429+
**kwargs: Any,
24252430
) -> LazyFrame[Any]:
24262431
"""Lazily read from a parquet file.
24272432
@@ -2430,16 +2435,29 @@ def scan_parquet(
24302435
24312436
Arguments:
24322437
source: Path to a file.
2438+
backend: The eager backend for DataFrame creation.
2439+
`backend` can be specified in various ways:
2440+
2441+
- As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
2442+
`POLARS`, `MODIN` or `CUDF`.
2443+
- As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
2444+
- Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
24332445
native_namespace: The native library to use for DataFrame creation.
2446+
2447+
**Deprecated** (v1.31.0):
2448+
Please use `backend` instead. Note that `native_namespace` is still available
2449+
(and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
2450+
see [perfect backwards compatibility policy](../backcompat.md/).
24342451
kwargs: Extra keyword arguments which are passed to the native parquet reader.
24352452
For example, you could use
24362453
`nw.scan_parquet('file.parquet', native_namespace=pd, engine='pyarrow')`.
24372454
24382455
Returns:
24392456
LazyFrame.
24402457
"""
2458+
backend = cast("ModuleType | Implementation | str", backend)
24412459
return _stableify( # type: ignore[no-any-return]
2442-
_scan_parquet_impl(source, native_namespace=native_namespace, **kwargs)
2460+
_scan_parquet_impl(source, backend=backend, **kwargs)
24432461
)
24442462

24452463

tests/read_scan_test.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -159,8 +159,8 @@ def test_scan_parquet(
159159
filepath = str(tmpdir / "file.parquet") # type: ignore[operator]
160160
df_pl.write_parquet(filepath)
161161
df = nw.from_native(constructor(data))
162-
native_namespace = nw.get_native_namespace(df)
163-
result = nw.scan_parquet(filepath, native_namespace=native_namespace)
162+
backend = nw.get_native_namespace(df)
163+
result = nw.scan_parquet(filepath, backend=backend)
164164
assert_equal_data(result, data)
165165
assert isinstance(result, nw.LazyFrame)
166166

@@ -177,8 +177,8 @@ def test_scan_parquet_v1(
177177
filepath = str(tmpdir / "file.parquet") # type: ignore[operator]
178178
df_pl.write_parquet(filepath)
179179
df = nw_v1.from_native(constructor(data))
180-
native_namespace = nw_v1.get_native_namespace(df)
181-
result = nw_v1.scan_parquet(filepath, native_namespace=native_namespace)
180+
backend = nw_v1.get_native_namespace(df)
181+
result = nw_v1.scan_parquet(filepath, backend=backend)
182182
assert_equal_data(result, data)
183183
assert isinstance(result, nw_v1.LazyFrame)
184184

@@ -188,5 +188,5 @@ def test_scan_parquet_kwargs(tmpdir: pytest.TempdirFactory) -> None:
188188
df_pl = pl.DataFrame(data)
189189
filepath = str(tmpdir / "file.parquet") # type: ignore[operator]
190190
df_pl.write_parquet(filepath)
191-
result = nw.scan_parquet(filepath, native_namespace=pd, engine="pyarrow")
191+
result = nw.scan_parquet(filepath, backend=pd, engine="pyarrow")
192192
assert_equal_data(result, data)

0 commit comments

Comments
 (0)