chore: Deprecate native_namespace in favour of backend in scan_csv (#2218)

dangotbanned · web-flow · commit 636fe409ca5d · 2025-03-15T18:53:11.000Z
- Related issue #1888
diff --git a/narwhals/functions.py b/narwhals/functions.py
@@ -840,8 +840,13 @@ def _read_csv_impl(
     return from_native(native_frame, eager_only=True)
 
 
+@deprecate_native_namespace(warn_version="1.31.0", required=True)
 def scan_csv(
-    source: str, *, native_namespace: ModuleType, **kwargs: Any
+    source: str,
+    *,
+    backend: ModuleType | Implementation | str | None = None,
+    native_namespace: ModuleType | None = None,  # noqa: ARG001
+    **kwargs: Any,
 ) -> LazyFrame[Any]:
     """Lazily read from a CSV file.
 
@@ -850,10 +855,22 @@ def scan_csv(
 
     Arguments:
         source: Path to a file.
+        backend: The eager backend for DataFrame creation.
+            `backend` can be specified in various ways:
+
+            - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+                `POLARS`, `MODIN` or `CUDF`.
+            - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+            - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
         native_namespace: The native library to use for DataFrame creation.
+
+            **Deprecated** (v1.31.0):
+                Please use `backend` instead. Note that `native_namespace` is still available
+                (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+                see [perfect backwards compatibility policy](../backcompat.md/).
         kwargs: Extra keyword arguments which are passed to the native CSV reader.
             For example, you could use
-            `nw.scan_csv('file.csv', native_namespace=pd, engine='pyarrow')`.
+            `nw.scan_csv('file.csv', backend=pd, engine='pyarrow')`.
 
     Returns:
         LazyFrame.
@@ -862,7 +879,7 @@ def scan_csv(
         >>> import duckdb
         >>> import narwhals as nw
         >>>
-        >>> nw.scan_csv("file.csv", native_namespace=duckdb).to_native()  # doctest:+SKIP
+        >>> nw.scan_csv("file.csv", backend="duckdb").to_native()  # doctest:+SKIP
         ┌─────────┬───────┐
         │    a    │   b   │
         │ varchar │ int32 │
@@ -872,13 +889,15 @@ def scan_csv(
         │ z       │     3 │
         └─────────┴───────┘
     """
-    return _scan_csv_impl(source, native_namespace=native_namespace, **kwargs)
+    backend = cast("ModuleType | Implementation | str", backend)
+    return _scan_csv_impl(source, backend=backend, **kwargs)
 
 
 def _scan_csv_impl(
-    source: str, *, native_namespace: ModuleType, **kwargs: Any
+    source: str, *, backend: ModuleType | Implementation | str, **kwargs: Any
 ) -> LazyFrame[Any]:
-    implementation = Implementation.from_native_namespace(native_namespace)
+    implementation = Implementation.from_backend(backend)
+    native_namespace = implementation.to_native_namespace()
     native_frame: NativeFrame | NativeLazyFrame
     if implementation is Implementation.POLARS:
         native_frame = native_namespace.scan_csv(source, **kwargs)
diff --git a/narwhals/stable/v1/__init__.py b/narwhals/stable/v1/__init__.py
@@ -2359,8 +2359,13 @@ def read_csv(
     )
 
 
+@deprecate_native_namespace(required=True)
 def scan_csv(
-    source: str, *, native_namespace: ModuleType, **kwargs: Any
+    source: str,
+    *,
+    backend: ModuleType | Implementation | str | None = None,
+    native_namespace: ModuleType | None = None,  # noqa: ARG001
+    **kwargs: Any,
 ) -> LazyFrame[Any]:
     """Lazily read from a CSV file.
 
@@ -2369,16 +2374,29 @@ def scan_csv(
 
     Arguments:
         source: Path to a file.
+        backend: The eager backend for DataFrame creation.
+            `backend` can be specified in various ways:
+
+            - As `Implementation.<BACKEND>` with `BACKEND` being `PANDAS`, `PYARROW`,
+                `POLARS`, `MODIN` or `CUDF`.
+            - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
+            - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
         native_namespace: The native library to use for DataFrame creation.
+
+            **Deprecated** (v1.31.0):
+                Please use `backend` instead. Note that `native_namespace` is still available
+                (and won't emit a deprecation warning) if you use `narwhals.stable.v1`,
+                see [perfect backwards compatibility policy](../backcompat.md/).
         kwargs: Extra keyword arguments which are passed to the native CSV reader.
             For example, you could use
-            `nw.scan_csv('file.csv', native_namespace=pd, engine='pyarrow')`.
+            `nw.scan_csv('file.csv', backend=pd, engine='pyarrow')`.
 
     Returns:
         LazyFrame.
     """
+    backend = cast("ModuleType | Implementation | str", backend)
     return _stableify(  # type: ignore[no-any-return]
-        _scan_csv_impl(source, native_namespace=native_namespace, **kwargs)
+        _scan_csv_impl(source, backend=backend, **kwargs)
     )
 
 
diff --git a/tests/read_scan_test.py b/tests/read_scan_test.py
@@ -77,8 +77,8 @@ def test_scan_csv(
     filepath = str(tmpdir / "file.csv")  # type: ignore[operator]
     df_pl.write_csv(filepath)
     df = nw.from_native(constructor(data))
-    native_namespace = nw.get_native_namespace(df)
-    result = nw.scan_csv(filepath, native_namespace=native_namespace)
+    backend = nw.get_native_namespace(df)
+    result = nw.scan_csv(filepath, backend=backend)
     assert_equal_data(result, data)
     assert isinstance(result, nw.LazyFrame)
 
@@ -94,8 +94,8 @@ def test_scan_csv_v1(
     filepath = str(tmpdir / "file.csv")  # type: ignore[operator]
     df_pl.write_csv(filepath)
     df = nw_v1.from_native(constructor(data))
-    native_namespace = nw_v1.get_native_namespace(df)
-    result = nw_v1.scan_csv(filepath, native_namespace=native_namespace)
+    backend = nw_v1.get_native_namespace(df)
+    result = nw_v1.scan_csv(filepath, backend=backend)
     assert_equal_data(result, data)
     assert isinstance(result, nw_v1.LazyFrame)
 
@@ -105,7 +105,7 @@ def test_scan_csv_kwargs(tmpdir: pytest.TempdirFactory) -> None:
     df_pl = pl.DataFrame(data)
     filepath = str(tmpdir / "file.csv")  # type: ignore[operator]
     df_pl.write_csv(filepath)
-    result = nw.scan_csv(filepath, native_namespace=pd, engine="pyarrow")
+    result = nw.scan_csv(filepath, backend=pd, engine="pyarrow")
     assert_equal_data(result, data)