feat(expr-ir): Add DataFrame.write_{csv,parquet}

dangotbanned · dangotbanned · commit bc264924bdb6 · 2025-12-23T12:01:06.000Z
Child of #2572
diff --git a/narwhals/_plan/arrow/dataframe.py b/narwhals/_plan/arrow/dataframe.py
@@ -14,7 +14,7 @@
 from narwhals._plan.arrow.expr import ArrowExpr as Expr, ArrowScalar as Scalar
 from narwhals._plan.arrow.group_by import ArrowGroupBy as GroupBy, partition_by
 from narwhals._plan.arrow.series import ArrowSeries as Series
-from narwhals._plan.common import temp
+from narwhals._plan.common import temp, todo
 from narwhals._plan.compliant.dataframe import EagerDataFrame
 from narwhals._plan.compliant.typing import namespace
 from narwhals._plan.exceptions import shape_error
@@ -191,6 +191,10 @@ def with_row_index_by(
         column = fn.unsort_indices(indices)
         return self._with_native(self.native.add_column(0, name, column))
 
+    write_csv = todo()
+    write_parquet = todo()
+    sink_parquet = todo()
+
     def to_struct(self, name: str = "") -> Series:
         native = self.native
         if fn.TO_STRUCT_ARRAY_ACCEPTS_EMPTY:
diff --git a/narwhals/_plan/compliant/dataframe.py b/narwhals/_plan/compliant/dataframe.py
@@ -17,6 +17,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Iterator, Mapping, Sequence
+    from io import BytesIO
 
     import polars as pl
     from typing_extensions import Self, TypeAlias
@@ -36,7 +37,7 @@
     from narwhals._typing import _EagerAllowedImpl
     from narwhals._utils import Implementation, Version
     from narwhals.dtypes import DType
-    from narwhals.typing import IntoSchema, UniqueKeepStrategy
+    from narwhals.typing import FileSource, IntoSchema, UniqueKeepStrategy
 
 Incomplete: TypeAlias = Any
 
@@ -208,6 +209,12 @@ def unique_by(
         maintain_order: bool = False,
     ) -> Self: ...
     def with_row_index(self, name: str) -> Self: ...
+    @overload
+    def write_csv(self, file: None) -> str: ...
+    @overload
+    def write_csv(self, file: FileSource | BytesIO) -> None: ...
+    def write_csv(self, file: FileSource | BytesIO | None) -> str | None: ...
+    def write_parquet(self, file: FileSource | BytesIO) -> None: ...
     def slice(self, offset: int, length: int | None = None) -> Self: ...
     def sample_frac(
         self, fraction: float, *, with_replacement: bool = False, seed: int | None = None
@@ -246,3 +253,6 @@ def with_columns(self, irs: Seq[NamedIR]) -> Self:
 
     def to_series(self, index: int = 0) -> SeriesT:
         return self.get_column(self.columns[index])
+
+    # TODO @dangotbanned: Move to `CompliantLazyFrame` once that's added
+    def sink_parquet(self, file: FileSource | BytesIO) -> None: ...
diff --git a/narwhals/_plan/dataframe.py b/narwhals/_plan/dataframe.py
@@ -30,6 +30,7 @@
 from narwhals.schema import Schema
 from narwhals.typing import (
     EagerAllowed,
+    FileSource,
     IntoBackend,
     IntoDType,
     IntoSchema,
@@ -39,6 +40,7 @@
 
 if TYPE_CHECKING:
     from collections.abc import Iterable, Iterator, Mapping, Sequence
+    from io import BytesIO
 
     import polars as pl
     import pyarrow as pa
@@ -482,6 +484,16 @@ def with_row_index(
             return self._with_compliant(self._compliant.with_row_index(name))
         return super().with_row_index(name, order_by=order_by)
 
+    @overload
+    def write_csv(self, file: None = None) -> str: ...
+    @overload
+    def write_csv(self, file: FileSource | BytesIO) -> None: ...
+    def write_csv(self, file: FileSource | BytesIO | None = None) -> str | None:
+        return self._compliant.write_csv(file)
+
+    def write_parquet(self, file: FileSource | BytesIO) -> None:
+        return self._compliant.write_parquet(file)
+
     def slice(self, offset: int, length: int | None = None) -> Self:
         return type(self)(self._compliant.slice(offset=offset, length=length))
 
diff --git a/tests/plan/frame_export_test.py b/tests/plan/frame_export_test.py
@@ -0,0 +1,98 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import TYPE_CHECKING, Literal
+
+import pytest
+
+from tests.plan.utils import dataframe
+from tests.utils import is_windows
+
+if TYPE_CHECKING:
+    from collections.abc import Mapping
+
+    from typing_extensions import TypeAlias
+
+    from narwhals.typing import FileSource
+    from tests.conftest import Data
+
+pytest.importorskip("pyarrow")
+
+IOTargetKind: TypeAlias = Literal["str", "Path", "PathLike"]
+"""Duplicated from `tests.read_scan_test.py`.
+
+Needs extending for `BytesIO`.
+"""
+
+
+class MockPathLike:
+    def __init__(self, path: Path) -> None:
+        self._super_secret: Path = path
+
+    def __fspath__(self) -> str:
+        return self._super_secret.__fspath__()
+
+
+def _into_file_source(source: Path, which: IOTargetKind, /) -> FileSource:
+    mapping: Mapping[IOTargetKind, FileSource] = {
+        "str": str(source),
+        "Path": source,
+        "PathLike": MockPathLike(source),
+    }
+    return mapping[which]
+
+
+@pytest.fixture(params=["str", "Path", "PathLike"])
+def csv_path(
+    tmp_path_factory: pytest.TempPathFactory, request: pytest.FixtureRequest
+) -> FileSource:
+    fp = tmp_path_factory.mktemp("data") / "file.csv"
+    return _into_file_source(fp, request.param)
+
+
+@pytest.fixture(params=["str", "Path", "PathLike"])
+def parquet_path(
+    tmp_path_factory: pytest.TempPathFactory, request: pytest.FixtureRequest
+) -> FileSource:
+    fp = tmp_path_factory.mktemp("data") / "file.parquet"
+    return _into_file_source(fp, request.param)
+
+
+@pytest.fixture(scope="module")
+def data() -> Data:
+    return {"a": [1, 2, 3]}
+
+
+XFAIL_DATAFRAME_EXPORT = pytest.mark.xfail(
+    reason="TODO: `DataFrame.write_{csv,parquet}`()", raises=NotImplementedError
+)
+
+
+@XFAIL_DATAFRAME_EXPORT
+def test_write_csv(data: Data, csv_path: FileSource) -> None:  # pragma: no cover
+    df = dataframe(data)
+    result_none = df.write_csv(csv_path)
+    assert Path(csv_path).exists()
+    assert result_none is None
+    result = dataframe(data).write_csv()
+    if is_windows():  # pragma: no cover
+        result = result.replace("\r\n", "\n")
+    if df.implementation.is_pyarrow():
+        assert result == '"a"\n1\n2\n3\n'
+    else:  # pragma: no cover
+        assert result == "a\n1\n2\n3\n"
+
+
+@XFAIL_DATAFRAME_EXPORT
+def test_write_parquet(data: Data, parquet_path: FileSource) -> None:  # pragma: no cover
+    dataframe(data).write_parquet(parquet_path)
+    assert Path(parquet_path).exists()
+
+
+@pytest.mark.xfail(
+    reason="TODO: `DataFrame.lazy()`, `LazyFrame.sink_parquet()`", raises=AttributeError
+)
+def test_sink_parquet(data: Data, parquet_path: FileSource) -> None:  # pragma: no cover
+    df = dataframe(data)
+    df.lazy().sink_parquet(parquet_path)  # type: ignore[attr-defined]
+    assert Path(parquet_path).exists()