narwhals-dev · raisadz · Aug 14, 2025 · Aug 14, 2025 · Aug 14, 2025 · Aug 14, 2025
diff --git a/narwhals/functions.py b/narwhals/functions.py
@@ -560,7 +560,11 @@ def show_versions() -> None:
 
 
 def read_csv(
-    source: str, *, backend: ModuleType | Implementation | str, **kwargs: Any
+    source: str,
+    *,
+    backend: ModuleType | Implementation | str,
+    separator: str = ",",
+    **kwargs: Any,
 ) -> DataFrame[Any]:
     """Read a CSV file into a DataFrame.
 
@@ -573,6 +577,7 @@ def read_csv(
                 `POLARS`, `MODIN` or `CUDF`.
             - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
             - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        separator: Single byte character to use as separator in the file.
         kwargs: Extra keyword arguments which are passed to the native CSV reader.
             For example, you could use
             `nw.read_csv('file.csv', backend='pandas', engine='pyarrow')`.
@@ -594,17 +599,19 @@ def read_csv(
     impl = Implementation.from_backend(backend)
     native_namespace = impl.to_native_namespace()
     native_frame: NativeFrame
-    if impl in {
-        Implementation.POLARS,
-        Implementation.PANDAS,
-        Implementation.MODIN,
-        Implementation.CUDF,
-    }:
-        native_frame = native_namespace.read_csv(source, **kwargs)
+    if impl in {Implementation.PANDAS, Implementation.MODIN, Implementation.CUDF}:
+        native_frame = native_namespace.read_csv(source, sep=separator, **kwargs)
+    elif impl is Implementation.POLARS:
+        native_frame = native_namespace.read_csv(source, separator=separator, **kwargs)
     elif impl is Implementation.PYARROW:
+        if separator is not None and "parse_options" in kwargs:
+            msg = "Can't pass both `separator` and `parse_options`."
+            raise TypeError(msg)
         from pyarrow import csv  # ignore-banned-import
 
-        native_frame = csv.read_csv(source, **kwargs)
+        native_frame = csv.read_csv(
+            source, parse_options=csv.ParseOptions(delimiter=separator), **kwargs
+        )
     elif impl in {
         Implementation.PYSPARK,
         Implementation.DASK,
@@ -630,7 +637,11 @@ def read_csv(
 
 
 def scan_csv(
-    source: str, *, backend: ModuleType | Implementation | str, **kwargs: Any
+    source: str,
+    *,
+    backend: ModuleType | Implementation | str,
+    separator: str = ",",
+    **kwargs: Any,
 ) -> LazyFrame[Any]:
     """Lazily read from a CSV file.
 
@@ -646,6 +657,7 @@ def scan_csv(
                 `POLARS`, `MODIN` or `CUDF`.
             - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
             - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        separator: Single byte character to use as separator in the file.
         kwargs: Extra keyword arguments which are passed to the native CSV reader.
             For example, you could use
             `nw.scan_csv('file.csv', backend=pd, engine='pyarrow')`.
@@ -671,33 +683,39 @@ def scan_csv(
     native_namespace = implementation.to_native_namespace()
     native_frame: NativeFrame | NativeLazyFrame
     if implementation is Implementation.POLARS:
-        native_frame = native_namespace.scan_csv(source, **kwargs)
+        native_frame = native_namespace.scan_csv(source, separator=separator, **kwargs)
     elif implementation in {
         Implementation.PANDAS,
         Implementation.MODIN,
         Implementation.CUDF,
         Implementation.DASK,
-        Implementation.DUCKDB,
         Implementation.IBIS,
     }:
-        native_frame = native_namespace.read_csv(source, **kwargs)
+        native_frame = native_namespace.read_csv(source, sep=separator, **kwargs)
+    elif implementation in {Implementation.DUCKDB}:
+        native_frame = native_namespace.read_csv(source, delimiter=separator, **kwargs)
     elif implementation is Implementation.PYARROW:
+        if separator is not None and "parse_options" in kwargs:
+            msg = "Can't pass both `separator` and `parse_options`."
+            raise TypeError(msg)
         from pyarrow import csv  # ignore-banned-import
 
-        native_frame = csv.read_csv(source, **kwargs)
+        native_frame = csv.read_csv(
+            source, parse_options=csv.ParseOptions(delimiter=separator), **kwargs
+        )
     elif implementation.is_spark_like():
         if (session := kwargs.pop("session", None)) is None:
             msg = "Spark like backends require a session object to be passed in `kwargs`."
             raise ValueError(msg)
 
         csv_reader = session.read.format("csv")
         native_frame = (
-            csv_reader.load(source)
+            csv_reader.load(source, sep=separator)
             if (
                 implementation is Implementation.SQLFRAME
                 and implementation._backend_version() < (3, 27, 0)
             )
-            else csv_reader.options(**kwargs).load(source)
+            else csv_reader.options(sep=separator, **kwargs).load(source)
         )
     else:  # pragma: no cover
         try:

diff --git a/narwhals/stable/v2/__init__.py b/narwhals/stable/v2/__init__.py
@@ -1055,7 +1055,11 @@ def from_numpy(
 
 
 def read_csv(
-    source: str, *, backend: ModuleType | Implementation | str, **kwargs: Any
+    source: str,
+    *,
+    backend: ModuleType | Implementation | str,
+    separator: str = ",",
+    **kwargs: Any,
 ) -> DataFrame[Any]:
     """Read a CSV file into a DataFrame.
 
@@ -1068,18 +1072,25 @@ def read_csv(
                 `POLARS`, `MODIN` or `CUDF`.
             - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
             - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        separator: Single byte character to use as separator in the file.
         kwargs: Extra keyword arguments which are passed to the native CSV reader.
             For example, you could use
             `nw.read_csv('file.csv', backend='pandas', engine='pyarrow')`.
 
     Returns:
         DataFrame.
     """
-    return _stableify(nw_f.read_csv(source, backend=backend, **kwargs))
+    return _stableify(
+        nw_f.read_csv(source, backend=backend, separator=separator, **kwargs)
+    )
 
 
 def scan_csv(
-    source: str, *, backend: ModuleType | Implementation | str, **kwargs: Any
+    source: str,
+    *,
+    backend: ModuleType | Implementation | str,
+    separator: str = ",",
+    **kwargs: Any,
 ) -> LazyFrame[Any]:
     """Lazily read from a CSV file.
 
@@ -1095,14 +1106,17 @@ def scan_csv(
                 `POLARS`, `MODIN` or `CUDF`.
             - As a string: `"pandas"`, `"pyarrow"`, `"polars"`, `"modin"` or `"cudf"`.
             - Directly as a module `pandas`, `pyarrow`, `polars`, `modin` or `cudf`.
+        separator: Single byte character to use as separator in the file.
         kwargs: Extra keyword arguments which are passed to the native CSV reader.
             For example, you could use
             `nw.scan_csv('file.csv', backend=pd, engine='pyarrow')`.
 
     Returns:
         LazyFrame.
     """
-    return _stableify(nw_f.scan_csv(source, backend=backend, **kwargs))
+    return _stableify(
+        nw_f.scan_csv(source, backend=backend, separator=separator, **kwargs)
+    )
 
 
 def read_parquet(

diff --git a/tests/read_scan_test.py b/tests/read_scan_test.py
@@ -35,6 +35,10 @@ def test_read_csv(tmpdir: pytest.TempdirFactory, backend: Implementation | str)
     result = nw.read_csv(filepath, backend=backend)
     assert_equal_data(result, data)
     assert isinstance(result, nw.DataFrame)
+    df_pl.write_csv(filepath, separator=";")
+    result = nw.read_csv(filepath, backend=backend, separator=";")
+    assert_equal_data(result, data)
+    assert isinstance(result, nw.DataFrame)
 
 
 @pytest.mark.skipif(PANDAS_VERSION < (1, 5), reason="too old for pyarrow")
@@ -57,12 +61,20 @@ def test_read_csv_raise_with_lazy(tmpdir: pytest.TempdirFactory, backend: str) -
         nw.read_csv(filepath, backend=backend)
 
 
-def test_scan_csv(tmpdir: pytest.TempdirFactory, constructor: Constructor) -> None:
+def test_scan_csv(
+    tmpdir: pytest.TempdirFactory,
+    constructor: Constructor,
+    request: pytest.FixtureRequest,
+) -> None:
     kwargs: dict[str, Any]
     if "sqlframe" in str(constructor):
         from sqlframe.duckdb import DuckDBSession
 
-        kwargs = {"session": DuckDBSession(), "inferSchema": True, "header": True}
+        kwargs = {"session": DuckDBSession(), "inferSchema": True}
+
+        request.applymarker(
+            pytest.mark.xfail(reason="https://github.com/eakmanrq/sqlframe/issues/469")
+        )
     elif "pyspark" in str(constructor):
         if is_spark_connect := os.environ.get("SPARK_CONNECT", None):
             from pyspark.sql.connect.session import SparkSession
@@ -96,6 +108,12 @@ def test_scan_csv(tmpdir: pytest.TempdirFactory, constructor: Constructor) -> No
     result = nw.scan_csv(filepath, backend=backend, **kwargs)
     assert_equal_data(result, data)
     assert isinstance(result, nw.LazyFrame)
+    df_pl.write_csv(filepath, separator="|")
+    df = nw.from_native(constructor(data))
+    backend = nw.get_native_namespace(df)
+    result = nw.scan_csv(filepath, backend=backend, separator="|", **kwargs)
+    assert_equal_data(result, data)
+    assert isinstance(result, nw.LazyFrame)
 
 
 @pytest.mark.skipif(PANDAS_VERSION < (1, 5), reason="too old for pyarrow")
@@ -107,6 +125,32 @@ def test_scan_csv_kwargs(tmpdir: pytest.TempdirFactory) -> None:
     assert_equal_data(result, data)
 
 
+def test_read_csv_raise_sep_multiple(tmpdir: pytest.TempdirFactory) -> None:
+    pytest.importorskip("pyarrow")
+    import pyarrow as pa
+    from pyarrow import csv
+
+    df_pl = pl.DataFrame(data)
+    filepath = str(tmpdir / "file.csv")  # type: ignore[operator]
+    df_pl.write_csv(filepath)
+
+    msg = "Can't pass both `separator` and `parse_options`."
+    with pytest.raises(TypeError, match=msg):
+        nw.read_csv(
+            filepath,
+            backend=pa,
+            separator="|",
+            parse_options=csv.ParseOptions(delimiter=";"),
+        )
+    with pytest.raises(TypeError, match=msg):
+        nw.scan_csv(
+            filepath,
+            backend=pa,
+            separator="|",
+            parse_options=csv.ParseOptions(delimiter=";"),
+        )
+
+
 @pytest.mark.skipif(PANDAS_VERSION < (1, 5), reason="too old for pyarrow")
 def test_read_parquet(
     tmpdir: pytest.TempdirFactory, constructor_eager: ConstructorEager