Skip to content

Commit 78d74fe

Browse files
author
abujabarmubarak
committed
ENH: Add support for engine='polars' in read_csv
1 parent fb4f24e commit 78d74fe

File tree

2 files changed

+43
-3
lines changed

2 files changed

+43
-3
lines changed

pandas/io/parsers/readers.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -672,8 +672,31 @@ def _read(
672672
filepath_or_buffer: FilePath | ReadCsvBuffer[bytes] | ReadCsvBuffer[str], kwds
673673
) -> DataFrame | TextFileReader:
674674
"""Generic reader of line files."""
675-
# if we pass a date_format and parse_dates=False, we should not parse the
676-
# dates GH#44366
675+
engine = kwds.get("engine", "c")
676+
677+
if engine not in ("c", "python", "pyarrow", "polars"):
678+
raise ValueError(f"Unknown engine: {engine}")
679+
680+
if engine == "polars":
681+
try:
682+
import polars as pl # type: ignore[import-untyped]
683+
except ImportError:
684+
raise ImportError("Polars is not installed. Please install it with 'pip install polars'.")
685+
686+
# Filter kwargs that are not supported by Polars
687+
allowed_polars_args = {
688+
"has_header", "columns", "new_columns", "skip_rows", "n_rows",
689+
"encoding", "separator", "quote_char", "comment_char", "null_values"
690+
}
691+
polars_kwargs = {k: v for k, v in kwds.items() if k in allowed_polars_args}
692+
693+
# Polars doesn't accept Path-like objects directly in all versions, convert to string
694+
path = str(filepath_or_buffer)
695+
696+
df = pl.read_csv(path, **polars_kwargs).to_pandas()
697+
return df
698+
699+
# Default pandas behavior
677700
if kwds.get("parse_dates", None) is None:
678701
if kwds.get("date_format", None) is None:
679702
kwds["parse_dates"] = False
@@ -1802,7 +1825,7 @@ def _refine_defaults_read(
18021825
kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.WARN
18031826
elif on_bad_lines == "skip":
18041827
kwds["on_bad_lines"] = ParserBase.BadLineHandleMethod.SKIP
1805-
elif callable(on_bad_lines):
1828+
elif callable(on_bad_lines):
18061829
if engine not in ["python", "pyarrow"]:
18071830
raise ValueError(
18081831
"on_bad_line can only be a callable function "
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import pytest
2+
3+
def test_read_csv_with_polars(tmp_path):
4+
pl = pytest.importorskip("polars")
5+
pd = pytest.importorskip("pandas")
6+
7+
# Create a simple CSV file
8+
file = tmp_path / "sample.csv"
9+
file.write_text("a,b\n1,2\n3,4")
10+
11+
# Read using engine='polars'
12+
df = pd.read_csv(file, engine="polars")
13+
14+
assert df.shape == (2, 2)
15+
assert list(df.columns) == ["a", "b"]
16+
assert df.iloc[0, 0] == 1
17+
assert df.iloc[1, 1] == 4

0 commit comments

Comments
 (0)