Skip to content

Commit bc87999

Browse files
authored
fix: scanCSV eolChar bug (#335)
Fixing scanCSV eolChar bug to close #333
1 parent 51dc97f commit bc87999

File tree

4 files changed

+32
-2
lines changed

4 files changed

+32
-2
lines changed
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
a,b|1,foo|2,boo

__tests__/io.test.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ const tsvpath = path.resolve(__dirname, "./examples/datasets/data.tsv");
99
// eslint-disable-next-line no-undef
1010
const emptycsvpath = path.resolve(__dirname, "./examples/datasets/empty.csv");
1111
// eslint-disable-next-line no-undef
12+
const pipecsvpath = path.resolve(__dirname, "./examples/datasets/pipe-eol.csv");
13+
// eslint-disable-next-line no-undef
1214
const parquetpath = path.resolve(__dirname, "./examples/foods.parquet");
1315
// eslint-disable-next-line no-undef
1416
const avropath = path.resolve(__dirname, "./examples/foods.avro");
@@ -52,6 +54,19 @@ describe("read:csv", () => {
5254
csvString.slice(0, 22),
5355
);
5456
});
57+
it("can read from a csv file with eolChar", async () => {
58+
const actual = pl.readCSV(pipecsvpath, { eolChar: "|" });
59+
const expected = `shape: (2, 2)
60+
┌─────┬─────┐
61+
│ a ┆ b │
62+
│ --- ┆ --- │
63+
│ i64 ┆ str │
64+
╞═════╪═════╡
65+
│ 1 ┆ foo │
66+
│ 2 ┆ boo │
67+
└─────┴─────┘`;
68+
expect(actual.toString()).toEqual(expected);
69+
});
5570
it("can read from a csv buffer with newline in the header", () => {
5671
const csvBuffer = Buffer.from(
5772
'"name\na","height\nb"\n"John",172.23\n"Anna",1653.34',
@@ -264,6 +279,19 @@ describe("scan", () => {
264279

265280
expect(df.shape).toEqual({ height: 4, width: 4 });
266281
});
282+
it("can lazy load (scan) from a csv file with eolChar", async () => {
283+
const actual = pl.scanCSV(pipecsvpath, { eolChar: "|" }).collectSync();
284+
const expected = `shape: (2, 2)
285+
┌─────┬─────┐
286+
│ a ┆ b │
287+
│ --- ┆ --- │
288+
│ i64 ┆ str │
289+
╞═════╪═════╡
290+
│ 1 ┆ foo │
291+
│ 2 ┆ boo │
292+
└─────┴─────┘`;
293+
expect(actual.toString()).toEqual(expected);
294+
});
267295
});
268296

269297
describe("parquet", () => {

polars/io.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,7 @@ const scanCsvDefaultOptions: Partial<ScanCsvOptions> = {
232232
ignoreErrors: true,
233233
skipRows: 0,
234234
sep: ",",
235+
eolChar: "\n",
235236
rechunk: false,
236237
encoding: "utf8",
237238
lowMemory: false,

src/lazy/dataframe.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ pub struct ScanCsvOptions {
618618
pub encoding: String,
619619
pub low_memory: Option<bool>,
620620
pub comment_prefix: Option<String>,
621-
pub eol_char: Option<u8>,
621+
pub eol_char: String,
622622
pub quote_char: Option<String>,
623623
pub parse_dates: Option<bool>,
624624
pub skip_rows_after_header: u32,
@@ -675,7 +675,7 @@ pub fn scan_csv(path: String, options: ScanCsvOptions) -> napi::Result<JsLazyFra
675675
.map_or(None, |s| Some(PlSmallStr::from_string(s))),
676676
)
677677
.with_quote_char(quote_char)
678-
.with_eol_char(options.eol_char.unwrap_or(b'\n'))
678+
.with_eol_char(options.eol_char.as_bytes()[0])
679679
.with_rechunk(options.rechunk.unwrap_or(false))
680680
.with_skip_rows_after_header(options.skip_rows_after_header as usize)
681681
.with_encoding(encoding)

0 commit comments

Comments
 (0)