Skip to content

Commit 48fd483

Browse files
authored
fix: handle column names with null bytes (#365)
1 parent 75a0a99 commit 48fd483

File tree

3 files changed

+20
-1
lines changed

3 files changed

+20
-1
lines changed
1.9 MB
Binary file not shown.

python/tests/test_fastexcel.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,3 +588,10 @@ def test_header_row_and_skip_rows(
588588
.to_polars(),
589589
pl.DataFrame(expected),
590590
)
591+
592+
593+
def test_null_bytes_in_column_names() -> None:
594+
"""https://github.com/ToucanToco/fastexcel/issues/343"""
595+
reader = fastexcel.read_excel(path_for_fixture("null-bytes-in-columns-names.xls"))
596+
df = reader.load_sheet(0).to_polars()
597+
assert df.shape == (8_763, 11)

src/types/python/excelsheet/column_info.rs

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,19 @@ fn column_info_from_header<D: CalamineDataProvider>(
380380
.map(|col_idx| {
381381
data.get_as_string((*row_idx, col_idx))
382382
.map(|col_name| {
383-
ColumnInfoNoDtype::new(col_name, col_idx, ColumnNameFrom::LookedUp)
383+
// Remove null bytes from column names to avoid CString panics in Arrow FFI.
384+
//
385+
// Excel strings (especially UTF-16 in .xls) may contain embedded nulls (`\0`) after
386+
// conversion to Rust `String`. Arrow’s C FFI uses `CString::new()`, which fails on
387+
// null bytes, causing panics.
388+
//
389+
// This strips nulls while keeping the readable content.
390+
let sanitized_col_name = col_name.replace('\0', "");
391+
ColumnInfoNoDtype::new(
392+
sanitized_col_name,
393+
col_idx,
394+
ColumnNameFrom::LookedUp,
395+
)
384396
})
385397
.unwrap_or_else(|| {
386398
ColumnInfoNoDtype::new(

0 commit comments

Comments
 (0)