Skip to content

Commit 9ae34f8

Browse files
handle empty arrays when inferring schema (#297)
Co-authored-by: Thomas McKenna <[email protected]>
1 parent d806f3c commit 9ae34f8

File tree

2 files changed

+29
-14
lines changed

2 files changed

+29
-14
lines changed

__tests__/dataframe.test.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2008,6 +2008,16 @@ describe("create", () => {
20082008
string: pl.String,
20092009
});
20102010
});
2011+
test("from row objects, inferred schema, empty array", () => {
2012+
const df = pl.readRecords([
2013+
{ a: [], b: 0 },
2014+
{ a: [""], b: 0 },
2015+
]);
2016+
expect(df.schema).toStrictEqual({
2017+
a: pl.List(pl.String),
2018+
b: pl.Float64,
2019+
});
2020+
});
20112021
test("from row objects, with schema", () => {
20122022
const rows = [
20132023
{ num: 1, date: "foo", string: "foo1" },

src/dataframe.rs

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1657,21 +1657,26 @@ fn obj_to_pairs(rows: &Array, len: usize) -> impl '_ + Iterator<Item = Vec<(Stri
16571657
if val.is_array().unwrap() {
16581658
let arr: napi::JsObject = unsafe { val.cast() };
16591659
let len = arr.get_array_length().unwrap();
1660-
// dont compare too many items, as it could be expensive
1661-
let max_take = std::cmp::min(len as usize, 10);
1662-
let mut dtypes: Vec<DataType> =
1663-
Vec::with_capacity(len as usize);
1664-
1665-
for idx in 0..max_take {
1666-
let item: napi::JsUnknown =
1667-
arr.get_element(idx as u32).unwrap();
1668-
let ty = item.get_type().unwrap();
1669-
let dt: Wrap<DataType> = ty.into();
1670-
dtypes.push(dt.0)
1671-
}
1672-
let dtype = coerce_data_type(&dtypes);
16731660

1674-
DataType::List(dtype.into())
1661+
if len == 0 {
1662+
DataType::List(DataType::Null.into())
1663+
} else {
1664+
// dont compare too many items, as it could be expensive
1665+
let max_take = std::cmp::min(len as usize, 10);
1666+
let mut dtypes: Vec<DataType> =
1667+
Vec::with_capacity(len as usize);
1668+
1669+
for idx in 0..max_take {
1670+
let item: napi::JsUnknown =
1671+
arr.get_element(idx as u32).unwrap();
1672+
let ty = item.get_type().unwrap();
1673+
let dt: Wrap<DataType> = ty.into();
1674+
dtypes.push(dt.0)
1675+
}
1676+
let dtype = coerce_data_type(&dtypes);
1677+
1678+
DataType::List(dtype.into())
1679+
}
16751680
} else if val.is_date().unwrap() {
16761681
DataType::Datetime(TimeUnit::Milliseconds, None)
16771682
} else {

0 commit comments

Comments
 (0)