Skip to content

Commit e38faee

Browse files
authored
Add List of nested types support for Arrow vtab: Struct, List, FixedSizeList (#432)
* Add List of nested types support for Arrow vtab: Struct, List, FixedSizeList * Fix lint
1 parent 4b55ed5 commit e38faee

File tree

2 files changed

+130
-12
lines changed

2 files changed

+130
-12
lines changed

crates/duckdb/src/core/vector.rs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,21 @@ impl ListVector {
191191
FlatVector::with_capacity(unsafe { duckdb_list_vector_get_child(self.entries.ptr) }, capacity)
192192
}
193193

194+
/// Take the child as [StructVector].
195+
pub fn struct_child(&self) -> StructVector {
196+
StructVector::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
197+
}
198+
199+
/// Take the child as [ArrayVector].
200+
pub fn array_child(&self) -> ArrayVector {
201+
ArrayVector::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
202+
}
203+
204+
/// Take the child as [ListVector].
205+
pub fn list_child(&self) -> ListVector {
206+
ListVector::from(unsafe { duckdb_list_vector_get_child(self.entries.ptr) })
207+
}
208+
194209
/// Set primitive data to the child node.
195210
pub fn set_child<T: Copy>(&self, data: &[T]) {
196211
self.child(data.len()).copy(data);

crates/duckdb/src/vtab/arrow.rs

Lines changed: 115 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -895,13 +895,12 @@ fn list_array_to_vector<O: OffsetSizeTrait + AsPrimitive<usize>>(
895895
out: &mut ListVector,
896896
) -> Result<(), Box<dyn std::error::Error>> {
897897
let value_array = array.values();
898-
let mut child = out.child(value_array.len());
899898
match value_array.data_type() {
900899
dt if dt.is_primitive() || matches!(dt, DataType::Boolean) => {
901-
primitive_array_to_vector(value_array.as_ref(), &mut child)?;
900+
primitive_array_to_vector(value_array.as_ref(), &mut out.child(value_array.len()))?;
902901
}
903902
DataType::Utf8 => {
904-
string_array_to_vector(as_string_array(value_array.as_ref()), &mut child);
903+
string_array_to_vector(as_string_array(value_array.as_ref()), &mut out.child(value_array.len()));
905904
}
906905
DataType::Utf8View => {
907906
string_view_array_to_vector(
@@ -910,11 +909,14 @@ fn list_array_to_vector<O: OffsetSizeTrait + AsPrimitive<usize>>(
910909
.as_any()
911910
.downcast_ref::<StringViewArray>()
912911
.ok_or_else(|| Box::<dyn std::error::Error>::from("Unable to downcast to StringViewArray"))?,
913-
&mut child,
912+
&mut out.child(value_array.len()),
914913
);
915914
}
916915
DataType::Binary => {
917-
binary_array_to_vector(as_generic_binary_array(value_array.as_ref()), &mut child);
916+
binary_array_to_vector(
917+
as_generic_binary_array(value_array.as_ref()),
918+
&mut out.child(value_array.len()),
919+
);
918920
}
919921
DataType::BinaryView => {
920922
binary_view_array_to_vector(
@@ -923,11 +925,24 @@ fn list_array_to_vector<O: OffsetSizeTrait + AsPrimitive<usize>>(
923925
.as_any()
924926
.downcast_ref::<BinaryViewArray>()
925927
.ok_or_else(|| Box::<dyn std::error::Error>::from("Unable to downcast to BinaryViewArray"))?,
926-
&mut child,
928+
&mut out.child(value_array.len()),
927929
);
928930
}
931+
DataType::List(_) => {
932+
list_array_to_vector(as_list_array(value_array.as_ref()), &mut out.list_child())?;
933+
}
934+
DataType::FixedSizeList(_, _) => {
935+
fixed_size_list_array_to_vector(as_fixed_size_list_array(value_array.as_ref()), &mut out.array_child())?;
936+
}
937+
DataType::Struct(_) => {
938+
struct_array_to_vector(as_struct_array(value_array.as_ref()), &mut out.struct_child())?;
939+
}
929940
_ => {
930-
return Err("Nested list is not supported yet.".into());
941+
return Err(format!(
942+
"List with elements of type '{}' are not currently supported.",
943+
value_array.data_type()
944+
)
945+
.into());
931946
}
932947
}
933948

@@ -1095,11 +1110,12 @@ mod test {
10951110
use arrow::{
10961111
array::{
10971112
Array, ArrayRef, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array, Date64Array,
1098-
Decimal128Array, Decimal256Array, DurationSecondArray, FixedSizeListArray, GenericByteArray,
1099-
GenericListArray, Int32Array, IntervalDayTimeArray, IntervalMonthDayNanoArray, IntervalYearMonthArray,
1100-
LargeStringArray, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StringViewArray, StructArray,
1101-
Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
1102-
TimestampNanosecondArray, TimestampSecondArray,
1113+
Decimal128Array, Decimal256Array, DurationSecondArray, FixedSizeListArray, FixedSizeListBuilder,
1114+
GenericByteArray, GenericListArray, Int32Array, Int32Builder, IntervalDayTimeArray,
1115+
IntervalMonthDayNanoArray, IntervalYearMonthArray, LargeStringArray, ListArray, ListBuilder,
1116+
OffsetSizeTrait, PrimitiveArray, StringArray, StringViewArray, StructArray, Time32SecondArray,
1117+
Time64MicrosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
1118+
TimestampSecondArray,
11031119
},
11041120
buffer::{OffsetBuffer, ScalarBuffer},
11051121
datatypes::{
@@ -1791,4 +1807,91 @@ mod test {
17911807

17921808
Ok(())
17931809
}
1810+
1811+
#[test]
1812+
fn test_list_of_fixed_size_lists_roundtrip() -> Result<(), Box<dyn Error>> {
1813+
// field name must be empty to match `query_arrow` behavior, otherwise record batches will not match
1814+
let field = Field::new("", DataType::Int32, true);
1815+
let mut list_builder = ListBuilder::new(FixedSizeListBuilder::new(Int32Builder::new(), 2).with_field(field));
1816+
1817+
// Append first list of FixedSizeList items
1818+
{
1819+
let fixed_size_list_builder = list_builder.values();
1820+
fixed_size_list_builder.values().append_value(1);
1821+
fixed_size_list_builder.values().append_value(2);
1822+
fixed_size_list_builder.append(true);
1823+
1824+
// Append NULL fixed-size list item
1825+
fixed_size_list_builder.values().append_null();
1826+
fixed_size_list_builder.values().append_null();
1827+
fixed_size_list_builder.append(false);
1828+
1829+
fixed_size_list_builder.values().append_value(3);
1830+
fixed_size_list_builder.values().append_value(4);
1831+
fixed_size_list_builder.append(true);
1832+
1833+
list_builder.append(true);
1834+
}
1835+
1836+
// Append NULL list
1837+
list_builder.append_null();
1838+
1839+
check_generic_array_roundtrip(list_builder.finish())?;
1840+
1841+
Ok(())
1842+
}
1843+
1844+
#[test]
1845+
fn test_list_of_lists_roundtrip() -> Result<(), Box<dyn Error>> {
1846+
// field name must be 'l' to match `query_arrow` behavior, otherwise record batches will not match
1847+
let field = Field::new("l", DataType::Int32, true);
1848+
let mut list_builder = ListBuilder::new(ListBuilder::new(Int32Builder::new()).with_field(field.clone()));
1849+
1850+
// Append first list of items
1851+
{
1852+
let list_item_builder = list_builder.values();
1853+
list_item_builder.append_value(vec![Some(1), Some(2)]);
1854+
1855+
// Append NULL list item
1856+
list_item_builder.append_null();
1857+
1858+
list_item_builder.append_value(vec![Some(3), None, Some(5)]);
1859+
1860+
list_builder.append(true);
1861+
}
1862+
1863+
// Append NULL list
1864+
list_builder.append_null();
1865+
1866+
check_generic_array_roundtrip(list_builder.finish())?;
1867+
1868+
Ok(())
1869+
}
1870+
1871+
#[test]
1872+
fn test_list_of_structs_roundtrip() -> Result<(), Box<dyn Error>> {
1873+
let field_i = Arc::new(Field::new("i", DataType::Int32, true));
1874+
let field_s = Arc::new(Field::new("s", DataType::Utf8, true));
1875+
1876+
let int32_array = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4), Some(5)]);
1877+
let string_array = StringArray::from(vec![Some("foo"), Some("baz"), Some("bar"), Some("foo"), Some("baz")]);
1878+
1879+
let struct_array = StructArray::from(vec![
1880+
(field_i.clone(), Arc::new(int32_array) as Arc<dyn Array>),
1881+
(field_s.clone(), Arc::new(string_array) as Arc<dyn Array>),
1882+
]);
1883+
1884+
check_generic_array_roundtrip(ListArray::new(
1885+
Arc::new(Field::new(
1886+
"item",
1887+
DataType::Struct(vec![field_i, field_s].into()),
1888+
true,
1889+
)),
1890+
OffsetBuffer::new(ScalarBuffer::from(vec![0, 3, 4, 5])),
1891+
Arc::new(struct_array),
1892+
Some(vec![true, false, true].into()),
1893+
))?;
1894+
1895+
Ok(())
1896+
}
17941897
}

0 commit comments

Comments
 (0)