Skip to content

Commit 9ab108c

Browse files
committed
Feature: Support importing nested vectors as well as timestamp ns
Signed-off-by: Robert Kruszewski <[email protected]>
1 parent 925c956 commit 9ab108c

File tree

3 files changed

+89
-9
lines changed

3 files changed

+89
-9
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-duckdb/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ crate-type = ["staticlib", "cdylib", "rlib"]
2323
anyhow = { workspace = true }
2424
arrow-array = { workspace = true }
2525
arrow-buffer = { workspace = true }
26+
arrow-schema = { workspace = true }
2627
async-compat = { workspace = true }
2728
async-fs = { workspace = true }
2829
bitvec = { workspace = true }

vortex-duckdb/src/convert/vector.rs

Lines changed: 87 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,25 @@ use arrow_array::types::{
1010
UInt32Type, UInt64Type,
1111
};
1212
use arrow_array::{
13-
Array, BooleanArray, Date32Array, Decimal128Array, PrimitiveArray, StringArray,
14-
Time64MicrosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
15-
TimestampNanosecondArray, TimestampSecondArray,
13+
Array, BooleanArray, Date32Array, Decimal128Array, FixedSizeListArray, GenericListViewArray,
14+
PrimitiveArray, StringArray, StructArray, Time64MicrosecondArray, Time64NanosecondArray,
15+
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
16+
TimestampSecondArray,
1617
};
1718
use arrow_buffer::buffer::BooleanBuffer;
1819
use num_traits::AsPrimitive;
1920
use vortex::ArrayRef;
20-
use vortex::arrays::StructArray;
2121
use vortex::arrow::FromArrowArray;
22-
use vortex::dtype::{DecimalDType, FieldNames};
23-
use vortex::error::{VortexResult, vortex_err};
22+
use vortex::buffer::BufferMut;
23+
use vortex::dtype::{DType, DecimalDType, FieldNames, Nullability};
24+
use vortex::error::{VortexExpect, VortexResult, vortex_err};
2425
use vortex::scalar::DecimalType;
2526

27+
use crate::convert::dtype::FromLogicalType;
2628
use crate::cpp::{
27-
DUCKDB_TYPE, duckdb_date, duckdb_string_t, duckdb_string_t_data, duckdb_string_t_length,
28-
duckdb_time, duckdb_timestamp, duckdb_timestamp_ms, duckdb_timestamp_s,
29+
DUCKDB_TYPE, duckdb_date, duckdb_list_entry, duckdb_string_t, duckdb_string_t_data,
30+
duckdb_string_t_length, duckdb_time, duckdb_time_ns, duckdb_timestamp, duckdb_timestamp_ms,
31+
duckdb_timestamp_s,
2932
};
3033
use crate::duckdb::{DataChunk, Vector};
3134
use crate::exporter::precision_to_duckdb_storage_size;
@@ -187,6 +190,16 @@ pub fn flat_vector_to_arrow_array(
187190
),
188191
))
189192
}
193+
DUCKDB_TYPE::DUCKDB_TYPE_TIME_NS => {
194+
let data = vector.as_slice_with_len::<duckdb_time_ns>(len);
195+
196+
Ok(Arc::new(
197+
Time64NanosecondArray::from_iter_values_with_nulls(
198+
data.iter().map(|duckdb_time_ns { nanos }| *nanos),
199+
vector.validity_ref(data.len()).to_null_buffer(),
200+
),
201+
))
202+
}
190203
DUCKDB_TYPE::DUCKDB_TYPE_SMALLINT => {
191204
let data = vector.as_slice_with_len::<i16>(len);
192205

@@ -311,6 +324,71 @@ pub fn flat_vector_to_arrow_array(
311324

312325
Ok(Arc::new(decimal_array))
313326
}
327+
DUCKDB_TYPE::DUCKDB_TYPE_ARRAY => {
328+
let array_elem_size = vector.logical_type().array_type_array_size();
329+
let array_child_type = vector.logical_type().array_child_type();
330+
let data_arrow = flat_vector_to_arrow_array(
331+
&mut vector.array_vector_get_child(),
332+
len * array_elem_size as usize,
333+
)?;
334+
Ok(Arc::new(FixedSizeListArray::try_new(
335+
Arc::new(arrow_schema::Field::new(
336+
"element",
337+
DType::from_logical_type(array_child_type, Nullability::Nullable)?
338+
.to_arrow_dtype()?,
339+
true,
340+
)),
341+
array_elem_size as i32,
342+
data_arrow,
343+
vector.validity_ref(len).to_null_buffer(),
344+
)?))
345+
}
346+
DUCKDB_TYPE::DUCKDB_TYPE_LIST => {
347+
let arrow_child = flat_vector_to_arrow_array(&mut vector.list_vector_get_child(), len)?;
348+
let array_child_type = vector.logical_type().list_child_type();
349+
350+
let mut offsets = BufferMut::with_capacity(len);
351+
let mut lengths = BufferMut::with_capacity(len);
352+
for duckdb_list_entry { offset, length } in
353+
vector.as_slice_with_len::<duckdb_list_entry>(len)
354+
{
355+
unsafe {
356+
offsets.push_unchecked(
357+
i64::try_from(*offset).vortex_expect("offset must fit i64"),
358+
);
359+
lengths.push_unchecked(
360+
i64::try_from(*length).vortex_expect("length must fit i64"),
361+
);
362+
}
363+
}
364+
365+
Ok(Arc::new(GenericListViewArray::try_new(
366+
Arc::new(arrow_schema::Field::new(
367+
"element",
368+
DType::from_logical_type(array_child_type, Nullability::Nullable)?
369+
.to_arrow_dtype()?,
370+
true,
371+
)),
372+
offsets.freeze().into_arrow_scalar_buffer(),
373+
lengths.freeze().into_arrow_scalar_buffer(),
374+
arrow_child,
375+
vector.validity_ref(len).to_null_buffer(),
376+
)?))
377+
}
378+
DUCKDB_TYPE::DUCKDB_TYPE_STRUCT => {
379+
let children = (0..vector.logical_type().struct_type_child_count())
380+
.map(|idx| {
381+
flat_vector_to_arrow_array(&mut vector.struct_vector_get_child(idx), len)
382+
})
383+
.collect::<Result<Vec<_>, _>>()?;
384+
Ok(Arc::new(StructArray::try_new(
385+
DType::from_logical_type(vector.logical_type(), Nullability::Nullable)?
386+
.to_arrow_schema()?
387+
.fields,
388+
children,
389+
vector.validity_ref(len).to_null_buffer(),
390+
)?))
391+
}
314392
_ => todo!("missing impl for {type_id:?}"),
315393
}
316394
}
@@ -332,7 +410,7 @@ pub fn data_chunk_to_arrow(field_names: &FieldNames, chunk: &DataChunk) -> Vorte
332410
.map_err(|e| vortex_err!("duckdb to arrow conversion failure {e}"))
333411
})
334412
.collect::<VortexResult<Vec<_>>>()?;
335-
StructArray::try_from_iter(columns).map(|a| a.to_array())
413+
vortex::arrays::StructArray::try_from_iter(columns).map(|a| a.to_array())
336414
}
337415

338416
#[cfg(test)]

0 commit comments

Comments
 (0)