Skip to content

Commit 4f772b3

Browse files
Add support for BinaryArray in arrow-vtab (#324)
* Add support for BinaryArray in arrow-vtab * Fix lint
1 parent f628e5a commit 4f772b3

File tree

2 files changed

+65
-9
lines changed

2 files changed

+65
-9
lines changed

crates/duckdb/src/vtab/arrow.rs

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ use std::ptr::null_mut;
66

77
use crate::vtab::vector::Inserter;
88
use arrow::array::{
9-
as_boolean_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array, as_struct_array, Array,
10-
ArrayData, AsArray, BooleanArray, Decimal128Array, FixedSizeListArray, GenericListArray, OffsetSizeTrait,
11-
PrimitiveArray, StringArray, StructArray,
9+
as_boolean_array, as_generic_binary_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array,
10+
as_struct_array, Array, ArrayData, AsArray, BinaryArray, BooleanArray, Decimal128Array, FixedSizeListArray,
11+
GenericListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray,
1212
};
1313

1414
use arrow::{
@@ -230,6 +230,9 @@ pub fn record_batch_to_duckdb_data_chunk(
230230
DataType::Utf8 => {
231231
string_array_to_vector(as_string_array(col.as_ref()), &mut chunk.flat_vector(i));
232232
}
233+
DataType::Binary => {
234+
binary_array_to_vector(as_generic_binary_array(col.as_ref()), &mut chunk.flat_vector(i));
235+
}
233236
DataType::List(_) => {
234237
list_array_to_vector(as_list_array(col.as_ref()), &mut chunk.list_vector(i))?;
235238
}
@@ -430,6 +433,15 @@ fn string_array_to_vector(array: &StringArray, out: &mut FlatVector) {
430433
}
431434
}
432435

436+
fn binary_array_to_vector(array: &BinaryArray, out: &mut FlatVector) {
437+
assert!(array.len() <= out.capacity());
438+
439+
for i in 0..array.len() {
440+
let s = array.value(i);
441+
out.insert(i, s);
442+
}
443+
}
444+
433445
fn list_array_to_vector<O: OffsetSizeTrait + AsPrimitive<usize>>(
434446
array: &GenericListArray<O>,
435447
out: &mut ListVector,
@@ -443,6 +455,9 @@ fn list_array_to_vector<O: OffsetSizeTrait + AsPrimitive<usize>>(
443455
DataType::Utf8 => {
444456
string_array_to_vector(as_string_array(value_array.as_ref()), &mut child);
445457
}
458+
DataType::Binary => {
459+
binary_array_to_vector(as_generic_binary_array(value_array.as_ref()), &mut child);
460+
}
446461
_ => {
447462
return Err("Nested list is not supported yet.".into());
448463
}
@@ -469,6 +484,9 @@ fn fixed_size_list_array_to_vector(
469484
DataType::Utf8 => {
470485
string_array_to_vector(as_string_array(value_array.as_ref()), &mut child);
471486
}
487+
DataType::Binary => {
488+
binary_array_to_vector(as_generic_binary_array(value_array.as_ref()), &mut child);
489+
}
472490
_ => {
473491
return Err("Nested array is not supported yet.".into());
474492
}
@@ -493,6 +511,9 @@ fn struct_array_to_vector(array: &StructArray, out: &mut StructVector) -> Result
493511
DataType::Utf8 => {
494512
string_array_to_vector(as_string_array(column.as_ref()), &mut out.child(i));
495513
}
514+
DataType::Binary => {
515+
binary_array_to_vector(as_generic_binary_array(column.as_ref()), &mut out.child(i));
516+
}
496517
DataType::List(_) => {
497518
list_array_to_vector(as_list_array(column.as_ref()), &mut out.list_vector_child(i))?;
498519
}
@@ -560,10 +581,10 @@ mod test {
560581
use crate::{Connection, Result};
561582
use arrow::{
562583
array::{
563-
Array, ArrayRef, AsArray, Date32Array, Date64Array, Decimal256Array, FixedSizeListArray, Float64Array,
564-
GenericListArray, Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray, StructArray,
565-
Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray,
566-
TimestampNanosecondArray, TimestampSecondArray,
584+
Array, ArrayRef, AsArray, BinaryArray, Date32Array, Date64Array, Decimal256Array, FixedSizeListArray,
585+
Float64Array, GenericListArray, Int32Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray,
586+
StructArray, Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray,
587+
TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray,
567588
},
568589
buffer::{OffsetBuffer, ScalarBuffer},
569590
datatypes::{i256, ArrowPrimitiveType, DataType, Field, Fields, Schema},
@@ -924,4 +945,23 @@ mod test {
924945
)
925946
);
926947
}
948+
949+
#[test]
950+
fn test_arrow_binary() {
951+
let byte_array = BinaryArray::from_iter_values([b"test"].iter());
952+
let arc: ArrayRef = Arc::new(byte_array);
953+
let batch = RecordBatch::try_from_iter(vec![("x", arc)]).unwrap();
954+
955+
let db = Connection::open_in_memory().unwrap();
956+
db.register_table_function::<ArrowVTab>("arrow").unwrap();
957+
958+
let mut stmt = db.prepare("SELECT * FROM arrow(?, ?)").unwrap();
959+
960+
let mut arr = stmt.query_arrow(arrow_recordbatch_to_query_params(batch)).unwrap();
961+
let rb = arr.next().expect("no record batch");
962+
963+
let column = rb.column(0).as_any().downcast_ref::<BinaryArray>().unwrap();
964+
assert_eq!(column.len(), 1);
965+
assert_eq!(column.value(0), b"test");
966+
}
927967
}

crates/duckdb/src/vtab/vector.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ use crate::ffi::{
77
duckdb_list_entry, duckdb_list_vector_get_child, duckdb_list_vector_get_size, duckdb_list_vector_reserve,
88
duckdb_list_vector_set_size, duckdb_struct_type_child_count, duckdb_struct_type_child_name,
99
duckdb_struct_vector_get_child, duckdb_validity_set_row_invalid, duckdb_vector,
10-
duckdb_vector_assign_string_element, duckdb_vector_ensure_validity_writable, duckdb_vector_get_column_type,
11-
duckdb_vector_get_data, duckdb_vector_get_validity, duckdb_vector_size,
10+
duckdb_vector_assign_string_element, duckdb_vector_assign_string_element_len,
11+
duckdb_vector_ensure_validity_writable, duckdb_vector_get_column_type, duckdb_vector_get_data,
12+
duckdb_vector_get_validity, duckdb_vector_size,
1213
};
1314

1415
/// Vector trait.
@@ -113,6 +114,21 @@ impl Inserter<&str> for FlatVector {
113114
}
114115
}
115116

117+
impl Inserter<&[u8]> for FlatVector {
118+
fn insert(&self, index: usize, value: &[u8]) {
119+
let value_size = value.len();
120+
unsafe {
121+
// This function also works for binary data. https://duckdb.org/docs/api/c/api#duckdb_vector_assign_string_element_len
122+
duckdb_vector_assign_string_element_len(
123+
self.ptr,
124+
index as u64,
125+
value.as_ptr() as *const ::std::os::raw::c_char,
126+
value_size as u64,
127+
);
128+
}
129+
}
130+
}
131+
116132
/// A list vector.
117133
pub struct ListVector {
118134
/// ListVector does not own the vector pointer.

0 commit comments

Comments
 (0)