|
1 | 1 | use super::{
|
2 | 2 | vector::{FlatVector, ListVector, Vector},
|
3 |
| - BindInfo, DataChunk, Free, FunctionInfo, InitInfo, LogicalType, LogicalTypeId, VTab, |
| 3 | + BindInfo, DataChunk, Free, FunctionInfo, InitInfo, LogicalType, LogicalTypeId, StructVector, VTab, |
4 | 4 | };
|
5 | 5 |
|
6 | 6 | use crate::vtab::vector::Inserter;
|
7 | 7 | use arrow::array::{
|
8 |
| - as_boolean_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array, Array, ArrayData, |
9 |
| - AsArray, BooleanArray, Decimal128Array, FixedSizeListArray, GenericListArray, OffsetSizeTrait, PrimitiveArray, |
10 |
| - StringArray, StructArray, |
| 8 | + as_boolean_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array, as_struct_array, Array, |
| 9 | + ArrayData, AsArray, BooleanArray, Decimal128Array, FixedSizeListArray, GenericListArray, OffsetSizeTrait, |
| 10 | + PrimitiveArray, StringArray, StructArray, |
11 | 11 | };
|
12 | 12 |
|
13 | 13 | use arrow::{
|
@@ -181,24 +181,22 @@ pub fn to_duckdb_logical_type(data_type: &DataType) -> Result<LogicalType, Box<d
|
181 | 181 | Ok(LogicalType::new(to_duckdb_type_id(data_type)?))
|
182 | 182 | } else if let DataType::Dictionary(_, value_type) = data_type {
|
183 | 183 | to_duckdb_logical_type(value_type)
|
184 |
| - // } else if let DataType::Struct(fields) = data_type { |
185 |
| - // let mut shape = vec![]; |
186 |
| - // for field in fields.iter() { |
187 |
| - // shape.push(( |
188 |
| - // field.name().as_str(), |
189 |
| - // to_duckdb_logical_type(field.data_type())?, |
190 |
| - // )); |
191 |
| - // } |
192 |
| - // Ok(LogicalType::struct_type(shape.as_slice())) |
| 184 | + } else if let DataType::Struct(fields) = data_type { |
| 185 | + let mut shape = vec![]; |
| 186 | + for field in fields.iter() { |
| 187 | + shape.push((field.name().as_str(), to_duckdb_logical_type(field.data_type())?)); |
| 188 | + } |
| 189 | + Ok(LogicalType::struct_type(shape.as_slice())) |
193 | 190 | } else if let DataType::List(child) = data_type {
|
194 | 191 | Ok(LogicalType::list(&to_duckdb_logical_type(child.data_type())?))
|
195 | 192 | } else if let DataType::LargeList(child) = data_type {
|
196 | 193 | Ok(LogicalType::list(&to_duckdb_logical_type(child.data_type())?))
|
197 | 194 | } else if let DataType::FixedSizeList(child, _) = data_type {
|
198 | 195 | Ok(LogicalType::list(&to_duckdb_logical_type(child.data_type())?))
|
199 | 196 | } else {
|
200 |
| - println!("Unsupported data type: {data_type}, please file an issue https://github.com/wangfenjin/duckdb-rs"); |
201 |
| - todo!() |
| 197 | + unimplemented!( |
| 198 | + "Unsupported data type: {data_type}, please file an issue https://github.com/wangfenjin/duckdb-rs" |
| 199 | + ) |
202 | 200 | }
|
203 | 201 | }
|
204 | 202 |
|
@@ -232,17 +230,16 @@ pub fn record_batch_to_duckdb_data_chunk(
|
232 | 230 | DataType::FixedSizeList(_, _) => {
|
233 | 231 | fixed_size_list_array_to_vector(as_fixed_size_list_array(col.as_ref()), &mut chunk.list_vector(i));
|
234 | 232 | }
|
235 |
| - // DataType::Struct(_) => { |
236 |
| - // let struct_array = as_struct_array(col.as_ref()); |
237 |
| - // let mut struct_vector = chunk.struct_vector(i); |
238 |
| - // struct_array_to_vector(struct_array, &mut struct_vector); |
239 |
| - // } |
| 233 | + DataType::Struct(_) => { |
| 234 | + let struct_array = as_struct_array(col.as_ref()); |
| 235 | + let mut struct_vector = chunk.struct_vector(i); |
| 236 | + struct_array_to_vector(struct_array, &mut struct_vector); |
| 237 | + } |
240 | 238 | _ => {
|
241 |
| - println!( |
| 239 | + unimplemented!( |
242 | 240 | "column {} is not supported yet, please file an issue https://github.com/wangfenjin/duckdb-rs",
|
243 | 241 | batch.schema().field(i)
|
244 | 242 | );
|
245 |
| - todo!() |
246 | 243 | }
|
247 | 244 | }
|
248 | 245 | }
|
@@ -458,46 +455,42 @@ fn as_fixed_size_list_array(arr: &dyn Array) -> &FixedSizeListArray {
|
458 | 455 | arr.as_any().downcast_ref::<FixedSizeListArray>().unwrap()
|
459 | 456 | }
|
460 | 457 |
|
461 |
| -// fn struct_array_to_vector(array: &StructArray, out: &mut StructVector) { |
462 |
| -// for i in 0..array.num_columns() { |
463 |
| -// let column = array.column(i); |
464 |
| -// match column.data_type() { |
465 |
| -// dt if dt.is_primitive() || matches!(dt, DataType::Boolean) => { |
466 |
| -// primitive_array_to_vector(column, &mut out.child(i)); |
467 |
| -// } |
468 |
| -// DataType::Utf8 => { |
469 |
| -// string_array_to_vector(as_string_array(column.as_ref()), &mut out.child(i)); |
470 |
| -// } |
471 |
| -// DataType::List(_) => { |
472 |
| -// list_array_to_vector( |
473 |
| -// as_list_array(column.as_ref()), |
474 |
| -// &mut out.list_vector_child(i), |
475 |
| -// ); |
476 |
| -// } |
477 |
| -// DataType::LargeList(_) => { |
478 |
| -// list_array_to_vector( |
479 |
| -// as_large_list_array(column.as_ref()), |
480 |
| -// &mut out.list_vector_child(i), |
481 |
| -// ); |
482 |
| -// } |
483 |
| -// DataType::FixedSizeList(_, _) => { |
484 |
| -// fixed_size_list_array_to_vector( |
485 |
| -// as_fixed_size_list_array(column.as_ref()), |
486 |
| -// &mut out.list_vector_child(i), |
487 |
| -// ); |
488 |
| -// } |
489 |
| -// DataType::Struct(_) => { |
490 |
| -// let struct_array = as_struct_array(column.as_ref()); |
491 |
| -// let mut struct_vector = out.struct_vector_child(i); |
492 |
| -// struct_array_to_vector(struct_array, &mut struct_vector); |
493 |
| -// } |
494 |
| -// _ => { |
495 |
| -// println!("Unsupported data type: {}, please file an issue https://github.com/wangfenjin/duckdb-rs", column.data_type()); |
496 |
| -// todo!() |
497 |
| -// } |
498 |
| -// } |
499 |
| -// } |
500 |
| -// } |
| 458 | +fn struct_array_to_vector(array: &StructArray, out: &mut StructVector) { |
| 459 | + for i in 0..array.num_columns() { |
| 460 | + let column = array.column(i); |
| 461 | + match column.data_type() { |
| 462 | + dt if dt.is_primitive() || matches!(dt, DataType::Boolean) => { |
| 463 | + primitive_array_to_vector(column, &mut out.child(i)); |
| 464 | + } |
| 465 | + DataType::Utf8 => { |
| 466 | + string_array_to_vector(as_string_array(column.as_ref()), &mut out.child(i)); |
| 467 | + } |
| 468 | + DataType::List(_) => { |
| 469 | + list_array_to_vector(as_list_array(column.as_ref()), &mut out.list_vector_child(i)); |
| 470 | + } |
| 471 | + DataType::LargeList(_) => { |
| 472 | + list_array_to_vector(as_large_list_array(column.as_ref()), &mut out.list_vector_child(i)); |
| 473 | + } |
| 474 | + DataType::FixedSizeList(_, _) => { |
| 475 | + fixed_size_list_array_to_vector( |
| 476 | + as_fixed_size_list_array(column.as_ref()), |
| 477 | + &mut out.list_vector_child(i), |
| 478 | + ); |
| 479 | + } |
| 480 | + DataType::Struct(_) => { |
| 481 | + let struct_array = as_struct_array(column.as_ref()); |
| 482 | + let mut struct_vector = out.struct_vector_child(i); |
| 483 | + struct_array_to_vector(struct_array, &mut struct_vector); |
| 484 | + } |
| 485 | + _ => { |
| 486 | + unimplemented!( |
| 487 | + "Unsupported data type: {}, please file an issue https://github.com/wangfenjin/duckdb-rs", |
| 488 | + column.data_type() |
| 489 | + ); |
| 490 | + } |
| 491 | + } |
| 492 | + } |
| 493 | +} |
501 | 494 |
|
502 | 495 | /// Pass RecordBatch to duckdb.
|
503 | 496 | ///
|
@@ -538,11 +531,11 @@ mod test {
|
538 | 531 | use crate::{Connection, Result};
|
539 | 532 | use arrow::{
|
540 | 533 | array::{
|
541 |
| - Array, AsArray, Date32Array, Date64Array, Float64Array, Int32Array, PrimitiveArray, StringArray, |
542 |
| - Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray, TimestampMillisecondArray, |
543 |
| - TimestampNanosecondArray, TimestampSecondArray, |
| 534 | + Array, ArrayRef, AsArray, Date32Array, Date64Array, Float64Array, Int32Array, PrimitiveArray, StringArray, |
| 535 | + StructArray, Time32SecondArray, Time64MicrosecondArray, TimestampMicrosecondArray, |
| 536 | + TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, |
544 | 537 | },
|
545 |
| - datatypes::{ArrowPrimitiveType, DataType, Field, Schema}, |
| 538 | + datatypes::{ArrowPrimitiveType, DataType, Field, Fields, Schema}, |
546 | 539 | record_batch::RecordBatch,
|
547 | 540 | };
|
548 | 541 | use std::{error::Error, sync::Arc};
|
@@ -588,6 +581,42 @@ mod test {
|
588 | 581 | Ok(())
|
589 | 582 | }
|
590 | 583 |
|
| 584 | + #[test] |
| 585 | + fn test_append_struct() -> Result<(), Box<dyn Error>> { |
| 586 | + let db = Connection::open_in_memory()?; |
| 587 | + db.execute_batch("CREATE TABLE t1 (s STRUCT(v VARCHAR, i INTEGER))")?; |
| 588 | + { |
| 589 | + let struct_array = StructArray::from(vec![ |
| 590 | + ( |
| 591 | + Arc::new(Field::new("v", DataType::Utf8, true)), |
| 592 | + Arc::new(StringArray::from(vec![Some("foo"), Some("bar")])) as ArrayRef, |
| 593 | + ), |
| 594 | + ( |
| 595 | + Arc::new(Field::new("i", DataType::Int32, true)), |
| 596 | + Arc::new(Int32Array::from(vec![Some(1), Some(2)])) as ArrayRef, |
| 597 | + ), |
| 598 | + ]); |
| 599 | + |
| 600 | + let schema = Schema::new(vec![Field::new( |
| 601 | + "s", |
| 602 | + DataType::Struct(Fields::from(vec![ |
| 603 | + Field::new("v", DataType::Utf8, true), |
| 604 | + Field::new("i", DataType::Int32, true), |
| 605 | + ])), |
| 606 | + true, |
| 607 | + )]); |
| 608 | + |
| 609 | + let record_batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(struct_array)])?; |
| 610 | + let mut app = db.appender("t1")?; |
| 611 | + app.append_record_batch(record_batch)?; |
| 612 | + } |
| 613 | + let mut stmt = db.prepare("SELECT s FROM t1")?; |
| 614 | + let rbs: Vec<RecordBatch> = stmt.query_arrow([])?.collect(); |
| 615 | + assert_eq!(rbs.iter().map(|op| op.num_rows()).sum::<usize>(), 2); |
| 616 | + |
| 617 | + Ok(()) |
| 618 | + } |
| 619 | + |
591 | 620 | fn check_rust_primitive_array_roundtrip<T1, T2>(
|
592 | 621 | input_array: PrimitiveArray<T1>,
|
593 | 622 | expected_array: PrimitiveArray<T2>,
|
|
0 commit comments