Skip to content

Commit 4b2ccfb

Browse files
committed
noarrows
Signed-off-by: Robert Kruszewski <[email protected]>
1 parent 99101b1 commit 4b2ccfb

File tree

2 files changed

+92
-125
lines changed

2 files changed

+92
-125
lines changed

vortex-duckdb/src/convert/vector.rs

Lines changed: 47 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,22 @@
44
/// Copied of duckdb-rs (https://github.com/duckdb/duckdb-rs/blob/main/crates/duckdb/src/vtab/arrow.rs)
55
use std::sync::Arc;
66

7-
use arrow_array::builder::GenericBinaryBuilder;
8-
use arrow_array::types::{
9-
Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type, UInt16Type,
10-
UInt32Type, UInt64Type,
11-
};
12-
use arrow_array::{
13-
Array, BooleanArray, Date32Array, Decimal128Array, FixedSizeListArray, GenericListViewArray,
14-
PrimitiveArray, StringArray, Time64MicrosecondArray, Time64NanosecondArray,
15-
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
16-
TimestampSecondArray,
17-
};
18-
use arrow_buffer::buffer::BooleanBuffer;
19-
use arrow_schema::Field;
207
use num_traits::AsPrimitive;
21-
use vortex::ArrayRef;
22-
use vortex::arrays::StructArray;
8+
use vortex::arrays::{PrimitiveArray, StructArray, TemporalArray};
239
use vortex::arrow::FromArrowArray;
24-
use vortex::buffer::BufferMut;
10+
use vortex::buffer::{Buffer, BufferMut};
11+
use vortex::dtype::datetime::TimeUnit;
2512
use vortex::dtype::{DType, DecimalDType, FieldNames, Nullability};
2613
use vortex::error::{VortexExpect, VortexResult, vortex_err};
2714
use vortex::scalar::DecimalType;
2815
use vortex::validity::Validity;
16+
use vortex::{ArrayRef, IntoArray};
2917

3018
use crate::convert::dtype::FromLogicalType;
3119
use crate::cpp::{
3220
DUCKDB_TYPE, duckdb_date, duckdb_list_entry, duckdb_string_t, duckdb_string_t_data,
3321
duckdb_string_t_length, duckdb_time, duckdb_time_ns, duckdb_timestamp, duckdb_timestamp_ms,
34-
duckdb_timestamp_s,
22+
duckdb_timestamp_ns, duckdb_timestamp_s,
3523
};
3624
use crate::duckdb::{DataChunk, Vector};
3725
use crate::exporter::precision_to_duckdb_storage_size;
@@ -62,75 +50,71 @@ impl<'a> DuckString<'a> {
6250
}
6351
}
6452

65-
// FIXME: flat vectors don't have all of thsese types. I think they only
66-
/// Converts flat vector to an arrow array
67-
pub fn flat_vector_to_arrow_array(
68-
vector: &mut Vector,
69-
len: usize,
70-
) -> Result<Arc<dyn Array>, Box<dyn std::error::Error>> {
53+
/// Converts flat vector to a vortex array
54+
pub fn flat_vector_to_arrow_array(vector: &mut Vector, len: usize) -> VortexResult<ArrayRef> {
7155
let type_id = vector.logical_type().as_type_id();
7256
match type_id {
7357
DUCKDB_TYPE::DUCKDB_TYPE_INTEGER => {
7458
let data = vector.as_slice_with_len::<i32>(len);
7559

76-
Ok(Arc::new(
77-
PrimitiveArray::<Int32Type>::from_iter_values_with_nulls(
78-
data.iter().copied(),
79-
vector.validity_ref(data.len()).to_null_buffer(),
80-
),
81-
))
60+
Ok(PrimitiveArray::new(
61+
Buffer::<i32>::copy_from(data),
62+
vector.validity_ref(data.len()).to_validity(),
63+
)
64+
.into_array())
8265
}
8366
DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP => {
8467
let data = vector.as_slice_with_len::<duckdb_timestamp>(len);
8568
let micros = data.iter().map(|duckdb_timestamp { micros }| *micros);
86-
let structs = TimestampMicrosecondArray::from_iter_values_with_nulls(
87-
micros,
88-
vector.validity_ref(data.len()).to_null_buffer(),
89-
);
90-
91-
Ok(Arc::new(structs))
69+
let arr = PrimitiveArray::new(
70+
Buffer::from_trusted_len_iter(micros),
71+
vector.validity_ref(data.len()).to_validity(),
72+
)
73+
.into_array();
74+
Ok(TemporalArray::new_timestamp(arr, TimeUnit::Microseconds, None).into_array())
9275
}
9376
DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_S => {
9477
let data = vector.as_slice_with_len::<duckdb_timestamp_s>(len);
9578
let seconds = data.iter().map(|duckdb_timestamp_s { seconds }| *seconds);
96-
let structs = TimestampSecondArray::from_iter_values_with_nulls(
97-
seconds,
98-
vector.validity_ref(data.len()).to_null_buffer(),
99-
);
100-
101-
Ok(Arc::new(structs))
79+
let arr = PrimitiveArray::new(
80+
Buffer::from_trusted_len_iter(seconds),
81+
vector.validity_ref(data.len()).to_validity(),
82+
)
83+
.into_array();
84+
Ok(TemporalArray::new_timestamp(arr, TimeUnit::Seconds, None).into_array())
10285
}
10386
DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_MS => {
10487
let data = vector.as_slice_with_len::<duckdb_timestamp_ms>(len);
10588
let millis = data.iter().map(|duckdb_timestamp_ms { millis }| *millis);
106-
let structs = TimestampMillisecondArray::from_iter_values_with_nulls(
107-
millis,
108-
vector.validity_ref(data.len()).to_null_buffer(),
109-
);
110-
111-
Ok(Arc::new(structs))
89+
let arr = PrimitiveArray::new(
90+
Buffer::from_trusted_len_iter(millis),
91+
vector.validity_ref(data.len()).to_validity(),
92+
)
93+
.into_array();
94+
Ok(TemporalArray::new_timestamp(arr, TimeUnit::Milliseconds, None).into_array())
11295
}
11396
DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_NS => {
114-
let data = vector.as_slice_with_len::<duckdb_timestamp>(len);
115-
let nanos = data
116-
.iter()
117-
.map(|duckdb_timestamp { micros }| *micros * 1000);
118-
let structs = TimestampNanosecondArray::from_iter_values_with_nulls(
119-
nanos,
120-
vector.validity_ref(data.len()).to_null_buffer(),
121-
);
122-
123-
Ok(Arc::new(structs))
97+
let data = vector.as_slice_with_len::<duckdb_timestamp_ns>(len);
98+
let nanos = data.iter().map(|duckdb_timestamp_ns { nanos }| *nanos);
99+
let arr = PrimitiveArray::new(
100+
Buffer::from_trusted_len_iter(nanos),
101+
vector.validity_ref(data.len()).to_validity(),
102+
)
103+
.into_array();
104+
Ok(TemporalArray::new_timestamp(arr, TimeUnit::Nanoseconds, None).into_array())
124105
}
125106
DUCKDB_TYPE::DUCKDB_TYPE_TIMESTAMP_TZ => {
126107
let data = vector.as_slice_with_len::<duckdb_timestamp>(len);
127-
let structs = TimestampMicrosecondArray::from_iter_values_with_nulls(
128-
data.iter().map(|duckdb_timestamp { micros }| *micros),
129-
vector.validity_ref(data.len()).to_null_buffer(),
108+
let micros = data.iter().map(|duckdb_timestamp { micros }| *micros);
109+
let arr = PrimitiveArray::new(
110+
Buffer::from_trusted_len_iter(micros),
111+
vector.validity_ref(data.len()).to_validity(),
112+
)
113+
.into_array();
114+
Ok(
115+
TemporalArray::new_timestamp(arr, TimeUnit::Nanoseconds, Some("UTC".to_string()))
116+
.into_array(),
130117
)
131-
.with_timezone("UTC");
132-
133-
Ok(Arc::new(structs))
134118
}
135119
DUCKDB_TYPE::DUCKDB_TYPE_VARCHAR => {
136120
let data = vector.as_slice_with_len::<duckdb_string_t>(len);

vortex-duckdb/src/duckdb/vector.rs

Lines changed: 45 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
use std::ffi::{CStr, CString, c_void};
55
use std::ptr;
66

7-
use arrow_buffer;
8-
use arrow_buffer::Buffer;
97
use bitvec::macros::internal::funty::Fundamental;
108
use bitvec::slice::BitSlice;
119
use bitvec::view::BitView;
10+
use vortex::buffer::{BitBuffer, Buffer};
1211
use vortex::error::{VortexResult, VortexUnwrap, vortex_bail, vortex_err};
12+
use vortex::validity::Validity;
1313

1414
use crate::cpp::{duckdb_vx_error, idx_t};
1515
use crate::duckdb::vector_buffer::VectorBuffer;
@@ -302,20 +302,17 @@ impl ValidityRef<'_> {
302302
(validity_entry & (1u64 << idx_in_entry)) != 0
303303
}
304304

305-
/// Creates a NullBuffer directly from the DuckDB validity mask for optimal performance.
306-
///
307-
/// Returns None if all values are valid (no null buffer needed).
308-
pub fn to_null_buffer(&self) -> Option<arrow_buffer::NullBuffer> {
305+
/// Creates a Validity directly from the DuckDB validity mask for optimal performance.
306+
pub fn to_validity(&self) -> Validity {
309307
let Some(validity) = self.validity else {
310-
// All values are valid - no null buffer needed
311-
return None;
308+
// All values are valid
309+
return Validity::AllValid;
312310
};
313311

314-
// Create copy of the buffer from the DuckDB validity mask.
315-
let buffer = Buffer::from_iter(validity.iter().cloned());
316-
317-
let boolean_buffer = arrow_buffer::BooleanBuffer::new(buffer, 0, self.len);
318-
Some(arrow_buffer::NullBuffer::new(boolean_buffer))
312+
Validity::from(BitBuffer::new(
313+
Buffer::<u64>::copy_from(validity).into_byte_buffer(),
314+
self.len,
315+
))
319316
}
320317
}
321318

@@ -325,19 +322,23 @@ mod tests {
325322
use crate::cpp::DUCKDB_TYPE;
326323

327324
#[test]
328-
fn test_create_null_buffer_all_valid() {
325+
fn test_create_validity_all_valid() {
329326
// Test case where all values are valid - should return None
330327
let len = 10;
331328
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER);
332329
let vector = Vector::with_capacity(logical_type, len);
333330

334331
let validity = vector.validity_ref(len);
335-
let null_buffer = validity.to_null_buffer();
336-
assert!(null_buffer.is_none(), "Expected None for all-valid vector");
332+
let validity = validity.to_validity();
333+
assert_eq!(
334+
validity,
335+
Validity::AllValid,
336+
"Expected None for all-valid vector"
337+
);
337338
}
338339

339340
#[test]
340-
fn test_create_null_buffer_with_nulls() {
341+
fn test_create_validity_with_nulls() {
341342
// Test case with some null values
342343
let len = 10;
343344
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER);
@@ -351,30 +352,24 @@ mod tests {
351352
validity_slice.set(7, false); // null at position 7
352353

353354
let validity = vector.validity_ref(len);
354-
let null_buffer = validity.to_null_buffer();
355-
assert!(
356-
null_buffer.is_some(),
357-
"Expected Some(NullBuffer) for vector with nulls"
358-
);
359-
360-
let null_buffer = null_buffer.unwrap();
361-
assert_eq!(null_buffer.len(), len);
355+
let validity = validity.to_validity();
356+
assert_eq!(validity.maybe_len(), Some(len));
362357

363358
// Check that the right positions are null
364-
assert!(null_buffer.is_valid(0));
365-
assert!(null_buffer.is_null(1));
366-
assert!(null_buffer.is_valid(2));
367-
assert!(null_buffer.is_null(3));
368-
assert!(null_buffer.is_valid(4));
369-
assert!(null_buffer.is_valid(5));
370-
assert!(null_buffer.is_valid(6));
371-
assert!(null_buffer.is_null(7));
372-
assert!(null_buffer.is_valid(8));
373-
assert!(null_buffer.is_valid(9));
359+
assert!(validity.is_valid(0));
360+
assert!(validity.is_null(1));
361+
assert!(validity.is_valid(2));
362+
assert!(validity.is_null(3));
363+
assert!(validity.is_valid(4));
364+
assert!(validity.is_valid(5));
365+
assert!(validity.is_valid(6));
366+
assert!(validity.is_null(7));
367+
assert!(validity.is_valid(8));
368+
assert!(validity.is_valid(9));
374369
}
375370

376371
#[test]
377-
fn test_create_null_buffer_single_element() {
372+
fn test_create_validity_single_element() {
378373
// Test with a single element that is null
379374
let len = 1;
380375
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER);
@@ -385,16 +380,13 @@ mod tests {
385380
validity_slice.set(0, false); // null at position 0
386381

387382
let validity = vector.validity_ref(len);
388-
let null_buffer = validity.to_null_buffer();
389-
assert!(null_buffer.is_some());
390-
391-
let null_buffer = null_buffer.unwrap();
392-
assert_eq!(null_buffer.len(), 1);
393-
assert!(null_buffer.is_null(0));
383+
let validity = validity.to_validity();
384+
assert_eq!(validity.maybe_len(), Some(1));
385+
assert!(validity.is_null(0));
394386
}
395387

396388
#[test]
397-
fn test_create_null_buffer_single_element_valid() {
389+
fn test_create_validity_single_element_valid() {
398390
// Test with a single valid element
399391
let len = 1;
400392
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER);
@@ -405,29 +397,26 @@ mod tests {
405397
let _validity_slice = unsafe { vector.ensure_validity_bitslice(len) };
406398

407399
let validity = vector.validity_ref(len);
408-
let null_buffer = validity.to_null_buffer();
409-
assert!(null_buffer.is_some());
410-
411-
let null_buffer = null_buffer.unwrap();
412-
assert_eq!(null_buffer.len(), 1);
413-
assert!(null_buffer.is_valid(0));
400+
let validity = validity.to_validity();
401+
assert_eq!(validity.maybe_len(), Some(1));
402+
assert!(validity.is_valid(0));
414403
}
415404

416405
#[test]
417-
fn test_create_null_buffer_empty() {
406+
fn test_create_validity_empty() {
418407
// Test with zero length
419408
let len = 0;
420409
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER);
421410
let vector = Vector::with_capacity(logical_type, len);
422411

423412
let validity = vector.validity_ref(len);
424-
let null_buffer = validity.to_null_buffer();
413+
let validity = validity.to_validity();
425414
// Even with zero length, if validity mask doesn't exist, should return None
426-
assert!(null_buffer.is_none());
415+
assert_eq!(validity, Validity::AllValid);
427416
}
428417

429418
#[test]
430-
fn test_create_null_buffer_all_nulls() {
419+
fn test_create_validity_all_nulls() {
431420
// Test case where all values are null
432421
let len = 10;
433422
let logical_type = LogicalType::new(DUCKDB_TYPE::DUCKDB_TYPE_INTEGER);
@@ -441,16 +430,10 @@ mod tests {
441430
}
442431

443432
let validity = vector.validity_ref(len);
444-
let null_buffer = validity.to_null_buffer();
445-
assert!(null_buffer.is_some());
446-
447-
let null_buffer = null_buffer.unwrap();
448-
assert_eq!(null_buffer.len(), len);
449-
433+
let validity = validity.to_validity();
434+
assert_eq!(validity.maybe_len(), Some(len));
450435
// Check that all positions are null
451-
for i in 0..len {
452-
assert!(null_buffer.is_null(i), "Element {i} should be null");
453-
}
436+
assert_eq!(validity, Validity::AllInvalid);
454437
}
455438

456439
#[test]

0 commit comments

Comments
 (0)