Skip to content

Commit d991be9

Browse files
committed
Add more arrow execution
Signed-off-by: Nicholas Gates <[email protected]>
1 parent 80874f3 commit d991be9

File tree

14 files changed

+371
-55
lines changed

14 files changed

+371
-55
lines changed

vortex-array/src/array/mod.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
mod visitor;
55

66
use std::any::Any;
7+
use std::any::type_name;
78
use std::fmt::Debug;
89
use std::fmt::Formatter;
910
use std::hash::Hash;
1011
use std::hash::Hasher;
12+
use std::ops::Deref;
1113
use std::ops::Range;
1214
use std::sync::Arc;
1315

@@ -18,6 +20,7 @@ use vortex_dtype::Nullability;
1820
use vortex_error::VortexExpect;
1921
use vortex_error::VortexResult;
2022
use vortex_error::vortex_ensure;
23+
use vortex_error::vortex_err;
2124
use vortex_error::vortex_panic;
2225
use vortex_mask::Mask;
2326
use vortex_scalar::Scalar;
@@ -73,6 +76,9 @@ pub trait Array:
7376
/// Returns the array as a reference to a generic [`Any`] trait object.
7477
fn as_any(&self) -> &dyn Any;
7578

79+
/// Returns the array as an `Arc<dyn Any + Send + Sync>`.
80+
fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync>;
81+
7682
/// Returns the array as an [`ArrayRef`].
7783
fn to_array(&self) -> ArrayRef;
7884

@@ -211,6 +217,10 @@ impl Array for Arc<dyn Array> {
211217
self.as_ref().as_any()
212218
}
213219

220+
fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
221+
self
222+
}
223+
214224
#[inline]
215225
fn to_array(&self) -> ArrayRef {
216226
self.clone()
@@ -350,6 +360,24 @@ impl dyn Array + '_ {
350360
.map(|array_adapter| &array_adapter.0)
351361
}
352362

363+
/// Returns the array downcast to the given `A` as an owned object.
364+
pub fn try_into<V: VTable>(self: Arc<Self>) -> Result<V::Array, Arc<Self>> {
365+
match self.is::<V>() {
366+
true => {
367+
let arc = self
368+
.as_any_arc()
369+
.downcast::<ArrayAdapter<V>>()
370+
.map_err(|_| vortex_err!("failed to downcast"))
371+
.vortex_expect("Failed to downcast");
372+
Ok(match Arc::try_unwrap(arc) {
373+
Ok(array) => array.0,
374+
Err(arc) => arc.deref().0.clone(),
375+
})
376+
}
377+
false => Err(self),
378+
}
379+
}
380+
353381
/// Is self an array with encoding from vtable `V`.
354382
pub fn is<V: VTable>(&self) -> bool {
355383
self.as_opt::<V>().is_some()
@@ -443,6 +471,10 @@ impl<V: VTable> Array for ArrayAdapter<V> {
443471
self
444472
}
445473

474+
fn as_any_arc(self: Arc<Self>) -> Arc<dyn Any + Send + Sync> {
475+
self
476+
}
477+
446478
fn to_array(&self) -> ArrayRef {
447479
Arc::new(ArrayAdapter::<V>(self.0.clone()))
448480
}

vortex-array/src/arrays/dict/array.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ impl DictArray {
114114
Ok(unsafe { Self::new_unchecked(codes, values) })
115115
}
116116

117+
pub fn into_parts(self) -> (ArrayRef, ArrayRef) {
118+
(self.codes, self.values)
119+
}
120+
117121
#[inline]
118122
pub fn codes(&self) -> &ArrayRef {
119123
&self.codes

vortex-array/src/arrays/listview/array.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,10 @@ impl ListViewArray {
319319
.is_ok()
320320
}
321321

322+
pub fn into_parts(self) -> (ArrayRef, ArrayRef, ArrayRef, Validity) {
323+
(self.elements, self.offsets, self.sizes, self.validity)
324+
}
325+
322326
/// Returns the offset at the given index.
323327
///
324328
/// Note that it is possible the corresponding list view is null (which is only defined by the

vortex-array/src/arrow/executor/bool.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use crate::VectorExecutor;
1414
use crate::arrow::null_buffer::to_null_buffer;
1515

1616
pub(super) fn to_arrow_bool(
17-
array: &ArrayRef,
17+
array: ArrayRef,
1818
session: &VortexSession,
1919
) -> VortexResult<ArrowArrayRef> {
2020
let bool_vector = array
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use arrow_array::ArrayRef as ArrowArrayRef;
7+
use arrow_array::GenericBinaryArray;
8+
use arrow_array::types::ByteArrayType;
9+
use vortex_compute::arrow::IntoArrow;
10+
use vortex_dtype::DType;
11+
use vortex_dtype::NativePType;
12+
use vortex_dtype::Nullability;
13+
use vortex_dtype::PTypeDowncastExt;
14+
use vortex_error::VortexError;
15+
use vortex_error::VortexResult;
16+
use vortex_session::VortexSession;
17+
18+
use crate::ArrayRef;
19+
use crate::VectorExecutor;
20+
use crate::arrays::VarBinArray;
21+
use crate::arrays::VarBinVTable;
22+
use crate::arrow::null_buffer::to_null_buffer;
23+
use crate::builtins::ArrayBuiltins;
24+
25+
/// Convert a Vortex array into an Arrow GenericBinaryArray.
26+
pub(super) fn to_arrow_byte_array<T: ByteArrayType>(
27+
array: ArrayRef,
28+
session: &VortexSession,
29+
) -> VortexResult<ArrowArrayRef>
30+
where
31+
T::Offset: NativePType,
32+
{
33+
// If the Vortex array is already in VarBin format, we can directly convert it.
34+
if let Some(array) = array.as_opt::<VarBinVTable>() {
35+
return varbin_to_byte_array::<T>(array, session);
36+
}
37+
38+
// Otherwise, we execute the array to a BinaryView vector and cast from there.
39+
let binary_view = array.execute_vector(session)?.into_arrow()?;
40+
arrow_cast::cast(&binary_view, &T::DATA_TYPE).map_err(VortexError::from)
41+
}
42+
43+
/// Convert a Vortex VarBinArray into an Arrow GenericBinaryArray.
44+
fn varbin_to_byte_array<T: ByteArrayType>(
45+
array: &VarBinArray,
46+
session: &VortexSession,
47+
) -> VortexResult<ArrowArrayRef>
48+
where
49+
T::Offset: NativePType,
50+
{
51+
// We must cast the offsets to the required offset type.
52+
let offsets = array
53+
.offsets()
54+
.cast(DType::Primitive(T::Offset::PTYPE, Nullability::NonNullable))?
55+
.execute_vector(session)?
56+
.into_primitive()
57+
.downcast::<T::Offset>()
58+
.into_buffer()
59+
.into_arrow_offset_buffer();
60+
61+
let data = array.bytes().clone().into_arrow_buffer();
62+
63+
let null_buffer = to_null_buffer(array.validity_mask());
64+
65+
Ok(Arc::new(unsafe {
66+
GenericBinaryArray::<T::Offset>::new_unchecked(offsets, data, null_buffer)
67+
}))
68+
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use arrow_array::ArrayRef as ArrowArrayRef;
5+
use arrow_array::types::ByteViewType;
6+
use vortex_compute::arrow::IntoArrow;
7+
use vortex_dtype::DType;
8+
use vortex_dtype::Nullability;
9+
use vortex_dtype::arrow::FromArrowType;
10+
use vortex_error::VortexResult;
11+
use vortex_session::VortexSession;
12+
13+
use crate::ArrayRef;
14+
use crate::VectorExecutor;
15+
use crate::builtins::ArrayBuiltins;
16+
17+
pub(super) fn to_arrow_byte_view<T: ByteViewType>(
18+
array: ArrayRef,
19+
session: &VortexSession,
20+
) -> VortexResult<ArrowArrayRef> {
21+
// First we cast the array into the desired ByteView type.
22+
// We do this in case the vortex array is Utf8, and we want Binary or vice versa. By casting
23+
// first, we may push this down through the Vortex array tree. We choose nullable to be most
24+
// flexible since there's no prescribed nullability in Arrow types.
25+
let array = array.cast(DType::from_arrow((&T::DATA_TYPE, Nullability::Nullable)))?;
26+
27+
// Perform a naive conversion via our VarBinView vector representation
28+
array.execute_vector(session)?.into_arrow()
29+
}

vortex-array/src/arrow/executor/dictionary.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,16 @@ use crate::arrays::DictVTable;
1919
use crate::arrow::ArrowArrayExecutor;
2020

2121
pub(super) fn to_arrow_dictionary(
22-
array: &ArrayRef,
22+
array: ArrayRef,
2323
codes_type: &DataType,
2424
values_type: &DataType,
2525
session: &VortexSession,
2626
) -> VortexResult<ArrowArrayRef> {
2727
// Check if we have a Vortex dictionary array
28-
if let Some(array) = array.as_opt::<DictVTable>() {
29-
return dict_to_dict(array, codes_type, values_type, session);
30-
}
28+
let array = match array.try_into::<DictVTable>() {
29+
Ok(array) => return dict_to_dict(array, codes_type, values_type, session),
30+
Err(a) => a,
31+
};
3132

3233
// Otherwise, we should try and build a dictionary.
3334
// Arrow hides this functionality inside the cast module!
@@ -41,13 +42,14 @@ pub(super) fn to_arrow_dictionary(
4142

4243
/// Convert a Vortex dictionary array to an Arrow dictionary array.
4344
fn dict_to_dict(
44-
array: &DictArray,
45+
array: DictArray,
4546
codes_type: &DataType,
4647
values_type: &DataType,
4748
session: &VortexSession,
4849
) -> VortexResult<ArrowArrayRef> {
49-
let codes = array.codes().execute_arrow(codes_type, session)?;
50-
let values = array.values().execute_arrow(values_type, session)?;
50+
let (codes, values) = array.into_parts();
51+
let codes = codes.execute_arrow(codes_type, session)?;
52+
let values = values.execute_arrow(values_type, session)?;
5153

5254
Ok(match codes_type {
5355
DataType::Int8 => Arc::new(unsafe {

0 commit comments

Comments
 (0)