Skip to content

Commit 5a77496

Browse files
committed
wip
Signed-off-by: Joe Isaacs <[email protected]>
1 parent 35d12a3 commit 5a77496

File tree

7 files changed

+142
-124
lines changed

7 files changed

+142
-124
lines changed

vortex-array/src/array/mod.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -735,14 +735,13 @@ impl<V: VTable> ArrayVisitor for ArrayAdapter<V> {
735735
}
736736

737737
fn metadata(&self) -> VortexResult<Option<Vec<u8>>> {
738-
Ok(<V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0)?.map(|m| m.serialize()))
738+
V::serialize(V::metadata(&self.0)?)
739739
}
740740

741741
fn metadata_fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
742-
match <V::SerdeVTable as SerdeVTable<V>>::metadata(&self.0) {
742+
match V::metadata(&self.0) {
743743
Err(e) => write!(f, "<serde error: {e}>"),
744-
Ok(None) => write!(f, "<serde not supported>"),
745-
Ok(Some(metadata)) => Debug::fmt(&metadata, f),
744+
Ok(metadata) => Debug::fmt(&metadata, f),
746745
}
747746
}
748747
}

vortex-array/src/arrays/primitive/vtable/mod.rs

Lines changed: 66 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,15 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use vortex_buffer::{Alignment, ByteBuffer};
5+
use vortex_dtype::{DType, PType, match_each_native_ptype};
6+
use vortex_error::{VortexResult, vortex_bail};
7+
48
use crate::arrays::PrimitiveArray;
9+
use crate::serde::ArrayChildren;
10+
use crate::validity::Validity;
511
use crate::vtable::{NotSupported, VTable, ValidityVTableFromValidityHelper};
6-
use crate::{EncodingId, EncodingRef, vtable};
12+
use crate::{EmptyMetadata, EncodingId, EncodingRef, vtable};
713

814
mod array;
915
mod canonical;
@@ -18,6 +24,7 @@ vtable!(Primitive);
1824
impl VTable for PrimitiveVTable {
1925
type Array = PrimitiveArray;
2026
type Encoding = PrimitiveEncoding;
27+
type Metadata = EmptyMetadata;
2128

2229
type ArrayVTable = Self;
2330
type CanonicalVTable = Self;
@@ -26,7 +33,6 @@ impl VTable for PrimitiveVTable {
2633
type VisitorVTable = Self;
2734
type ComputeVTable = NotSupported;
2835
type EncodeVTable = NotSupported;
29-
type SerdeVTable = Self;
3036
type OperatorVTable = Self;
3137

3238
fn id(_encoding: &Self::Encoding) -> EncodingId {
@@ -36,6 +42,64 @@ impl VTable for PrimitiveVTable {
3642
fn encoding(_array: &Self::Array) -> EncodingRef {
3743
EncodingRef::new_ref(PrimitiveEncoding.as_ref())
3844
}
45+
46+
fn metadata(_array: &PrimitiveArray) -> VortexResult<Self::Metadata> {
47+
Ok(EmptyMetadata)
48+
}
49+
50+
fn serialize(_metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
51+
Ok(Some(vec![]))
52+
}
53+
54+
fn deserialize(_buffer: &[u8]) -> VortexResult<Self::Metadata> {
55+
Ok(EmptyMetadata)
56+
}
57+
58+
fn build(
59+
_encoding: &PrimitiveEncoding,
60+
dtype: &DType,
61+
len: usize,
62+
_metadata: &Self::Metadata,
63+
buffers: &[ByteBuffer],
64+
children: &dyn ArrayChildren,
65+
) -> VortexResult<PrimitiveArray> {
66+
if buffers.len() != 1 {
67+
vortex_bail!("Expected 1 buffer, got {}", buffers.len());
68+
}
69+
let buffer = buffers[0].clone();
70+
71+
let validity = if children.is_empty() {
72+
Validity::from(dtype.nullability())
73+
} else if children.len() == 1 {
74+
let validity = children.get(0, &Validity::DTYPE, len)?;
75+
Validity::Array(validity)
76+
} else {
77+
vortex_bail!("Expected 0 or 1 child, got {}", children.len());
78+
};
79+
80+
let ptype = PType::try_from(dtype)?;
81+
82+
if !buffer.is_aligned(Alignment::new(ptype.byte_width())) {
83+
vortex_bail!(
84+
"Buffer is not aligned to {}-byte boundary",
85+
ptype.byte_width()
86+
);
87+
}
88+
if buffer.len() != ptype.byte_width() * len {
89+
vortex_bail!(
90+
"Buffer length {} does not match expected length {} for {}, {}",
91+
buffer.len(),
92+
ptype.byte_width() * len,
93+
ptype.byte_width(),
94+
len,
95+
);
96+
}
97+
98+
match_each_native_ptype!(ptype, |P| {
99+
let buffer = Buffer::<P>::from_byte_buffer(buffer);
100+
Ok(PrimitiveArray::new(buffer, validity))
101+
})
102+
}
39103
}
40104

41105
#[derive(Clone, Debug)]

vortex-array/src/arrays/primitive/vtable/serde.rs

Lines changed: 1 addition & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -12,56 +12,4 @@ use crate::serde::ArrayChildren;
1212
use crate::validity::Validity;
1313
use crate::vtable::SerdeVTable;
1414

15-
impl SerdeVTable<PrimitiveVTable> for PrimitiveVTable {
16-
type Metadata = EmptyMetadata;
17-
18-
fn metadata(_array: &PrimitiveArray) -> VortexResult<Option<Self::Metadata>> {
19-
Ok(Some(EmptyMetadata))
20-
}
21-
22-
fn build(
23-
_encoding: &PrimitiveEncoding,
24-
dtype: &DType,
25-
len: usize,
26-
_metadata: &Self::Metadata,
27-
buffers: &[ByteBuffer],
28-
children: &dyn ArrayChildren,
29-
) -> VortexResult<PrimitiveArray> {
30-
if buffers.len() != 1 {
31-
vortex_bail!("Expected 1 buffer, got {}", buffers.len());
32-
}
33-
let buffer = buffers[0].clone();
34-
35-
let validity = if children.is_empty() {
36-
Validity::from(dtype.nullability())
37-
} else if children.len() == 1 {
38-
let validity = children.get(0, &Validity::DTYPE, len)?;
39-
Validity::Array(validity)
40-
} else {
41-
vortex_bail!("Expected 0 or 1 child, got {}", children.len());
42-
};
43-
44-
let ptype = PType::try_from(dtype)?;
45-
46-
if !buffer.is_aligned(Alignment::new(ptype.byte_width())) {
47-
vortex_bail!(
48-
"Buffer is not aligned to {}-byte boundary",
49-
ptype.byte_width()
50-
);
51-
}
52-
if buffer.len() != ptype.byte_width() * len {
53-
vortex_bail!(
54-
"Buffer length {} does not match expected length {} for {}, {}",
55-
buffer.len(),
56-
ptype.byte_width() * len,
57-
ptype.byte_width(),
58-
len,
59-
);
60-
}
61-
62-
match_each_native_ptype!(ptype, |P| {
63-
let buffer = Buffer::<P>::from_byte_buffer(buffer);
64-
Ok(PrimitiveArray::new(buffer, validity))
65-
})
66-
}
67-
}
15+
impl SerdeVTable<PrimitiveVTable> for PrimitiveVTable {}

vortex-array/src/encoding.rs

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -76,17 +76,12 @@ impl<V: VTable> Encoding for EncodingAdapter<V> {
7676
&self,
7777
dtype: &DType,
7878
len: usize,
79-
metadata: &[u8],
79+
metadata_bytes: &[u8],
8080
buffers: &[ByteBuffer],
8181
children: &dyn ArrayChildren,
8282
) -> VortexResult<ArrayRef> {
83-
let metadata =
84-
<<V::SerdeVTable as SerdeVTable<V>>::Metadata as DeserializeMetadata>::deserialize(
85-
metadata,
86-
)?;
87-
let array = <V::SerdeVTable as SerdeVTable<V>>::build(
88-
&self.0, dtype, len, &metadata, buffers, children,
89-
)?;
83+
let metadata = V::deserialize(metadata_bytes);
84+
let array = V::build(&self.0, dtype, len, &metadata, buffers, children)?;
9085
assert_eq!(array.len(), len, "Array length mismatch after building");
9186
assert_eq!(array.dtype(), dtype, "Array dtype mismatch after building");
9287
Ok(array.to_array())

vortex-array/src/vtable/array.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
// SPDX-License-Identifier: Apache-2.0
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

4+
use std::fmt::Debug;
45
use std::hash::Hasher;
56

7+
use vortex_buffer::ByteBuffer;
68
use vortex_dtype::DType;
9+
use vortex_error::VortexResult;
710

8-
use crate::Precision;
11+
use crate::serde::ArrayChildren;
912
use crate::stats::StatsSetRef;
1013
use crate::vtable::VTable;
14+
use crate::{DeserializeMetadata, Precision, SerializeMetadata};
1115

1216
pub trait ArrayVTable<V: VTable> {
1317
fn len(array: &V::Array) -> usize;

vortex-array/src/vtable/mod.rs

Lines changed: 63 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,14 @@ pub use operator::*;
2525
pub use serde::*;
2626
pub use validity::*;
2727
pub use visitor::*;
28+
use vortex_buffer::ByteBuffer;
29+
use vortex_dtype::DType;
30+
use vortex_error::VortexResult;
2831

29-
use crate::{Array, Encoding, EncodingId, EncodingRef, IntoArray};
32+
use crate::serde::ArrayChildren;
33+
use crate::{
34+
Array, DeserializeMetadata, Encoding, EncodingId, EncodingRef, IntoArray, SerializeMetadata,
35+
};
3036

3137
/// The encoding [`VTable`] encapsulates logic for an Encoding type and associated Array type.
3238
/// The logic is split across several "VTable" traits to enable easier code organization than
@@ -46,6 +52,7 @@ use crate::{Array, Encoding, EncodingId, EncodingRef, IntoArray};
4652
pub trait VTable: 'static + Sized + Send + Sync + Debug {
4753
type Array: 'static + Send + Sync + Clone + Debug + Deref<Target = dyn Array> + IntoArray;
4854
type Encoding: 'static + Send + Sync + Clone + Deref<Target = dyn Encoding>;
55+
type Metadata: Debug + SerializeMetadata + DeserializeMetadata;
4956

5057
type ArrayVTable: ArrayVTable<Self>;
5158
type CanonicalVTable: CanonicalVTable<Self>;
@@ -60,9 +67,6 @@ pub trait VTable: 'static + Sized + Send + Sync + Debug {
6067
/// compression.
6168
/// Can be disabled by assigning to the [`NotSupported`] type.
6269
type EncodeVTable: EncodeVTable<Self>;
63-
/// Optionally enable serde for this encoding by implementing the [`SerdeVTable`] trait.
64-
/// Can be disabled by assigning to the [`NotSupported`] type.
65-
type SerdeVTable: SerdeVTable<Self>;
6670
/// Optionally enable the [`OperatorVTable`] for this encoding. This allows it to partake in
6771
/// operator operations.
6872
type OperatorVTable: OperatorVTable<Self>;
@@ -72,6 +76,61 @@ pub trait VTable: 'static + Sized + Send + Sync + Debug {
7276

7377
/// Returns the encoding for the array.
7478
fn encoding(array: &Self::Array) -> EncodingRef;
79+
80+
/// Exports metadata for an array.
81+
///
82+
/// All other parts of the array are exported using the [`crate::vtable::VisitorVTable`].
83+
///
84+
/// * If the array does not contain metadata, it should return
85+
/// [`crate::metadata::EmptyMetadata`].
86+
fn metadata(array: &Self::Array) -> VortexResult<Self::Metadata>;
87+
88+
/// Serialize metadata into a byte buffer for IPC or file storage.
89+
/// Return `None` if the array cannot be serialized.
90+
fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>>;
91+
92+
/// Deserialize metadata from a byte buffer.
93+
fn deserialize(bytes: &[u8]) -> VortexResult<Self::Metadata>;
94+
95+
/// Build an array from components.
96+
///
97+
/// This is called on the file and IPC deserialization pathways, to reconstruct the array from
98+
/// type-erased components.
99+
///
100+
/// Encoding implementers should take note that all validation necessary to ensure the encoding
101+
/// is safe to read should happen inside of this method.
102+
///
103+
/// # Safety and correctness
104+
///
105+
/// This method should *never* panic, it must always return an error or else it returns a
106+
/// valid `Array` that meets all the encoding's preconditions.
107+
///
108+
/// For example, the `build` implementation for a dictionary encoding should ensure that all
109+
/// codes lie in the valid range. For a UTF-8 array, it should check the bytes to ensure they
110+
/// are all valid string data bytes. Any corrupt files or malformed data buffers should be
111+
/// caught here, before returning the deserialized array.
112+
///
113+
/// # Validation
114+
///
115+
/// Validation is mainly meant to ensure that all internal pointers in the encoding reference
116+
/// valid ranges of data, and that all data conforms to its DType constraints. These ensure
117+
/// that no array operations will panic at runtime, or yield undefined behavior when unsafe
118+
/// operations like `get_unchecked` use indices in the array buffer.
119+
///
120+
/// Examples of the kinds of validation that should be part of the `build` step:
121+
///
122+
/// * Checking that any offsets buffers point to valid offsets in some other child array
123+
/// * Checking that any buffers for data or validity have the appropriate size for the
124+
/// encoding
125+
/// * Running UTF-8 validation for any buffers that are expected to hold flat UTF-8 data
126+
fn build(
127+
encoding: &Self::Encoding,
128+
dtype: &DType,
129+
len: usize,
130+
metadata: &Self::Metadata,
131+
buffers: &[ByteBuffer],
132+
children: &dyn ArrayChildren,
133+
) -> VortexResult<Self::Array>;
75134
}
76135

77136
/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.

vortex-array/src/vtable/serde.rs

Lines changed: 1 addition & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -14,58 +14,7 @@ use crate::{DeserializeMetadata, EmptyMetadata, SerializeMetadata};
1414
/// VTable trait for building an array from its serialized components.
1515
///
1616
/// # Guarantees
17-
pub trait SerdeVTable<V: VTable> {
18-
type Metadata: Debug + SerializeMetadata + DeserializeMetadata;
19-
20-
/// Exports metadata for an array.
21-
///
22-
/// All other parts of the array are exported using the [`crate::vtable::VisitorVTable`].
23-
///
24-
/// * If the array does not require serialized metadata, it should return
25-
/// [`crate::metadata::EmptyMetadata`].
26-
/// * If the array does not support serialization, it should return `None`.
27-
fn metadata(array: &V::Array) -> VortexResult<Option<Self::Metadata>>;
28-
29-
/// Build an array from components.
30-
///
31-
/// This is called on the file and IPC deserialization pathways, to reconstruct the array from
32-
/// type-erased components.
33-
///
34-
/// Encoding implementers should take note that all validation necessary to ensure the encoding
35-
/// is safe to read should happen inside of this method.
36-
///
37-
/// # Safety and correctness
38-
///
39-
/// This method should *never* panic, it must always return an error or else it returns a
40-
/// valid `Array` that meets all the encoding's preconditions.
41-
///
42-
/// For example, the `build` implementation for a dictionary encoding should ensure that all
43-
/// codes lie in the valid range. For a UTF-8 array, it should check the bytes to ensure they
44-
/// are all valid string data bytes. Any corrupt files or malformed data buffers should be
45-
/// caught here, before returning the deserialized array.
46-
///
47-
/// # Validation
48-
///
49-
/// Validation is mainly meant to ensure that all internal pointers in the encoding reference
50-
/// valid ranges of data, and that all data conforms to its DType constraints. These ensure
51-
/// that no array operations will panic at runtime, or yield undefined behavior when unsafe
52-
/// operations like `get_unchecked` use indices in the array buffer.
53-
///
54-
/// Examples of the kinds of validation that should be part of the `build` step:
55-
///
56-
/// * Checking that any offsets buffers point to valid offsets in some other child array
57-
/// * Checking that any buffers for data or validity have the appropriate size for the
58-
/// encoding
59-
/// * Running UTF-8 validation for any buffers that are expected to hold flat UTF-8 data
60-
fn build(
61-
encoding: &V::Encoding,
62-
dtype: &DType,
63-
len: usize,
64-
metadata: &<Self::Metadata as DeserializeMetadata>::Output,
65-
buffers: &[ByteBuffer],
66-
children: &dyn ArrayChildren,
67-
) -> VortexResult<V::Array>;
68-
}
17+
pub trait SerdeVTable<V: VTable> {}
6918

7019
impl<V: VTable> SerdeVTable<V> for NotSupported {
7120
type Metadata = EmptyMetadata;

0 commit comments

Comments
 (0)