@@ -25,8 +25,14 @@ pub use operator::*;
2525pub use serde:: * ;
2626pub use validity:: * ;
2727pub use visitor:: * ;
28+ use vortex_buffer:: ByteBuffer ;
29+ use vortex_dtype:: DType ;
30+ use vortex_error:: VortexResult ;
2831
29- use crate :: { Array , Encoding , EncodingId , EncodingRef , IntoArray } ;
32+ use crate :: serde:: ArrayChildren ;
33+ use crate :: {
34+ Array , DeserializeMetadata , Encoding , EncodingId , EncodingRef , IntoArray , SerializeMetadata ,
35+ } ;
3036
3137/// The encoding [`VTable`] encapsulates logic for an Encoding type and associated Array type.
3238/// The logic is split across several "VTable" traits to enable easier code organization than
@@ -46,6 +52,7 @@ use crate::{Array, Encoding, EncodingId, EncodingRef, IntoArray};
4652pub trait VTable : ' static + Sized + Send + Sync + Debug {
4753 type Array : ' static + Send + Sync + Clone + Debug + Deref < Target = dyn Array > + IntoArray ;
4854 type Encoding : ' static + Send + Sync + Clone + Deref < Target = dyn Encoding > ;
55+ type Metadata : Debug + SerializeMetadata + DeserializeMetadata ;
4956
5057 type ArrayVTable : ArrayVTable < Self > ;
5158 type CanonicalVTable : CanonicalVTable < Self > ;
@@ -60,9 +67,6 @@ pub trait VTable: 'static + Sized + Send + Sync + Debug {
6067 /// compression.
6168 /// Can be disabled by assigning to the [`NotSupported`] type.
6269 type EncodeVTable : EncodeVTable < Self > ;
63- /// Optionally enable serde for this encoding by implementing the [`SerdeVTable`] trait.
64- /// Can be disabled by assigning to the [`NotSupported`] type.
65- type SerdeVTable : SerdeVTable < Self > ;
6670 /// Optionally enable the [`OperatorVTable`] for this encoding. This allows it to partake in
6771 /// operator operations.
6872 type OperatorVTable : OperatorVTable < Self > ;
@@ -72,6 +76,61 @@ pub trait VTable: 'static + Sized + Send + Sync + Debug {
7276
7377 /// Returns the encoding for the array.
7478 fn encoding ( array : & Self :: Array ) -> EncodingRef ;
79+
80+ /// Exports metadata for an array.
81+ ///
82+ /// All other parts of the array are exported using the [`crate::vtable::VisitorVTable`].
83+ ///
84+ /// * If the array does not contain metadata, it should return
85+ /// [`crate::metadata::EmptyMetadata`].
86+ fn metadata ( array : & Self :: Array ) -> VortexResult < Self :: Metadata > ;
87+
88+ /// Serialize metadata into a byte buffer for IPC or file storage.
89+ /// Return `None` if the array cannot be serialized.
90+ fn serialize ( metadata : Self :: Metadata ) -> VortexResult < Option < Vec < u8 > > > ;
91+
92+ /// Deserialize metadata from a byte buffer.
93+ fn deserialize ( bytes : & [ u8 ] ) -> VortexResult < Self :: Metadata > ;
94+
95+ /// Build an array from components.
96+ ///
97+ /// This is called on the file and IPC deserialization pathways, to reconstruct the array from
98+ /// type-erased components.
99+ ///
100+ /// Encoding implementers should take note that all validation necessary to ensure the encoding
101+ /// is safe to read should happen inside of this method.
102+ ///
103+ /// # Safety and correctness
104+ ///
105+ /// This method should *never* panic, it must always return an error or else it returns a
106+ /// valid `Array` that meets all the encoding's preconditions.
107+ ///
108+ /// For example, the `build` implementation for a dictionary encoding should ensure that all
109+ /// codes lie in the valid range. For a UTF-8 array, it should check the bytes to ensure they
110+ /// are all valid string data bytes. Any corrupt files or malformed data buffers should be
111+ /// caught here, before returning the deserialized array.
112+ ///
113+ /// # Validation
114+ ///
115+ /// Validation is mainly meant to ensure that all internal pointers in the encoding reference
116+ /// valid ranges of data, and that all data conforms to its DType constraints. These ensure
117+ /// that no array operations will panic at runtime, or yield undefined behavior when unsafe
118+ /// operations like `get_unchecked` use indices in the array buffer.
119+ ///
120+ /// Examples of the kinds of validation that should be part of the `build` step:
121+ ///
122+ /// * Checking that any offsets buffers point to valid offsets in some other child array
123+ /// * Checking that any buffers for data or validity have the appropriate size for the
124+ /// encoding
125+ /// * Running UTF-8 validation for any buffers that are expected to hold flat UTF-8 data
126+ fn build (
127+ encoding : & Self :: Encoding ,
128+ dtype : & DType ,
129+ len : usize ,
130+ metadata : & Self :: Metadata ,
131+ buffers : & [ ByteBuffer ] ,
132+ children : & dyn ArrayChildren ,
133+ ) -> VortexResult < Self :: Array > ;
75134}
76135
77136/// Placeholder type used to indicate when a particular vtable is not supported by the encoding.
0 commit comments