diff --git a/vortex-array/src/data/mod.rs b/vortex-array/src/data/mod.rs index 94ab6c06b43..c4da19e3418 100644 --- a/vortex-array/src/data/mod.rs +++ b/vortex-array/src/data/mod.rs @@ -21,7 +21,7 @@ use crate::stats::{ArrayStatistics, Stat, Statistics, StatsSet}; use crate::stream::{ArrayStream, ArrayStreamAdapter}; use crate::validity::{ArrayValidity, LogicalValidity, ValidityVTable}; use crate::{ - ArrayChildrenIterator, ArrayDType, ArrayLen, ChildrenCollector, ContextRef, + ArrayChildrenIterator, ArrayDType, ArrayLen, ChildrenCollector, ContextRef, MetadataBytes, NamedChildrenCollector, }; @@ -60,7 +60,7 @@ impl ArrayData { encoding: EncodingRef, dtype: DType, len: usize, - metadata: Option, + metadata: MetadataBytes, buffers: Option>, children: Option>, statistics: StatsSet, @@ -276,10 +276,11 @@ impl ArrayData { offsets } - pub fn metadata_bytes(&self) -> Option<&[u8]> { + /// Returns the Array metadata bytes with 8-byte aligned. + pub fn metadata_bytes(&self) -> MetadataBytes { match &self.0 { - InnerArrayData::Owned(d) => d.metadata.as_ref().map(|b| b.as_slice()), - InnerArrayData::Viewed(v) => v.flatbuffer().metadata().map(|m| m.bytes()), + InnerArrayData::Owned(d) => d.metadata, + InnerArrayData::Viewed(v) => v.flatbuffer().metadata().to_le_bytes(), } } diff --git a/vortex-array/src/data/owned.rs b/vortex-array/src/data/owned.rs index 478497321b9..5a1e4a44628 100644 --- a/vortex-array/src/data/owned.rs +++ b/vortex-array/src/data/owned.rs @@ -6,7 +6,7 @@ use vortex_error::{vortex_bail, VortexResult}; use crate::encoding::EncodingRef; use crate::stats::StatsSet; -use crate::{ArrayDType, ArrayData}; +use crate::{ArrayDType, ArrayData, MetadataBytes}; /// Owned [`ArrayData`] with serialized metadata, backed by heap-allocated memory. #[derive(Debug)] @@ -14,7 +14,7 @@ pub(super) struct OwnedArrayData { pub(super) encoding: EncodingRef, pub(super) dtype: DType, pub(super) len: usize, - pub(super) metadata: Option, + pub(super) metadata: MetadataBytes, pub(super) buffers: Option>, pub(super) children: Option>, pub(super) stats_set: RwLock, diff --git a/vortex-array/src/encoding/opaque.rs b/vortex-array/src/encoding/opaque.rs index 8a2e719b96c..916e78bd861 100644 --- a/vortex-array/src/encoding/opaque.rs +++ b/vortex-array/src/encoding/opaque.rs @@ -11,7 +11,9 @@ use crate::validate::ValidateVTable; use crate::validity::{LogicalValidity, ValidityVTable}; use crate::variants::VariantsVTable; use crate::visitor::{ArrayVisitor, VisitorVTable}; -use crate::{ArrayData, Canonical, EmptyMetadata, IntoCanonicalVTable, MetadataVTable}; +use crate::{ + ArrayData, Canonical, EmptyMetadata, IntoCanonicalVTable, MetadataBytes, MetadataVTable, +}; /// An encoding of an array that we cannot interpret. /// @@ -29,7 +31,7 @@ pub struct OpaqueEncoding(pub u16); impl VariantsVTable for OpaqueEncoding {} impl MetadataVTable for OpaqueEncoding { - fn validate_metadata(&self, _metadata: Option<&[u8]>) -> VortexResult<()> { + fn validate_metadata(&self, _metadata: MetadataBytes) -> VortexResult<()> { Ok(()) } diff --git a/vortex-array/src/metadata.rs b/vortex-array/src/metadata.rs index 1b05fc48dd2..5d039ef65ae 100644 --- a/vortex-array/src/metadata.rs +++ b/vortex-array/src/metadata.rs @@ -2,20 +2,23 @@ use std::fmt::{Debug, Display, Formatter}; use flexbuffers::FlexbufferSerializer; use vortex_buffer::ByteBuffer; +use vortex_dtype::{ToBytes, TryFromBytes}; use vortex_error::{vortex_bail, vortex_err, VortexError, VortexExpect, VortexResult}; use crate::encoding::Encoding; -use crate::ArrayData; +use crate::{metadata, ArrayData}; + +pub type MetadataBytes = [u8; 8]; pub trait ArrayMetadata: SerializeMetadata + DeserializeMetadata + Display {} pub trait SerializeMetadata { - fn serialize(&self) -> VortexResult>; + fn serialize(&self) -> VortexResult; } impl SerializeMetadata for () { - fn serialize(&self) -> VortexResult> { - Ok(None) + fn serialize(&self) -> VortexResult { + Ok([0; 8]) } } @@ -25,7 +28,7 @@ where { type Output; - fn deserialize(metadata: Option<&[u8]>) -> VortexResult; + fn deserialize(metadata: MetadataBytes) -> VortexResult; /// Deserialize metadata without validation. /// @@ -33,23 +36,23 @@ where /// /// Those who use this API must be sure to have invoked deserialize at least once before /// calling this method. - unsafe fn deserialize_unchecked(metadata: Option<&[u8]>) -> Self::Output { + unsafe fn deserialize_unchecked(metadata: MetadataBytes) -> Self::Output { Self::deserialize(metadata) .vortex_expect("Metadata should have been validated before calling this method") } /// Format metadata for display. - fn format(metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result; + fn format(metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result; } pub trait MetadataVTable { - fn validate_metadata(&self, metadata: Option<&[u8]>) -> VortexResult<()>; + fn validate_metadata(&self, metadata: MetadataBytes) -> VortexResult<()>; fn display_metadata(&self, array: &Array, f: &mut Formatter<'_>) -> std::fmt::Result; } impl MetadataVTable for E { - fn validate_metadata(&self, metadata: Option<&[u8]>) -> VortexResult<()> { + fn validate_metadata(&self, metadata: MetadataBytes) -> VortexResult<()> { E::Metadata::deserialize(metadata).map(|_| ()) } @@ -62,22 +65,19 @@ pub struct EmptyMetadata; impl ArrayMetadata for EmptyMetadata {} impl SerializeMetadata for EmptyMetadata { - fn serialize(&self) -> VortexResult> { - Ok(None) + fn serialize(&self) -> VortexResult { + Ok([0; 8]) } } impl DeserializeMetadata for EmptyMetadata { type Output = EmptyMetadata; - fn deserialize(metadata: Option<&[u8]>) -> VortexResult { - if metadata.is_some() { - vortex_bail!("EmptyMetadata should not have metadata bytes") - } + fn deserialize(_metadata: MetadataBytes) -> VortexResult { Ok(EmptyMetadata) } - fn format(_metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result { + fn format(_metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result { f.write_str("EmptyMetadata") } } @@ -101,13 +101,14 @@ where >, >, { - fn serialize(&self) -> VortexResult> { + fn serialize(&self) -> VortexResult<[u8; 8]> { let buf = rkyv::to_bytes::(&self.0)?; - if buf.is_empty() { - Ok(None) - } else { - Ok(Some(ByteBuffer::from(buf))) + if buf.len() > 8 { + vortex_bail!("Metadata exceeds 8 bytes") } + let mut metadata: [u8; 8] = [0; 8]; + metadata[..buf.len()].copy_from_slice(buf.as_slice()); + Ok(metadata) } } @@ -124,14 +125,12 @@ where { type Output = M; - fn deserialize(metadata: Option<&[u8]>) -> VortexResult { - rkyv::from_bytes::( - metadata.ok_or_else(|| vortex_err!("Missing expected metadata"))?, - ) + fn deserialize(metadata: MetadataBytes) -> VortexResult { + rkyv::from_bytes::(&metadata[..]) } #[allow(clippy::use_debug)] - fn format(metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result { + fn format(metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result { match Self::deserialize(metadata) { Ok(m) => write!(f, "{:?}", m), Err(_) => write!(f, "Failed to deserialize metadata"), @@ -145,10 +144,14 @@ impl SerializeMetadata for SerdeMetadata where M: serde::Serialize, { - fn serialize(&self) -> VortexResult> { + fn serialize(&self) -> VortexResult { let mut ser = FlexbufferSerializer::new(); serde::Serialize::serialize(&self.0, &mut ser)?; - Ok(Some(ser.take_buffer().into())) + let buf = ser.take_buffer(); + if buf.len() > 8 { + vortex_bail!("Metadata exceeds 8 bytes") + } + Ok(buf.as_slice().try_into()?) } } @@ -159,14 +162,14 @@ where { type Output = M; - fn deserialize(metadata: Option<&[u8]>) -> VortexResult { - let bytes = - metadata.ok_or_else(|| vortex_err!("Serde metadata requires metadata bytes"))?; - Ok(M::deserialize(flexbuffers::Reader::get_root(bytes)?)?) + fn deserialize(metadata: MetadataBytes) -> VortexResult { + Ok(M::deserialize(flexbuffers::Reader::get_root( + &metadata[..], + )?)?) } #[allow(clippy::use_debug)] - fn format(metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result { + fn format(metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result { match Self::deserialize(metadata) { Ok(m) => write!(f, "{:?}", m), Err(_) => write!(f, "Failed to deserialize metadata"), diff --git a/vortex-array/src/nbytes.rs b/vortex-array/src/nbytes.rs index a24d0dd055a..d8a36fc4358 100644 --- a/vortex-array/src/nbytes.rs +++ b/vortex-array/src/nbytes.rs @@ -11,7 +11,7 @@ impl ArrayData { self.encoding() .accept(self.as_ref(), &mut visitor) .vortex_expect("Failed to get nbytes from Array"); - visitor.0 + self.metadata_bytes().map_or(0, |b| b.len()) + visitor.0 } } diff --git a/vortex-array/src/parts.rs b/vortex-array/src/parts.rs index 09791279c07..527af749703 100644 --- a/vortex-array/src/parts.rs +++ b/vortex-array/src/parts.rs @@ -3,7 +3,7 @@ use std::fmt::{Debug, Formatter}; use flatbuffers::{FlatBufferBuilder, Follow, WIPOffset}; use itertools::Itertools; use vortex_buffer::ByteBuffer; -use vortex_dtype::DType; +use vortex_dtype::{DType, TryFromBytes}; use vortex_error::{vortex_panic, VortexExpect, VortexResult}; use vortex_flatbuffers::{ array as fba, FlatBuffer, FlatBufferRoot, WriteFlatBuffer, WriteFlatBufferExt, @@ -128,10 +128,7 @@ impl WriteFlatBuffer for ArrayPartsFlatBuffer<'_> { fbb: &mut FlatBufferBuilder<'fb>, ) -> WIPOffset> { let encoding = self.array.encoding().id().code(); - let metadata = self - .array - .metadata_bytes() - .map(|bytes| fbb.create_vector(bytes)); + let metadata = u64::from_le_bytes(self.array.metadata_bytes()); // Assign buffer indices for all child arrays. let nbuffers = u16::try_from(self.array.nbuffers()) diff --git a/vortex-array/src/test_harness.rs b/vortex-array/src/test_harness.rs index 847a1f3feab..6289b74fe47 100644 --- a/vortex-array/src/test_harness.rs +++ b/vortex-array/src/test_harness.rs @@ -2,6 +2,7 @@ use std::io::Write; use goldenfile::differs::binary_diff; use goldenfile::Mint; +use vortex_dtype::ToBytes; use vortex_error::VortexExpect; use crate::{DeserializeMetadata, SerializeMetadata}; @@ -16,13 +17,11 @@ where T: DeserializeMetadata, { let mut mint = Mint::new("goldenfiles/"); - if let Some(meta) = metadata + let meta = metadata .serialize() - .vortex_expect("Failed to serialize metadata") - { - let mut f = mint - .new_goldenfile_with_differ(name, Box::new(binary_diff)) - .unwrap(); - f.write_all(&meta).unwrap(); - } + .vortex_expect("Failed to serialize metadata"); + let mut f = mint + .new_goldenfile_with_differ(name, Box::new(binary_diff)) + .unwrap(); + f.write_all(&meta[..]).unwrap(); } diff --git a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs index d173877ef10..d233e9f7abd 100644 --- a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs +++ b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs @@ -2,7 +2,7 @@ include "vortex-scalar/scalar.fbs"; table Array { encoding: uint16; - metadata: [ubyte]; + metadata: uint64; // We store as a u64 to guarantee 8-byte alignment. children: [Array]; buffers: [uint16]; stats: ArrayStats; diff --git a/vortex-flatbuffers/src/generated/array.rs b/vortex-flatbuffers/src/generated/array.rs index 3a813b7ca39..8bddea0c865 100644 --- a/vortex-flatbuffers/src/generated/array.rs +++ b/vortex-flatbuffers/src/generated/array.rs @@ -43,10 +43,10 @@ impl<'a> Array<'a> { args: &'args ArrayArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = ArrayBuilder::new(_fbb); + builder.add_metadata(args.metadata); if let Some(x) = args.stats { builder.add_stats(x); } if let Some(x) = args.buffers { builder.add_buffers(x); } if let Some(x) = args.children { builder.add_children(x); } - if let Some(x) = args.metadata { builder.add_metadata(x); } builder.add_encoding(args.encoding); builder.finish() } @@ -60,11 +60,11 @@ impl<'a> Array<'a> { unsafe { self._tab.get::(Array::VT_ENCODING, Some(0)).unwrap()} } #[inline] - pub fn metadata(&self) -> Option> { + pub fn metadata(&self) -> u64 { // Safety: // Created from valid Table for this object // which contains a valid value in this slot - unsafe { self._tab.get::>>(Array::VT_METADATA, None)} + unsafe { self._tab.get::(Array::VT_METADATA, Some(0)).unwrap()} } #[inline] pub fn children(&self) -> Option>>> { @@ -97,7 +97,7 @@ impl flatbuffers::Verifiable for Array<'_> { use self::flatbuffers::Verifiable; v.visit_table(pos)? .visit_field::("encoding", Self::VT_ENCODING, false)? - .visit_field::>>("metadata", Self::VT_METADATA, false)? + .visit_field::("metadata", Self::VT_METADATA, false)? .visit_field::>>>("children", Self::VT_CHILDREN, false)? .visit_field::>>("buffers", Self::VT_BUFFERS, false)? .visit_field::>("stats", Self::VT_STATS, false)? @@ -107,7 +107,7 @@ impl flatbuffers::Verifiable for Array<'_> { } pub struct ArrayArgs<'a> { pub encoding: u16, - pub metadata: Option>>, + pub metadata: u64, pub children: Option>>>>, pub buffers: Option>>, pub stats: Option>>, @@ -117,7 +117,7 @@ impl<'a> Default for ArrayArgs<'a> { fn default() -> Self { ArrayArgs { encoding: 0, - metadata: None, + metadata: 0, children: None, buffers: None, stats: None, @@ -135,8 +135,8 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayBuilder<'a, 'b, A> { self.fbb_.push_slot::(Array::VT_ENCODING, encoding, 0); } #[inline] - pub fn add_metadata(&mut self, metadata: flatbuffers::WIPOffset>) { - self.fbb_.push_slot_always::>(Array::VT_METADATA, metadata); + pub fn add_metadata(&mut self, metadata: u64) { + self.fbb_.push_slot::(Array::VT_METADATA, metadata, 0); } #[inline] pub fn add_children(&mut self, children: flatbuffers::WIPOffset>>>) {