From 7899927f796968e0498e02ba07586bfb0570912a Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 22 Jan 2025 15:32:30 +0000 Subject: [PATCH 1/5] Store Array metadata in 8 bytes --- vortex-array/src/data/mod.rs | 9 +-- vortex-array/src/data/owned.rs | 2 +- vortex-array/src/encoding/opaque.rs | 2 +- vortex-array/src/metadata.rs | 57 ++++++++++++------- vortex-array/src/nbytes.rs | 2 +- vortex-array/src/parts.rs | 5 +- vortex-array/src/test_harness.rs | 3 +- .../flatbuffers/vortex-array/array.fbs | 2 +- vortex-flatbuffers/src/generated/array.rs | 14 ++--- 9 files changed, 55 insertions(+), 41 deletions(-) diff --git a/vortex-array/src/data/mod.rs b/vortex-array/src/data/mod.rs index 94ab6c06b43..a7a0f930a2b 100644 --- a/vortex-array/src/data/mod.rs +++ b/vortex-array/src/data/mod.rs @@ -60,7 +60,7 @@ impl ArrayData { encoding: EncodingRef, dtype: DType, len: usize, - metadata: Option, + metadata: Option, buffers: Option>, children: Option>, statistics: StatsSet, @@ -276,10 +276,11 @@ impl ArrayData { offsets } - pub fn metadata_bytes(&self) -> Option<&[u8]> { + /// Returns the u64 Array metadata. + pub fn metadata_bytes(&self) -> Option { match &self.0 { - InnerArrayData::Owned(d) => d.metadata.as_ref().map(|b| b.as_slice()), - InnerArrayData::Viewed(v) => v.flatbuffer().metadata().map(|m| m.bytes()), + InnerArrayData::Owned(d) => d.metadata.clone(), + InnerArrayData::Viewed(v) => v.flatbuffer().metadata(), } } diff --git a/vortex-array/src/data/owned.rs b/vortex-array/src/data/owned.rs index 478497321b9..677f7d68e71 100644 --- a/vortex-array/src/data/owned.rs +++ b/vortex-array/src/data/owned.rs @@ -14,7 +14,7 @@ pub(super) struct OwnedArrayData { pub(super) encoding: EncodingRef, pub(super) dtype: DType, pub(super) len: usize, - pub(super) metadata: Option, + pub(super) metadata: Option, pub(super) buffers: Option>, pub(super) children: Option>, pub(super) stats_set: RwLock, diff --git a/vortex-array/src/encoding/opaque.rs b/vortex-array/src/encoding/opaque.rs index 8a2e719b96c..2270d24d439 100644 --- a/vortex-array/src/encoding/opaque.rs +++ b/vortex-array/src/encoding/opaque.rs @@ -29,7 +29,7 @@ pub struct OpaqueEncoding(pub u16); impl VariantsVTable for OpaqueEncoding {} impl MetadataVTable for OpaqueEncoding { - fn validate_metadata(&self, _metadata: Option<&[u8]>) -> VortexResult<()> { + fn validate_metadata(&self, _metadata: Option) -> VortexResult<()> { Ok(()) } diff --git a/vortex-array/src/metadata.rs b/vortex-array/src/metadata.rs index 1b05fc48dd2..5870c47cbce 100644 --- a/vortex-array/src/metadata.rs +++ b/vortex-array/src/metadata.rs @@ -2,19 +2,20 @@ use std::fmt::{Debug, Display, Formatter}; use flexbuffers::FlexbufferSerializer; use vortex_buffer::ByteBuffer; +use vortex_dtype::{ToBytes, TryFromBytes}; use vortex_error::{vortex_bail, vortex_err, VortexError, VortexExpect, VortexResult}; use crate::encoding::Encoding; -use crate::ArrayData; +use crate::{metadata, ArrayData}; pub trait ArrayMetadata: SerializeMetadata + DeserializeMetadata + Display {} pub trait SerializeMetadata { - fn serialize(&self) -> VortexResult>; + fn serialize(&self) -> VortexResult>; } impl SerializeMetadata for () { - fn serialize(&self) -> VortexResult> { + fn serialize(&self) -> VortexResult> { Ok(None) } } @@ -25,7 +26,7 @@ where { type Output; - fn deserialize(metadata: Option<&[u8]>) -> VortexResult; + fn deserialize(metadata: Option) -> VortexResult; /// Deserialize metadata without validation. /// @@ -33,23 +34,23 @@ where /// /// Those who use this API must be sure to have invoked deserialize at least once before /// calling this method. - unsafe fn deserialize_unchecked(metadata: Option<&[u8]>) -> Self::Output { + unsafe fn deserialize_unchecked(metadata: Option) -> Self::Output { Self::deserialize(metadata) .vortex_expect("Metadata should have been validated before calling this method") } /// Format metadata for display. - fn format(metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result; + fn format(metadata: Option, f: &mut Formatter<'_>) -> std::fmt::Result; } pub trait MetadataVTable { - fn validate_metadata(&self, metadata: Option<&[u8]>) -> VortexResult<()>; + fn validate_metadata(&self, metadata: Option) -> VortexResult<()>; fn display_metadata(&self, array: &Array, f: &mut Formatter<'_>) -> std::fmt::Result; } impl MetadataVTable for E { - fn validate_metadata(&self, metadata: Option<&[u8]>) -> VortexResult<()> { + fn validate_metadata(&self, metadata: Option) -> VortexResult<()> { E::Metadata::deserialize(metadata).map(|_| ()) } @@ -62,7 +63,7 @@ pub struct EmptyMetadata; impl ArrayMetadata for EmptyMetadata {} impl SerializeMetadata for EmptyMetadata { - fn serialize(&self) -> VortexResult> { + fn serialize(&self) -> VortexResult> { Ok(None) } } @@ -70,14 +71,14 @@ impl SerializeMetadata for EmptyMetadata { impl DeserializeMetadata for EmptyMetadata { type Output = EmptyMetadata; - fn deserialize(metadata: Option<&[u8]>) -> VortexResult { + fn deserialize(metadata: Option) -> VortexResult { if metadata.is_some() { vortex_bail!("EmptyMetadata should not have metadata bytes") } Ok(EmptyMetadata) } - fn format(_metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result { + fn format(_metadata: Option, f: &mut Formatter<'_>) -> std::fmt::Result { f.write_str("EmptyMetadata") } } @@ -101,12 +102,18 @@ where >, >, { - fn serialize(&self) -> VortexResult> { + fn serialize(&self) -> VortexResult> { let buf = rkyv::to_bytes::(&self.0)?; + if buf.len() > 8 { + vortex_bail!("Metadata exceeds 8 bytes") + } + if buf.is_empty() { Ok(None) } else { - Ok(Some(ByteBuffer::from(buf))) + let mut metadata: [u8; 8] = [0; 8]; + metadata.copy_from_slice(&buf.as_slice()[..buf.len().min(8)]); + Ok(Some(u64::try_from_le_bytes(&metadata[..])?)) } } } @@ -124,14 +131,16 @@ where { type Output = M; - fn deserialize(metadata: Option<&[u8]>) -> VortexResult { + fn deserialize(metadata: Option) -> VortexResult { rkyv::from_bytes::( - metadata.ok_or_else(|| vortex_err!("Missing expected metadata"))?, + &metadata + .ok_or_else(|| vortex_err!("Missing expected metadata"))? + .to_le_bytes()[..], ) } #[allow(clippy::use_debug)] - fn format(metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result { + fn format(metadata: Option, f: &mut Formatter<'_>) -> std::fmt::Result { match Self::deserialize(metadata) { Ok(m) => write!(f, "{:?}", m), Err(_) => write!(f, "Failed to deserialize metadata"), @@ -145,10 +154,14 @@ impl SerializeMetadata for SerdeMetadata where M: serde::Serialize, { - fn serialize(&self) -> VortexResult> { + fn serialize(&self) -> VortexResult> { let mut ser = FlexbufferSerializer::new(); serde::Serialize::serialize(&self.0, &mut ser)?; - Ok(Some(ser.take_buffer().into())) + let buf = ser.take_buffer(); + if buf.len() > 8 { + vortex_bail!("Metadata exceeds 8 bytes") + } + Ok(Some(u64::try_from_le_bytes(&buf[..])?)) } } @@ -159,14 +172,16 @@ where { type Output = M; - fn deserialize(metadata: Option<&[u8]>) -> VortexResult { + fn deserialize(metadata: Option) -> VortexResult { let bytes = metadata.ok_or_else(|| vortex_err!("Serde metadata requires metadata bytes"))?; - Ok(M::deserialize(flexbuffers::Reader::get_root(bytes)?)?) + Ok(M::deserialize(flexbuffers::Reader::get_root( + &bytes.to_le_bytes()[..], + )?)?) } #[allow(clippy::use_debug)] - fn format(metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result { + fn format(metadata: Option, f: &mut Formatter<'_>) -> std::fmt::Result { match Self::deserialize(metadata) { Ok(m) => write!(f, "{:?}", m), Err(_) => write!(f, "Failed to deserialize metadata"), diff --git a/vortex-array/src/nbytes.rs b/vortex-array/src/nbytes.rs index a24d0dd055a..d8a36fc4358 100644 --- a/vortex-array/src/nbytes.rs +++ b/vortex-array/src/nbytes.rs @@ -11,7 +11,7 @@ impl ArrayData { self.encoding() .accept(self.as_ref(), &mut visitor) .vortex_expect("Failed to get nbytes from Array"); - visitor.0 + self.metadata_bytes().map_or(0, |b| b.len()) + visitor.0 } } diff --git a/vortex-array/src/parts.rs b/vortex-array/src/parts.rs index 09791279c07..019dfccc4cb 100644 --- a/vortex-array/src/parts.rs +++ b/vortex-array/src/parts.rs @@ -128,10 +128,7 @@ impl WriteFlatBuffer for ArrayPartsFlatBuffer<'_> { fbb: &mut FlatBufferBuilder<'fb>, ) -> WIPOffset> { let encoding = self.array.encoding().id().code(); - let metadata = self - .array - .metadata_bytes() - .map(|bytes| fbb.create_vector(bytes)); + let metadata = self.array.metadata_bytes(); // Assign buffer indices for all child arrays. let nbuffers = u16::try_from(self.array.nbuffers()) diff --git a/vortex-array/src/test_harness.rs b/vortex-array/src/test_harness.rs index 847a1f3feab..c7457a169fb 100644 --- a/vortex-array/src/test_harness.rs +++ b/vortex-array/src/test_harness.rs @@ -2,6 +2,7 @@ use std::io::Write; use goldenfile::differs::binary_diff; use goldenfile::Mint; +use vortex_dtype::ToBytes; use vortex_error::VortexExpect; use crate::{DeserializeMetadata, SerializeMetadata}; @@ -23,6 +24,6 @@ where let mut f = mint .new_goldenfile_with_differ(name, Box::new(binary_diff)) .unwrap(); - f.write_all(&meta).unwrap(); + f.write_all(&meta.to_le_bytes()[..]).unwrap(); } } diff --git a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs index d173877ef10..0c2cb4fac6e 100644 --- a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs +++ b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs @@ -2,7 +2,7 @@ include "vortex-scalar/scalar.fbs"; table Array { encoding: uint16; - metadata: [ubyte]; + metadata: uint64 = null; children: [Array]; buffers: [uint16]; stats: ArrayStats; diff --git a/vortex-flatbuffers/src/generated/array.rs b/vortex-flatbuffers/src/generated/array.rs index 3a813b7ca39..5100fc03ec0 100644 --- a/vortex-flatbuffers/src/generated/array.rs +++ b/vortex-flatbuffers/src/generated/array.rs @@ -43,10 +43,10 @@ impl<'a> Array<'a> { args: &'args ArrayArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = ArrayBuilder::new(_fbb); + if let Some(x) = args.metadata { builder.add_metadata(x); } if let Some(x) = args.stats { builder.add_stats(x); } if let Some(x) = args.buffers { builder.add_buffers(x); } if let Some(x) = args.children { builder.add_children(x); } - if let Some(x) = args.metadata { builder.add_metadata(x); } builder.add_encoding(args.encoding); builder.finish() } @@ -60,11 +60,11 @@ impl<'a> Array<'a> { unsafe { self._tab.get::(Array::VT_ENCODING, Some(0)).unwrap()} } #[inline] - pub fn metadata(&self) -> Option> { + pub fn metadata(&self) -> Option { // Safety: // Created from valid Table for this object // which contains a valid value in this slot - unsafe { self._tab.get::>>(Array::VT_METADATA, None)} + unsafe { self._tab.get::(Array::VT_METADATA, None)} } #[inline] pub fn children(&self) -> Option>>> { @@ -97,7 +97,7 @@ impl flatbuffers::Verifiable for Array<'_> { use self::flatbuffers::Verifiable; v.visit_table(pos)? .visit_field::("encoding", Self::VT_ENCODING, false)? - .visit_field::>>("metadata", Self::VT_METADATA, false)? + .visit_field::("metadata", Self::VT_METADATA, false)? .visit_field::>>>("children", Self::VT_CHILDREN, false)? .visit_field::>>("buffers", Self::VT_BUFFERS, false)? .visit_field::>("stats", Self::VT_STATS, false)? @@ -107,7 +107,7 @@ impl flatbuffers::Verifiable for Array<'_> { } pub struct ArrayArgs<'a> { pub encoding: u16, - pub metadata: Option>>, + pub metadata: Option, pub children: Option>>>>, pub buffers: Option>>, pub stats: Option>>, @@ -135,8 +135,8 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayBuilder<'a, 'b, A> { self.fbb_.push_slot::(Array::VT_ENCODING, encoding, 0); } #[inline] - pub fn add_metadata(&mut self, metadata: flatbuffers::WIPOffset>) { - self.fbb_.push_slot_always::>(Array::VT_METADATA, metadata); + pub fn add_metadata(&mut self, metadata: u64) { + self.fbb_.push_slot_always::(Array::VT_METADATA, metadata); } #[inline] pub fn add_children(&mut self, children: flatbuffers::WIPOffset>>>) { From eff322526d4211cdcbdb26c85223f48deb19518a Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 22 Jan 2025 15:33:17 +0000 Subject: [PATCH 2/5] Store Array metadata in 8 bytes --- vortex-array/src/data/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/data/mod.rs b/vortex-array/src/data/mod.rs index a7a0f930a2b..3d4fe8154d5 100644 --- a/vortex-array/src/data/mod.rs +++ b/vortex-array/src/data/mod.rs @@ -279,7 +279,7 @@ impl ArrayData { /// Returns the u64 Array metadata. pub fn metadata_bytes(&self) -> Option { match &self.0 { - InnerArrayData::Owned(d) => d.metadata.clone(), + InnerArrayData::Owned(d) => d.metadata, InnerArrayData::Viewed(v) => v.flatbuffer().metadata(), } } From 2d5ba61d7e9078849da0e95fcc430282804c303e Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 22 Jan 2025 16:06:09 +0000 Subject: [PATCH 3/5] Store Array metadata in 8 bytes --- vortex-array/src/array/chunked/mod.rs | 1 + vortex-array/src/data/mod.rs | 10 +-- vortex-array/src/data/owned.rs | 4 +- vortex-array/src/encoding/opaque.rs | 6 +- vortex-array/src/metadata.rs | 64 ++++++++----------- vortex-array/src/parts.rs | 4 +- .../flatbuffers/vortex-array/array.fbs | 2 +- vortex-flatbuffers/src/generated/array.rs | 12 ++-- 8 files changed, 47 insertions(+), 56 deletions(-) diff --git a/vortex-array/src/array/chunked/mod.rs b/vortex-array/src/array/chunked/mod.rs index b768a681f9b..49443d84f92 100644 --- a/vortex-array/src/array/chunked/mod.rs +++ b/vortex-array/src/array/chunked/mod.rs @@ -109,6 +109,7 @@ impl ChunkedArray { } pub fn nchunks(&self) -> usize { + println!("ChunkedArray: meta: {}", self.metadata()); self.metadata().nchunks } diff --git a/vortex-array/src/data/mod.rs b/vortex-array/src/data/mod.rs index 3d4fe8154d5..c4da19e3418 100644 --- a/vortex-array/src/data/mod.rs +++ b/vortex-array/src/data/mod.rs @@ -21,7 +21,7 @@ use crate::stats::{ArrayStatistics, Stat, Statistics, StatsSet}; use crate::stream::{ArrayStream, ArrayStreamAdapter}; use crate::validity::{ArrayValidity, LogicalValidity, ValidityVTable}; use crate::{ - ArrayChildrenIterator, ArrayDType, ArrayLen, ChildrenCollector, ContextRef, + ArrayChildrenIterator, ArrayDType, ArrayLen, ChildrenCollector, ContextRef, MetadataBytes, NamedChildrenCollector, }; @@ -60,7 +60,7 @@ impl ArrayData { encoding: EncodingRef, dtype: DType, len: usize, - metadata: Option, + metadata: MetadataBytes, buffers: Option>, children: Option>, statistics: StatsSet, @@ -276,11 +276,11 @@ impl ArrayData { offsets } - /// Returns the u64 Array metadata. - pub fn metadata_bytes(&self) -> Option { + /// Returns the Array metadata bytes with 8-byte aligned. + pub fn metadata_bytes(&self) -> MetadataBytes { match &self.0 { InnerArrayData::Owned(d) => d.metadata, - InnerArrayData::Viewed(v) => v.flatbuffer().metadata(), + InnerArrayData::Viewed(v) => v.flatbuffer().metadata().to_le_bytes(), } } diff --git a/vortex-array/src/data/owned.rs b/vortex-array/src/data/owned.rs index 677f7d68e71..5a1e4a44628 100644 --- a/vortex-array/src/data/owned.rs +++ b/vortex-array/src/data/owned.rs @@ -6,7 +6,7 @@ use vortex_error::{vortex_bail, VortexResult}; use crate::encoding::EncodingRef; use crate::stats::StatsSet; -use crate::{ArrayDType, ArrayData}; +use crate::{ArrayDType, ArrayData, MetadataBytes}; /// Owned [`ArrayData`] with serialized metadata, backed by heap-allocated memory. #[derive(Debug)] @@ -14,7 +14,7 @@ pub(super) struct OwnedArrayData { pub(super) encoding: EncodingRef, pub(super) dtype: DType, pub(super) len: usize, - pub(super) metadata: Option, + pub(super) metadata: MetadataBytes, pub(super) buffers: Option>, pub(super) children: Option>, pub(super) stats_set: RwLock, diff --git a/vortex-array/src/encoding/opaque.rs b/vortex-array/src/encoding/opaque.rs index 2270d24d439..916e78bd861 100644 --- a/vortex-array/src/encoding/opaque.rs +++ b/vortex-array/src/encoding/opaque.rs @@ -11,7 +11,9 @@ use crate::validate::ValidateVTable; use crate::validity::{LogicalValidity, ValidityVTable}; use crate::variants::VariantsVTable; use crate::visitor::{ArrayVisitor, VisitorVTable}; -use crate::{ArrayData, Canonical, EmptyMetadata, IntoCanonicalVTable, MetadataVTable}; +use crate::{ + ArrayData, Canonical, EmptyMetadata, IntoCanonicalVTable, MetadataBytes, MetadataVTable, +}; /// An encoding of an array that we cannot interpret. /// @@ -29,7 +31,7 @@ pub struct OpaqueEncoding(pub u16); impl VariantsVTable for OpaqueEncoding {} impl MetadataVTable for OpaqueEncoding { - fn validate_metadata(&self, _metadata: Option) -> VortexResult<()> { + fn validate_metadata(&self, _metadata: MetadataBytes) -> VortexResult<()> { Ok(()) } diff --git a/vortex-array/src/metadata.rs b/vortex-array/src/metadata.rs index 5870c47cbce..5d039ef65ae 100644 --- a/vortex-array/src/metadata.rs +++ b/vortex-array/src/metadata.rs @@ -8,15 +8,17 @@ use vortex_error::{vortex_bail, vortex_err, VortexError, VortexExpect, VortexRes use crate::encoding::Encoding; use crate::{metadata, ArrayData}; +pub type MetadataBytes = [u8; 8]; + pub trait ArrayMetadata: SerializeMetadata + DeserializeMetadata + Display {} pub trait SerializeMetadata { - fn serialize(&self) -> VortexResult>; + fn serialize(&self) -> VortexResult; } impl SerializeMetadata for () { - fn serialize(&self) -> VortexResult> { - Ok(None) + fn serialize(&self) -> VortexResult { + Ok([0; 8]) } } @@ -26,7 +28,7 @@ where { type Output; - fn deserialize(metadata: Option) -> VortexResult; + fn deserialize(metadata: MetadataBytes) -> VortexResult; /// Deserialize metadata without validation. /// @@ -34,23 +36,23 @@ where /// /// Those who use this API must be sure to have invoked deserialize at least once before /// calling this method. - unsafe fn deserialize_unchecked(metadata: Option) -> Self::Output { + unsafe fn deserialize_unchecked(metadata: MetadataBytes) -> Self::Output { Self::deserialize(metadata) .vortex_expect("Metadata should have been validated before calling this method") } /// Format metadata for display. - fn format(metadata: Option, f: &mut Formatter<'_>) -> std::fmt::Result; + fn format(metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result; } pub trait MetadataVTable { - fn validate_metadata(&self, metadata: Option) -> VortexResult<()>; + fn validate_metadata(&self, metadata: MetadataBytes) -> VortexResult<()>; fn display_metadata(&self, array: &Array, f: &mut Formatter<'_>) -> std::fmt::Result; } impl MetadataVTable for E { - fn validate_metadata(&self, metadata: Option) -> VortexResult<()> { + fn validate_metadata(&self, metadata: MetadataBytes) -> VortexResult<()> { E::Metadata::deserialize(metadata).map(|_| ()) } @@ -63,22 +65,19 @@ pub struct EmptyMetadata; impl ArrayMetadata for EmptyMetadata {} impl SerializeMetadata for EmptyMetadata { - fn serialize(&self) -> VortexResult> { - Ok(None) + fn serialize(&self) -> VortexResult { + Ok([0; 8]) } } impl DeserializeMetadata for EmptyMetadata { type Output = EmptyMetadata; - fn deserialize(metadata: Option) -> VortexResult { - if metadata.is_some() { - vortex_bail!("EmptyMetadata should not have metadata bytes") - } + fn deserialize(_metadata: MetadataBytes) -> VortexResult { Ok(EmptyMetadata) } - fn format(_metadata: Option, f: &mut Formatter<'_>) -> std::fmt::Result { + fn format(_metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result { f.write_str("EmptyMetadata") } } @@ -102,19 +101,14 @@ where >, >, { - fn serialize(&self) -> VortexResult> { + fn serialize(&self) -> VortexResult<[u8; 8]> { let buf = rkyv::to_bytes::(&self.0)?; if buf.len() > 8 { vortex_bail!("Metadata exceeds 8 bytes") } - - if buf.is_empty() { - Ok(None) - } else { - let mut metadata: [u8; 8] = [0; 8]; - metadata.copy_from_slice(&buf.as_slice()[..buf.len().min(8)]); - Ok(Some(u64::try_from_le_bytes(&metadata[..])?)) - } + let mut metadata: [u8; 8] = [0; 8]; + metadata[..buf.len()].copy_from_slice(buf.as_slice()); + Ok(metadata) } } @@ -131,16 +125,12 @@ where { type Output = M; - fn deserialize(metadata: Option) -> VortexResult { - rkyv::from_bytes::( - &metadata - .ok_or_else(|| vortex_err!("Missing expected metadata"))? - .to_le_bytes()[..], - ) + fn deserialize(metadata: MetadataBytes) -> VortexResult { + rkyv::from_bytes::(&metadata[..]) } #[allow(clippy::use_debug)] - fn format(metadata: Option, f: &mut Formatter<'_>) -> std::fmt::Result { + fn format(metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result { match Self::deserialize(metadata) { Ok(m) => write!(f, "{:?}", m), Err(_) => write!(f, "Failed to deserialize metadata"), @@ -154,14 +144,14 @@ impl SerializeMetadata for SerdeMetadata where M: serde::Serialize, { - fn serialize(&self) -> VortexResult> { + fn serialize(&self) -> VortexResult { let mut ser = FlexbufferSerializer::new(); serde::Serialize::serialize(&self.0, &mut ser)?; let buf = ser.take_buffer(); if buf.len() > 8 { vortex_bail!("Metadata exceeds 8 bytes") } - Ok(Some(u64::try_from_le_bytes(&buf[..])?)) + Ok(buf.as_slice().try_into()?) } } @@ -172,16 +162,14 @@ where { type Output = M; - fn deserialize(metadata: Option) -> VortexResult { - let bytes = - metadata.ok_or_else(|| vortex_err!("Serde metadata requires metadata bytes"))?; + fn deserialize(metadata: MetadataBytes) -> VortexResult { Ok(M::deserialize(flexbuffers::Reader::get_root( - &bytes.to_le_bytes()[..], + &metadata[..], )?)?) } #[allow(clippy::use_debug)] - fn format(metadata: Option, f: &mut Formatter<'_>) -> std::fmt::Result { + fn format(metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result { match Self::deserialize(metadata) { Ok(m) => write!(f, "{:?}", m), Err(_) => write!(f, "Failed to deserialize metadata"), diff --git a/vortex-array/src/parts.rs b/vortex-array/src/parts.rs index 019dfccc4cb..527af749703 100644 --- a/vortex-array/src/parts.rs +++ b/vortex-array/src/parts.rs @@ -3,7 +3,7 @@ use std::fmt::{Debug, Formatter}; use flatbuffers::{FlatBufferBuilder, Follow, WIPOffset}; use itertools::Itertools; use vortex_buffer::ByteBuffer; -use vortex_dtype::DType; +use vortex_dtype::{DType, TryFromBytes}; use vortex_error::{vortex_panic, VortexExpect, VortexResult}; use vortex_flatbuffers::{ array as fba, FlatBuffer, FlatBufferRoot, WriteFlatBuffer, WriteFlatBufferExt, @@ -128,7 +128,7 @@ impl WriteFlatBuffer for ArrayPartsFlatBuffer<'_> { fbb: &mut FlatBufferBuilder<'fb>, ) -> WIPOffset> { let encoding = self.array.encoding().id().code(); - let metadata = self.array.metadata_bytes(); + let metadata = u64::from_le_bytes(self.array.metadata_bytes()); // Assign buffer indices for all child arrays. let nbuffers = u16::try_from(self.array.nbuffers()) diff --git a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs index 0c2cb4fac6e..d233e9f7abd 100644 --- a/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs +++ b/vortex-flatbuffers/flatbuffers/vortex-array/array.fbs @@ -2,7 +2,7 @@ include "vortex-scalar/scalar.fbs"; table Array { encoding: uint16; - metadata: uint64 = null; + metadata: uint64; // We store as a u64 to guarantee 8-byte alignment. children: [Array]; buffers: [uint16]; stats: ArrayStats; diff --git a/vortex-flatbuffers/src/generated/array.rs b/vortex-flatbuffers/src/generated/array.rs index 5100fc03ec0..8bddea0c865 100644 --- a/vortex-flatbuffers/src/generated/array.rs +++ b/vortex-flatbuffers/src/generated/array.rs @@ -43,7 +43,7 @@ impl<'a> Array<'a> { args: &'args ArrayArgs<'args> ) -> flatbuffers::WIPOffset> { let mut builder = ArrayBuilder::new(_fbb); - if let Some(x) = args.metadata { builder.add_metadata(x); } + builder.add_metadata(args.metadata); if let Some(x) = args.stats { builder.add_stats(x); } if let Some(x) = args.buffers { builder.add_buffers(x); } if let Some(x) = args.children { builder.add_children(x); } @@ -60,11 +60,11 @@ impl<'a> Array<'a> { unsafe { self._tab.get::(Array::VT_ENCODING, Some(0)).unwrap()} } #[inline] - pub fn metadata(&self) -> Option { + pub fn metadata(&self) -> u64 { // Safety: // Created from valid Table for this object // which contains a valid value in this slot - unsafe { self._tab.get::(Array::VT_METADATA, None)} + unsafe { self._tab.get::(Array::VT_METADATA, Some(0)).unwrap()} } #[inline] pub fn children(&self) -> Option>>> { @@ -107,7 +107,7 @@ impl flatbuffers::Verifiable for Array<'_> { } pub struct ArrayArgs<'a> { pub encoding: u16, - pub metadata: Option, + pub metadata: u64, pub children: Option>>>>, pub buffers: Option>>, pub stats: Option>>, @@ -117,7 +117,7 @@ impl<'a> Default for ArrayArgs<'a> { fn default() -> Self { ArrayArgs { encoding: 0, - metadata: None, + metadata: 0, children: None, buffers: None, stats: None, @@ -136,7 +136,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayBuilder<'a, 'b, A> { } #[inline] pub fn add_metadata(&mut self, metadata: u64) { - self.fbb_.push_slot_always::(Array::VT_METADATA, metadata); + self.fbb_.push_slot::(Array::VT_METADATA, metadata, 0); } #[inline] pub fn add_children(&mut self, children: flatbuffers::WIPOffset>>>) { From beb316711ee0cbeb2fcee2bbccd11291f26e2f02 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 22 Jan 2025 17:17:57 +0000 Subject: [PATCH 4/5] Store Array metadata in 8 bytes --- vortex-array/src/test_harness.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/vortex-array/src/test_harness.rs b/vortex-array/src/test_harness.rs index c7457a169fb..6289b74fe47 100644 --- a/vortex-array/src/test_harness.rs +++ b/vortex-array/src/test_harness.rs @@ -17,13 +17,11 @@ where T: DeserializeMetadata, { let mut mint = Mint::new("goldenfiles/"); - if let Some(meta) = metadata + let meta = metadata .serialize() - .vortex_expect("Failed to serialize metadata") - { - let mut f = mint - .new_goldenfile_with_differ(name, Box::new(binary_diff)) - .unwrap(); - f.write_all(&meta.to_le_bytes()[..]).unwrap(); - } + .vortex_expect("Failed to serialize metadata"); + let mut f = mint + .new_goldenfile_with_differ(name, Box::new(binary_diff)) + .unwrap(); + f.write_all(&meta[..]).unwrap(); } From 10ea6483e8936e499df59a92922c7947a58d7a37 Mon Sep 17 00:00:00 2001 From: Nicholas Gates Date: Wed, 22 Jan 2025 17:20:22 +0000 Subject: [PATCH 5/5] Self-hosted --- vortex-array/src/array/chunked/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/vortex-array/src/array/chunked/mod.rs b/vortex-array/src/array/chunked/mod.rs index 49443d84f92..b768a681f9b 100644 --- a/vortex-array/src/array/chunked/mod.rs +++ b/vortex-array/src/array/chunked/mod.rs @@ -109,7 +109,6 @@ impl ChunkedArray { } pub fn nchunks(&self) -> usize { - println!("ChunkedArray: meta: {}", self.metadata()); self.metadata().nchunks }