Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions vortex-array/src/data/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::stats::{ArrayStatistics, Stat, Statistics, StatsSet};
use crate::stream::{ArrayStream, ArrayStreamAdapter};
use crate::validity::{ArrayValidity, LogicalValidity, ValidityVTable};
use crate::{
ArrayChildrenIterator, ArrayDType, ArrayLen, ChildrenCollector, ContextRef,
ArrayChildrenIterator, ArrayDType, ArrayLen, ChildrenCollector, ContextRef, MetadataBytes,
NamedChildrenCollector,
};

Expand Down Expand Up @@ -60,7 +60,7 @@ impl ArrayData {
encoding: EncodingRef,
dtype: DType,
len: usize,
metadata: Option<ByteBuffer>,
metadata: MetadataBytes,
buffers: Option<Box<[ByteBuffer]>>,
children: Option<Box<[ArrayData]>>,
statistics: StatsSet,
Expand Down Expand Up @@ -276,10 +276,11 @@ impl ArrayData {
offsets
}

pub fn metadata_bytes(&self) -> Option<&[u8]> {
/// Returns the Array metadata bytes with 8-byte aligned.
pub fn metadata_bytes(&self) -> MetadataBytes {
match &self.0 {
InnerArrayData::Owned(d) => d.metadata.as_ref().map(|b| b.as_slice()),
InnerArrayData::Viewed(v) => v.flatbuffer().metadata().map(|m| m.bytes()),
InnerArrayData::Owned(d) => d.metadata,
InnerArrayData::Viewed(v) => v.flatbuffer().metadata().to_le_bytes(),
}
}

Expand Down
4 changes: 2 additions & 2 deletions vortex-array/src/data/owned.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ use vortex_error::{vortex_bail, VortexResult};

use crate::encoding::EncodingRef;
use crate::stats::StatsSet;
use crate::{ArrayDType, ArrayData};
use crate::{ArrayDType, ArrayData, MetadataBytes};

/// Owned [`ArrayData`] with serialized metadata, backed by heap-allocated memory.
#[derive(Debug)]
pub(super) struct OwnedArrayData {
pub(super) encoding: EncodingRef,
pub(super) dtype: DType,
pub(super) len: usize,
pub(super) metadata: Option<ByteBuffer>,
pub(super) metadata: MetadataBytes,
pub(super) buffers: Option<Box<[ByteBuffer]>>,
pub(super) children: Option<Box<[ArrayData]>>,
pub(super) stats_set: RwLock<StatsSet>,
Expand Down
6 changes: 4 additions & 2 deletions vortex-array/src/encoding/opaque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ use crate::validate::ValidateVTable;
use crate::validity::{LogicalValidity, ValidityVTable};
use crate::variants::VariantsVTable;
use crate::visitor::{ArrayVisitor, VisitorVTable};
use crate::{ArrayData, Canonical, EmptyMetadata, IntoCanonicalVTable, MetadataVTable};
use crate::{
ArrayData, Canonical, EmptyMetadata, IntoCanonicalVTable, MetadataBytes, MetadataVTable,
};

/// An encoding of an array that we cannot interpret.
///
Expand All @@ -29,7 +31,7 @@ pub struct OpaqueEncoding(pub u16);
impl VariantsVTable<ArrayData> for OpaqueEncoding {}

impl MetadataVTable<ArrayData> for OpaqueEncoding {
fn validate_metadata(&self, _metadata: Option<&[u8]>) -> VortexResult<()> {
fn validate_metadata(&self, _metadata: MetadataBytes) -> VortexResult<()> {
Ok(())
}

Expand Down
69 changes: 36 additions & 33 deletions vortex-array/src/metadata.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,23 @@ use std::fmt::{Debug, Display, Formatter};

use flexbuffers::FlexbufferSerializer;
use vortex_buffer::ByteBuffer;
use vortex_dtype::{ToBytes, TryFromBytes};
use vortex_error::{vortex_bail, vortex_err, VortexError, VortexExpect, VortexResult};

use crate::encoding::Encoding;
use crate::ArrayData;
use crate::{metadata, ArrayData};

pub type MetadataBytes = [u8; 8];

pub trait ArrayMetadata: SerializeMetadata + DeserializeMetadata + Display {}

pub trait SerializeMetadata {
fn serialize(&self) -> VortexResult<Option<ByteBuffer>>;
fn serialize(&self) -> VortexResult<MetadataBytes>;
}

impl SerializeMetadata for () {
fn serialize(&self) -> VortexResult<Option<ByteBuffer>> {
Ok(None)
fn serialize(&self) -> VortexResult<MetadataBytes> {
Ok([0; 8])
}
}

Expand All @@ -25,31 +28,31 @@ where
{
type Output;

fn deserialize(metadata: Option<&[u8]>) -> VortexResult<Self::Output>;
fn deserialize(metadata: MetadataBytes) -> VortexResult<Self::Output>;

/// Deserialize metadata without validation.
///
/// ## Safety
///
/// Those who use this API must be sure to have invoked deserialize at least once before
/// calling this method.
unsafe fn deserialize_unchecked(metadata: Option<&[u8]>) -> Self::Output {
unsafe fn deserialize_unchecked(metadata: MetadataBytes) -> Self::Output {
Self::deserialize(metadata)
.vortex_expect("Metadata should have been validated before calling this method")
}

/// Format metadata for display.
fn format(metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result;
fn format(metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result;
}

pub trait MetadataVTable<Array> {
fn validate_metadata(&self, metadata: Option<&[u8]>) -> VortexResult<()>;
fn validate_metadata(&self, metadata: MetadataBytes) -> VortexResult<()>;

fn display_metadata(&self, array: &Array, f: &mut Formatter<'_>) -> std::fmt::Result;
}

impl<E: Encoding> MetadataVTable<ArrayData> for E {
fn validate_metadata(&self, metadata: Option<&[u8]>) -> VortexResult<()> {
fn validate_metadata(&self, metadata: MetadataBytes) -> VortexResult<()> {
E::Metadata::deserialize(metadata).map(|_| ())
}

Expand All @@ -62,22 +65,19 @@ pub struct EmptyMetadata;
impl ArrayMetadata for EmptyMetadata {}

impl SerializeMetadata for EmptyMetadata {
fn serialize(&self) -> VortexResult<Option<ByteBuffer>> {
Ok(None)
fn serialize(&self) -> VortexResult<MetadataBytes> {
Ok([0; 8])
}
}

impl DeserializeMetadata for EmptyMetadata {
type Output = EmptyMetadata;

fn deserialize(metadata: Option<&[u8]>) -> VortexResult<Self::Output> {
if metadata.is_some() {
vortex_bail!("EmptyMetadata should not have metadata bytes")
}
fn deserialize(_metadata: MetadataBytes) -> VortexResult<Self::Output> {
Ok(EmptyMetadata)
}

fn format(_metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result {
fn format(_metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str("EmptyMetadata")
}
}
Expand All @@ -101,13 +101,14 @@ where
>,
>,
{
fn serialize(&self) -> VortexResult<Option<ByteBuffer>> {
fn serialize(&self) -> VortexResult<[u8; 8]> {
let buf = rkyv::to_bytes::<VortexError>(&self.0)?;
if buf.is_empty() {
Ok(None)
} else {
Ok(Some(ByteBuffer::from(buf)))
if buf.len() > 8 {
vortex_bail!("Metadata exceeds 8 bytes")
}
let mut metadata: [u8; 8] = [0; 8];
metadata[..buf.len()].copy_from_slice(buf.as_slice());
Ok(metadata)
}
}

Expand All @@ -124,14 +125,12 @@ where
{
type Output = M;

fn deserialize(metadata: Option<&[u8]>) -> VortexResult<Self::Output> {
rkyv::from_bytes::<M, VortexError>(
metadata.ok_or_else(|| vortex_err!("Missing expected metadata"))?,
)
fn deserialize(metadata: MetadataBytes) -> VortexResult<Self::Output> {
rkyv::from_bytes::<M, VortexError>(&metadata[..])
}

#[allow(clippy::use_debug)]
fn format(metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result {
fn format(metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result {
match Self::deserialize(metadata) {
Ok(m) => write!(f, "{:?}", m),
Err(_) => write!(f, "Failed to deserialize metadata"),
Expand All @@ -145,10 +144,14 @@ impl<M> SerializeMetadata for SerdeMetadata<M>
where
M: serde::Serialize,
{
fn serialize(&self) -> VortexResult<Option<ByteBuffer>> {
fn serialize(&self) -> VortexResult<MetadataBytes> {
let mut ser = FlexbufferSerializer::new();
serde::Serialize::serialize(&self.0, &mut ser)?;
Ok(Some(ser.take_buffer().into()))
let buf = ser.take_buffer();
if buf.len() > 8 {
vortex_bail!("Metadata exceeds 8 bytes")
}
Ok(buf.as_slice().try_into()?)
}
}

Expand All @@ -159,14 +162,14 @@ where
{
type Output = M;

fn deserialize(metadata: Option<&[u8]>) -> VortexResult<Self::Output> {
let bytes =
metadata.ok_or_else(|| vortex_err!("Serde metadata requires metadata bytes"))?;
Ok(M::deserialize(flexbuffers::Reader::get_root(bytes)?)?)
fn deserialize(metadata: MetadataBytes) -> VortexResult<Self::Output> {
Ok(M::deserialize(flexbuffers::Reader::get_root(
&metadata[..],
)?)?)
}

#[allow(clippy::use_debug)]
fn format(metadata: Option<&[u8]>, f: &mut Formatter<'_>) -> std::fmt::Result {
fn format(metadata: MetadataBytes, f: &mut Formatter<'_>) -> std::fmt::Result {
match Self::deserialize(metadata) {
Ok(m) => write!(f, "{:?}", m),
Err(_) => write!(f, "Failed to deserialize metadata"),
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/nbytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ impl ArrayData {
self.encoding()
.accept(self.as_ref(), &mut visitor)
.vortex_expect("Failed to get nbytes from Array");
visitor.0 + self.metadata_bytes().map_or(0, |b| b.len())
visitor.0
}
}

Expand Down
7 changes: 2 additions & 5 deletions vortex-array/src/parts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::fmt::{Debug, Formatter};
use flatbuffers::{FlatBufferBuilder, Follow, WIPOffset};
use itertools::Itertools;
use vortex_buffer::ByteBuffer;
use vortex_dtype::DType;
use vortex_dtype::{DType, TryFromBytes};
use vortex_error::{vortex_panic, VortexExpect, VortexResult};
use vortex_flatbuffers::{
array as fba, FlatBuffer, FlatBufferRoot, WriteFlatBuffer, WriteFlatBufferExt,
Expand Down Expand Up @@ -128,10 +128,7 @@ impl WriteFlatBuffer for ArrayPartsFlatBuffer<'_> {
fbb: &mut FlatBufferBuilder<'fb>,
) -> WIPOffset<Self::Target<'fb>> {
let encoding = self.array.encoding().id().code();
let metadata = self
.array
.metadata_bytes()
.map(|bytes| fbb.create_vector(bytes));
let metadata = u64::from_le_bytes(self.array.metadata_bytes());

// Assign buffer indices for all child arrays.
let nbuffers = u16::try_from(self.array.nbuffers())
Expand Down
15 changes: 7 additions & 8 deletions vortex-array/src/test_harness.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::io::Write;

use goldenfile::differs::binary_diff;
use goldenfile::Mint;
use vortex_dtype::ToBytes;
use vortex_error::VortexExpect;

use crate::{DeserializeMetadata, SerializeMetadata};
Expand All @@ -16,13 +17,11 @@ where
T: DeserializeMetadata,
{
let mut mint = Mint::new("goldenfiles/");
if let Some(meta) = metadata
let meta = metadata
.serialize()
.vortex_expect("Failed to serialize metadata")
{
let mut f = mint
.new_goldenfile_with_differ(name, Box::new(binary_diff))
.unwrap();
f.write_all(&meta).unwrap();
}
.vortex_expect("Failed to serialize metadata");
let mut f = mint
.new_goldenfile_with_differ(name, Box::new(binary_diff))
.unwrap();
f.write_all(&meta[..]).unwrap();
}
2 changes: 1 addition & 1 deletion vortex-flatbuffers/flatbuffers/vortex-array/array.fbs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ include "vortex-scalar/scalar.fbs";

table Array {
encoding: uint16;
metadata: [ubyte];
metadata: uint64; // We store as a u64 to guarantee 8-byte alignment.
children: [Array];
buffers: [uint16];
stats: ArrayStats;
Expand Down
16 changes: 8 additions & 8 deletions vortex-flatbuffers/src/generated/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ impl<'a> Array<'a> {
args: &'args ArrayArgs<'args>
) -> flatbuffers::WIPOffset<Array<'bldr>> {
let mut builder = ArrayBuilder::new(_fbb);
builder.add_metadata(args.metadata);
if let Some(x) = args.stats { builder.add_stats(x); }
if let Some(x) = args.buffers { builder.add_buffers(x); }
if let Some(x) = args.children { builder.add_children(x); }
if let Some(x) = args.metadata { builder.add_metadata(x); }
builder.add_encoding(args.encoding);
builder.finish()
}
Expand All @@ -60,11 +60,11 @@ impl<'a> Array<'a> {
unsafe { self._tab.get::<u16>(Array::VT_ENCODING, Some(0)).unwrap()}
}
#[inline]
pub fn metadata(&self) -> Option<flatbuffers::Vector<'a, u8>> {
pub fn metadata(&self) -> u64 {
// Safety:
// Created from valid Table for this object
// which contains a valid value in this slot
unsafe { self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, u8>>>(Array::VT_METADATA, None)}
unsafe { self._tab.get::<u64>(Array::VT_METADATA, Some(0)).unwrap()}
}
#[inline]
pub fn children(&self) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Array<'a>>>> {
Expand Down Expand Up @@ -97,7 +97,7 @@ impl flatbuffers::Verifiable for Array<'_> {
use self::flatbuffers::Verifiable;
v.visit_table(pos)?
.visit_field::<u16>("encoding", Self::VT_ENCODING, false)?
.visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u8>>>("metadata", Self::VT_METADATA, false)?
.visit_field::<u64>("metadata", Self::VT_METADATA, false)?
.visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Array>>>>("children", Self::VT_CHILDREN, false)?
.visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u16>>>("buffers", Self::VT_BUFFERS, false)?
.visit_field::<flatbuffers::ForwardsUOffset<ArrayStats>>("stats", Self::VT_STATS, false)?
Expand All @@ -107,7 +107,7 @@ impl flatbuffers::Verifiable for Array<'_> {
}
pub struct ArrayArgs<'a> {
pub encoding: u16,
pub metadata: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u8>>>,
pub metadata: u64,
pub children: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Array<'a>>>>>,
pub buffers: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u16>>>,
pub stats: Option<flatbuffers::WIPOffset<ArrayStats<'a>>>,
Expand All @@ -117,7 +117,7 @@ impl<'a> Default for ArrayArgs<'a> {
fn default() -> Self {
ArrayArgs {
encoding: 0,
metadata: None,
metadata: 0,
children: None,
buffers: None,
stats: None,
Expand All @@ -135,8 +135,8 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ArrayBuilder<'a, 'b, A> {
self.fbb_.push_slot::<u16>(Array::VT_ENCODING, encoding, 0);
}
#[inline]
pub fn add_metadata(&mut self, metadata: flatbuffers::WIPOffset<flatbuffers::Vector<'b , u8>>) {
self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Array::VT_METADATA, metadata);
pub fn add_metadata(&mut self, metadata: u64) {
self.fbb_.push_slot::<u64>(Array::VT_METADATA, metadata, 0);
}
#[inline]
pub fn add_children(&mut self, children: flatbuffers::WIPOffset<flatbuffers::Vector<'b , flatbuffers::ForwardsUOffset<Array<'b >>>>) {
Expand Down
Loading