Skip to content

Commit 60af509

Browse files
authored
Initial Vortex Layouts (#1805)
Initial implementation of the new structure of vortex layouts per #1676 * Only flat layout works. * I'm not 100% sure on the trait APIs, these will evolve as we pad out the implementation. * StructLayout will be worked on as part of #1782 so will probably come last. Up next: * Implementation of ChunkedLayout Open Questions: * What is the API that e.g. Python users have to precisely configure layout strategies? Can I override a layout writer for a specific field? * Similarly, how can we configure the layout scanners? Can I configure a level 0 chunked layout differently from level 2 in a chunk-of-struct-of-chunk world?
1 parent 3617e6c commit 60af509

File tree

29 files changed

+2303
-0
lines changed

29 files changed

+2303
-0
lines changed

Cargo.lock

Lines changed: 18 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ members = [
1616
"vortex-flatbuffers",
1717
"vortex-io",
1818
"vortex-ipc",
19+
"vortex-layout",
1920
"vortex-proto",
2021
"vortex-sampling-compressor",
2122
"vortex-scalar",

vortex-flatbuffers/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ dtype = []
1818
scalar = ["dtype"]
1919
array = ["dtype", "scalar"]
2020
ipc = ["array"]
21+
layout = ["array"]
2122
file = ["ipc"]
2223

2324
[dependencies]
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/// A `Layout` is a recursive data structure describing the physical layout of Vortex arrays in random access storage.
2+
/// As a starting, concrete example, the first three Layout encodings are defined as:
3+
///
4+
/// 1. encoding == 1, `Flat` -> one buffer, zero child Layouts
5+
/// 2. encoding == 2, `Chunked` -> zero buffers, one or more child Layouts (used for chunks of rows)
6+
/// 3. encoding == 3, `Columnar` -> zero buffers, one or more child Layouts (used for columns of structs)
7+
///
8+
/// The `row_count` represents the number of rows represented by this Layout. This is very useful for
9+
/// pruning the Layout tree based on row filters.
10+
///
11+
/// The `metadata` field is fully opaque at this layer, and allows the Layout implementation corresponding to
12+
/// `encoding` to embed additional information that may be useful for the reader. For example, the `ChunkedLayout`
13+
/// uses the first byte of the `metadata` array as a boolean to indicate whether the first child Layout represents
14+
/// the statistics table for the other chunks.
15+
table Layout {
16+
/// The ID of the encoding used for this Layout.
17+
encoding: uint16;
18+
/// The number of rows of data represented by this Layout.
19+
row_count: uint64;
20+
/// Any additional metadata this layout needs to interpret its children.
21+
/// This does not include data-specific metadata, which the layout should store in a segment.
22+
metadata: [ubyte];
23+
/// The children of this Layout.
24+
children: [Layout];
25+
/// Identifiers for each `Segment` of data required by this layout.
26+
segments: [uint32];
27+
}
28+
29+
root_type Layout;
Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
// automatically generated by the FlatBuffers compiler, do not modify
2+
3+
4+
// @generated
5+
6+
use core::mem;
7+
use core::cmp::Ordering;
8+
9+
extern crate flatbuffers;
10+
use self::flatbuffers::{EndianScalar, Follow};
11+
12+
pub enum LayoutOffset {}
13+
#[derive(Copy, Clone, PartialEq)]
14+
15+
/// A `Layout` is a recursive data structure describing the physical layout of Vortex arrays in random access storage.
16+
/// As a starting, concrete example, the first three Layout encodings are defined as:
17+
///
18+
/// 1. encoding == 1, `Flat` -> one buffer, zero child Layouts
19+
/// 2. encoding == 2, `Chunked` -> zero buffers, one or more child Layouts (used for chunks of rows)
20+
/// 3. encoding == 3, `Columnar` -> zero buffers, one or more child Layouts (used for columns of structs)
21+
///
22+
/// The `row_count` represents the number of rows represented by this Layout. This is very useful for
23+
/// pruning the Layout tree based on row filters.
24+
///
25+
/// The `metadata` field is fully opaque at this layer, and allows the Layout implementation corresponding to
26+
/// `encoding` to embed additional information that may be useful for the reader. For example, the `ChunkedLayout`
27+
/// uses the first byte of the `metadata` array as a boolean to indicate whether the first child Layout represents
28+
/// the statistics table for the other chunks.
29+
pub struct Layout<'a> {
30+
pub _tab: flatbuffers::Table<'a>,
31+
}
32+
33+
impl<'a> flatbuffers::Follow<'a> for Layout<'a> {
34+
type Inner = Layout<'a>;
35+
#[inline]
36+
unsafe fn follow(buf: &'a [u8], loc: usize) -> Self::Inner {
37+
Self { _tab: flatbuffers::Table::new(buf, loc) }
38+
}
39+
}
40+
41+
impl<'a> Layout<'a> {
42+
pub const VT_ENCODING: flatbuffers::VOffsetT = 4;
43+
pub const VT_ROW_COUNT: flatbuffers::VOffsetT = 6;
44+
pub const VT_METADATA: flatbuffers::VOffsetT = 8;
45+
pub const VT_CHILDREN: flatbuffers::VOffsetT = 10;
46+
pub const VT_SEGMENTS: flatbuffers::VOffsetT = 12;
47+
48+
#[inline]
49+
pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self {
50+
Layout { _tab: table }
51+
}
52+
#[allow(unused_mut)]
53+
pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
54+
_fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
55+
args: &'args LayoutArgs<'args>
56+
) -> flatbuffers::WIPOffset<Layout<'bldr>> {
57+
let mut builder = LayoutBuilder::new(_fbb);
58+
builder.add_row_count(args.row_count);
59+
if let Some(x) = args.segments { builder.add_segments(x); }
60+
if let Some(x) = args.children { builder.add_children(x); }
61+
if let Some(x) = args.metadata { builder.add_metadata(x); }
62+
builder.add_encoding(args.encoding);
63+
builder.finish()
64+
}
65+
66+
67+
/// The ID of the encoding used for this Layout.
68+
#[inline]
69+
pub fn encoding(&self) -> u16 {
70+
// Safety:
71+
// Created from valid Table for this object
72+
// which contains a valid value in this slot
73+
unsafe { self._tab.get::<u16>(Layout::VT_ENCODING, Some(0)).unwrap()}
74+
}
75+
/// The number of rows of data represented by this Layout.
76+
#[inline]
77+
pub fn row_count(&self) -> u64 {
78+
// Safety:
79+
// Created from valid Table for this object
80+
// which contains a valid value in this slot
81+
unsafe { self._tab.get::<u64>(Layout::VT_ROW_COUNT, Some(0)).unwrap()}
82+
}
83+
/// Any additional metadata this layout needs to interpret its children.
84+
/// This does not include data-specific metadata, which the layout should store in a segment.
85+
#[inline]
86+
pub fn metadata(&self) -> Option<flatbuffers::Vector<'a, u8>> {
87+
// Safety:
88+
// Created from valid Table for this object
89+
// which contains a valid value in this slot
90+
unsafe { self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, u8>>>(Layout::VT_METADATA, None)}
91+
}
92+
/// The children of this Layout.
93+
#[inline]
94+
pub fn children(&self) -> Option<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Layout<'a>>>> {
95+
// Safety:
96+
// Created from valid Table for this object
97+
// which contains a valid value in this slot
98+
unsafe { self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Layout>>>>(Layout::VT_CHILDREN, None)}
99+
}
100+
/// Identifiers for each `Segment` of data required by this layout.
101+
#[inline]
102+
pub fn segments(&self) -> Option<flatbuffers::Vector<'a, u32>> {
103+
// Safety:
104+
// Created from valid Table for this object
105+
// which contains a valid value in this slot
106+
unsafe { self._tab.get::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'a, u32>>>(Layout::VT_SEGMENTS, None)}
107+
}
108+
}
109+
110+
impl flatbuffers::Verifiable for Layout<'_> {
111+
#[inline]
112+
fn run_verifier(
113+
v: &mut flatbuffers::Verifier, pos: usize
114+
) -> Result<(), flatbuffers::InvalidFlatbuffer> {
115+
use self::flatbuffers::Verifiable;
116+
v.visit_table(pos)?
117+
.visit_field::<u16>("encoding", Self::VT_ENCODING, false)?
118+
.visit_field::<u64>("row_count", Self::VT_ROW_COUNT, false)?
119+
.visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u8>>>("metadata", Self::VT_METADATA, false)?
120+
.visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset<Layout>>>>("children", Self::VT_CHILDREN, false)?
121+
.visit_field::<flatbuffers::ForwardsUOffset<flatbuffers::Vector<'_, u32>>>("segments", Self::VT_SEGMENTS, false)?
122+
.finish();
123+
Ok(())
124+
}
125+
}
126+
pub struct LayoutArgs<'a> {
127+
pub encoding: u16,
128+
pub row_count: u64,
129+
pub metadata: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u8>>>,
130+
pub children: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<Layout<'a>>>>>,
131+
pub segments: Option<flatbuffers::WIPOffset<flatbuffers::Vector<'a, u32>>>,
132+
}
133+
impl<'a> Default for LayoutArgs<'a> {
134+
#[inline]
135+
fn default() -> Self {
136+
LayoutArgs {
137+
encoding: 0,
138+
row_count: 0,
139+
metadata: None,
140+
children: None,
141+
segments: None,
142+
}
143+
}
144+
}
145+
146+
pub struct LayoutBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
147+
fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
148+
start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
149+
}
150+
impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LayoutBuilder<'a, 'b, A> {
151+
#[inline]
152+
pub fn add_encoding(&mut self, encoding: u16) {
153+
self.fbb_.push_slot::<u16>(Layout::VT_ENCODING, encoding, 0);
154+
}
155+
#[inline]
156+
pub fn add_row_count(&mut self, row_count: u64) {
157+
self.fbb_.push_slot::<u64>(Layout::VT_ROW_COUNT, row_count, 0);
158+
}
159+
#[inline]
160+
pub fn add_metadata(&mut self, metadata: flatbuffers::WIPOffset<flatbuffers::Vector<'b , u8>>) {
161+
self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Layout::VT_METADATA, metadata);
162+
}
163+
#[inline]
164+
pub fn add_children(&mut self, children: flatbuffers::WIPOffset<flatbuffers::Vector<'b , flatbuffers::ForwardsUOffset<Layout<'b >>>>) {
165+
self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Layout::VT_CHILDREN, children);
166+
}
167+
#[inline]
168+
pub fn add_segments(&mut self, segments: flatbuffers::WIPOffset<flatbuffers::Vector<'b , u32>>) {
169+
self.fbb_.push_slot_always::<flatbuffers::WIPOffset<_>>(Layout::VT_SEGMENTS, segments);
170+
}
171+
#[inline]
172+
pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> LayoutBuilder<'a, 'b, A> {
173+
let start = _fbb.start_table();
174+
LayoutBuilder {
175+
fbb_: _fbb,
176+
start_: start,
177+
}
178+
}
179+
#[inline]
180+
pub fn finish(self) -> flatbuffers::WIPOffset<Layout<'a>> {
181+
let o = self.fbb_.end_table(self.start_);
182+
flatbuffers::WIPOffset::new(o.value())
183+
}
184+
}
185+
186+
impl core::fmt::Debug for Layout<'_> {
187+
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
188+
let mut ds = f.debug_struct("Layout");
189+
ds.field("encoding", &self.encoding());
190+
ds.field("row_count", &self.row_count());
191+
ds.field("metadata", &self.metadata());
192+
ds.field("children", &self.children());
193+
ds.field("segments", &self.segments());
194+
ds.finish()
195+
}
196+
}
197+
#[inline]
198+
/// Verifies that a buffer of bytes contains a `Layout`
199+
/// and returns it.
200+
/// Note that verification is still experimental and may not
201+
/// catch every error, or be maximally performant. For the
202+
/// previous, unchecked, behavior use
203+
/// `root_as_layout_unchecked`.
204+
pub fn root_as_layout(buf: &[u8]) -> Result<Layout, flatbuffers::InvalidFlatbuffer> {
205+
flatbuffers::root::<Layout>(buf)
206+
}
207+
#[inline]
208+
/// Verifies that a buffer of bytes contains a size prefixed
209+
/// `Layout` and returns it.
210+
/// Note that verification is still experimental and may not
211+
/// catch every error, or be maximally performant. For the
212+
/// previous, unchecked, behavior use
213+
/// `size_prefixed_root_as_layout_unchecked`.
214+
pub fn size_prefixed_root_as_layout(buf: &[u8]) -> Result<Layout, flatbuffers::InvalidFlatbuffer> {
215+
flatbuffers::size_prefixed_root::<Layout>(buf)
216+
}
217+
#[inline]
218+
/// Verifies, with the given options, that a buffer of bytes
219+
/// contains a `Layout` and returns it.
220+
/// Note that verification is still experimental and may not
221+
/// catch every error, or be maximally performant. For the
222+
/// previous, unchecked, behavior use
223+
/// `root_as_layout_unchecked`.
224+
pub fn root_as_layout_with_opts<'b, 'o>(
225+
opts: &'o flatbuffers::VerifierOptions,
226+
buf: &'b [u8],
227+
) -> Result<Layout<'b>, flatbuffers::InvalidFlatbuffer> {
228+
flatbuffers::root_with_opts::<Layout<'b>>(opts, buf)
229+
}
230+
#[inline]
231+
/// Verifies, with the given verifier options, that a buffer of
232+
/// bytes contains a size prefixed `Layout` and returns
233+
/// it. Note that verification is still experimental and may not
234+
/// catch every error, or be maximally performant. For the
235+
/// previous, unchecked, behavior use
236+
/// `root_as_layout_unchecked`.
237+
pub fn size_prefixed_root_as_layout_with_opts<'b, 'o>(
238+
opts: &'o flatbuffers::VerifierOptions,
239+
buf: &'b [u8],
240+
) -> Result<Layout<'b>, flatbuffers::InvalidFlatbuffer> {
241+
flatbuffers::size_prefixed_root_with_opts::<Layout<'b>>(opts, buf)
242+
}
243+
#[inline]
244+
/// Assumes, without verification, that a buffer of bytes contains a Layout and returns it.
245+
/// # Safety
246+
/// Callers must trust the given bytes do indeed contain a valid `Layout`.
247+
pub unsafe fn root_as_layout_unchecked(buf: &[u8]) -> Layout {
248+
flatbuffers::root_unchecked::<Layout>(buf)
249+
}
250+
#[inline]
251+
/// Assumes, without verification, that a buffer of bytes contains a size prefixed Layout and returns it.
252+
/// # Safety
253+
/// Callers must trust the given bytes do indeed contain a valid size prefixed `Layout`.
254+
pub unsafe fn size_prefixed_root_as_layout_unchecked(buf: &[u8]) -> Layout {
255+
flatbuffers::size_prefixed_root_unchecked::<Layout>(buf)
256+
}
257+
#[inline]
258+
pub fn finish_layout_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
259+
fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
260+
root: flatbuffers::WIPOffset<Layout<'a>>) {
261+
fbb.finish(root, None);
262+
}
263+
264+
#[inline]
265+
pub fn finish_size_prefixed_layout_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>, root: flatbuffers::WIPOffset<Layout<'a>>) {
266+
fbb.finish_size_prefixed(root, None);
267+
}

vortex-flatbuffers/src/lib.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,28 @@ pub mod scalar;
9292
/// ```
9393
pub mod footer;
9494

95+
#[cfg(feature = "layout")]
96+
#[allow(clippy::all)]
97+
#[allow(clippy::derive_partial_eq_without_eq)]
98+
#[allow(clippy::many_single_char_names)]
99+
#[allow(clippy::unwrap_used)]
100+
#[allow(dead_code)]
101+
#[allow(non_snake_case)]
102+
#[allow(non_camel_case_types)]
103+
#[allow(unsafe_op_in_unsafe_fn)]
104+
#[allow(unused_imports)]
105+
#[allow(unused_lifetimes)]
106+
#[allow(unused_qualifications)]
107+
#[rustfmt::skip]
108+
#[path = "./generated/layout.rs"]
109+
/// A serialized sequence of arrays, each with its buffers.
110+
///
111+
/// `layout.fbs`:
112+
/// ```flatbuffers
113+
#[doc = include_str!("../flatbuffers/vortex-layout/layout.fbs")]
114+
/// ```
115+
pub mod layout;
116+
95117
#[cfg(feature = "ipc")]
96118
#[allow(clippy::all)]
97119
#[allow(clippy::derive_partial_eq_without_eq)]

0 commit comments

Comments
 (0)