Skip to content

Commit 83a675d

Browse files
authored
feat: Layout VTable (#3331)
Similar refactor as #3273 but for layouts. Again, this introduces the idea that a `LayoutEncoding` can be independently heap allocated, and is instantiated as a `Layout`. Slight difference to arrays is that the `Layout` tree is lazy in terms of parsing children, so child accessors are fallible. --------- Signed-off-by: Nicholas Gates <[email protected]>
1 parent 2781caa commit 83a675d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+3130
-2683
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-array/src/encoding.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ impl<V: VTable> Encoding for EncodingAdapter<V> {
9696
&self.0, dtype, len, &metadata, buffers, children,
9797
)?;
9898
assert_eq!(array.len(), len, "Array length mismatch after building");
99+
assert_eq!(array.dtype(), dtype, "Array dtype mismatch after building");
99100
Ok(array.to_array())
100101
}
101102

vortex-array/src/vtable/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ use crate::{Array, Encoding, EncodingId, EncodingRef, IntoArray};
3939
/// out of bounds). Post-conditions are validated after invocation of the vtable function and will
4040
/// panic if violated.
4141
pub trait VTable: 'static + Sized + Send + Sync + Debug {
42-
type Array: 'static + Send + Sync + Clone + Deref<Target = dyn Array> + IntoArray;
42+
type Array: 'static + Send + Sync + Clone + Debug + Deref<Target = dyn Array> + IntoArray;
4343
type Encoding: 'static + Send + Sync + Clone + Deref<Target = dyn Encoding>;
4444

4545
type ArrayVTable: ArrayVTable<Self>;

vortex-buffer/src/const.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ impl<T, const A: usize> ConstBuffer<T, A> {
3030
self.0.as_slice()
3131
}
3232

33+
/// Unwrap the inner buffer.
34+
pub fn inner(&self) -> &Buffer<T> {
35+
&self.0
36+
}
37+
3338
/// Unwrap the inner buffer.
3439
pub fn into_inner(self) -> Buffer<T> {
3540
self.0

vortex-datafusion/src/persistent/cache.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,8 @@ fn estimate_layout_size(footer: &Footer) -> usize {
162162

163163
let root_layout = footer.layout();
164164
let layout_size = size_of::<DType>()
165-
+ root_layout.metadata().map(|b| b.len()).unwrap_or_default()
166-
+ root_layout.nsegments() * size_of::<SegmentId>();
165+
+ root_layout.metadata().len()
166+
+ root_layout.segment_ids().len() * size_of::<SegmentId>();
167167

168168
segments_size + stats_size + layout_size
169169
}

vortex-dtype/src/field_mask.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
//! Field mask represents a field projection, which leads to a set of field paths under a given layout.
2+
// TODO(ngates): this API needs work. It could be made a lot easier to use.
23

34
use vortex_error::{VortexResult, vortex_bail};
45

vortex-file/src/file.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,8 @@ impl VortexFile {
7272
let segment_source = self.segment_source();
7373
self.footer
7474
.layout()
75-
.reader(&segment_source, self.footer().ctx())
75+
// TODO(ngates): we may want to allow the user pass in a name here?
76+
.new_reader(&"".into(), &segment_source, self.footer().ctx())
7677
}
7778

7879
/// Initiate a scan of the file, returning a builder for configuring the scan.

vortex-file/src/footer/mod.rs

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -23,15 +23,15 @@ use vortex_array::stats::StatsSet;
2323
use vortex_array::{ArrayContext, ArrayRegistry};
2424
use vortex_dtype::DType;
2525
use vortex_error::{VortexResult, vortex_bail, vortex_err};
26-
use vortex_flatbuffers::{FlatBuffer, footer as fb, layout as fbl};
27-
use vortex_layout::{Layout, LayoutContext, LayoutRegistry};
26+
use vortex_flatbuffers::{FlatBuffer, footer as fb};
27+
use vortex_layout::{LayoutContext, LayoutRef, LayoutRegistry, layout_from_flatbuffer};
2828

2929
/// Captures the layout information of a Vortex file.
3030
#[derive(Debug, Clone)]
3131
pub struct Footer {
3232
array_ctx: ArrayContext,
3333
layout_ctx: LayoutContext,
34-
root_layout: Layout,
34+
root_layout: LayoutRef,
3535
segments: Arc<[SegmentSpec]>,
3636
statistics: Option<FileStatistics>,
3737
}
@@ -47,7 +47,6 @@ impl Footer {
4747
layout_registry: &LayoutRegistry,
4848
) -> VortexResult<Self> {
4949
let fb_footer = root::<fb::Footer>(&footer_bytes)?;
50-
let fb_layout = root::<fbl::Layout>(&layout_bytes)?;
5150

5251
// Create a LayoutContext from the registry.
5352
let layout_specs = fb_footer.layout_specs();
@@ -65,27 +64,7 @@ impl Footer {
6564
.map(|encoding| encoding.id());
6665
let array_ctx = array_registry.new_context(array_ids)?;
6766

68-
let root_encoding = layout_ctx
69-
.lookup_encoding(fb_layout.encoding())
70-
.ok_or_else(|| {
71-
vortex_err!(
72-
"Footer root layout encoding {} not found",
73-
fb_layout.encoding()
74-
)
75-
})?
76-
.clone();
77-
78-
// SAFETY: We have validated the fb_root_layout at the beginning of this function
79-
let root_layout = unsafe {
80-
Layout::new_viewed_unchecked(
81-
"".into(),
82-
root_encoding,
83-
dtype,
84-
layout_bytes.clone(),
85-
fb_layout._tab.loc(),
86-
layout_ctx.clone(),
87-
)
88-
};
67+
let root_layout = layout_from_flatbuffer(layout_bytes, &dtype, &layout_ctx)?;
8968

9069
let segments: Arc<[SegmentSpec]> = fb_footer
9170
.segment_specs()
@@ -118,8 +97,8 @@ impl Footer {
11897
&self.layout_ctx
11998
}
12099

121-
/// Returns the root [`Layout`] of the file.
122-
pub fn layout(&self) -> &Layout {
100+
/// Returns the root [`LayoutRef`] of the file.
101+
pub fn layout(&self) -> &LayoutRef {
123102
&self.root_layout
124103
}
125104

vortex-file/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
//! offsets. Finding the chunks containing row range is an `Nlog(N)` operation of searching the
1818
//! offsets.
1919
//!
20-
//! 4. The [`StatsLayout`](vortex_layout::layouts::stats::StatsLayout).
20+
//! 4. The [`ZonedLayout`](vortex_layout::layouts::zoned::ZonedLayout).
2121
//!
2222
//! A layout, alone, is _not_ a standalone Vortex file because layouts are not self-describing. They
2323
//! neither contain a description of the kind of layout (e.g. flat, column of flat, chunked of

vortex-file/src/strategy.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ use vortex_layout::layouts::flat::writer::FlatLayoutStrategy;
1717
use vortex_layout::layouts::repartition::{
1818
RepartitionStrategy, RepartitionWriter, RepartitionWriterOptions,
1919
};
20-
use vortex_layout::layouts::stats::writer::{StatsLayoutOptions, StatsLayoutWriter};
2120
use vortex_layout::layouts::struct_::writer::StructLayoutWriter;
21+
use vortex_layout::layouts::zoned::writer::{ZonedLayoutOptions, ZonedLayoutWriter};
2222
use vortex_layout::segments::SegmentWriter;
23-
use vortex_layout::{Layout, LayoutStrategy, LayoutWriter, LayoutWriterExt};
23+
use vortex_layout::{LayoutRef, LayoutStrategy, LayoutWriter, LayoutWriterExt};
2424
use vortex_scalar::Scalar;
2525

2626
const ROW_BLOCK_SIZE: usize = 8192;
@@ -70,15 +70,15 @@ impl LayoutStrategy for VortexLayoutStrategy {
7070

7171
let writer = dict_strategy.new_writer(ctx, dtype)?;
7272

73-
// Prior to repartitioning, we record statistics
74-
let stats_writer = StatsLayoutWriter::new(
73+
// Prior to repartitioning, we create a zone map
74+
let zoned_writer = ZonedLayoutWriter::new(
7575
ctx.clone(),
7676
dtype,
7777
writer,
7878
ArcRef::new_arc(Arc::new(BtrBlocksCompressedStrategy {
7979
child: ArcRef::new_arc(Arc::new(FlatLayoutStrategy::default())),
8080
})),
81-
StatsLayoutOptions {
81+
ZonedLayoutOptions {
8282
block_size: ROW_BLOCK_SIZE,
8383
stats: PRUNING_STATS.into(),
8484
},
@@ -87,7 +87,7 @@ impl LayoutStrategy for VortexLayoutStrategy {
8787

8888
let writer = RepartitionWriter::new(
8989
dtype.clone(),
90-
stats_writer,
90+
zoned_writer,
9191
RepartitionWriterOptions {
9292
// No minimum block size in bytes
9393
block_size_minimum: 0,
@@ -211,7 +211,7 @@ impl LayoutWriter for BtrBlocksCompressedWriter {
211211
self.child.flush(segment_writer)
212212
}
213213

214-
fn finish(&mut self, segment_writer: &mut dyn SegmentWriter) -> VortexResult<Layout> {
214+
fn finish(&mut self, segment_writer: &mut dyn SegmentWriter) -> VortexResult<LayoutRef> {
215215
self.child.finish(segment_writer)
216216
}
217217
}
@@ -271,7 +271,7 @@ impl LayoutWriter for BufferedWriter {
271271
self.child.flush(segment_writer)
272272
}
273273

274-
fn finish(&mut self, segment_writer: &mut dyn SegmentWriter) -> VortexResult<Layout> {
274+
fn finish(&mut self, segment_writer: &mut dyn SegmentWriter) -> VortexResult<LayoutRef> {
275275
self.child.finish(segment_writer)
276276
}
277277
}

0 commit comments

Comments
 (0)