Skip to content

Commit bc28f59

Browse files
authored
Pull out layout flatbuffer (#3127)
For queries that exclusively use column-level stats, in theory we could defer loading the layout flatbuffer entirely... In any case, this makes it easier to build custom logic for deferred flatbuffer verification over layouts (when it's not included as part of footer validation). We could build similar logic into DType flatbuffer if that also gets expensive.
1 parent 6b262b6 commit bc28f59

File tree

10 files changed

+147
-122
lines changed

10 files changed

+147
-122
lines changed

vortex-file/src/footer/file_layout.rs

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,36 +9,32 @@ use std::sync::Arc;
99
use flatbuffers::{FlatBufferBuilder, WIPOffset};
1010
use vortex_array::ArrayContext;
1111
use vortex_flatbuffers::{FlatBufferRoot, WriteFlatBuffer, footer as fb};
12-
use vortex_layout::{Layout, LayoutContext};
12+
use vortex_layout::LayoutContext;
1313

1414
use crate::footer::segment::SegmentSpec;
1515

1616
/// A writer for serializing a file layout to a FlatBuffer.
1717
///
1818
/// This struct is used to write the layout component of a Vortex file footer,
1919
/// which describes the structure of the data in the file.
20-
pub(crate) struct FileLayoutFlatBufferWriter {
20+
pub(crate) struct FooterFlatBufferWriter {
2121
/// The array context containing encodings used in the file.
2222
pub(crate) ctx: ArrayContext,
23-
/// The root layout of the file.
24-
pub(crate) layout: Layout,
23+
/// The layout context containing the layouts used in the file.
24+
pub(crate) layout_ctx: LayoutContext,
2525
/// Specifications for all segments in the file.
2626
pub(crate) segment_specs: Arc<[SegmentSpec]>,
2727
}
2828

29-
impl FlatBufferRoot for FileLayoutFlatBufferWriter {}
29+
impl FlatBufferRoot for FooterFlatBufferWriter {}
3030

31-
impl WriteFlatBuffer for FileLayoutFlatBufferWriter {
32-
type Target<'a> = fb::FileLayout<'a>;
31+
impl WriteFlatBuffer for FooterFlatBufferWriter {
32+
type Target<'a> = fb::Footer<'a>;
3333

3434
fn write_flatbuffer<'fb>(
3535
&self,
3636
fbb: &mut FlatBufferBuilder<'fb>,
3737
) -> WIPOffset<Self::Target<'fb>> {
38-
// Set up a layout context to capture the layouts used in the file.
39-
let layout_ctx = LayoutContext::empty();
40-
let layout = self.layout.write_flatbuffer(fbb, &layout_ctx);
41-
4238
let segment_specs =
4339
fbb.create_vector_from_iter(self.segment_specs.iter().map(fb::SegmentSpec::from));
4440

@@ -53,7 +49,8 @@ impl WriteFlatBuffer for FileLayoutFlatBufferWriter {
5349
.collect::<Vec<_>>();
5450
let array_specs = fbb.create_vector(array_specs.as_slice());
5551

56-
let layout_specs = layout_ctx
52+
let layout_specs = self
53+
.layout_ctx
5754
.encodings()
5855
.iter()
5956
.map(|e| {
@@ -63,10 +60,9 @@ impl WriteFlatBuffer for FileLayoutFlatBufferWriter {
6360
.collect::<Vec<_>>();
6461
let layout_specs = fbb.create_vector(layout_specs.as_slice());
6562

66-
fb::FileLayout::create(
63+
fb::Footer::create(
6764
fbb,
68-
&fb::FileLayoutArgs {
69-
layout: Some(layout),
65+
&fb::FooterArgs {
7066
segment_specs: Some(segment_specs),
7167
array_specs: Some(array_specs),
7268
layout_specs: Some(layout_specs),

vortex-file/src/footer/mod.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ use vortex_array::stats::StatsSet;
2323
use vortex_array::{ArrayContext, ArrayRegistry};
2424
use vortex_dtype::DType;
2525
use vortex_error::{VortexResult, vortex_bail, vortex_err};
26-
use vortex_flatbuffers::{FlatBuffer, footer as fb};
26+
use vortex_flatbuffers::{FlatBuffer, footer as fb, layout as fbl};
2727
use vortex_layout::{Layout, LayoutContext, LayoutRegistry};
2828

2929
/// Captures the layout information of a Vortex file.
@@ -39,39 +39,38 @@ pub struct Footer {
3939
impl Footer {
4040
/// Read the [`Footer`] from a flatbuffer.
4141
pub(crate) fn from_flatbuffer(
42-
flatbuffer: FlatBuffer,
42+
footer_bytes: FlatBuffer,
43+
layout_bytes: FlatBuffer,
4344
dtype: DType,
4445
statistics: Option<FileStatistics>,
4546
array_registry: &ArrayRegistry,
4647
layout_registry: &LayoutRegistry,
4748
) -> VortexResult<Self> {
48-
let fb = root::<fb::FileLayout>(&flatbuffer)?;
49-
let fb_root_layout = fb
50-
.layout()
51-
.ok_or_else(|| vortex_err!("Footer missing root layout"))?;
49+
let fb_footer = root::<fb::Footer>(&footer_bytes)?;
50+
let fb_layout = root::<fbl::Layout>(&layout_bytes)?;
5251

5352
// Create a LayoutContext from the registry.
54-
let layout_specs = fb.layout_specs();
53+
let layout_specs = fb_footer.layout_specs();
5554
let layout_ids = layout_specs
5655
.iter()
5756
.flat_map(|e| e.iter())
5857
.map(|encoding| encoding.id());
5958
let layout_ctx = layout_registry.new_context(layout_ids)?;
6059

6160
// Create an ArrayContext from the registry.
62-
let array_specs = fb.array_specs();
61+
let array_specs = fb_footer.array_specs();
6362
let array_ids = array_specs
6463
.iter()
6564
.flat_map(|e| e.iter())
6665
.map(|encoding| encoding.id());
6766
let array_ctx = array_registry.new_context(array_ids)?;
6867

6968
let root_encoding = layout_ctx
70-
.lookup_encoding(fb_root_layout.encoding())
69+
.lookup_encoding(fb_layout.encoding())
7170
.ok_or_else(|| {
7271
vortex_err!(
7372
"Footer root layout encoding {} not found",
74-
fb_root_layout.encoding()
73+
fb_layout.encoding()
7574
)
7675
})?
7776
.clone();
@@ -82,13 +81,13 @@ impl Footer {
8281
"".into(),
8382
root_encoding,
8483
dtype,
85-
flatbuffer.clone(),
86-
fb_root_layout._tab.loc(),
84+
layout_bytes.clone(),
85+
fb_layout._tab.loc(),
8786
layout_ctx.clone(),
8887
)
8988
};
9089

91-
let segments: Arc<[SegmentSpec]> = fb
90+
let segments: Arc<[SegmentSpec]> = fb_footer
9291
.segment_specs()
9392
.ok_or_else(|| vortex_err!("FileLayout missing segment specs"))?
9493
.iter()

vortex-file/src/footer/postscript.rs

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ use vortex_flatbuffers::{FlatBufferRoot, ReadFlatBuffer, WriteFlatBuffer, footer
77
/// reading a Vortex file.
88
pub(crate) struct Postscript {
99
pub(crate) dtype: Option<PostscriptSegment>,
10-
pub(crate) statistics: Option<PostscriptSegment>,
1110
pub(crate) layout: PostscriptSegment,
11+
pub(crate) statistics: Option<PostscriptSegment>,
12+
pub(crate) footer: PostscriptSegment,
1213
}
1314

1415
impl FlatBufferRoot for Postscript {}
@@ -21,14 +22,16 @@ impl WriteFlatBuffer for Postscript {
2122
fbb: &mut FlatBufferBuilder<'fb>,
2223
) -> WIPOffset<Self::Target<'fb>> {
2324
let dtype = self.dtype.as_ref().map(|ps| ps.write_flatbuffer(fbb));
24-
let statistics = self.statistics.as_ref().map(|ps| ps.write_flatbuffer(fbb));
2525
let layout = self.layout.write_flatbuffer(fbb);
26+
let statistics = self.statistics.as_ref().map(|ps| ps.write_flatbuffer(fbb));
27+
let footer = self.footer.write_flatbuffer(fbb);
2628
fb::Postscript::create(
2729
fbb,
2830
&fb::PostscriptArgs {
2931
dtype,
30-
statistics,
3132
layout: Some(layout),
33+
statistics,
34+
footer: Some(footer),
3235
},
3336
)
3437
}
@@ -46,13 +49,17 @@ impl ReadFlatBuffer for Postscript {
4649
.dtype()
4750
.map(|ps| PostscriptSegment::read_flatbuffer(&ps))
4851
.transpose()?,
52+
layout: PostscriptSegment::read_flatbuffer(
53+
&fb.layout()
54+
.ok_or_else(|| vortex_err!("Postscript missing layout segment"))?,
55+
)?,
4956
statistics: fb
5057
.statistics()
5158
.map(|ps| PostscriptSegment::read_flatbuffer(&ps))
5259
.transpose()?,
53-
layout: PostscriptSegment::read_flatbuffer(
54-
&fb.layout()
55-
.ok_or_else(|| vortex_err!("Postscript missing layout segment"))?,
60+
footer: PostscriptSegment::read_flatbuffer(
61+
&fb.footer()
62+
.ok_or_else(|| vortex_err!("Postscript missing footer segment"))?,
5663
)?,
5764
})
5865
}

vortex-file/src/generic.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ impl VortexOpenOptions<GenericVortexFile> {
154154
read_more_offset = read_more_offset.min(stats_segment.offset);
155155
}
156156
read_more_offset = read_more_offset.min(postscript.layout.offset);
157+
read_more_offset = read_more_offset.min(postscript.footer.offset);
157158

158159
// Read more bytes if necessary.
159160
if read_more_offset < initial_offset {
@@ -188,6 +189,7 @@ impl VortexOpenOptions<GenericVortexFile> {
188189
let footer = self.parse_footer(
189190
initial_offset,
190191
&initial_read,
192+
&postscript.footer,
191193
&postscript.layout,
192194
dtype,
193195
file_stats,

vortex-file/src/memory.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,14 @@ impl VortexOpenOptions<InMemoryFileType> {
4747
.map(|segment| self.parse_file_statistics(0, &buffer, &segment))
4848
.transpose()?;
4949

50-
let footer = self.parse_footer(0, &buffer, &postscript.layout, dtype, file_stats)?;
50+
let footer = self.parse_footer(
51+
0,
52+
&buffer,
53+
&postscript.footer,
54+
&postscript.layout,
55+
dtype,
56+
file_stats,
57+
)?;
5158

5259
let segment_source_factory = Arc::new(InMemorySegmentReader {
5360
buffer,

vortex-file/src/open.rs

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,15 +170,24 @@ impl<F: FileType> VortexOpenOptions<F> {
170170
&self,
171171
initial_offset: u64,
172172
initial_read: &[u8],
173+
footer_segment: &PostscriptSegment,
173174
layout_segment: &PostscriptSegment,
174175
dtype: DType,
175176
file_stats: Option<FileStatistics>,
176177
) -> VortexResult<Footer> {
177-
let offset = usize::try_from(layout_segment.offset - initial_offset)?;
178-
let bytes =
179-
FlatBuffer::copy_from(&initial_read[offset..offset + (layout_segment.length as usize)]);
178+
let footer_offset = usize::try_from(footer_segment.offset - initial_offset)?;
179+
let footer_bytes = FlatBuffer::copy_from(
180+
&initial_read[footer_offset..footer_offset + (footer_segment.length as usize)],
181+
);
182+
183+
let layout_offset = usize::try_from(layout_segment.offset - initial_offset)?;
184+
let layout_bytes = FlatBuffer::copy_from(
185+
&initial_read[layout_offset..layout_offset + (layout_segment.length as usize)],
186+
);
187+
180188
Footer::from_flatbuffer(
181-
bytes,
189+
footer_bytes,
190+
layout_bytes,
182191
dtype,
183192
file_stats,
184193
&self.registry,

vortex-file/src/writer.rs

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ use vortex_error::{VortexExpect, VortexResult, vortex_bail, vortex_err};
66
use vortex_flatbuffers::{FlatBuffer, FlatBufferRoot, WriteFlatBuffer, WriteFlatBufferExt};
77
use vortex_io::VortexWrite;
88
use vortex_layout::layouts::file_stats::FileStatsLayoutWriter;
9-
use vortex_layout::{LayoutStrategy, LayoutWriter};
9+
use vortex_layout::{LayoutContext, LayoutStrategy, LayoutWriter};
1010

11-
use crate::footer::{FileLayoutFlatBufferWriter, FileStatistics, Postscript, PostscriptSegment};
11+
use crate::footer::{FileStatistics, FooterFlatBufferWriter, Postscript, PostscriptSegment};
1212
use crate::segments::writer::BufferedSegmentWriter;
1313
use crate::strategy::VortexLayoutStrategy;
1414
use crate::{EOF_SIZE, MAGIC_BYTES, MAX_FOOTER_SIZE, VERSION};
@@ -112,19 +112,24 @@ impl VortexWriteOptions {
112112
Some(self.write_flatbuffer(&mut write, stream.dtype()).await?)
113113
};
114114

115+
let layout_ctx = LayoutContext::empty();
116+
let layout_segment = self
117+
.write_flatbuffer(&mut write, &layout.flatbuffer_writer(&layout_ctx))
118+
.await?;
119+
115120
let statistics_segment = if self.file_statistics.is_empty() {
116121
None
117122
} else {
118123
let file_statistics = FileStatistics(layout_writer.into_stats_sets().into());
119124
Some(self.write_flatbuffer(&mut write, &file_statistics).await?)
120125
};
121126

122-
let layout_segment = self
127+
let footer_segment = self
123128
.write_flatbuffer(
124129
&mut write,
125-
&FileLayoutFlatBufferWriter {
130+
&FooterFlatBufferWriter {
126131
ctx,
127-
layout,
132+
layout_ctx,
128133
segment_specs: segment_specs.into(),
129134
},
130135
)
@@ -133,8 +138,9 @@ impl VortexWriteOptions {
133138
// Assemble the postscript, and write it manually to avoid any framing.
134139
let postscript = Postscript {
135140
dtype: dtype_segment,
136-
statistics: statistics_segment,
137141
layout: layout_segment,
142+
statistics: statistics_segment,
143+
footer: footer_segment,
138144
};
139145
let postscript_buffer = postscript.write_flatbuffer_bytes();
140146
if postscript_buffer.len() > MAX_FOOTER_SIZE as usize {

vortex-flatbuffers/flatbuffers/vortex-file/footer.fbs

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ include "vortex-layout/layout.fbs";
2020
table Postscript {
2121
/// Segment containing the root `DType` flatbuffer.
2222
dtype: PostscriptSegment;
23+
/// Segment containing the root `Layout` flatbuffer (required).
24+
layout: PostscriptSegment;
2325
/// Segment containing the file-level `Statistics` flatbuffer.
2426
statistics: PostscriptSegment;
25-
/// Segment containing the `FileLayout` flatbuffer (required).
26-
layout: PostscriptSegment;
27+
/// Segment containing the 'Footer' flatbuffer (required)
28+
footer: PostscriptSegment;
2729
}
2830

2931
/// A `PostscriptSegment` describes the location of a segment in the file without referencing any
@@ -47,10 +49,7 @@ table FileStatistics {
4749

4850
/// The `Registry` object stores dictionary-encoded configuration for segments,
4951
/// compression schemes, encryption schemes, etc.
50-
table FileLayout {
51-
// The root [`Layout`] of the file.
52-
layout: Layout;
53-
52+
table Footer {
5453
// Dictionary-encoded array specs, up to u16::MAX.
5554
array_specs: [ArraySpec];
5655
// Dictionary-encoded layout specs, up to u16::MAX.
@@ -111,5 +110,5 @@ table EncryptionSpec {
111110
// [footer]
112111

113112
root_type FileStatistics;
114-
root_type FileLayout;
113+
root_type Footer;
115114
root_type Postscript;

0 commit comments

Comments
 (0)