Skip to content

Commit 08f57dc

Browse files
feat[layout]: support array encoding outlined in flat layout (#5086)
set with `FLAT_LAYOUT_INLINE_ARRAY_NODE`. This can be used to plan a scan without loading the layout segments --------- Signed-off-by: Joe Isaacs <[email protected]>
1 parent 66844f2 commit 08f57dc

File tree

8 files changed

+140
-25
lines changed

8 files changed

+140
-25
lines changed

vortex-layout/src/display.rs

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,20 @@ use vortex_error::VortexResult;
77
use crate::LayoutRef;
88

99
/// Display wrapper for layout tree visualization
10-
pub struct DisplayLayoutTree(pub LayoutRef);
10+
pub struct DisplayLayoutTree {
11+
layout: LayoutRef,
12+
verbose: bool,
13+
}
14+
15+
impl DisplayLayoutTree {
16+
pub fn new(layout: LayoutRef, verbose: bool) -> Self {
17+
Self { layout, verbose }
18+
}
19+
}
1120

1221
impl std::fmt::Display for DisplayLayoutTree {
1322
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
14-
fn make_tree(layout: LayoutRef) -> VortexResult<Tree<String>> {
23+
fn make_tree(layout: LayoutRef, verbose: bool) -> VortexResult<Tree<String>> {
1524
// Build the node label with encoding, dtype, and metadata
1625
let mut node_parts = vec![
1726
format!("{}", layout.encoding()),
@@ -24,6 +33,30 @@ impl std::fmt::Display for DisplayLayoutTree {
2433
node_parts.push(format!("children: {}", nchildren));
2534
}
2635

36+
// Add metadata information if verbose mode is enabled
37+
if verbose {
38+
let metadata = layout.metadata();
39+
if !metadata.is_empty() {
40+
node_parts.push(format!("metadata: {} bytes", metadata.len()));
41+
}
42+
43+
// Add segment IDs
44+
let segment_ids = layout.segment_ids();
45+
if !segment_ids.is_empty() {
46+
node_parts.push(format!(
47+
"segments: [{}]",
48+
segment_ids
49+
.iter()
50+
.map(|s| s.to_string())
51+
.collect::<Vec<_>>()
52+
.join(", ")
53+
));
54+
}
55+
56+
// Add row count
57+
node_parts.push(format!("rows: {}", layout.row_count()));
58+
}
59+
2760
let node_name = node_parts.join(", ");
2861

2962
// Get children and child names directly from the layout (not loading arrays)
@@ -38,14 +71,17 @@ impl std::fmt::Display for DisplayLayoutTree {
3871
.into_iter()
3972
.zip(child_names.iter())
4073
.map(|(child, name)| {
41-
let child_tree = make_tree(child)?;
74+
let child_tree = make_tree(child, verbose)?;
4275
Ok(Tree::new(format!("{}: {}", name, child_tree.root))
4376
.with_leaves(child_tree.leaves))
4477
})
4578
.collect()
4679
} else if !children.is_empty() {
4780
// No names available, just show children
48-
children.into_iter().map(make_tree).collect()
81+
children
82+
.into_iter()
83+
.map(|c| make_tree(c, verbose))
84+
.collect()
4985
} else {
5086
// Leaf node - no children
5187
Ok(Vec::new())
@@ -54,7 +90,7 @@ impl std::fmt::Display for DisplayLayoutTree {
5490
Ok(Tree::new(node_name).with_leaves(child_trees?))
5591
}
5692

57-
match make_tree(self.0.clone()) {
93+
match make_tree(self.layout.clone(), self.verbose) {
5894
Ok(tree) => write!(f, "{}", tree),
5995
Err(e) => write!(f, "Error building layout tree: {}", e),
6096
}

vortex-layout/src/layout.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,12 @@ impl dyn Layout + '_ {
188188

189189
/// Display the layout as a tree structure.
190190
pub fn display_tree(&self) -> DisplayLayoutTree {
191-
DisplayLayoutTree(self.to_layout())
191+
DisplayLayoutTree::new(self.to_layout(), false)
192+
}
193+
194+
/// Display the layout as a tree structure with optional verbose metadata.
195+
pub fn display_tree_verbose(&self, verbose: bool) -> DisplayLayoutTree {
196+
DisplayLayoutTree::new(self.to_layout(), verbose)
192197
}
193198
}
194199

vortex-layout/src/layouts/flat/mod.rs

Lines changed: 48 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
mod reader;
55
pub mod writer;
66

7-
use std::sync::Arc;
7+
use std::env;
8+
use std::sync::{Arc, LazyLock};
89

9-
use vortex_array::{ArrayContext, DeserializeMetadata, EmptyMetadata};
10+
use vortex_array::{ArrayContext, DeserializeMetadata, ProstMetadata};
11+
use vortex_buffer::ByteBuffer;
1012
use vortex_dtype::DType;
1113
use vortex_error::{VortexResult, vortex_bail, vortex_panic};
1214

@@ -17,12 +19,15 @@ use crate::{
1719
LayoutChildType, LayoutEncodingRef, LayoutId, LayoutReaderRef, LayoutRef, VTable, vtable,
1820
};
1921

22+
static FLAT_LAYOUT_INLINE_ARRAY_NODE: LazyLock<bool> =
23+
LazyLock::new(|| env::var("FLAT_LAYOUT_INLINE_ARRAY_NODE").is_ok());
24+
2025
vtable!(Flat);
2126

2227
impl VTable for FlatVTable {
2328
type Layout = FlatLayout;
2429
type Encoding = FlatLayoutEncoding;
25-
type Metadata = EmptyMetadata;
30+
type Metadata = ProstMetadata<FlatLayoutMetadata>;
2631

2732
fn id(_encoding: &Self::Encoding) -> LayoutId {
2833
LayoutId::new_ref("vortex.flat")
@@ -40,8 +45,10 @@ impl VTable for FlatVTable {
4045
&layout.dtype
4146
}
4247

43-
fn metadata(_layout: &Self::Layout) -> Self::Metadata {
44-
EmptyMetadata
48+
fn metadata(layout: &Self::Layout) -> Self::Metadata {
49+
ProstMetadata(FlatLayoutMetadata {
50+
array_encoding_tree: layout.array_tree.as_ref().map(|bytes| bytes.to_vec()),
51+
})
4552
}
4653

4754
fn segment_ids(layout: &Self::Layout) -> Vec<SegmentId> {
@@ -76,20 +83,24 @@ impl VTable for FlatVTable {
7683
_encoding: &Self::Encoding,
7784
dtype: &DType,
7885
row_count: u64,
79-
_metadata: &<Self::Metadata as DeserializeMetadata>::Output,
86+
metadata: &<Self::Metadata as DeserializeMetadata>::Output,
8087
segment_ids: Vec<SegmentId>,
8188
_children: &dyn LayoutChildren,
8289
ctx: ArrayContext,
8390
) -> VortexResult<Self::Layout> {
8491
if segment_ids.len() != 1 {
8592
vortex_bail!("Flat layout must have exactly one segment ID");
8693
}
87-
Ok(FlatLayout {
94+
Ok(FlatLayout::new_with_metadata(
8895
row_count,
89-
dtype: dtype.clone(),
90-
segment_id: segment_ids[0],
96+
dtype.clone(),
97+
segment_ids[0],
9198
ctx,
92-
})
99+
metadata
100+
.array_encoding_tree
101+
.as_ref()
102+
.map(|v| ByteBuffer::from(v.clone())),
103+
))
93104
}
94105
}
95106

@@ -102,6 +113,7 @@ pub struct FlatLayout {
102113
dtype: DType,
103114
segment_id: SegmentId,
104115
ctx: ArrayContext,
116+
array_tree: Option<ByteBuffer>,
105117
}
106118

107119
impl FlatLayout {
@@ -111,10 +123,36 @@ impl FlatLayout {
111123
dtype,
112124
segment_id,
113125
ctx,
126+
array_tree: None,
127+
}
128+
}
129+
130+
pub fn new_with_metadata(
131+
row_count: u64,
132+
dtype: DType,
133+
segment_id: SegmentId,
134+
ctx: ArrayContext,
135+
metadata: Option<ByteBuffer>,
136+
) -> Self {
137+
Self {
138+
row_count,
139+
dtype,
140+
segment_id,
141+
ctx,
142+
array_tree: metadata,
114143
}
115144
}
116145

117146
pub fn segment_id(&self) -> SegmentId {
118147
self.segment_id
119148
}
120149
}
150+
151+
#[derive(prost::Message)]
152+
pub struct FlatLayoutMetadata {
153+
// We can optionally store the array encoding tree here to avoid needing to fetch the segment
154+
// to plan array deserialization.
155+
// This will be a `ArrayNode`.
156+
#[prost(optional, bytes, tag = "1")]
157+
pub array_encoding_tree: Option<Vec<u8>>,
158+
}

vortex-layout/src/layouts/flat/writer.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use vortex_dtype::DType;
1010
use vortex_error::{VortexResult, vortex_bail};
1111
use vortex_io::runtime::Handle;
1212

13-
use crate::layouts::flat::FlatLayout;
13+
use crate::layouts::flat::{FLAT_LAYOUT_INLINE_ARRAY_NODE, FlatLayout};
1414
use crate::layouts::zoned::{lower_bound, upper_bound};
1515
use crate::segments::SegmentSinkRef;
1616
use crate::sequence::{SendableSequentialStream, SequencePointer};
@@ -122,15 +122,23 @@ impl LayoutStrategy for FlatLayoutStrategy {
122122
include_padding: options.include_padding,
123123
},
124124
)?;
125+
// there is at least the flatbuffer and the length
126+
assert!(buffers.len() >= 2);
127+
let array_node =
128+
(*FLAT_LAYOUT_INLINE_ARRAY_NODE).then(|| buffers[buffers.len() - 2].clone());
125129
let segment_id = segment_sink.write(sequence_id, buffers).await?;
126130

127131
let None = stream.next().await else {
128132
vortex_bail!("flat layout received stream with more than a single chunk");
129133
};
130-
Ok(
131-
FlatLayout::new(row_count, stream.dtype().clone(), segment_id, ctx.clone())
132-
.into_layout(),
134+
Ok(FlatLayout::new_with_metadata(
135+
row_count,
136+
stream.dtype().clone(),
137+
segment_id,
138+
ctx.clone(),
139+
array_node,
133140
)
141+
.into_layout())
134142
}
135143

136144
fn buffered_bytes(&self) -> u64 {

vortex-tui/src/browse/app.rs

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@ use ratatui::widgets::ListState;
1010
use vortex::dtype::DType;
1111
use vortex::error::{VortexExpect, VortexResult, VortexUnwrap};
1212
use vortex::file::{Footer, SegmentSpec, VortexFile, VortexOpenOptions};
13-
use vortex::layout::LayoutRef;
1413
use vortex::layout::layouts::flat::FlatVTable;
1514
use vortex::layout::layouts::zoned::ZonedVTable;
1615
use vortex::layout::segments::{SegmentId, SegmentSource};
16+
use vortex::layout::{LayoutRef, VTable};
1717
use vortex::serde::ArrayParts;
1818

1919
use crate::browse::ui::SegmentGridState;
@@ -103,6 +103,27 @@ impl LayoutCursor {
103103
.len()
104104
}
105105

106+
/// Get information about the flat layout metadata.
107+
///
108+
/// NOTE: this is only safe to run against a FLAT layout.
109+
pub fn flat_layout_metadata_info(&self) -> String {
110+
let flat_layout = self.layout.as_::<FlatVTable>();
111+
let metadata = FlatVTable::metadata(flat_layout);
112+
113+
// Check if array_encoding_tree is present and get its size
114+
match metadata.0.array_encoding_tree.as_ref() {
115+
Some(tree) => {
116+
let size = tree.len();
117+
// Truncate to a single line - show the size and presence
118+
format!(
119+
"Flat Metadata: array_encoding_tree present ({} bytes)",
120+
size
121+
)
122+
}
123+
None => "Flat Metadata: array_encoding_tree not present".to_string(),
124+
}
125+
}
126+
106127
pub fn total_size(&self) -> usize {
107128
self.layout_segments()
108129
.iter()

vortex-tui/src/browse/ui/layouts.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,10 @@ fn render_layout_header(cursor: &LayoutCursor, area: Rect, buf: &mut Buffer) {
6464
"FlatBuffer Size: {}",
6565
size_formatter(cursor.flatbuffer_size())
6666
)));
67+
68+
// Display metadata info about the flat layout
69+
let metadata_info = cursor.flat_layout_metadata_info();
70+
rows.push(Text::from(metadata_info));
6771
}
6872

6973
if let Some(layout) = cursor.layout().as_opt::<ZonedVTable>() {

vortex-tui/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ impl Commands {
3939
match self {
4040
Commands::Tree(args) => match &args.mode {
4141
tree::TreeMode::Array { file } => file,
42-
tree::TreeMode::Layout { file } => file,
42+
tree::TreeMode::Layout { file, .. } => file,
4343
},
4444
Commands::Browse { file } => file,
4545
Commands::Convert(flags) => &flags.file,

vortex-tui/src/tree.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,16 @@ pub enum TreeMode {
2525
Layout {
2626
/// Path to the Vortex file
2727
file: PathBuf,
28+
/// Show additional metadata information
29+
#[arg(short, long)]
30+
verbose: bool,
2831
},
2932
}
3033

3134
pub async fn exec_tree(args: TreeArgs) -> VortexResult<()> {
3235
match args.mode {
3336
TreeMode::Array { file } => exec_array_tree(&file).await?,
34-
TreeMode::Layout { file } => exec_layout_tree(&file).await?,
37+
TreeMode::Layout { file, verbose } => exec_layout_tree(&file, verbose).await?,
3538
}
3639

3740
Ok(())
@@ -51,11 +54,11 @@ async fn exec_array_tree(file: &Path) -> VortexResult<()> {
5154
Ok(())
5255
}
5356

54-
async fn exec_layout_tree(file: &Path) -> VortexResult<()> {
57+
async fn exec_layout_tree(file: &Path, verbose: bool) -> VortexResult<()> {
5558
let vxf = VortexOpenOptions::new().open(file).await?;
5659
let footer = vxf.footer();
5760

58-
println!("{}", footer.layout().display_tree());
61+
println!("{}", footer.layout().display_tree_verbose(verbose));
5962

6063
Ok(())
6164
}

0 commit comments

Comments
 (0)