Skip to content

Commit 6c1801f

Browse files
authored
Layout segments size info in TUI (#2833)
I'm looking into compression/file size and this is just thing I'm adding as I go. The main change here is restoring size info by traversing the segments to get all segments contained within a layout. Top layout info: <img width="357" alt="Screenshot 2025-03-28 at 16 50 08" src="https://github.com/user-attachments/assets/a2566e74-9935-471f-a6d5-70526194a3b9" /> child level info for chunked layout <img width="295" alt="Screenshot 2025-03-28 at 16 50 49" src="https://github.com/user-attachments/assets/c54bb540-6414-489e-a054-cb8b14ec2e08" /> and struct layout: <img width="463" alt="Screenshot 2025-03-28 at 16 51 01" src="https://github.com/user-attachments/assets/6d04ac22-ecf3-4a1f-92c2-0c6ea2e1828c" />
1 parent b46ed15 commit 6c1801f

File tree

5 files changed

+111
-43
lines changed

5 files changed

+111
-43
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-dtype/src/field.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,18 @@ impl From<&str> for Field {
2525
}
2626
}
2727

28+
impl From<Arc<str>> for Field {
29+
fn from(value: Arc<str>) -> Self {
30+
Self::Name(value)
31+
}
32+
}
33+
34+
impl From<&Arc<str>> for Field {
35+
fn from(value: &Arc<str>) -> Self {
36+
Self::Name(value.clone())
37+
}
38+
}
39+
2840
impl From<String> for Field {
2941
fn from(value: String) -> Self {
3042
Field::Name(value.into())

vortex-tui/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ async-trait = { workspace = true }
1919
clap = { workspace = true, features = ["derive"] }
2020
crossterm = { workspace = true }
2121
futures-util = { workspace = true }
22+
humansize = { workspace = true }
2223
indicatif = { workspace = true, features = ["futures"] }
2324
parquet = { workspace = true, features = ["arrow", "async"] }
2425
ratatui = { workspace = true }

vortex-tui/src/browse/app.rs

Lines changed: 63 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,19 @@ use async_trait::async_trait;
77
use ratatui::widgets::ListState;
88
use vortex::buffer::{Alignment, ByteBuffer, ByteBufferMut};
99
use vortex::dtype::DType;
10-
use vortex::error::{VortexExpect, VortexResult};
10+
use vortex::error::{VortexExpect, VortexResult, VortexUnwrap};
1111
use vortex::file::{Footer, SegmentSpec, VortexOpenOptions};
1212
use vortex::io::TokioFile;
1313
use vortex::stats::stats_from_bitset_bytes;
14+
use vortex_layout::layouts::chunked::ChunkedLayout;
15+
use vortex_layout::layouts::flat::FlatLayout;
16+
use vortex_layout::layouts::stats::StatsLayout;
1417
use vortex_layout::layouts::stats::stats_table::StatsTable;
18+
use vortex_layout::layouts::struct_::StructLayout;
1519
use vortex_layout::segments::{AsyncSegmentReader, SegmentId};
1620
use vortex_layout::{
17-
CHUNKED_LAYOUT_ID, FLAT_LAYOUT_ID, Layout, LayoutVTableRef, STATS_LAYOUT_ID, STRUCT_LAYOUT_ID,
21+
CHUNKED_LAYOUT_ID, FLAT_LAYOUT_ID, Layout, LayoutVTable, LayoutVTableRef, STATS_LAYOUT_ID,
22+
STRUCT_LAYOUT_ID,
1823
};
1924

2025
#[derive(Default, Copy, Clone, Eq, PartialEq)]
@@ -35,7 +40,6 @@ pub struct LayoutCursor {
3540
path: Vec<usize>,
3641
footer: Footer,
3742
layout: Layout,
38-
#[allow(unused)]
3943
segment_map: Arc<[SegmentSpec]>,
4044
}
4145

@@ -137,13 +141,18 @@ impl LayoutCursor {
137141
.unwrap_or_default()
138142
}
139143

140-
pub fn segment_size(&self) -> usize {
141-
self.layout()
142-
.segments()
143-
.map(|id| self.segment_spec(id).length as usize)
144+
pub fn total_size(&self) -> usize {
145+
self.layout_segments()
146+
.iter()
147+
.map(|id| self.segment_spec(*id).length as usize)
144148
.sum()
145149
}
146150

151+
fn layout_segments(&self) -> Vec<SegmentId> {
152+
let segments = collect_segment_ids(&self.layout);
153+
[segments.0, segments.1].concat()
154+
}
155+
147156
/// Predicate true when the cursor is currently activated over a stats table
148157
pub fn is_stats_table(&self) -> bool {
149158
let parent = self.parent();
@@ -262,3 +271,50 @@ impl AsyncSegmentReader for SegmentReader {
262271
Ok(self.read_bytes_sync(range, segment.alignment))
263272
}
264273
}
274+
275+
pub fn collect_segment_ids(root_layout: &Layout) -> (Vec<SegmentId>, Vec<SegmentId>) {
276+
let mut data_segment_ids = Vec::default();
277+
let mut stats_segment_ids = Vec::default();
278+
279+
collect_segment_ids_impl(root_layout, &mut data_segment_ids, &mut stats_segment_ids)
280+
.vortex_unwrap();
281+
282+
(data_segment_ids, stats_segment_ids)
283+
}
284+
285+
fn collect_segment_ids_impl(
286+
root: &Layout,
287+
data_segments: &mut Vec<SegmentId>,
288+
stats_segments: &mut Vec<SegmentId>,
289+
) -> VortexResult<()> {
290+
let layout_id = root.id();
291+
292+
if layout_id == StructLayout.id() {
293+
let dtype = root.dtype().as_struct().vortex_expect("");
294+
for child_idx in 0..dtype.fields().len() {
295+
let name = dtype.field_name(child_idx)?;
296+
let child_dtype = dtype.field_by_index(child_idx)?;
297+
let child_layout = root.child(child_idx, child_dtype, name)?;
298+
collect_segment_ids_impl(&child_layout, data_segments, stats_segments)?;
299+
}
300+
} else if layout_id == ChunkedLayout.id() {
301+
for child_idx in 0..root.nchildren() {
302+
let child_layout =
303+
root.child(child_idx, root.dtype().clone(), format!("[{child_idx}]"))?;
304+
collect_segment_ids_impl(&child_layout, data_segments, stats_segments)?;
305+
}
306+
} else if layout_id == StatsLayout.id() {
307+
let data_layout = root.child(0, root.dtype().clone(), "data")?;
308+
collect_segment_ids_impl(&data_layout, data_segments, stats_segments)?;
309+
310+
// For the stats layout, we use the stats segment accumulator
311+
let stats_layout = root.child(1, root.dtype().clone(), "stats")?;
312+
collect_segment_ids_impl(&stats_layout, stats_segments, &mut vec![])?;
313+
} else if layout_id == FlatLayout.id() {
314+
data_segments.extend(root.segments());
315+
} else {
316+
unreachable!()
317+
};
318+
319+
Ok(())
320+
}

vortex-tui/src/browse/ui/layouts.rs

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use humansize::{DECIMAL, format_size, make_format};
12
use ratatui::buffer::Buffer;
23
use ratatui::layout::{Constraint, Layout, Rect};
34
use ratatui::style::{Color, Style, Stylize};
@@ -11,7 +12,7 @@ use vortex::error::VortexExpect;
1112
use vortex::expr::Identity;
1213
use vortex::layout::{CHUNKED_LAYOUT_ID, FLAT_LAYOUT_ID, STATS_LAYOUT_ID, STRUCT_LAYOUT_ID};
1314
use vortex::stats::stats_from_bitset_bytes;
14-
use vortex_layout::layouts::flat::FlatLayout;
15+
use vortex_layout::layouts::stats::StatsLayout;
1516
use vortex_layout::{ExprEvaluator, LayoutReaderExt, LayoutVTable, RowMask};
1617

1718
use crate::TOKIO_RUNTIME;
@@ -41,18 +42,16 @@ pub fn render_layouts(app_state: &mut AppState, area: Rect, buf: &mut Buffer) {
4142
fn render_layout_header(cursor: &LayoutCursor, area: Rect, buf: &mut Buffer) {
4243
let layout_kind = cursor.layout().id().to_string();
4344
let row_count = cursor.layout().row_count();
45+
let size = format_size(cursor.total_size(), DECIMAL);
4446

4547
let mut rows = vec![
4648
Text::from(format!("Kind: {layout_kind}")).bold(),
4749
Text::from(format!("Row Count: {row_count}")).bold(),
4850
Text::from(format!("Schema: {}", cursor.dtype()))
4951
.bold()
5052
.green(),
51-
Text::from(format!("Segments: {}", cursor.layout().nsegments())),
52-
Text::from(format!(
53-
"Segment data size: {} bytes",
54-
cursor.segment_size()
55-
)),
53+
Text::from(format!("Children: {}", cursor.layout().nchildren())).bold(),
54+
Text::from(format!("Segment data size: {}", size)).bold(),
5655
];
5756

5857
if cursor.encoding().id() == FLAT_LAYOUT_ID {
@@ -62,7 +61,7 @@ fn render_layout_header(cursor: &LayoutCursor, area: Rect, buf: &mut Buffer) {
6261
)));
6362
}
6463

65-
if cursor.encoding().id() == STATS_LAYOUT_ID {
64+
if cursor.encoding().id() == StatsLayout.id() {
6665
// Push any columnar stats.
6766
if let Some(available_stats) = cursor
6867
.layout()
@@ -97,13 +96,11 @@ fn render_layout_header(cursor: &LayoutCursor, area: Rect, buf: &mut Buffer) {
9796

9897
// Render the inner Array for a FlatLayout
9998
fn render_array(app: &AppState, area: Rect, buf: &mut Buffer, is_stats_table: bool) {
100-
let reader = FlatLayout
101-
.reader(
102-
app.cursor.layout().clone(),
103-
app.footer.ctx().clone(),
104-
app.reader.clone(),
105-
)
106-
.vortex_expect("Failed to create FlatLayout reader");
99+
let reader = app
100+
.cursor
101+
.layout()
102+
.reader(app.reader.clone(), app.footer.ctx().clone())
103+
.vortex_expect("Failed to create reader");
107104

108105
let array = TOKIO_RUNTIME
109106
.block_on(reader.evaluate_expr(
@@ -172,27 +169,25 @@ fn render_array(app: &AppState, area: Rect, buf: &mut Buffer, is_stats_table: bo
172169
}
173170

174171
fn render_children_list(app: &mut AppState, area: Rect, buf: &mut Buffer) {
175-
let cursor = &app.cursor;
176172
// TODO: add selection state.
177-
let layout = cursor.layout();
178-
let state = &mut app.layouts_list_state;
173+
let search_filter = app.search_filter.clone();
179174

180-
if layout.nchildren() > 0 {
181-
let filter: Vec<bool> = (0..layout.nchildren())
182-
.map(|idx| child_name(cursor, idx))
175+
if app.cursor.layout().nchildren() > 0 {
176+
let filter: Vec<bool> = (0..app.cursor.layout().nchildren())
177+
.map(|idx| child_name(app, idx))
183178
.map(|name| {
184-
if app.search_filter.is_empty() {
179+
if search_filter.is_empty() {
185180
true
186181
} else {
187-
name.contains(&app.search_filter)
182+
name.contains(&search_filter)
188183
}
189184
})
190185
.collect();
191186

192-
let list_items: Vec<String> = (0..layout.nchildren())
187+
let list_items: Vec<String> = (0..app.cursor.layout().nchildren())
193188
.zip(filter.iter())
194189
.filter(|&(_, keep)| *keep)
195-
.map(|(idx, _)| child_name(cursor, idx))
190+
.map(|(idx, _)| child_name(app, idx))
196191
.collect();
197192

198193
if !app.search_filter.is_empty() {
@@ -215,28 +210,31 @@ fn render_children_list(app: &mut AppState, area: Rect, buf: &mut Buffer) {
215210
List::new(list_items).highlight_style(Style::default().black().on_white().bold()),
216211
inner_area,
217212
buf,
218-
state,
213+
&mut app.layouts_list_state,
219214
);
220215
}
221216
}
222217

223-
fn child_name(cursor: &LayoutCursor, nth: usize) -> String {
218+
fn child_name(app: &mut AppState, nth: usize) -> String {
219+
let cursor = &app.cursor;
220+
let formatter = make_format(DECIMAL);
224221
// TODO(ngates): layout visitors
225222
if cursor.layout().id() == STRUCT_LAYOUT_ID {
226223
let struct_dtype = cursor.dtype().as_struct().expect("struct dtype");
227224
let field_name = struct_dtype.field_name(nth).expect("field name");
228225
let field_dtype = struct_dtype.field_by_index(nth).expect("dtype value");
229-
format!("Column {nth} - {field_name} ({field_dtype})")
226+
227+
let total_size = formatter(app.cursor.child(nth).total_size());
228+
229+
format!("Column {nth} - {field_name} ({field_dtype}) - {total_size}")
230230
} else if cursor.layout().id() == CHUNKED_LAYOUT_ID {
231-
// 0th child of a ChunkedLayout is the chunk stats array.
232-
// The rest of the chunks are child arrays
233-
if cursor.layout().metadata().is_none() {
234-
format!("Chunk {nth}")
235-
} else if nth == (cursor.layout().nchildren() - 1) {
236-
"Chunk Statistics".to_string()
237-
} else {
238-
format!("Chunk {}", nth)
239-
}
231+
let name = format!("Chunk {nth}");
232+
let child_cursor = app.cursor.child(nth);
233+
234+
let total_size = formatter(child_cursor.total_size());
235+
let row_count = child_cursor.layout().row_count();
236+
237+
format!("{name} - {row_count} - {total_size}")
240238
} else if cursor.layout().id() == FLAT_LAYOUT_ID {
241239
format!("Page {nth}")
242240
} else if cursor.layout().id() == STATS_LAYOUT_ID {

0 commit comments

Comments
 (0)