Skip to content

Commit bcc85a6

Browse files
committed
feat(vx): add segments command for displaying segment information
Adds a new 'vx segments' command that outputs structured JSON information about all segments in a Vortex file, including byte offsets, row ranges, and alignment information. The segment tree logic is shared with the TUI. Signed-off-by: Baris Palaska <[email protected]>
1 parent e9a912f commit bcc85a6

File tree

3 files changed

+255
-1
lines changed

3 files changed

+255
-1
lines changed

vortex-tui/src/main.rs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
mod browse;
66
mod convert;
77
mod inspect;
8+
mod segment_tree;
9+
mod segments;
810
mod tree;
911

1012
use std::path::PathBuf;
@@ -21,8 +23,10 @@ use vortex::io::session::RuntimeSessionExt;
2123
use vortex::session::VortexSession;
2224

2325
use crate::inspect::InspectArgs;
26+
use crate::segments::SegmentsArgs;
2427

2528
#[derive(clap::Parser)]
29+
#[command(version)]
2630
struct Cli {
2731
#[clap(subcommand)]
2832
command: Commands,
@@ -38,18 +42,21 @@ enum Commands {
3842
Browse { file: PathBuf },
3943
/// Inspect Vortex file footer and metadata
4044
Inspect(InspectArgs),
45+
/// Display segment information for a Vortex file
46+
Segments(SegmentsArgs),
4147
}
4248

4349
impl Commands {
4450
fn file_path(&self) -> &PathBuf {
4551
match self {
4652
Commands::Tree(args) => match &args.mode {
47-
tree::TreeMode::Array { file } => file,
53+
tree::TreeMode::Array { file, .. } => file,
4854
tree::TreeMode::Layout { file, .. } => file,
4955
},
5056
Commands::Browse { file } => file,
5157
Commands::Convert(flags) => &flags.file,
5258
Commands::Inspect(args) => &args.file,
59+
Commands::Segments(args) => &args.file,
5360
}
5461
}
5562
}
@@ -81,6 +88,7 @@ async fn main() -> anyhow::Result<()> {
8188
Commands::Convert(flags) => convert::exec_convert(flags).await?,
8289
Commands::Browse { file } => exec_tui(file).await?,
8390
Commands::Inspect(args) => inspect::exec_inspect(args).await?,
91+
Commands::Segments(args) => segments::exec_segments(args).await?,
8492
};
8593

8694
Ok(())

vortex-tui/src/segment_tree.rs

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Shared segment tree collection logic used by both the TUI browse view and the CLI segments command.
5+
6+
use std::sync::Arc;
7+
8+
use vortex::dtype::FieldName;
9+
use vortex::error::VortexResult;
10+
use vortex::file::SegmentSpec;
11+
use vortex::layout::Layout;
12+
use vortex::layout::LayoutChildType;
13+
use vortex::utils::aliases::hash_map::HashMap;
14+
15+
/// Information about a single segment for display purposes.
16+
pub struct SegmentDisplay {
17+
/// Name of the segment (e.g., "data", "[0]", "zones")
18+
pub name: FieldName,
19+
/// The underlying segment specification
20+
pub spec: SegmentSpec,
21+
/// Row offset within the file
22+
pub row_offset: u64,
23+
/// Number of rows in this segment
24+
pub row_count: u64,
25+
}
26+
27+
/// A tree of segments organized by field name.
28+
pub struct SegmentTree {
29+
/// Map from field name to list of segments for that field
30+
pub segments: HashMap<FieldName, Vec<SegmentDisplay>>,
31+
/// Ordered list of field names (columns) in display order
32+
pub segment_ordering: Vec<FieldName>,
33+
}
34+
35+
/// Collect segment tree from a layout and segment map.
36+
pub fn collect_segment_tree(
37+
root_layout: &dyn Layout,
38+
segments: &Arc<[SegmentSpec]>,
39+
) -> SegmentTree {
40+
let mut tree = SegmentTree {
41+
segments: HashMap::new(),
42+
segment_ordering: Vec::new(),
43+
};
44+
// Ignore errors during traversal - we want to collect as much as possible
45+
drop(segments_by_name_impl(
46+
root_layout,
47+
None,
48+
None,
49+
Some(0),
50+
segments,
51+
&mut tree,
52+
));
53+
tree
54+
}
55+
56+
fn segments_by_name_impl(
57+
root: &dyn Layout,
58+
group_name: Option<FieldName>,
59+
name: Option<FieldName>,
60+
row_offset: Option<u64>,
61+
segments: &Arc<[SegmentSpec]>,
62+
segment_tree: &mut SegmentTree,
63+
) -> VortexResult<()> {
64+
// Recurse into children
65+
for (child, child_type) in root.children()?.into_iter().zip(root.child_types()) {
66+
match child_type {
67+
LayoutChildType::Transparent(sub_name) => segments_by_name_impl(
68+
child.as_ref(),
69+
group_name.clone(),
70+
Some(
71+
name.as_ref()
72+
.map(|n| format!("{n}.{sub_name}").into())
73+
.unwrap_or_else(|| sub_name.into()),
74+
),
75+
row_offset,
76+
segments,
77+
segment_tree,
78+
)?,
79+
LayoutChildType::Auxiliary(aux_name) => segments_by_name_impl(
80+
child.as_ref(),
81+
group_name.clone(),
82+
Some(
83+
name.as_ref()
84+
.map(|n| format!("{n}.{aux_name}").into())
85+
.unwrap_or_else(|| aux_name.into()),
86+
),
87+
Some(0),
88+
segments,
89+
segment_tree,
90+
)?,
91+
LayoutChildType::Chunk((idx, chunk_row_offset)) => segments_by_name_impl(
92+
child.as_ref(),
93+
group_name.clone(),
94+
Some(
95+
name.as_ref()
96+
.map(|n| format!("{n}.[{idx}]"))
97+
.unwrap_or_else(|| format!("[{idx}]"))
98+
.into(),
99+
),
100+
// Compute absolute row offset.
101+
Some(chunk_row_offset + row_offset.unwrap_or(0)),
102+
segments,
103+
segment_tree,
104+
)?,
105+
LayoutChildType::Field(field_name) => {
106+
// Step into a new group name
107+
let new_group_name = group_name
108+
.as_ref()
109+
.map(|n| format!("{n}.{field_name}").into())
110+
.unwrap_or_else(|| field_name);
111+
segment_tree.segment_ordering.push(new_group_name.clone());
112+
113+
segments_by_name_impl(
114+
child.as_ref(),
115+
Some(new_group_name),
116+
None,
117+
row_offset,
118+
segments,
119+
segment_tree,
120+
)?
121+
}
122+
}
123+
}
124+
125+
let current_segments = segment_tree
126+
.segments
127+
.entry(group_name.unwrap_or_else(|| FieldName::from("root")))
128+
.or_default();
129+
130+
for segment_id in root.segment_ids() {
131+
let segment_spec = segments[*segment_id as usize].clone();
132+
current_segments.push(SegmentDisplay {
133+
name: name.clone().unwrap_or_else(|| "<unnamed>".into()),
134+
spec: segment_spec,
135+
row_count: root.row_count(),
136+
row_offset: row_offset.unwrap_or(0),
137+
})
138+
}
139+
140+
Ok(())
141+
}

vortex-tui/src/segments.rs

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::path::PathBuf;
5+
6+
use serde::Serialize;
7+
use vortex::error::VortexResult;
8+
use vortex::file::OpenOptionsSessionExt;
9+
10+
use crate::SESSION;
11+
use crate::segment_tree::collect_segment_tree;
12+
13+
#[derive(Debug, clap::Parser)]
14+
pub struct SegmentsArgs {
15+
/// Path to the Vortex file
16+
pub file: PathBuf,
17+
}
18+
19+
#[derive(Serialize)]
20+
struct SegmentsOutput {
21+
/// Columns in display order
22+
columns: Vec<ColumnInfo>,
23+
}
24+
25+
#[derive(Serialize)]
26+
struct ColumnInfo {
27+
/// Field name (column header)
28+
name: String,
29+
/// Segments within this column
30+
segments: Vec<SegmentInfo>,
31+
}
32+
33+
#[derive(Serialize)]
34+
struct SegmentInfo {
35+
/// Segment name (e.g., "[0]", "data", etc.)
36+
name: String,
37+
/// Row range start
38+
row_offset: u64,
39+
/// Number of rows
40+
row_count: u64,
41+
/// Byte offset in file
42+
byte_offset: u64,
43+
/// Length in bytes
44+
byte_length: u32,
45+
/// Alignment requirement
46+
alignment: usize,
47+
/// Gap from previous segment end
48+
byte_gap: u64,
49+
}
50+
51+
pub async fn exec_segments(args: SegmentsArgs) -> VortexResult<()> {
52+
let vxf = SESSION.open_options().open(args.file).await?;
53+
54+
let footer = vxf.footer();
55+
let mut segment_tree = collect_segment_tree(footer.layout().as_ref(), footer.segment_map());
56+
57+
// Convert to output format
58+
let columns: Vec<ColumnInfo> = segment_tree
59+
.segment_ordering
60+
.iter()
61+
.filter_map(|name| {
62+
let mut segments = segment_tree.segments.remove(name)?;
63+
64+
// Sort by byte offset
65+
segments.sort_by(|a, b| a.spec.offset.cmp(&b.spec.offset));
66+
67+
// Convert to output format, computing byte gaps
68+
let mut current_offset = 0u64;
69+
let segment_infos: Vec<SegmentInfo> = segments
70+
.into_iter()
71+
.map(|seg| {
72+
let byte_gap = if current_offset == 0 {
73+
0
74+
} else {
75+
seg.spec.offset.saturating_sub(current_offset)
76+
};
77+
current_offset = seg.spec.offset + seg.spec.length as u64;
78+
79+
SegmentInfo {
80+
name: seg.name.to_string(),
81+
row_offset: seg.row_offset,
82+
row_count: seg.row_count,
83+
byte_offset: seg.spec.offset,
84+
byte_length: seg.spec.length,
85+
alignment: *seg.spec.alignment,
86+
byte_gap,
87+
}
88+
})
89+
.collect();
90+
91+
Some(ColumnInfo {
92+
name: name.to_string(),
93+
segments: segment_infos,
94+
})
95+
})
96+
.collect();
97+
98+
let output = SegmentsOutput { columns };
99+
100+
let json_output = serde_json::to_string_pretty(&output)
101+
.map_err(|e| vortex::error::vortex_err!("Failed to serialize JSON: {e}"))?;
102+
println!("{json_output}");
103+
104+
Ok(())
105+
}

0 commit comments

Comments
 (0)