Skip to content

Commit e9a912f

Browse files
committed
feat(vx): add --json output flag to inspect and tree commands
Adds structured JSON output option to the inspect and tree commands, enabling programmatic access to file metadata for tooling integration. Signed-off-by: Baris Palaska <[email protected]>
1 parent 374882d commit e9a912f

File tree

2 files changed

+251
-8
lines changed

2 files changed

+251
-8
lines changed

vortex-tui/src/inspect.rs

Lines changed: 183 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,13 @@ use std::fs::File;
66
use std::io::Read;
77
use std::io::Seek;
88
use std::io::SeekFrom;
9+
use std::path::Path;
910
use std::path::PathBuf;
1011
use std::sync::Arc;
1112

1213
use flatbuffers::root;
1314
use itertools::Itertools;
15+
use serde::Serialize;
1416
use vortex::buffer::Alignment;
1517
use vortex::buffer::ByteBuffer;
1618
use vortex::error::VortexExpect;
@@ -36,6 +38,10 @@ pub struct InspectArgs {
3638

3739
/// Path to the Vortex file to inspect
3840
pub file: PathBuf,
41+
42+
/// Output as JSON
43+
#[arg(long, global = true)]
44+
pub json: bool,
3945
}
4046

4147
#[derive(Debug, clap::Subcommand)]
@@ -50,15 +56,189 @@ pub enum InspectMode {
5056
Footer,
5157
}
5258

59+
#[derive(Serialize)]
60+
pub struct InspectOutput {
61+
pub file_path: String,
62+
pub file_size: u64,
63+
pub eof: EofInfoJson,
64+
#[serde(skip_serializing_if = "Option::is_none")]
65+
pub postscript: Option<PostscriptInfoJson>,
66+
#[serde(skip_serializing_if = "Option::is_none")]
67+
pub footer: Option<FooterInfoJson>,
68+
}
69+
70+
#[derive(Serialize)]
71+
pub struct EofInfoJson {
72+
pub version: u16,
73+
pub current_version: u16,
74+
pub postscript_size: u16,
75+
pub magic_bytes: String,
76+
pub valid_magic: bool,
77+
}
78+
79+
#[derive(Serialize)]
80+
pub struct SegmentInfoJson {
81+
pub offset: u64,
82+
pub length: u32,
83+
pub alignment: usize,
84+
}
85+
86+
#[derive(Serialize)]
87+
pub struct PostscriptInfoJson {
88+
pub dtype: Option<SegmentInfoJson>,
89+
pub layout: SegmentInfoJson,
90+
pub statistics: Option<SegmentInfoJson>,
91+
pub footer: SegmentInfoJson,
92+
}
93+
94+
#[derive(Serialize)]
95+
pub struct FooterInfoJson {
96+
pub total_segments: usize,
97+
pub total_data_size: u64,
98+
pub segments: Vec<FooterSegmentJson>,
99+
}
100+
101+
#[derive(Serialize)]
102+
pub struct FooterSegmentJson {
103+
pub index: usize,
104+
pub offset: u64,
105+
pub end_offset: u64,
106+
pub length: u32,
107+
pub alignment: usize,
108+
pub path: Option<String>,
109+
}
110+
53111
pub async fn exec_inspect(args: InspectArgs) -> anyhow::Result<()> {
54112
let mut inspector = VortexInspector::new(args.file.clone())?;
55113

56-
println!("File: {}", args.file.display());
114+
let mode = args.mode.unwrap_or(InspectMode::Footer);
115+
116+
if args.json {
117+
exec_inspect_json(&mut inspector, &args.file, mode).await
118+
} else {
119+
exec_inspect_text(&mut inspector, &args.file, mode).await
120+
}
121+
}
122+
123+
async fn exec_inspect_json(
124+
inspector: &mut VortexInspector,
125+
file_path: &Path,
126+
mode: InspectMode,
127+
) -> anyhow::Result<()> {
128+
let eof = inspector.read_eof()?;
129+
let eof_json = EofInfoJson {
130+
version: eof.version,
131+
current_version: VERSION,
132+
postscript_size: eof.postscript_size,
133+
magic_bytes: std::str::from_utf8(&eof.magic_bytes)
134+
.unwrap_or("<invalid utf8>")
135+
.to_string(),
136+
valid_magic: eof.valid_magic,
137+
};
138+
139+
let postscript_json =
140+
if matches!(mode, InspectMode::Postscript | InspectMode::Footer) && eof.valid_magic {
141+
inspector
142+
.read_postscript(eof.postscript_size)
143+
.ok()
144+
.map(|ps| PostscriptInfoJson {
145+
dtype: ps.dtype.map(|s| SegmentInfoJson {
146+
offset: s.offset,
147+
length: s.length,
148+
alignment: *s.alignment,
149+
}),
150+
layout: SegmentInfoJson {
151+
offset: ps.layout.offset,
152+
length: ps.layout.length,
153+
alignment: *ps.layout.alignment,
154+
},
155+
statistics: ps.statistics.map(|s| SegmentInfoJson {
156+
offset: s.offset,
157+
length: s.length,
158+
alignment: *s.alignment,
159+
}),
160+
footer: SegmentInfoJson {
161+
offset: ps.footer.offset,
162+
length: ps.footer.length,
163+
alignment: *ps.footer.alignment,
164+
},
165+
})
166+
} else {
167+
None
168+
};
169+
170+
let footer_json =
171+
if matches!(mode, InspectMode::Footer) && eof.valid_magic && postscript_json.is_some() {
172+
inspector.read_footer().await.ok().map(|footer| {
173+
let segment_map = footer.segment_map().clone();
174+
let root_layout = footer.layout().clone();
175+
176+
let mut segment_paths: Vec<Option<Vec<Arc<str>>>> = vec![None; segment_map.len()];
177+
let mut queue =
178+
VecDeque::<(Vec<Arc<str>>, LayoutRef)>::from_iter([(Vec::new(), root_layout)]);
179+
while !queue.is_empty() {
180+
let (path, layout) = queue.pop_front().vortex_expect("queue is not empty");
181+
for segment in layout.segment_ids() {
182+
segment_paths[*segment as usize] = Some(path.clone());
183+
}
184+
if let Ok(children) = layout.children() {
185+
for (child_layout, child_name) in
186+
children.into_iter().zip(layout.child_names())
187+
{
188+
let child_path = path.iter().cloned().chain([child_name]).collect();
189+
queue.push_back((child_path, child_layout));
190+
}
191+
}
192+
}
193+
194+
let segments: Vec<FooterSegmentJson> = segment_map
195+
.iter()
196+
.enumerate()
197+
.map(|(i, segment)| FooterSegmentJson {
198+
index: i,
199+
offset: segment.offset,
200+
end_offset: segment.offset + segment.length as u64,
201+
length: segment.length,
202+
alignment: *segment.alignment,
203+
path: segment_paths[i]
204+
.as_ref()
205+
.map(|p| p.iter().map(|s| s.as_ref()).collect::<Vec<_>>().join(".")),
206+
})
207+
.collect();
208+
209+
FooterInfoJson {
210+
total_segments: segment_map.len(),
211+
total_data_size: segment_map.iter().map(|s| s.length as u64).sum(),
212+
segments,
213+
}
214+
})
215+
} else {
216+
None
217+
};
218+
219+
let output = InspectOutput {
220+
file_path: file_path.display().to_string(),
221+
file_size: inspector.file_size,
222+
eof: eof_json,
223+
postscript: postscript_json,
224+
footer: footer_json,
225+
};
226+
227+
let json_output = serde_json::to_string_pretty(&output)?;
228+
println!("{json_output}");
229+
230+
Ok(())
231+
}
232+
233+
async fn exec_inspect_text(
234+
inspector: &mut VortexInspector,
235+
file_path: &Path,
236+
mode: InspectMode,
237+
) -> anyhow::Result<()> {
238+
println!("File: {}", file_path.display());
57239
println!("Size: {} bytes", inspector.file_size);
58240
println!();
59241

60-
let mode = args.mode.unwrap_or(InspectMode::Footer);
61-
62242
match mode {
63243
InspectMode::Eof => {
64244
let eof = inspector.read_eof()?;

vortex-tui/src/tree.rs

Lines changed: 68 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
use std::path::Path;
55
use std::path::PathBuf;
66

7+
use serde::Serialize;
78
use vortex::array::stream::ArrayStreamExt;
89
use vortex::error::VortexResult;
910
use vortex::file::OpenOptionsSessionExt;
11+
use vortex::layout::LayoutRef;
1012

1113
use crate::SESSION;
1214

@@ -23,6 +25,9 @@ pub enum TreeMode {
2325
Array {
2426
/// Path to the Vortex file
2527
file: PathBuf,
28+
/// Output as JSON
29+
#[arg(long)]
30+
json: bool,
2631
},
2732
/// Display the layout tree structure (metadata only, no array loading)
2833
Layout {
@@ -31,19 +36,43 @@ pub enum TreeMode {
3136
/// Show additional metadata information
3237
#[arg(short, long)]
3338
verbose: bool,
39+
/// Output as JSON
40+
#[arg(long)]
41+
json: bool,
3442
},
3543
}
3644

45+
#[derive(Serialize)]
46+
pub struct LayoutTreeNode {
47+
pub encoding: String,
48+
pub dtype: String,
49+
pub row_count: u64,
50+
pub metadata_bytes: usize,
51+
pub segment_ids: Vec<u32>,
52+
pub children: Vec<LayoutTreeNodeWithName>,
53+
}
54+
55+
#[derive(Serialize)]
56+
pub struct LayoutTreeNodeWithName {
57+
pub name: String,
58+
#[serde(flatten)]
59+
pub node: LayoutTreeNode,
60+
}
61+
3762
pub async fn exec_tree(args: TreeArgs) -> VortexResult<()> {
3863
match args.mode {
39-
TreeMode::Array { file } => exec_array_tree(&file).await?,
40-
TreeMode::Layout { file, verbose } => exec_layout_tree(&file, verbose).await?,
64+
TreeMode::Array { file, json } => exec_array_tree(&file, json).await?,
65+
TreeMode::Layout {
66+
file,
67+
verbose,
68+
json,
69+
} => exec_layout_tree(&file, verbose, json).await?,
4170
}
4271

4372
Ok(())
4473
}
4574

46-
async fn exec_array_tree(file: &Path) -> VortexResult<()> {
75+
async fn exec_array_tree(file: &Path, _json: bool) -> VortexResult<()> {
4776
let full = SESSION
4877
.open_options()
4978
.open(file)
@@ -53,16 +82,50 @@ async fn exec_array_tree(file: &Path) -> VortexResult<()> {
5382
.read_all()
5483
.await?;
5584

85+
// TODO: Add JSON output support for array tree
5686
println!("{}", full.display_tree());
5787

5888
Ok(())
5989
}
6090

61-
async fn exec_layout_tree(file: &Path, verbose: bool) -> VortexResult<()> {
91+
async fn exec_layout_tree(file: &Path, verbose: bool, json: bool) -> VortexResult<()> {
6292
let vxf = SESSION.open_options().open(file).await?;
6393
let footer = vxf.footer();
6494

65-
println!("{}", footer.layout().display_tree_verbose(verbose));
95+
if json {
96+
let tree = layout_to_json(footer.layout().clone())?;
97+
let json_output = serde_json::to_string_pretty(&tree)
98+
.map_err(|e| vortex::error::vortex_err!("Failed to serialize JSON: {e}"))?;
99+
println!("{json_output}");
100+
} else {
101+
println!("{}", footer.layout().display_tree_verbose(verbose));
102+
}
66103

67104
Ok(())
68105
}
106+
107+
fn layout_to_json(layout: LayoutRef) -> VortexResult<LayoutTreeNode> {
108+
let children = layout.children()?;
109+
let child_names: Vec<_> = layout.child_names().collect();
110+
111+
let children_json: Vec<LayoutTreeNodeWithName> = children
112+
.into_iter()
113+
.zip(child_names.into_iter())
114+
.map(|(child, name)| {
115+
let node = layout_to_json(child)?;
116+
Ok(LayoutTreeNodeWithName {
117+
name: name.to_string(),
118+
node,
119+
})
120+
})
121+
.collect::<VortexResult<Vec<_>>>()?;
122+
123+
Ok(LayoutTreeNode {
124+
encoding: layout.encoding().to_string(),
125+
dtype: layout.dtype().to_string(),
126+
row_count: layout.row_count(),
127+
metadata_bytes: layout.metadata().len(),
128+
segment_ids: layout.segment_ids().iter().map(|s| **s).collect(),
129+
children: children_json,
130+
})
131+
}

0 commit comments

Comments
 (0)