Skip to content

Commit 3bb5fe9

Browse files
committed
Add hex-dump and MDIO output reproducer
1 parent 6e9fcb2 commit 3bb5fe9

File tree

3 files changed

+319
-0
lines changed

3 files changed

+319
-0
lines changed
Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
#!/bin/bash
2+
3+
set -e # Exit on any error
4+
5+
echo "=== Zarr Hexdump Bootstrap Script ==="
6+
echo
7+
8+
# Check if Rust is installed, if not install it
9+
if ! command -v rustc &> /dev/null; then
10+
echo "Rust is not installed. Installing Rust..."
11+
curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
12+
source $HOME/.cargo/env
13+
echo "✓ Rust installed successfully"
14+
fi
15+
16+
echo "✓ Rust is installed ($(rustc --version))"
17+
18+
# Check if Cargo is available
19+
if ! command -v cargo &> /dev/null; then
20+
echo "Sourcing Rust environment..."
21+
source $HOME/.cargo/env
22+
fi
23+
24+
echo "✓ Cargo is available ($(cargo --version))"
25+
echo
26+
27+
# Install blosc library if not present
28+
echo "Checking for blosc library..."
29+
if ! pkg-config --exists blosc; then
30+
echo "Installing blosc library..."
31+
sudo apt-get update
32+
sudo apt-get install -y libblosc-dev pkg-config
33+
echo "✓ Blosc library installed"
34+
else
35+
echo "✓ Blosc library already available"
36+
fi
37+
echo
38+
39+
# Build the project
40+
echo "Building the Zarr hexdump tool..."
41+
if cargo build --release; then
42+
echo "✓ Build successful!"
43+
else
44+
echo "✗ Build failed!"
45+
exit 1
46+
fi
47+
48+
echo
49+
echo "=== Usage ==="
50+
echo "To hexdump a Zarr array, run:"
51+
echo " ./target/release/zarr-hexdump /path/to/your/zarr/array"
52+
echo
53+
echo "Or use cargo run:"
54+
echo " cargo run --release -- /path/to/your/zarr/array"
55+
echo
56+
57+
# Check if a path was provided as an argument
58+
if [ $# -eq 1 ]; then
59+
ZARR_PATH="$1"
60+
echo "=== Running hexdump on provided path: $ZARR_PATH ==="
61+
echo
62+
63+
if [ -e "$ZARR_PATH" ]; then
64+
./target/release/zarr-hexdump "$ZARR_PATH"
65+
else
66+
echo "Error: Path '$ZARR_PATH' does not exist!"
67+
exit 1
68+
fi
69+
elif [ $# -gt 1 ]; then
70+
echo "Error: Too many arguments provided."
71+
echo "Usage: $0 [zarr_array_path]"
72+
exit 1
73+
else
74+
echo "No Zarr array path provided. Build completed successfully."
75+
echo "Use the commands above to run the hexdump tool."
76+
fi
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
if __name__ == "__main__":
2+
import mdio
3+
from segy.standards import get_segy_standard
4+
from segy.schema import HeaderField, Endianness
5+
import os
6+
from mdio.builder.template_registry import TemplateRegistry
7+
8+
import logging
9+
10+
logging.getLogger("segy").setLevel(logging.DEBUG)
11+
12+
os.environ["MDIO__IMPORT__CLOUD_NATIVE"] = "true"
13+
os.environ["MDIO__IMPORT__CPU_COUNT"] = "16"
14+
os.environ["MDIO__DO_RAW_HEADERS"] = "1"
15+
16+
custom_headers = [
17+
HeaderField(name="inline", byte=181, format="int32"),
18+
HeaderField(name="crossline", byte=185, format="int32"),
19+
HeaderField(name="cdp_x", byte=81, format="int32"),
20+
HeaderField(name="cdp_y", byte=85, format="int32"),
21+
]
22+
23+
big_endian_spec = get_segy_standard(0)
24+
big_endian_spec.endianness = Endianness.BIG
25+
little_endian_spec = get_segy_standard(0)
26+
little_endian_spec.endianness = Endianness.LITTLE
27+
big_endian_spec = big_endian_spec.customize(trace_header_fields=custom_headers)
28+
little_endian_spec = little_endian_spec.customize(trace_header_fields=custom_headers)
29+
30+
mdio.segy_to_mdio(
31+
segy_spec=big_endian_spec,
32+
mdio_template=TemplateRegistry().get("PostStack3DTime"),
33+
input_path="filt_mig_IEEE_BigEndian_Rev1.sgy",
34+
output_path="filt_mig_IEEE_BigEndian_Rev1.mdio",
35+
overwrite=True,
36+
)
37+
mdio.segy_to_mdio(
38+
segy_spec=little_endian_spec,
39+
mdio_template=TemplateRegistry().get("PostStack3DTime"),
40+
input_path="filt_mig_IEEE_LittleEndian_Rev1.sgy",
41+
output_path="filt_mig_IEEE_LittleEndian_Rev1.mdio",
42+
overwrite=True,
43+
)
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
use std::env;
2+
use std::fs;
3+
use std::path::Path;
4+
use walkdir::WalkDir;
5+
fn decompress_blosc(compressed_data: &[u8]) -> Result<Vec<u8>, String> {
6+
unsafe {
7+
// Get decompressed size first
8+
let mut nbytes = 0usize;
9+
let mut cbytes = 0usize;
10+
let mut blocksize = 0usize;
11+
12+
blosc_sys::blosc_cbuffer_sizes(
13+
compressed_data.as_ptr() as *const std::ffi::c_void,
14+
&mut nbytes as *mut usize,
15+
&mut cbytes as *mut usize,
16+
&mut blocksize as *mut usize,
17+
);
18+
19+
if nbytes == 0 {
20+
return Err("Invalid compressed data".to_string());
21+
}
22+
23+
// Allocate output buffer
24+
let mut decompressed = vec![0u8; nbytes];
25+
26+
// Decompress
27+
let result = blosc_sys::blosc_decompress(
28+
compressed_data.as_ptr() as *const std::ffi::c_void,
29+
decompressed.as_mut_ptr() as *mut std::ffi::c_void,
30+
nbytes,
31+
);
32+
33+
if result < 0 {
34+
return Err(format!("Blosc decompression failed with code: {}", result));
35+
}
36+
37+
decompressed.truncate(result as usize);
38+
Ok(decompressed)
39+
}
40+
}
41+
42+
fn print_hexdump(data: &[u8], offset: usize, chunk_name: &str) {
43+
println!("=== {} ===", chunk_name);
44+
for (i, chunk) in data.chunks(16).enumerate() {
45+
let addr = offset + i * 16;
46+
47+
// Print address
48+
print!("{:08x} ", addr);
49+
50+
// Print hex bytes
51+
for (j, &byte) in chunk.iter().enumerate() {
52+
if j == 8 {
53+
print!(" "); // Extra space in the middle
54+
}
55+
print!("{:02x} ", byte);
56+
}
57+
58+
// Pad if chunk is less than 16 bytes
59+
if chunk.len() < 16 {
60+
for j in chunk.len()..16 {
61+
if j == 8 {
62+
print!(" ");
63+
}
64+
print!(" ");
65+
}
66+
}
67+
68+
// Print ASCII representation
69+
print!(" |");
70+
for &byte in chunk {
71+
if byte >= 32 && byte <= 126 {
72+
print!("{}", byte as char);
73+
} else {
74+
print!(".");
75+
}
76+
}
77+
println!("|");
78+
}
79+
println!();
80+
}
81+
82+
fn main() -> Result<(), Box<dyn std::error::Error>> {
83+
let args: Vec<String> = env::args().collect();
84+
85+
if args.len() != 2 {
86+
eprintln!("Usage: {} <zarr_array_path>", args[0]);
87+
eprintln!("Example: {} /path/to/zarr/array", args[0]);
88+
std::process::exit(1);
89+
}
90+
91+
let zarr_path = Path::new(&args[1]);
92+
93+
// Verify the path exists
94+
if !zarr_path.exists() {
95+
eprintln!("Error: Path '{}' does not exist", zarr_path.display());
96+
std::process::exit(1);
97+
}
98+
99+
println!("Reading Zarr array from: {}", zarr_path.display());
100+
println!("========================================");
101+
102+
// Read zarr.json metadata
103+
let zarr_json_path = zarr_path.join("zarr.json");
104+
if !zarr_json_path.exists() {
105+
eprintln!("Error: zarr.json not found in {}", zarr_path.display());
106+
std::process::exit(1);
107+
}
108+
109+
let metadata_content = fs::read_to_string(&zarr_json_path)?;
110+
let metadata: serde_json::Value = serde_json::from_str(&metadata_content)?;
111+
112+
// Extract information from metadata
113+
let shape = metadata["shape"].as_array().unwrap();
114+
let chunk_shape = metadata["chunk_grid"]["configuration"]["chunk_shape"].as_array().unwrap();
115+
116+
println!("Array shape: {:?}", shape);
117+
println!("Chunk shape: {:?}", chunk_shape);
118+
println!("Data type: {}", metadata["data_type"]["name"]);
119+
if let Some(config) = metadata["data_type"]["configuration"].as_object() {
120+
if let Some(length_bytes) = config.get("length_bytes") {
121+
println!("Length bytes: {}", length_bytes);
122+
}
123+
}
124+
println!();
125+
126+
// Calculate expected chunks based on the metadata we know:
127+
// Shape: [345, 188], Chunk shape: [128, 128]
128+
// This means we have ceil(345/128) = 3 chunks in dimension 0
129+
// and ceil(188/128) = 2 chunks in dimension 1
130+
// So we expect chunks: c/0/0, c/0/1, c/1/0, c/1/1, c/2/0, c/2/1
131+
132+
let mut chunk_files = Vec::new();
133+
134+
// Find all chunk files by walking the directory
135+
for entry in WalkDir::new(zarr_path) {
136+
let entry = entry?;
137+
let path = entry.path();
138+
139+
// Look for chunk files (they start with 'c/' in Zarr v3)
140+
if path.is_file() {
141+
let relative_path = path.strip_prefix(zarr_path)?;
142+
let path_str = relative_path.to_string_lossy();
143+
144+
if path_str.starts_with("c/") {
145+
chunk_files.push((path.to_path_buf(), path_str.to_string()));
146+
}
147+
}
148+
}
149+
150+
// Sort chunk files for consistent ordering
151+
chunk_files.sort_by(|a, b| a.1.cmp(&b.1));
152+
153+
println!("Found {} chunk files:", chunk_files.len());
154+
for (_, chunk_name) in &chunk_files {
155+
println!(" {}", chunk_name);
156+
}
157+
println!();
158+
159+
let mut total_offset = 0;
160+
161+
// Read, decompress, and hexdump each chunk file
162+
for (chunk_path, chunk_name) in chunk_files {
163+
match fs::read(&chunk_path) {
164+
Ok(compressed_data) => {
165+
if compressed_data.is_empty() {
166+
println!("=== {} ===", chunk_name);
167+
println!("(empty chunk)");
168+
println!();
169+
} else {
170+
println!("Compressed size: {} bytes", compressed_data.len());
171+
172+
// Decompress the Blosc-compressed data using blosc-sys directly
173+
match decompress_blosc(&compressed_data) {
174+
Ok(decompressed_data) => {
175+
println!("Decompressed size: {} bytes", decompressed_data.len());
176+
print_hexdump(&decompressed_data, total_offset, &chunk_name);
177+
total_offset += decompressed_data.len();
178+
}
179+
Err(e) => {
180+
eprintln!("Error decompressing chunk {}: {}", chunk_name, e);
181+
println!("Showing raw compressed data instead:");
182+
print_hexdump(&compressed_data, total_offset, &format!("{} (compressed)", chunk_name));
183+
total_offset += compressed_data.len();
184+
}
185+
}
186+
}
187+
}
188+
Err(e) => {
189+
eprintln!("Error reading chunk {}: {}", chunk_name, e);
190+
}
191+
}
192+
}
193+
194+
println!("Total decompressed bytes processed: {}", total_offset);
195+
println!();
196+
println!("Note: This shows the decompressed array data as it would appear in memory.");
197+
println!("Each element is 240 bytes (raw_bytes with length_bytes: 240).");
198+
199+
Ok(())
200+
}

0 commit comments

Comments
 (0)