| 
 | 1 | +use std::env;  | 
 | 2 | +use std::fs;  | 
 | 3 | +use std::path::Path;  | 
 | 4 | +use walkdir::WalkDir;  | 
 | 5 | +fn decompress_blosc(compressed_data: &[u8]) -> Result<Vec<u8>, String> {  | 
 | 6 | +    unsafe {  | 
 | 7 | +        // Get decompressed size first  | 
 | 8 | +        let mut nbytes = 0usize;  | 
 | 9 | +        let mut cbytes = 0usize;  | 
 | 10 | +        let mut blocksize = 0usize;  | 
 | 11 | +          | 
 | 12 | +        blosc_sys::blosc_cbuffer_sizes(  | 
 | 13 | +            compressed_data.as_ptr() as *const std::ffi::c_void,  | 
 | 14 | +            &mut nbytes as *mut usize,  | 
 | 15 | +            &mut cbytes as *mut usize,  | 
 | 16 | +            &mut blocksize as *mut usize,  | 
 | 17 | +        );  | 
 | 18 | +          | 
 | 19 | +        if nbytes == 0 {  | 
 | 20 | +            return Err("Invalid compressed data".to_string());  | 
 | 21 | +        }  | 
 | 22 | +          | 
 | 23 | +        // Allocate output buffer  | 
 | 24 | +        let mut decompressed = vec![0u8; nbytes];  | 
 | 25 | +          | 
 | 26 | +        // Decompress  | 
 | 27 | +        let result = blosc_sys::blosc_decompress(  | 
 | 28 | +            compressed_data.as_ptr() as *const std::ffi::c_void,  | 
 | 29 | +            decompressed.as_mut_ptr() as *mut std::ffi::c_void,  | 
 | 30 | +            nbytes,  | 
 | 31 | +        );  | 
 | 32 | +          | 
 | 33 | +        if result < 0 {  | 
 | 34 | +            return Err(format!("Blosc decompression failed with code: {}", result));  | 
 | 35 | +        }  | 
 | 36 | +          | 
 | 37 | +        decompressed.truncate(result as usize);  | 
 | 38 | +        Ok(decompressed)  | 
 | 39 | +    }  | 
 | 40 | +}  | 
 | 41 | + | 
 | 42 | +fn print_hexdump(data: &[u8], offset: usize, chunk_name: &str) {  | 
 | 43 | +    println!("=== {} ===", chunk_name);  | 
 | 44 | +    for (i, chunk) in data.chunks(16).enumerate() {  | 
 | 45 | +        let addr = offset + i * 16;  | 
 | 46 | +          | 
 | 47 | +        // Print address  | 
 | 48 | +        print!("{:08x}  ", addr);  | 
 | 49 | +          | 
 | 50 | +        // Print hex bytes  | 
 | 51 | +        for (j, &byte) in chunk.iter().enumerate() {  | 
 | 52 | +            if j == 8 {  | 
 | 53 | +                print!(" "); // Extra space in the middle  | 
 | 54 | +            }  | 
 | 55 | +            print!("{:02x} ", byte);  | 
 | 56 | +        }  | 
 | 57 | +          | 
 | 58 | +        // Pad if chunk is less than 16 bytes  | 
 | 59 | +        if chunk.len() < 16 {  | 
 | 60 | +            for j in chunk.len()..16 {  | 
 | 61 | +                if j == 8 {  | 
 | 62 | +                    print!(" ");  | 
 | 63 | +                }  | 
 | 64 | +                print!("   ");  | 
 | 65 | +            }  | 
 | 66 | +        }  | 
 | 67 | +          | 
 | 68 | +        // Print ASCII representation  | 
 | 69 | +        print!(" |");  | 
 | 70 | +        for &byte in chunk {  | 
 | 71 | +            if byte >= 32 && byte <= 126 {  | 
 | 72 | +                print!("{}", byte as char);  | 
 | 73 | +            } else {  | 
 | 74 | +                print!(".");  | 
 | 75 | +            }  | 
 | 76 | +        }  | 
 | 77 | +        println!("|");  | 
 | 78 | +    }  | 
 | 79 | +    println!();  | 
 | 80 | +}  | 
 | 81 | + | 
 | 82 | +fn main() -> Result<(), Box<dyn std::error::Error>> {  | 
 | 83 | +    let args: Vec<String> = env::args().collect();  | 
 | 84 | +      | 
 | 85 | +    if args.len() != 2 {  | 
 | 86 | +        eprintln!("Usage: {} <zarr_array_path>", args[0]);  | 
 | 87 | +        eprintln!("Example: {} /path/to/zarr/array", args[0]);  | 
 | 88 | +        std::process::exit(1);  | 
 | 89 | +    }  | 
 | 90 | +      | 
 | 91 | +    let zarr_path = Path::new(&args[1]);  | 
 | 92 | +      | 
 | 93 | +    // Verify the path exists  | 
 | 94 | +    if !zarr_path.exists() {  | 
 | 95 | +        eprintln!("Error: Path '{}' does not exist", zarr_path.display());  | 
 | 96 | +        std::process::exit(1);  | 
 | 97 | +    }  | 
 | 98 | +      | 
 | 99 | +    println!("Reading Zarr array from: {}", zarr_path.display());  | 
 | 100 | +    println!("========================================");  | 
 | 101 | +      | 
 | 102 | +    // Read zarr.json metadata  | 
 | 103 | +    let zarr_json_path = zarr_path.join("zarr.json");  | 
 | 104 | +    if !zarr_json_path.exists() {  | 
 | 105 | +        eprintln!("Error: zarr.json not found in {}", zarr_path.display());  | 
 | 106 | +        std::process::exit(1);  | 
 | 107 | +    }  | 
 | 108 | +      | 
 | 109 | +    let metadata_content = fs::read_to_string(&zarr_json_path)?;  | 
 | 110 | +    let metadata: serde_json::Value = serde_json::from_str(&metadata_content)?;  | 
 | 111 | +      | 
 | 112 | +    // Extract information from metadata  | 
 | 113 | +    let shape = metadata["shape"].as_array().unwrap();  | 
 | 114 | +    let chunk_shape = metadata["chunk_grid"]["configuration"]["chunk_shape"].as_array().unwrap();  | 
 | 115 | +      | 
 | 116 | +    println!("Array shape: {:?}", shape);  | 
 | 117 | +    println!("Chunk shape: {:?}", chunk_shape);  | 
 | 118 | +    println!("Data type: {}", metadata["data_type"]["name"]);  | 
 | 119 | +    if let Some(config) = metadata["data_type"]["configuration"].as_object() {  | 
 | 120 | +        if let Some(length_bytes) = config.get("length_bytes") {  | 
 | 121 | +            println!("Length bytes: {}", length_bytes);  | 
 | 122 | +        }  | 
 | 123 | +    }  | 
 | 124 | +    println!();  | 
 | 125 | +      | 
 | 126 | +    // Calculate expected chunks based on the metadata we know:  | 
 | 127 | +    // Shape: [345, 188], Chunk shape: [128, 128]  | 
 | 128 | +    // This means we have ceil(345/128) = 3 chunks in dimension 0  | 
 | 129 | +    // and ceil(188/128) = 2 chunks in dimension 1  | 
 | 130 | +    // So we expect chunks: c/0/0, c/0/1, c/1/0, c/1/1, c/2/0, c/2/1  | 
 | 131 | +      | 
 | 132 | +    let mut chunk_files = Vec::new();  | 
 | 133 | +      | 
 | 134 | +    // Find all chunk files by walking the directory  | 
 | 135 | +    for entry in WalkDir::new(zarr_path) {  | 
 | 136 | +        let entry = entry?;  | 
 | 137 | +        let path = entry.path();  | 
 | 138 | +          | 
 | 139 | +        // Look for chunk files (they start with 'c/' in Zarr v3)  | 
 | 140 | +        if path.is_file() {  | 
 | 141 | +            let relative_path = path.strip_prefix(zarr_path)?;  | 
 | 142 | +            let path_str = relative_path.to_string_lossy();  | 
 | 143 | +              | 
 | 144 | +            if path_str.starts_with("c/") {  | 
 | 145 | +                chunk_files.push((path.to_path_buf(), path_str.to_string()));  | 
 | 146 | +            }  | 
 | 147 | +        }  | 
 | 148 | +    }  | 
 | 149 | +      | 
 | 150 | +    // Sort chunk files for consistent ordering  | 
 | 151 | +    chunk_files.sort_by(|a, b| a.1.cmp(&b.1));  | 
 | 152 | +      | 
 | 153 | +    println!("Found {} chunk files:", chunk_files.len());  | 
 | 154 | +    for (_, chunk_name) in &chunk_files {  | 
 | 155 | +        println!("  {}", chunk_name);  | 
 | 156 | +    }  | 
 | 157 | +    println!();  | 
 | 158 | +      | 
 | 159 | +    let mut total_offset = 0;  | 
 | 160 | +      | 
 | 161 | +    // Read, decompress, and hexdump each chunk file  | 
 | 162 | +    for (chunk_path, chunk_name) in chunk_files {  | 
 | 163 | +        match fs::read(&chunk_path) {  | 
 | 164 | +            Ok(compressed_data) => {  | 
 | 165 | +                if compressed_data.is_empty() {  | 
 | 166 | +                    println!("=== {} ===", chunk_name);  | 
 | 167 | +                    println!("(empty chunk)");  | 
 | 168 | +                    println!();  | 
 | 169 | +                } else {  | 
 | 170 | +                    println!("Compressed size: {} bytes", compressed_data.len());  | 
 | 171 | +                      | 
 | 172 | +                    // Decompress the Blosc-compressed data using blosc-sys directly  | 
 | 173 | +                    match decompress_blosc(&compressed_data) {  | 
 | 174 | +                        Ok(decompressed_data) => {  | 
 | 175 | +                            println!("Decompressed size: {} bytes", decompressed_data.len());  | 
 | 176 | +                            print_hexdump(&decompressed_data, total_offset, &chunk_name);  | 
 | 177 | +                            total_offset += decompressed_data.len();  | 
 | 178 | +                        }  | 
 | 179 | +                        Err(e) => {  | 
 | 180 | +                            eprintln!("Error decompressing chunk {}: {}", chunk_name, e);  | 
 | 181 | +                            println!("Showing raw compressed data instead:");  | 
 | 182 | +                            print_hexdump(&compressed_data, total_offset, &format!("{} (compressed)", chunk_name));  | 
 | 183 | +                            total_offset += compressed_data.len();  | 
 | 184 | +                        }  | 
 | 185 | +                    }  | 
 | 186 | +                }  | 
 | 187 | +            }  | 
 | 188 | +            Err(e) => {  | 
 | 189 | +                eprintln!("Error reading chunk {}: {}", chunk_name, e);  | 
 | 190 | +            }  | 
 | 191 | +        }  | 
 | 192 | +    }  | 
 | 193 | +      | 
 | 194 | +    println!("Total decompressed bytes processed: {}", total_offset);  | 
 | 195 | +    println!();  | 
 | 196 | +    println!("Note: This shows the decompressed array data as it would appear in memory.");  | 
 | 197 | +    println!("Each element is 240 bytes (raw_bytes with length_bytes: 240).");  | 
 | 198 | +      | 
 | 199 | +    Ok(())  | 
 | 200 | +}  | 
0 commit comments