linting pass 1

BrianMichell · BrianMichell · commit 7d1508e0d884 · 2025-09-24T18:31:32.000Z
diff --git a/disaster_recovery_analysis/bootstrap.sh b/disaster_recovery_analysis/bootstrap.sh
@@ -59,7 +59,7 @@ if [ $# -eq 1 ]; then
     ZARR_PATH="$1"
     echo "=== Running hexdump on provided path: $ZARR_PATH ==="
     echo
-    
+
     if [ -e "$ZARR_PATH" ]; then
         ./target/release/zarr-hexdump "$ZARR_PATH"
     else
diff --git a/disaster_recovery_analysis/ingest_both_teapots.py b/disaster_recovery_analysis/ingest_both_teapots.py
@@ -1,11 +1,13 @@
 if __name__ == "__main__":
-    import mdio
-    from segy.standards import get_segy_standard
-    from segy.schema import HeaderField, Endianness
+    import logging
     import os
-    from mdio.builder.template_registry import TemplateRegistry
 
-    import logging
+    from segy.schema import Endianness
+    from segy.schema import HeaderField
+    from segy.standards import get_segy_standard
+
+    import mdio
+    from mdio.builder.template_registry import TemplateRegistry
 
     logging.getLogger("segy").setLevel(logging.DEBUG)
 
@@ -40,4 +42,4 @@
         input_path="filt_mig_IEEE_LittleEndian_Rev1.sgy",
         output_path="filt_mig_IEEE_LittleEndian_Rev1.mdio",
         overwrite=True,
-    )
+    )
diff --git a/disaster_recovery_analysis/src/main.rs b/disaster_recovery_analysis/src/main.rs
@@ -8,32 +8,32 @@ fn decompress_blosc(compressed_data: &[u8]) -> Result<Vec<u8>, String> {
         let mut nbytes = 0usize;
         let mut cbytes = 0usize;
         let mut blocksize = 0usize;
-        
+
         blosc_sys::blosc_cbuffer_sizes(
             compressed_data.as_ptr() as *const std::ffi::c_void,
             &mut nbytes as *mut usize,
             &mut cbytes as *mut usize,
             &mut blocksize as *mut usize,
         );
-        
+
         if nbytes == 0 {
             return Err("Invalid compressed data".to_string());
         }
-        
+
         // Allocate output buffer
         let mut decompressed = vec![0u8; nbytes];
-        
+
         // Decompress
         let result = blosc_sys::blosc_decompress(
             compressed_data.as_ptr() as *const std::ffi::c_void,
             decompressed.as_mut_ptr() as *mut std::ffi::c_void,
             nbytes,
         );
-        
+
         if result < 0 {
             return Err(format!("Blosc decompression failed with code: {}", result));
         }
-        
+
         decompressed.truncate(result as usize);
         Ok(decompressed)
     }
@@ -43,18 +43,18 @@ fn print_hexdump(data: &[u8], offset: usize, chunk_name: &str) {
     println!("=== {} ===", chunk_name);
     for (i, chunk) in data.chunks(16).enumerate() {
         let addr = offset + i * 16;
-        
+
         // Print address
         print!("{:08x}  ", addr);
-        
+
         // Print hex bytes
         for (j, &byte) in chunk.iter().enumerate() {
             if j == 8 {
                 print!(" "); // Extra space in the middle
             }
             print!("{:02x} ", byte);
         }
-        
+
         // Pad if chunk is less than 16 bytes
         if chunk.len() < 16 {
             for j in chunk.len()..16 {
@@ -64,7 +64,7 @@ fn print_hexdump(data: &[u8], offset: usize, chunk_name: &str) {
                 print!("   ");
             }
         }
-        
+
         // Print ASCII representation
         print!(" |");
         for &byte in chunk {
@@ -81,38 +81,38 @@ fn print_hexdump(data: &[u8], offset: usize, chunk_name: &str) {
 
 fn main() -> Result<(), Box<dyn std::error::Error>> {
     let args: Vec<String> = env::args().collect();
-    
+
     if args.len() != 2 {
         eprintln!("Usage: {} <zarr_array_path>", args[0]);
         eprintln!("Example: {} /path/to/zarr/array", args[0]);
         std::process::exit(1);
     }
-    
+
     let zarr_path = Path::new(&args[1]);
-    
+
     // Verify the path exists
     if !zarr_path.exists() {
         eprintln!("Error: Path '{}' does not exist", zarr_path.display());
         std::process::exit(1);
     }
-    
+
     println!("Reading Zarr array from: {}", zarr_path.display());
     println!("========================================");
-    
+
     // Read zarr.json metadata
     let zarr_json_path = zarr_path.join("zarr.json");
     if !zarr_json_path.exists() {
         eprintln!("Error: zarr.json not found in {}", zarr_path.display());
         std::process::exit(1);
     }
-    
+
     let metadata_content = fs::read_to_string(&zarr_json_path)?;
     let metadata: serde_json::Value = serde_json::from_str(&metadata_content)?;
-    
+
     // Extract information from metadata
     let shape = metadata["shape"].as_array().unwrap();
     let chunk_shape = metadata["chunk_grid"]["configuration"]["chunk_shape"].as_array().unwrap();
-    
+
     println!("Array shape: {:?}", shape);
     println!("Chunk shape: {:?}", chunk_shape);
     println!("Data type: {}", metadata["data_type"]["name"]);
@@ -122,42 +122,42 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         }
     }
     println!();
-    
+
     // Calculate expected chunks based on the metadata we know:
     // Shape: [345, 188], Chunk shape: [128, 128]
     // This means we have ceil(345/128) = 3 chunks in dimension 0
     // and ceil(188/128) = 2 chunks in dimension 1
     // So we expect chunks: c/0/0, c/0/1, c/1/0, c/1/1, c/2/0, c/2/1
-    
+
     let mut chunk_files = Vec::new();
-    
+
     // Find all chunk files by walking the directory
     for entry in WalkDir::new(zarr_path) {
         let entry = entry?;
         let path = entry.path();
-        
+
         // Look for chunk files (they start with 'c/' in Zarr v3)
         if path.is_file() {
             let relative_path = path.strip_prefix(zarr_path)?;
             let path_str = relative_path.to_string_lossy();
-            
+
             if path_str.starts_with("c/") {
                 chunk_files.push((path.to_path_buf(), path_str.to_string()));
             }
         }
     }
-    
+
     // Sort chunk files for consistent ordering
     chunk_files.sort_by(|a, b| a.1.cmp(&b.1));
-    
+
     println!("Found {} chunk files:", chunk_files.len());
     for (_, chunk_name) in &chunk_files {
         println!("  {}", chunk_name);
     }
     println!();
-    
+
     let mut total_offset = 0;
-    
+
     // Read, decompress, and hexdump each chunk file
     for (chunk_path, chunk_name) in chunk_files {
         match fs::read(&chunk_path) {
@@ -168,7 +168,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                     println!();
                 } else {
                     println!("Compressed size: {} bytes", compressed_data.len());
-                    
+
                     // Decompress the Blosc-compressed data using blosc-sys directly
                     match decompress_blosc(&compressed_data) {
                         Ok(decompressed_data) => {
@@ -190,11 +190,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
             }
         }
     }
-    
+
     println!("Total decompressed bytes processed: {}", total_offset);
     println!();
     println!("Note: This shows the decompressed array data as it would appear in memory.");
     println!("Each element is 240 bytes (raw_bytes with length_bytes: 240).");
-    
+
     Ok(())
 }
diff --git a/src/mdio/converters/segy.py b/src/mdio/converters/segy.py
@@ -4,16 +4,17 @@
 
 import logging
 import os
+from copy import deepcopy
 from typing import TYPE_CHECKING
 
 import numpy as np
 import zarr
 from segy import SegyFile
 from segy.config import SegySettings
-from segy.standards.codes import MeasurementSystem as segy_MeasurementSystem
-from segy.standards.fields.trace import Rev0 as TraceHeaderFieldsRev0
 from segy.schema import HeaderField
 from segy.schema import ScalarType as ScalarType2
+from segy.standards.codes import MeasurementSystem as segy_MeasurementSystem
+from segy.standards.fields.trace import Rev0 as TraceHeaderFieldsRev0
 
 from mdio.api.io import _normalize_path
 from mdio.api.io import to_mdio
@@ -342,48 +343,29 @@ def _add_grid_override_to_metadata(dataset: Dataset, grid_overrides: dict[str, A
     if grid_overrides is not None:
         dataset.metadata.attributes["gridOverrides"] = grid_overrides
 
+
 def _scalar_to_size(scalar: ScalarType2) -> int:
-    if scalar == ScalarType2.UINT8:
-        return 1
-    elif scalar == ScalarType2.UINT16:
-        return 2
-    elif scalar == ScalarType2.UINT32:
-        return 4
-    elif scalar == ScalarType2.UINT64:
+    if scalar == ScalarType2.STRING8:
         return 8
-    elif scalar == ScalarType2.INT8:
-        return 1
-    elif scalar == ScalarType2.INT16:
-        return 2
-    elif scalar == ScalarType2.INT32:
-        return 4
-    elif scalar == ScalarType2.INT64:
-        return 8
-    elif scalar == ScalarType2.FLOAT32:
-        return 4
-    elif scalar == ScalarType2.FLOAT64:
-        return 8
-    elif scalar == ScalarType2.FLOAT16:
-        return 2
-    elif scalar == ScalarType2.STRING8:
-        return 8
-    else:
-        raise ValueError(f"Invalid scalar type: {scalar}")
+
+    return str(scalar).split(".")[1] % 8
+
 
 def _customize_segy_spec(segy_spec: SegySpec) -> SegySpec:
-    from copy import deepcopy
     assigned_bytes = []
 
     ret = deepcopy(segy_spec)
 
     for field in segy_spec.trace.header.fields:
-        byte = field.byte-1
+        byte = field.byte - 1
         for i in range(byte, byte + _scalar_to_size(field.format)):
-            assigned_bytes.append(i)
+            assigned_bytes.append(i)  # noqa: PERF402
     unassigned_bytes = [i for i in range(240) if i not in assigned_bytes]
-    field_to_customize = [HeaderField(name=f"__MDIO_RAW_UNSPECIFIED_Field_{i}", format=ScalarType.UINT8, byte=i+1) for i in unassigned_bytes]
-    ret = ret.customize(trace_header_fields=field_to_customize)
-    return ret
+    field_to_customize = [
+        HeaderField(name=f"__MDIO_RAW_UNSPECIFIED_Field_{i}", format=ScalarType.UINT8, byte=i + 1)
+        for i in unassigned_bytes
+    ]
+    return ret.customize(trace_header_fields=field_to_customize)
 
 
 def _add_raw_headers_to_template(mdio_template: AbstractDatasetTemplate) -> AbstractDatasetTemplate:
diff --git a/src/mdio/segy/_disaster_recovery_wrapper.py b/src/mdio/segy/_disaster_recovery_wrapper.py
@@ -2,17 +2,17 @@
 
 from __future__ import annotations
 
+from copy import deepcopy
 from typing import TYPE_CHECKING
 
-from copy import deepcopy
 import numpy as np
 
 if TYPE_CHECKING:
     from numpy.typing import NDArray
     from segy import SegyFile
 
-class SegyFileTraceDataWrapper:
 
+class SegyFileTraceDataWrapper:
     def __init__(self, segy_file: SegyFile, indices: int | list[int] | NDArray | slice):
         self.segy_file = segy_file
         self.indices = indices
@@ -21,15 +21,14 @@ def __init__(self, segy_file: SegyFile, indices: int | list[int] | NDArray | sli
         self.traces = segy_file.trace[indices]
 
     @property
-    def header(self):
+    def header(self) -> NDArray:
         # The copy is necessary to avoid applying the pipeline to the original header.
         return self._header_pipeline.apply(self.traces.header.copy())
 
-
     @property
-    def raw_header(self):
+    def raw_header(self) -> NDArray:
         return np.ascontiguousarray(self.traces.header.copy()).view("|V240")
 
     @property
-    def sample(self):
+    def sample(self) -> NDArray:
         return self.traces.sample
diff --git a/src/mdio/segy/_workers.py b/src/mdio/segy/_workers.py
@@ -12,7 +12,6 @@
 
 from mdio.api.io import to_mdio
 from mdio.builder.schemas.dtype import ScalarType
-from mdio.segy._disaster_recovery_wrapper import SegyFileTraceDataWrapper
 
 if TYPE_CHECKING:
     from segy.arrays import HeaderArray
@@ -141,6 +140,7 @@ def trace_worker(  # noqa: PLR0913
     # NOTE: The `raw_header_key` code block should be removed in full as it will become dead code.
     # traces = SegyFileTraceDataWrapper(segy_file, live_trace_indexes)
     from copy import deepcopy
+
     header_pipeline = deepcopy(segy_file.accessors.header_decode_pipeline)
     segy_file.accessors.header_decode_pipeline.transforms = []
     traces = segy_file.trace[live_trace_indexes]
diff --git a/src/mdio/segy/creation.py b/src/mdio/segy/creation.py
diff --git a/tests/integration/test_segy_import_export_masked.py b/tests/integration/test_segy_import_export_masked.py
diff --git a/tests/unit/test_disaster_recovery_wrapper.py b/tests/unit/test_disaster_recovery_wrapper.py