Skip to content

Commit 7d1508e

Browse files
committed
linting pass 1
1 parent 5a041a4 commit 7d1508e

File tree

9 files changed

+114
-135
lines changed

9 files changed

+114
-135
lines changed

disaster_recovery_analysis/bootstrap.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ if [ $# -eq 1 ]; then
5959
ZARR_PATH="$1"
6060
echo "=== Running hexdump on provided path: $ZARR_PATH ==="
6161
echo
62-
62+
6363
if [ -e "$ZARR_PATH" ]; then
6464
./target/release/zarr-hexdump "$ZARR_PATH"
6565
else

disaster_recovery_analysis/ingest_both_teapots.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
if __name__ == "__main__":
2-
import mdio
3-
from segy.standards import get_segy_standard
4-
from segy.schema import HeaderField, Endianness
2+
import logging
53
import os
6-
from mdio.builder.template_registry import TemplateRegistry
74

8-
import logging
5+
from segy.schema import Endianness
6+
from segy.schema import HeaderField
7+
from segy.standards import get_segy_standard
8+
9+
import mdio
10+
from mdio.builder.template_registry import TemplateRegistry
911

1012
logging.getLogger("segy").setLevel(logging.DEBUG)
1113

@@ -40,4 +42,4 @@
4042
input_path="filt_mig_IEEE_LittleEndian_Rev1.sgy",
4143
output_path="filt_mig_IEEE_LittleEndian_Rev1.mdio",
4244
overwrite=True,
43-
)
45+
)

disaster_recovery_analysis/src/main.rs

Lines changed: 30 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,32 +8,32 @@ fn decompress_blosc(compressed_data: &[u8]) -> Result<Vec<u8>, String> {
88
let mut nbytes = 0usize;
99
let mut cbytes = 0usize;
1010
let mut blocksize = 0usize;
11-
11+
1212
blosc_sys::blosc_cbuffer_sizes(
1313
compressed_data.as_ptr() as *const std::ffi::c_void,
1414
&mut nbytes as *mut usize,
1515
&mut cbytes as *mut usize,
1616
&mut blocksize as *mut usize,
1717
);
18-
18+
1919
if nbytes == 0 {
2020
return Err("Invalid compressed data".to_string());
2121
}
22-
22+
2323
// Allocate output buffer
2424
let mut decompressed = vec![0u8; nbytes];
25-
25+
2626
// Decompress
2727
let result = blosc_sys::blosc_decompress(
2828
compressed_data.as_ptr() as *const std::ffi::c_void,
2929
decompressed.as_mut_ptr() as *mut std::ffi::c_void,
3030
nbytes,
3131
);
32-
32+
3333
if result < 0 {
3434
return Err(format!("Blosc decompression failed with code: {}", result));
3535
}
36-
36+
3737
decompressed.truncate(result as usize);
3838
Ok(decompressed)
3939
}
@@ -43,18 +43,18 @@ fn print_hexdump(data: &[u8], offset: usize, chunk_name: &str) {
4343
println!("=== {} ===", chunk_name);
4444
for (i, chunk) in data.chunks(16).enumerate() {
4545
let addr = offset + i * 16;
46-
46+
4747
// Print address
4848
print!("{:08x} ", addr);
49-
49+
5050
// Print hex bytes
5151
for (j, &byte) in chunk.iter().enumerate() {
5252
if j == 8 {
5353
print!(" "); // Extra space in the middle
5454
}
5555
print!("{:02x} ", byte);
5656
}
57-
57+
5858
// Pad if chunk is less than 16 bytes
5959
if chunk.len() < 16 {
6060
for j in chunk.len()..16 {
@@ -64,7 +64,7 @@ fn print_hexdump(data: &[u8], offset: usize, chunk_name: &str) {
6464
print!(" ");
6565
}
6666
}
67-
67+
6868
// Print ASCII representation
6969
print!(" |");
7070
for &byte in chunk {
@@ -81,38 +81,38 @@ fn print_hexdump(data: &[u8], offset: usize, chunk_name: &str) {
8181

8282
fn main() -> Result<(), Box<dyn std::error::Error>> {
8383
let args: Vec<String> = env::args().collect();
84-
84+
8585
if args.len() != 2 {
8686
eprintln!("Usage: {} <zarr_array_path>", args[0]);
8787
eprintln!("Example: {} /path/to/zarr/array", args[0]);
8888
std::process::exit(1);
8989
}
90-
90+
9191
let zarr_path = Path::new(&args[1]);
92-
92+
9393
// Verify the path exists
9494
if !zarr_path.exists() {
9595
eprintln!("Error: Path '{}' does not exist", zarr_path.display());
9696
std::process::exit(1);
9797
}
98-
98+
9999
println!("Reading Zarr array from: {}", zarr_path.display());
100100
println!("========================================");
101-
101+
102102
// Read zarr.json metadata
103103
let zarr_json_path = zarr_path.join("zarr.json");
104104
if !zarr_json_path.exists() {
105105
eprintln!("Error: zarr.json not found in {}", zarr_path.display());
106106
std::process::exit(1);
107107
}
108-
108+
109109
let metadata_content = fs::read_to_string(&zarr_json_path)?;
110110
let metadata: serde_json::Value = serde_json::from_str(&metadata_content)?;
111-
111+
112112
// Extract information from metadata
113113
let shape = metadata["shape"].as_array().unwrap();
114114
let chunk_shape = metadata["chunk_grid"]["configuration"]["chunk_shape"].as_array().unwrap();
115-
115+
116116
println!("Array shape: {:?}", shape);
117117
println!("Chunk shape: {:?}", chunk_shape);
118118
println!("Data type: {}", metadata["data_type"]["name"]);
@@ -122,42 +122,42 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
122122
}
123123
}
124124
println!();
125-
125+
126126
// Calculate expected chunks based on the metadata we know:
127127
// Shape: [345, 188], Chunk shape: [128, 128]
128128
// This means we have ceil(345/128) = 3 chunks in dimension 0
129129
// and ceil(188/128) = 2 chunks in dimension 1
130130
// So we expect chunks: c/0/0, c/0/1, c/1/0, c/1/1, c/2/0, c/2/1
131-
131+
132132
let mut chunk_files = Vec::new();
133-
133+
134134
// Find all chunk files by walking the directory
135135
for entry in WalkDir::new(zarr_path) {
136136
let entry = entry?;
137137
let path = entry.path();
138-
138+
139139
// Look for chunk files (they start with 'c/' in Zarr v3)
140140
if path.is_file() {
141141
let relative_path = path.strip_prefix(zarr_path)?;
142142
let path_str = relative_path.to_string_lossy();
143-
143+
144144
if path_str.starts_with("c/") {
145145
chunk_files.push((path.to_path_buf(), path_str.to_string()));
146146
}
147147
}
148148
}
149-
149+
150150
// Sort chunk files for consistent ordering
151151
chunk_files.sort_by(|a, b| a.1.cmp(&b.1));
152-
152+
153153
println!("Found {} chunk files:", chunk_files.len());
154154
for (_, chunk_name) in &chunk_files {
155155
println!(" {}", chunk_name);
156156
}
157157
println!();
158-
158+
159159
let mut total_offset = 0;
160-
160+
161161
// Read, decompress, and hexdump each chunk file
162162
for (chunk_path, chunk_name) in chunk_files {
163163
match fs::read(&chunk_path) {
@@ -168,7 +168,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
168168
println!();
169169
} else {
170170
println!("Compressed size: {} bytes", compressed_data.len());
171-
171+
172172
// Decompress the Blosc-compressed data using blosc-sys directly
173173
match decompress_blosc(&compressed_data) {
174174
Ok(decompressed_data) => {
@@ -190,11 +190,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
190190
}
191191
}
192192
}
193-
193+
194194
println!("Total decompressed bytes processed: {}", total_offset);
195195
println!();
196196
println!("Note: This shows the decompressed array data as it would appear in memory.");
197197
println!("Each element is 240 bytes (raw_bytes with length_bytes: 240).");
198-
198+
199199
Ok(())
200200
}

src/mdio/converters/segy.py

Lines changed: 15 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,17 @@
44

55
import logging
66
import os
7+
from copy import deepcopy
78
from typing import TYPE_CHECKING
89

910
import numpy as np
1011
import zarr
1112
from segy import SegyFile
1213
from segy.config import SegySettings
13-
from segy.standards.codes import MeasurementSystem as segy_MeasurementSystem
14-
from segy.standards.fields.trace import Rev0 as TraceHeaderFieldsRev0
1514
from segy.schema import HeaderField
1615
from segy.schema import ScalarType as ScalarType2
16+
from segy.standards.codes import MeasurementSystem as segy_MeasurementSystem
17+
from segy.standards.fields.trace import Rev0 as TraceHeaderFieldsRev0
1718

1819
from mdio.api.io import _normalize_path
1920
from mdio.api.io import to_mdio
@@ -342,48 +343,29 @@ def _add_grid_override_to_metadata(dataset: Dataset, grid_overrides: dict[str, A
342343
if grid_overrides is not None:
343344
dataset.metadata.attributes["gridOverrides"] = grid_overrides
344345

346+
345347
def _scalar_to_size(scalar: ScalarType2) -> int:
346-
if scalar == ScalarType2.UINT8:
347-
return 1
348-
elif scalar == ScalarType2.UINT16:
349-
return 2
350-
elif scalar == ScalarType2.UINT32:
351-
return 4
352-
elif scalar == ScalarType2.UINT64:
348+
if scalar == ScalarType2.STRING8:
353349
return 8
354-
elif scalar == ScalarType2.INT8:
355-
return 1
356-
elif scalar == ScalarType2.INT16:
357-
return 2
358-
elif scalar == ScalarType2.INT32:
359-
return 4
360-
elif scalar == ScalarType2.INT64:
361-
return 8
362-
elif scalar == ScalarType2.FLOAT32:
363-
return 4
364-
elif scalar == ScalarType2.FLOAT64:
365-
return 8
366-
elif scalar == ScalarType2.FLOAT16:
367-
return 2
368-
elif scalar == ScalarType2.STRING8:
369-
return 8
370-
else:
371-
raise ValueError(f"Invalid scalar type: {scalar}")
350+
351+
return str(scalar).split(".")[1] % 8
352+
372353

373354
def _customize_segy_spec(segy_spec: SegySpec) -> SegySpec:
374-
from copy import deepcopy
375355
assigned_bytes = []
376356

377357
ret = deepcopy(segy_spec)
378358

379359
for field in segy_spec.trace.header.fields:
380-
byte = field.byte-1
360+
byte = field.byte - 1
381361
for i in range(byte, byte + _scalar_to_size(field.format)):
382-
assigned_bytes.append(i)
362+
assigned_bytes.append(i) # noqa: PERF402
383363
unassigned_bytes = [i for i in range(240) if i not in assigned_bytes]
384-
field_to_customize = [HeaderField(name=f"__MDIO_RAW_UNSPECIFIED_Field_{i}", format=ScalarType.UINT8, byte=i+1) for i in unassigned_bytes]
385-
ret = ret.customize(trace_header_fields=field_to_customize)
386-
return ret
364+
field_to_customize = [
365+
HeaderField(name=f"__MDIO_RAW_UNSPECIFIED_Field_{i}", format=ScalarType.UINT8, byte=i + 1)
366+
for i in unassigned_bytes
367+
]
368+
return ret.customize(trace_header_fields=field_to_customize)
387369

388370

389371
def _add_raw_headers_to_template(mdio_template: AbstractDatasetTemplate) -> AbstractDatasetTemplate:

src/mdio/segy/_disaster_recovery_wrapper.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,17 @@
22

33
from __future__ import annotations
44

5+
from copy import deepcopy
56
from typing import TYPE_CHECKING
67

7-
from copy import deepcopy
88
import numpy as np
99

1010
if TYPE_CHECKING:
1111
from numpy.typing import NDArray
1212
from segy import SegyFile
1313

14-
class SegyFileTraceDataWrapper:
1514

15+
class SegyFileTraceDataWrapper:
1616
def __init__(self, segy_file: SegyFile, indices: int | list[int] | NDArray | slice):
1717
self.segy_file = segy_file
1818
self.indices = indices
@@ -21,15 +21,14 @@ def __init__(self, segy_file: SegyFile, indices: int | list[int] | NDArray | sli
2121
self.traces = segy_file.trace[indices]
2222

2323
@property
24-
def header(self):
24+
def header(self) -> NDArray:
2525
# The copy is necessary to avoid applying the pipeline to the original header.
2626
return self._header_pipeline.apply(self.traces.header.copy())
2727

28-
2928
@property
30-
def raw_header(self):
29+
def raw_header(self) -> NDArray:
3130
return np.ascontiguousarray(self.traces.header.copy()).view("|V240")
3231

3332
@property
34-
def sample(self):
33+
def sample(self) -> NDArray:
3534
return self.traces.sample

src/mdio/segy/_workers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
from mdio.api.io import to_mdio
1414
from mdio.builder.schemas.dtype import ScalarType
15-
from mdio.segy._disaster_recovery_wrapper import SegyFileTraceDataWrapper
1615

1716
if TYPE_CHECKING:
1817
from segy.arrays import HeaderArray
@@ -141,6 +140,7 @@ def trace_worker( # noqa: PLR0913
141140
# NOTE: The `raw_header_key` code block should be removed in full as it will become dead code.
142141
# traces = SegyFileTraceDataWrapper(segy_file, live_trace_indexes)
143142
from copy import deepcopy
143+
144144
header_pipeline = deepcopy(segy_file.accessors.header_decode_pipeline)
145145
segy_file.accessors.header_decode_pipeline.transforms = []
146146
traces = segy_file.trace[live_trace_indexes]

0 commit comments

Comments
 (0)