Skip to content

Commit 92184a9

Browse files
BrianMichelltasansal
authored andcommitted
Checkpoint
1 parent dfebfc8 commit 92184a9

File tree

4 files changed

+66
-10
lines changed

4 files changed

+66
-10
lines changed

src/mdio/converters/segy.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,17 @@ def _scan_for_headers(
178178
)
179179
template._dim_names = actual_spatial_dims + (template.trace_domain,)
180180

181+
# Handle NonBinned: move non-binned dimensions to coordinates
182+
if grid_overrides and "NonBinned" in grid_overrides and "non_binned_dims" in grid_overrides:
183+
non_binned_dims = tuple(grid_overrides["non_binned_dims"])
184+
if non_binned_dims:
185+
logger.debug(
186+
"NonBinned grid override: moving dimensions %s to coordinates",
187+
non_binned_dims,
188+
)
189+
# Add non-binned dimensions as logical coordinates
190+
template._logical_coord_names = template._logical_coord_names + non_binned_dims
191+
181192
return segy_dimensions, segy_headers
182193

183194

src/mdio/segy/geometry.py

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -421,19 +421,51 @@ def transform_chunksize(
421421

422422

423423
class NonBinned(DuplicateIndex):
424-
"""Automatically index traces in a single specified axis - trace."""
424+
"""Handle non-binned dimensions by converting them to a trace dimension with coordinates.
425+
426+
This override takes dimensions that are not regularly sampled (non-binned) and converts
427+
them into a single 'trace' dimension. The original non-binned dimensions become coordinates
428+
indexed by the trace dimension.
429+
430+
Example:
431+
Template with dimensions [shot_point, cable, channel, azimuth, offset, sample]
432+
and non_binned_dims=['azimuth', 'offset'] becomes:
433+
- dimensions: [shot_point, cable, channel, trace, sample]
434+
- coordinates: azimuth and offset with dimensions [shot_point, cable, channel, trace]
435+
436+
Attributes:
437+
required_keys: No required keys for this override.
438+
required_parameters: Set containing 'chunksize' and 'non_binned_dims'.
439+
"""
425440

426441
required_keys = None
427-
required_parameters = {"chunksize"}
442+
required_parameters = {"chunksize", "non_binned_dims"}
443+
444+
def validate(self, index_headers: HeaderArray, grid_overrides: dict[str, bool | int]) -> None:
445+
"""Validate if this transform should run on the type of data."""
446+
self.check_required_params(grid_overrides)
447+
448+
# Validate that non_binned_dims is a list
449+
non_binned_dims = grid_overrides.get("non_binned_dims", [])
450+
if not isinstance(non_binned_dims, list):
451+
msg = f"non_binned_dims must be a list, got {type(non_binned_dims)}"
452+
raise ValueError(msg)
453+
454+
# Validate that all non-binned dimensions exist in headers
455+
missing_dims = set(non_binned_dims) - set(index_headers.dtype.names)
456+
if missing_dims:
457+
msg = f"Non-binned dimensions {missing_dims} not found in index headers"
458+
raise ValueError(msg)
428459

429460
def transform_chunksize(
430461
self,
431462
chunksize: Sequence[int],
432463
grid_overrides: dict[str, bool | int],
433464
) -> Sequence[int]:
434-
"""Perform the transform of chunksize."""
465+
"""Insert chunksize for trace dimension at N-1 position."""
435466
new_chunks = list(chunksize)
436-
new_chunks.insert(-1, grid_overrides["chunksize"])
467+
trace_chunksize = grid_overrides["chunksize"]
468+
new_chunks.insert(-1, trace_chunksize)
437469
return tuple(new_chunks)
438470

439471

@@ -544,6 +576,9 @@ def get_allowed_parameters(self) -> set:
544576

545577
parameters.update(command.required_parameters)
546578

579+
# Add optional parameters that are not strictly required but are valid
580+
parameters.add("non_binned_dims")
581+
547582
return parameters
548583

549584
def run(

src/mdio/segy/utilities.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,19 @@ def get_grid_plan( # noqa: C901, PLR0913
7373
grid_overrides=grid_overrides,
7474
template=template,
7575
)
76+
77+
# Determine which dimensions are non-binned (converted to coordinates)
78+
non_binned_dims = set()
79+
if "NonBinned" in grid_overrides and "non_binned_dims" in grid_overrides:
80+
non_binned_dims = set(grid_overrides["non_binned_dims"])
81+
7682
# Use the spatial dimension names from horizontal_coordinates (which may have been modified by grid overrides)
77-
# Extract only the dimension names (not including non-dimension coordinates)
83+
# Extract only the dimension names (not including non-dimension coordinates or non-binned dimensions)
7884
# After grid overrides, trace might have been added to horizontal_coordinates
7985
transformed_spatial_dims = [
80-
name for name in horizontal_coordinates if name in horizontal_dimensions or name == "trace"
86+
name
87+
for name in horizontal_coordinates
88+
if (name in horizontal_dimensions or name == "trace") and name not in non_binned_dims
8189
]
8290

8391
dimensions = []

tests/integration/test_import_streamer_grid_overrides.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,6 @@
2727
os.environ["MDIO__IMPORT__SAVE_SEGY_FILE_HEADER"] = "true"
2828

2929

30-
# TODO(BrianMichell): Add non-binned back
31-
# https://github.com/TGSAI/mdio-python/issues/612
32-
# @pytest.mark.parametrize("grid_override", [{"NonBinned": True, "chunksize": 4}, {"HasDuplicates": True}])
3330
@pytest.mark.parametrize("grid_override", [{"HasDuplicates": True}])
3431
@pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.C])
3532
class TestImport4DNonReg:
@@ -69,13 +66,18 @@ def test_import_4d_segy( # noqa: PLR0913
6966
xrt.assert_duckarray_equal(ds["shot_point"], shots)
7067
xrt.assert_duckarray_equal(ds["cable"], cables)
7168

72-
# assert grid.select_dim("trace") == Dimension(range(1, np.amax(receivers_per_cable) + 1), "trace")
69+
# HasDuplicates should create a trace dimension
7370
expected = list(range(1, np.amax(receivers_per_cable) + 1))
7471
xrt.assert_duckarray_equal(ds["trace"], expected)
7572

7673
times_expected = list(range(0, num_samples, 1))
7774
xrt.assert_duckarray_equal(ds["time"], times_expected)
7875

76+
# HasDuplicates uses chunksize of 1 for trace dimension
77+
trace_chunks = ds["amplitude"].chunksizes.get("trace", None)
78+
if trace_chunks is not None:
79+
assert all(chunk == 1 for chunk in trace_chunks)
80+
7981

8082
@pytest.mark.parametrize("grid_override", [{"AutoChannelWrap": True}, None])
8183
@pytest.mark.parametrize("chan_header_type", [StreamerShotGeometryType.A, StreamerShotGeometryType.B])

0 commit comments

Comments
 (0)