Skip to content

Commit 73ee1dc

Browse files
markspectasansal
andauthored
Configurable Grid Sparsity Ratio Limit (#361)
* Add option to change grid sparsity ratio limit by setting env MDIO_GRID_SPARSITY_RATIO_LIMIT. * Add option to change grid sparsity ratio limit by setting env MDIO_GRID_SPARSITY_RATIO_LIMIT. * Provide more context in GridTraceSparsityError rather than as a warning. Rename MDIO__GRID__SPARSITY_RATIO_LIMIT. * Add option to change grid sparsity ratio limit by setting env MDIO_GRID_SPARSITY_RATIO_LIMIT. * Provide more context in GridTraceSparsityError rather than as a warning. Rename MDIO__GRID__SPARSITY_RATIO_LIMIT. * FixTestImport4DSparse. * make env vars consistent to upcoming standard --------- Co-authored-by: Altay Sansal <[email protected]>
1 parent 28a8e9f commit 73ee1dc

File tree

3 files changed

+100
-16
lines changed

3 files changed

+100
-16
lines changed

src/mdio/converters/exceptions.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
"""Custom exceptions for MDIO converters."""
22

33

4+
class EnvironmentFormatError(Exception):
5+
"""Raised when environment variable is of the wrong format."""
6+
7+
def __init__(self, name, format, msg: str = ""):
8+
"""Initialize error."""
9+
self.message = (
10+
f"Environment variable: {name} not of expected format: {format}. "
11+
)
12+
self.message += f"\n{msg}" if msg else ""
13+
super().__init__(self.message)
14+
15+
416
class GridTraceCountError(Exception):
517
"""Raised when grid trace counts don't match the SEG-Y trace count."""
618

@@ -20,11 +32,11 @@ def __init__(self, grid_traces, segy_traces):
2032
class GridTraceSparsityError(Exception):
2133
"""Raised when mdio grid will be sparsely populated from SEG-Y traces."""
2234

23-
def __init__(self, shape, num_traces):
35+
def __init__(self, shape, num_traces, msg: str = ""):
2436
"""Initialize error."""
2537
self.message = (
2638
f"Grid shape: {shape} but SEG-Y tracecount: {num_traces}. "
2739
"This grid is very sparse and most likely user error with indexing."
2840
)
29-
41+
self.message += f"\n{msg}" if msg else ""
3042
super().__init__(self.message)

src/mdio/converters/segy.py

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
import zarr
1717

1818
from mdio.api.io_utils import process_url
19+
from mdio.converters.exceptions import EnvironmentFormatError
1920
from mdio.converters.exceptions import GridTraceCountError
2021
from mdio.converters.exceptions import GridTraceSparsityError
2122
from mdio.core import Grid
@@ -66,42 +67,61 @@ def grid_density_qc(grid: Grid, num_traces: int) -> None:
6667
logged. If density is less than 10% an exception is raised. To ignore
6768
trace sparsity check set environment variable:
6869
MDIO_IGNORE_CHECKS = True
70+
To change the ratio set the environment variable:
71+
MDIO__GRID__SPARSITY_RATIO_LIMIT = 10
6972
7073
Args:
7174
grid: The grid instance to check.
7275
num_traces: Expected number of traces.
7376
7477
Raises:
75-
GridTraceSparsityError: When the grid is too sparse.
78+
GridTraceSparsityError: Raised if the grid is significantly larger
79+
than the number of traces in the SEG-Y file. By default the error
80+
is raised if the grid is more than 10 times larger than the number
81+
of traces in the SEG-Y file. This can be disabled by setting the
82+
environment variable `MDIO_IGNORE_CHECKS` to `True`. The limit can
83+
be changed by setting the environment variable
84+
`MDIO__GRID__SPARSITY_RATIO_LIMIT`.
85+
EnvironmentFormatError: Raised if the environment variable
86+
MDIO__GRID__SPARSITY_RATIO_LIMIT is not a float.
7687
"""
7788
grid_traces = np.prod(grid.shape[:-1], dtype=np.uint64) # Exclude sample
7889
dims = {k: v for k, v in zip(grid.dim_names, grid.shape)} # noqa: B905
7990

8091
logger.debug(f"Dimensions: {dims}")
8192
logger.debug(f"num_traces = {num_traces}")
8293

83-
# Extreme case where the grid is very sparse (usually user error)
84-
if grid_traces > 10 * num_traces:
85-
logger.warning("WARNING: Sparse mdio grid detected!")
86-
for dim_name in grid.dim_names:
87-
dim_min = grid.get_min(dim_name)
88-
dim_max = grid.get_max(dim_name)
89-
logger.warning(f"{dim_name} min: {dim_min} max: {dim_max}")
90-
if os.getenv("MDIO_IGNORE_CHECKS", False):
91-
# Do not raise an exception if MDIO_IGNORE_CHECK is False
92-
pass
93-
else:
94-
raise GridTraceSparsityError(grid.shape, num_traces)
94+
grid_sparsity_ratio_limit = os.getenv("MDIO__GRID__SPARSITY_RATIO_LIMIT", 10)
95+
try:
96+
grid_sparsity_ratio_limit_ = float(grid_sparsity_ratio_limit)
97+
except ValueError:
98+
raise EnvironmentFormatError(
99+
"MDIO__GRID__SPARSITY_RATIO_LIMIT", "float"
100+
) from None
95101

96102
# Warning if we have above 50% sparsity.
97-
if grid_traces > 2 * num_traces:
103+
msg = ""
104+
if grid_traces > min(2, grid_sparsity_ratio_limit_) * num_traces:
98105
msg = (
99106
f"Proposed ingestion grid is sparse. Ingestion grid: {dims}. "
100107
f"SEG-Y trace count:{num_traces}, grid trace count: {grid_traces}."
101108
)
109+
for dim_name in grid.dim_names:
110+
dim_min = grid.get_min(dim_name)
111+
dim_max = grid.get_max(dim_name)
112+
msg += f"\n{dim_name} min: {dim_min} max: {dim_max}"
102113

103114
logger.warning(msg)
104115

116+
# Extreme case where the grid is very sparse (usually user error)
117+
if grid_traces > grid_sparsity_ratio_limit_ * num_traces:
118+
logger.warning("WARNING: Sparse mdio grid detected!")
119+
if os.getenv("MDIO__IGNORE_CHECKS", False):
120+
# Do not raise an exception if MDIO_IGNORE_CHECK is False
121+
pass
122+
else:
123+
raise GridTraceSparsityError(grid.shape, num_traces, msg)
124+
105125

106126
def segy_to_mdio(
107127
segy_path: str,

tests/integration/test_segy_import_export.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,58 @@ def test_import_4d_segy(
139139
assert grid.select_dim("sample") == samples_exp
140140

141141

142+
@pytest.mark.parametrize("header_locations", [(17, 137, 13)])
143+
@pytest.mark.parametrize("header_names", [("shot_point", "cable", "channel")])
144+
@pytest.mark.parametrize("header_types", [("int32", "int16", "int32")])
145+
@pytest.mark.parametrize("endian", ["big"])
146+
@pytest.mark.parametrize(
147+
"grid_overrides",
148+
[
149+
None,
150+
],
151+
)
152+
@pytest.mark.parametrize(
153+
"chan_header_type",
154+
[
155+
StreamerShotGeometryType.A,
156+
],
157+
)
158+
class TestImport4DSparse:
159+
"""Test for 4D segy import with grid overrides."""
160+
161+
def test_import_4d_segy(
162+
self,
163+
segy_mock_4d_shots,
164+
zarr_tmp,
165+
header_locations,
166+
header_names,
167+
header_types,
168+
endian,
169+
grid_overrides,
170+
chan_header_type,
171+
):
172+
"""Test importing a SEG-Y file to MDIO."""
173+
import os
174+
175+
from mdio.converters.exceptions import GridTraceSparsityError
176+
177+
segy_path = segy_mock_4d_shots[chan_header_type]
178+
os.environ["MDIO__GRID__SPARSITY_RATIO_LIMIT"] = "1.1"
179+
180+
with pytest.raises(GridTraceSparsityError):
181+
segy_to_mdio(
182+
segy_path=segy_path,
183+
mdio_path_or_buffer=zarr_tmp.__str__(),
184+
index_bytes=header_locations,
185+
index_names=header_names,
186+
index_types=header_types,
187+
chunksize=(8, 2, 128, 1024),
188+
overwrite=True,
189+
endian=endian,
190+
grid_overrides=grid_overrides,
191+
)
192+
193+
142194
@pytest.mark.parametrize("header_locations", [(17, 13)])
143195
@pytest.mark.parametrize("header_names", [("inline", "crossline")])
144196
@pytest.mark.parametrize("endian", ["big"])

0 commit comments

Comments
 (0)