Skip to content

Commit 40ed5e0

Browse files
markspectasansal
andauthored
AutoShotWrap grid override (#338)
* Added AutoShopWrap grid override. This allows ingestion with gun indexed by unwrapping shot_point for shot_lines. * Add documentation to cover segy_to_mdio AutoShotWrap grid override. * Resolve merge conflicts. * Update Dockerfile to align with nox pipeline checks. * Merge poetry.lock. * Fix ingestion tests. * Fix issues with tests. * Switch to poetry.lock from main to resolve conflict. * Add some more explainination on ShotGumGeometryType.. * Linting updates. * Remove unused code. * Update .devcontainer/Dockerfile * Update tests/integration/conftest.py * update long text syntax * Fix linting. --------- Co-authored-by: Altay Sansal <[email protected]>
1 parent f19762c commit 40ed5e0

File tree

5 files changed

+261
-11
lines changed

5 files changed

+261
-11
lines changed

.devcontainer/Dockerfile

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
1-
ARG PYTHON_VERSION=3.11
1+
ARG PYTHON_VERSION=3.12
22
ARG LINUX_DISTRO=bookworm
33

44
FROM mcr.microsoft.com/devcontainers/python:1-${PYTHON_VERSION}-${LINUX_DISTRO}
55

66
# Install git for nox pre-commit
77
RUN apt-get update \
8-
&& apt-get install -y --no-install-recommends \
9-
git \
10-
&& rm -rf /var/lib/apt/lists/*
8+
&& apt-get install -y --no-install-recommends \
9+
git \
10+
&& rm -rf /var/lib/apt/lists/*
1111

1212
# Poetry
13-
ARG POETRY_VERSION="1.6.1"
13+
ARG POETRY_VERSION="1.8.2"
1414
RUN if [ "${POETRY_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install poetry==${POETRY_VERSION}"; fi
1515

1616
# Nox

src/mdio/converters/segy.py

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ def grid_density_qc(grid: Grid, num_traces: int) -> None:
9090

9191
logger.debug(f"Dimensions: {dims}")
9292
logger.debug(f"num_traces = {num_traces}")
93+
logger.debug(f"grid_traces = {grid_traces}")
94+
logger.debug(f"sparsity = {grid_traces / num_traces}")
9395

9496
grid_sparsity_ratio_limit = os.getenv("MDIO__GRID__SPARSITY_RATIO_LIMIT", 10)
9597
try:
@@ -270,7 +272,7 @@ def segy_to_mdio(
270272
... mdio_path_or_buffer="s3://bucket/shot_file.mdio",
271273
... index_bytes=(17, 137, 13),
272274
... index_lengths=(4, 2, 4),
273-
... index_names=("shot", "cable", "channel"),
275+
... index_names=("shot_point", "cable", "channel"),
274276
... chunksize=(8, 2, 128, 1024),
275277
... grid_overrides={"ChannelWrap": True, "ChannelsPerCable": 800},
276278
... )
@@ -283,6 +285,31 @@ def segy_to_mdio(
283285
>>> grid_overrides={"AutoChannelWrap": True,
284286
"AutoChannelTraceQC": 1000000}
285287
288+
For ingestion of pre-stack streamer data where the user needs to
289+
access/index *common-channel gathers* (single gun) then the following
290+
strategy can be used to densely ingest while indexing on gun number:
291+
292+
>>> segy_to_mdio(
293+
... segy_path="prefix/shot_file.segy",
294+
... mdio_path_or_buffer="s3://bucket/shot_file.mdio",
295+
... index_bytes=(133, 171, 17, 137, 13),
296+
... index_lengths=(2, 2, 4, 2, 4),
297+
... index_names=("shot_line", "gun", "shot_point", "cable", "channel"),
298+
... chunksize=(1, 1, 8, 1, 128, 1024),
299+
... grid_overrides={
300+
... "AutoShotWrap": True,
301+
... "AutoChannelWrap": True,
302+
... "AutoChannelTraceQC": 1000000
303+
... },
304+
... )
305+
306+
For AutoShotWrap and AutoChannelWrap to work, the user must provide
307+
"shot_line", "gun", "shot_point", "cable", "channel". For improved
308+
common-channel performance consider modifying the chunksize to be
309+
(1, 1, 32, 1, 32, 2048) for good common-shot and common-channel
310+
performance or (1, 1, 128, 1, 1, 2048) for common-channel
311+
performance.
312+
286313
For cases with no well-defined trace header for indexing a NonBinned
287314
grid override is provided.This creates the index and attributes an
288315
incrementing integer to the trace for the index based on first in first

src/mdio/segy/geometry.py

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,28 @@ class StreamerShotGeometryType(Enum):
6060
C = auto()
6161

6262

63+
class ShotGunGeometryType(Enum):
64+
r"""Shot geometry template types for multi-gun acquisition.
65+
66+
For shot lines with multiple guns, we can have two configurations for
67+
numbering shot_point. The desired index is to have the shot point index
68+
for a given gun to be dense and unique (configuration A). Typically the
69+
shot_point is unique for the line and therefore is not dense for each
70+
gun (configuration B).
71+
72+
Configuration A:
73+
Gun 1 -> 1------------------20
74+
Gun 2 -> 1------------------20
75+
76+
Configuration B:
77+
Gun 1 -> 1------------------39
78+
Gun 2 -> 2------------------40
79+
80+
"""
81+
A = auto()
82+
B = auto()
83+
84+
6385
def analyze_streamer_headers(
6486
index_headers: dict[str, npt.NDArray],
6587
) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray, StreamerShotGeometryType]:
@@ -91,6 +113,7 @@ def analyze_streamer_headers(
91113

92114
# Check channel numbers do not overlap for case B
93115
geom_type = StreamerShotGeometryType.B
116+
94117
for idx1, cable1 in enumerate(unique_cables):
95118
min_val1 = cable_chan_min[idx1]
96119
max_val1 = cable_chan_max[idx1]
@@ -124,13 +147,65 @@ def analyze_streamer_headers(
124147
return unique_cables, cable_chan_min, cable_chan_max, geom_type
125148

126149

150+
def analyze_shotlines_for_guns(
151+
index_headers: dict[str, npt.NDArray],
152+
) -> tuple[npt.NDArray, npt.NDArray, ShotGunGeometryType]:
153+
"""Check input headers for SEG-Y input to help determine geometry of shots and guns.
154+
155+
This function reads in trace_qc_count headers and finds the unique gun values.
156+
The function then checks to ensure shot numbers are dense.
157+
158+
Args:
159+
index_headers: numpy array with index headers
160+
161+
Returns:
162+
tuple of unique_shot_lines, unique_guns_in_shot_line, geom_type
163+
"""
164+
# Find unique cable ids
165+
unique_shot_lines = np.sort(np.unique(index_headers["shot_line"]))
166+
unique_guns = np.sort(np.unique(index_headers["gun"]))
167+
logger.info(f"unique_shot_lines: {unique_shot_lines}")
168+
logger.info(f"unique_guns: {unique_guns}")
169+
170+
# Find channel min and max values for each cable
171+
# unique_guns_in_shot_line = np.empty(unique_shot_lines.shape)
172+
unique_guns_in_shot_line = dict()
173+
174+
geom_type = ShotGunGeometryType.B
175+
# Check shot numbers are still unique if div/num_guns
176+
for shot_line in unique_shot_lines:
177+
shot_line_mask = index_headers["shot_line"] == shot_line
178+
shot_current_sl = index_headers["shot_point"][shot_line_mask]
179+
gun_current_sl = index_headers["gun"][shot_line_mask]
180+
181+
unique_guns_sl = np.sort(np.unique(gun_current_sl))
182+
num_guns_sl = unique_guns_sl.shape[0]
183+
# unique_guns_in_shot_line[idx] = list(unique_guns_sl)
184+
unique_guns_in_shot_line[str(shot_line)] = list(unique_guns_sl)
185+
186+
for gun in unique_guns_sl:
187+
gun_mask = gun_current_sl == gun
188+
shots_current_sl_gun = shot_current_sl[gun_mask]
189+
num_shots_sl = np.unique(shots_current_sl_gun).shape[0]
190+
mod_shots = np.floor(shots_current_sl_gun / num_guns_sl)
191+
if len(np.unique(mod_shots)) != num_shots_sl:
192+
msg = (
193+
f"Shot line {shot_line} has {num_shots_sl} when using div by "
194+
f"{num_guns_sl} (num_guns) has {np.unique(mod_shots)} unique mod shots."
195+
)
196+
logger.info(msg)
197+
geom_type = ShotGunGeometryType.A
198+
return unique_shot_lines, unique_guns_in_shot_line, geom_type
199+
return unique_shot_lines, unique_guns_in_shot_line, geom_type
200+
201+
127202
def create_counter(
128203
depth: int,
129204
total_depth: int,
130205
unique_headers: dict[str, npt.NDArray],
131206
header_names: list[str],
132207
):
133-
"""Helper funtion to create dictionary tree for counting trace key for auto index."""
208+
"""Helper function to create dictionary tree for counting trace key for auto index."""
134209
if depth == total_depth:
135210
return 0
136211

@@ -490,6 +565,54 @@ def transform(
490565
return index_headers
491566

492567

568+
class AutoShotWrap(GridOverrideCommand):
569+
"""Automatically determine ShotGun acquisition type."""
570+
571+
required_keys = {"shot_line", "gun", "shot_point", "cable", "channel"}
572+
required_parameters = None
573+
574+
def validate(
575+
self,
576+
index_headers: dict[str, npt.NDArray],
577+
grid_overrides: dict[str, bool | int],
578+
) -> None:
579+
"""Validate if this transform should run on the type of data."""
580+
self.check_required_keys(index_headers)
581+
self.check_required_params(grid_overrides)
582+
583+
def transform(
584+
self,
585+
index_headers: dict[str, npt.NDArray],
586+
grid_overrides: dict[str, bool | int],
587+
) -> dict[str, npt.NDArray]:
588+
"""Perform the grid transform."""
589+
self.validate(index_headers, grid_overrides)
590+
591+
result = analyze_shotlines_for_guns(index_headers)
592+
unique_shot_lines, unique_guns_in_shot_line, geom_type = result
593+
logger.info(f"Ingesting dataset as shot type: {geom_type.name}")
594+
595+
# TODO: Add strict=True and remove noqa when min Python is 3.10
596+
max_num_guns = 1
597+
for shot_line in unique_shot_lines:
598+
logger.info(
599+
f"shot_line: {shot_line} has guns: {unique_guns_in_shot_line[str(shot_line)]}"
600+
)
601+
num_guns = len(unique_guns_in_shot_line[str(shot_line)])
602+
if num_guns > max_num_guns:
603+
max_num_guns = num_guns
604+
605+
# This might be slow and potentially could be improved with a rewrite
606+
# to prevent so many lookups
607+
if geom_type == ShotGunGeometryType.B:
608+
for shot_line in unique_shot_lines:
609+
shot_line_idxs = np.where(index_headers["shot_line"][:] == shot_line)
610+
index_headers["shot_point"][shot_line_idxs] = np.floor(
611+
index_headers["shot_point"][shot_line_idxs] / max_num_guns
612+
)
613+
return index_headers
614+
615+
493616
class GridOverrider:
494617
"""Executor for grid overrides.
495618
@@ -503,6 +626,7 @@ def __init__(self):
503626
"""Define allowed overrides and parameters here."""
504627
self.commands = {
505628
"AutoChannelWrap": AutoChannelWrap(),
629+
"AutoShotWrap": AutoShotWrap(),
506630
"CalculateCable": CalculateCable(),
507631
"ChannelWrap": ChannelWrap(),
508632
"NonBinned": NonBinned(),

tests/integration/conftest.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def create_segy_mock_4d(
1818
shots: list,
1919
cables: list,
2020
receivers_per_cable: list,
21+
guns: list | None = None,
2122
chan_header_type: StreamerShotGeometryType = StreamerShotGeometryType.A,
2223
index_receivers: bool = True,
2324
) -> str:
@@ -55,20 +56,30 @@ def create_segy_mock_4d(
5556
index_receivers = False
5657

5758
shot_headers = np.hstack([np.repeat(shot, total_chan) for shot in shots])
59+
60+
gun_per_shot = []
61+
for shot in shots:
62+
gun_per_shot.append(guns[(shot % len(guns))])
63+
gun_headers = np.hstack([np.repeat(gun, total_chan) for gun in gun_per_shot])
64+
5865
cable_headers = np.tile(cable_headers, shot_count)
5966
channel_headers = np.tile(channel_headers, shot_count)
6067

6168
with segyio.create(segy_file, spec) as f:
6269
for trc_idx in range(trace_count):
6370
shot = shot_headers[trc_idx]
71+
gun = gun_headers[trc_idx]
6472
cable = cable_headers[trc_idx]
6573
channel = channel_headers[trc_idx]
74+
source_line = 1
6675

6776
# offset is byte location 37 - offset 4 bytes
6877
# fldr is byte location 9 - shot 4 byte
6978
# ep is byte location 17 - shot 4 byte
7079
# stae is byte location 137 - cable 2 byte
7180
# tracf is byte location 13 - channel 4 byte
81+
# grnors is byte location 171 - gun 2 bytes
82+
# styp is byte location 133 - source_line 2 bytes
7283

7384
if index_receivers:
7485
f.header[trc_idx].update(
@@ -77,6 +88,8 @@ def create_segy_mock_4d(
7788
ep=shot,
7889
stae=cable,
7990
tracf=channel,
91+
grnors=gun,
92+
styp=source_line,
8093
)
8194
else:
8295
f.header[trc_idx].update(
@@ -98,7 +111,8 @@ def create_segy_mock_4d(
98111
def segy_mock_4d_shots(fake_segy_tmp: str) -> dict[StreamerShotGeometryType, str]:
99112
"""Generate mock 4D shot SEG-Y files."""
100113
num_samples = 25
101-
shots = [2, 3, 5]
114+
shots = [2, 3, 5, 6, 7, 8, 9]
115+
guns = [1, 2]
102116
cables = [0, 101, 201, 301]
103117
receivers_per_cable = [1, 5, 7, 5]
104118

@@ -112,6 +126,7 @@ def segy_mock_4d_shots(fake_segy_tmp: str) -> dict[StreamerShotGeometryType, str
112126
cables=cables,
113127
receivers_per_cable=receivers_per_cable,
114128
chan_header_type=chan_header_type,
129+
guns=guns,
115130
)
116131

117132
return segy_paths

0 commit comments

Comments
 (0)