Skip to content

Commit 65a9a74

Browse files
committed
Resolve data serialization issues
1 parent 90fc97e commit 65a9a74

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

src/mdio/converters/segy.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,15 @@
1111
from segy.config import SegySettings
1212
from segy.standards.codes import MeasurementSystem as segy_MeasurementSystem
1313
from segy.standards.fields.trace import Rev0 as TraceHeaderFieldsRev0
14+
from xarray import Variable
1415

1516
from mdio.constants import UINT32_MAX
1617
from mdio.converters.exceptions import EnvironmentFormatError
1718
from mdio.converters.exceptions import GridTraceCountError
1819
from mdio.converters.exceptions import GridTraceSparsityError
1920
from mdio.converters.type_converter import to_structured_type
2021
from mdio.core.grid import Grid
22+
from mdio.schemas.v1.dataset_serializer import _get_fill_value
2123
from mdio.schemas.v1.dataset_serializer import to_xarray_dataset
2224
from mdio.schemas.v1.units import AllUnits
2325
from mdio.schemas.v1.units import LengthUnitEnum
@@ -227,7 +229,18 @@ def populate_non_dim_coordinates(
227229
"""Populate the xarray dataset with coordinate variables."""
228230
not_null = grid.map[:] != UINT32_MAX
229231
for c_name, c_values in coordinates.items():
230-
dataset[c_name].values[not_null] = c_values
232+
encodings = dataset[c_name].encoding
233+
tmp_coords = np.full(
234+
not_null.shape, dtype=dataset[c_name].dtype, fill_value=_get_fill_value(dataset[c_name].dtype)
235+
)
236+
tmp_coords[not_null] = c_values
237+
dataset[c_name].values = tmp_coords
238+
dataset[c_name] = Variable(
239+
dataset[c_name].dims,
240+
tmp_coords,
241+
attrs=dataset[c_name].attrs,
242+
encoding=encodings, # Ensure we preserve all of the encodings.
243+
)
231244
drop_vars_delayed.append(c_name)
232245
return dataset, drop_vars_delayed
233246

src/mdio/segy/_workers.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from xarray import Variable
2323

2424
from mdio.constants import UINT32_MAX
25+
from mdio.schemas.v1.dataset_serializer import _get_fill_value
2526
from mdio.schemas.v1.stats import CenteredBinHistogram
2627
from mdio.schemas.v1.stats import SummaryStatistics
2728

@@ -125,6 +126,7 @@ def trace_worker( # noqa: PLR0913
125126

126127
if hdr_key in worker_variables:
127128
# Create temporary array for headers with the correct shape
129+
# TODO(BrianMichell): Implement this better so that we can enable fill values without changing the code. #noqa: TD003
128130
tmp_headers = np.zeros(not_null.shape, dtype=ds_to_write[hdr_key].dtype)
129131
tmp_headers[not_null] = traces.header
130132
# Create a new Variable object to avoid copying the temporary array
@@ -136,7 +138,11 @@ def trace_worker( # noqa: PLR0913
136138

137139
# Get the sample dimension size from the data variable itself
138140
sample_dim_size = ds_to_write[data_variable_name].shape[-1]
139-
tmp_samples = np.zeros(not_null.shape + (sample_dim_size,), dtype=ds_to_write[data_variable_name].dtype)
141+
tmp_samples = np.full(
142+
not_null.shape + (sample_dim_size,),
143+
dtype=ds_to_write[data_variable_name].dtype,
144+
fill_value=_get_fill_value(ds_to_write[data_variable_name].dtype),
145+
)
140146

141147
# Assign trace samples to the correct positions
142148
# We need to handle the fact that traces.sample is (num_traces, num_samples)

0 commit comments

Comments
 (0)