Skip to content

Commit 11dc9e5

Browse files
committed
feat: support larger integer for station identifiers, up to 18 or 19 digits or so.
Fixes #17
1 parent 7883145 commit 11dc9e5

File tree

3 files changed

+44
-2
lines changed

3 files changed

+44
-2
lines changed

src/efts_io/_ncdf_stf2.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ def write_nc_stf2(
106106
data_qual: Optional[xr.DataArray] = None,
107107
overwrite: bool = True, # noqa: FBT001, FBT002
108108
# loc_info: Optional[Dict[str, Any]] = None,
109+
intdata_type: str = "i4",
109110
) -> None:
110111
from efts_io.conventions import ( # noqa: I001
111112
X_VARNAME,
@@ -163,8 +164,6 @@ def _check_optional_var_attr(dataset: xr.Dataset, var_id: str) -> None:
163164
for var_id in (AREA_VARNAME, X_VARNAME, Y_VARNAME, ELEVATION_VARNAME):
164165
_check_optional_var_attr(dataset, var_id)
165166

166-
intdata_type = "i4"
167-
168167
var_type = var_type.value
169168
data_type = data_type.value
170169

@@ -226,6 +225,20 @@ def _check_optional_var_attr(dataset: xr.Dataset, var_id: str) -> None:
226225
station_var[:] = station
227226

228227
# station_id
228+
229+
# we check that station_id can be safely stored as int32
230+
# I add this deliberately as a check to avoid possibly silent data corruption as observed in
231+
# https://github.com/csiro-hydroinformatics/efts-io/issues/17
232+
if intdata_type == "i4":
233+
max_station_id = np.max(station_id)
234+
min_station_id = np.min(station_id)
235+
if not np.issubdtype(type(max_station_id), np.integer) or not np.issubdtype(type(min_station_id), np.integer):
236+
raise TypeError("station_id values must be integers to be stored in STF2.0 format.")
237+
if max_station_id > np.iinfo(np.int32).max or min_station_id < np.iinfo(np.int32).min:
238+
raise OverflowError(
239+
f"station_id values must be in the int32 range [{np.iinfo(np.int32).min}, {np.iinfo(np.int32).max}] to be stored in STF2.0 format.",
240+
)
241+
229242
station_id_var = ncfile.createVariable(STATION_ID_VARNAME, intdata_type, (STATION_DIMNAME,), fill_value=-9999)
230243
station_id_var.setncattr(LONG_NAME_ATTR_KEY, "station or node identification code")
231244
station_id_var[:] = station_id

src/efts_io/wrapper.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ def __init__(self, data: Union[str, xr.Dataset]) -> None:
187187
else:
188188
self.data = data
189189

190+
self.stf2_int_datatype = "i4" # default integer type for STF2 saving
191+
190192
@property
191193
def title(self) -> str:
192194
"""Get or set the title attribute of the dataset."""
@@ -326,6 +328,18 @@ def writeable_to_stf2(self) -> bool:
326328

327329
return exportable_to_stf2(self.data)
328330

331+
@property
332+
def stf2_int_datatype(self) -> str:
333+
"""The type of integer to save to in the STF 2.x netcdf convention: 'i4' or 'i8'."""
334+
return self._stf2_int_datatype
335+
336+
@stf2_int_datatype.setter
337+
def stf2_int_datatype(self, value: str) -> None:
338+
"""The type of integer to save to in the STF 2.x netcdf convention: 'i4' or 'i8'."""
339+
if value not in ("i4", "i8"):
340+
raise ValueError("stf2_int_datatype must be either 'i4' or 'i8'")
341+
self._stf2_int_datatype = value
342+
329343
def save_to_stf2(
330344
self,
331345
path: str,
@@ -359,6 +373,7 @@ def save_to_stf2(
359373
data_qual=data_qual, # : Optional[xr.DataArray] = None,
360374
overwrite=True, # :bool=True,
361375
# loc_info=loc_info, # : Optional[Dict[str, Any]] = None,
376+
intdata_type=self.stf2_int_datatype,
362377
)
363378

364379
def create_data_variables(self, data_var_def: Dict[str, Dict[str, Any]]) -> None:

tests/test_create.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# import netCDF4
22
import numpy as np
33
import pandas as pd
4+
import pytest
45
from efts_io._ncdf_stf2 import StfDataType, StfVariable
56
from efts_io.wrapper import EftsDataSet, xr_efts
67

@@ -132,6 +133,18 @@ def _create_test_ds():
132133
def test_repro_issue_16():
133134
"""Try to repro as closely as possible the issue reported in #16."""
134135
station_ids = [1, 2, 3]
136+
_saving_to_stf2(station_ids)
137+
138+
139+
def test_large_station_integers():
140+
"""Try to repro as closely as possible the issue reported in #17."""
141+
station_ids = [1, 2, 123456789123]
142+
with pytest.raises(OverflowError):
143+
_saving_to_stf2(station_ids, intdata_type="i4")
144+
_saving_to_stf2(station_ids, intdata_type="i8")
145+
146+
147+
def _saving_to_stf2(station_ids, intdata_type="i4"):
135148
xr_ds = xr_efts(
136149
issue_times=pd.date_range("2023-10-01", periods=31, freq="D"),
137150
station_ids=station_ids,
@@ -155,6 +168,7 @@ def test_repro_issue_16():
155168
longitudes=[10.0, 11.0, 12.0],
156169
)
157170
eds = EftsDataSet(xr_ds)
171+
eds.stf2_int_datatype = intdata_type
158172
eds.create_data_variables(
159173
{
160174
"rain_obs": {

0 commit comments

Comments
 (0)