@@ -106,6 +106,7 @@ def write_nc_stf2(
106106 data_qual : Optional [xr .DataArray ] = None ,
107107 overwrite : bool = True , # noqa: FBT001, FBT002
108108 # loc_info: Optional[Dict[str, Any]] = None,
109+ intdata_type : str = "i4" ,
109110) -> None :
110111 from efts_io .conventions import ( # noqa: I001
111112 X_VARNAME ,
@@ -163,8 +164,6 @@ def _check_optional_var_attr(dataset: xr.Dataset, var_id: str) -> None:
163164 for var_id in (AREA_VARNAME , X_VARNAME , Y_VARNAME , ELEVATION_VARNAME ):
164165 _check_optional_var_attr (dataset , var_id )
165166
166- intdata_type = "i4"
167-
168167 var_type = var_type .value
169168 data_type = data_type .value
170169
@@ -226,6 +225,20 @@ def _check_optional_var_attr(dataset: xr.Dataset, var_id: str) -> None:
226225 station_var [:] = station
227226
228227 # station_id
228+
229+ # we check that station_id can be safely stored as int32
230+ # I add this deliberately as a check to avoid possibly silent data corruption as observed in
231+ # https://github.com/csiro-hydroinformatics/efts-io/issues/17
232+ if intdata_type == "i4" :
233+ max_station_id = np .max (station_id )
234+ min_station_id = np .min (station_id )
235+ if not np .issubdtype (type (max_station_id ), np .integer ) or not np .issubdtype (type (min_station_id ), np .integer ):
236+ raise TypeError ("station_id values must be integers to be stored in STF2.0 format." )
237+ if max_station_id > np .iinfo (np .int32 ).max or min_station_id < np .iinfo (np .int32 ).min :
238+ raise OverflowError (
239+ f"station_id values must be in the int32 range [{ np .iinfo (np .int32 ).min } , { np .iinfo (np .int32 ).max } ] to be stored in STF2.0 format." ,
240+ )
241+
229242 station_id_var = ncfile .createVariable (STATION_ID_VARNAME , intdata_type , (STATION_DIMNAME ,), fill_value = - 9999 )
230243 station_id_var .setncattr (LONG_NAME_ATTR_KEY , "station or node identification code" )
231244 station_id_var [:] = station_id
0 commit comments