Skip to content

Commit c6851cd

Browse files
authored
TCTracks: Option to read additional variables from IBTrACS (#728)
Add option to read additional variables from IBTrACS datasets (e.g. "nature" or "stormtype")
1 parent b67314a commit c6851cd

File tree

3 files changed

+83
-20
lines changed

3 files changed

+83
-20
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ Removed:
4141
- `Exposures.affected_total_value` now takes a hazard intensity threshold as argument. Affected values are only those for which at least one event exceeds the threshold. (previously, all exposures points with an assigned centroid were considered affected). By default the centroids are reassigned. [#702](https://github.com/CLIMADA-project/climada_python/pull/702) [#730](https://github.com/CLIMADA-project/climada_python/pull/730)
4242
- Add option to pass region ID to `LitPop.from_shape` [#720](https://github.com/CLIMADA-project/climada_python/pull/720)
4343
- Slightly improved performance on `LitPop`-internal computations [#720](https://github.com/CLIMADA-project/climada_python/pull/720)
44+
- Add option to read additional variables from IBTrACS when using `TCTracks.from_ibtracs_netcdf` [#728](https://github.com/CLIMADA-project/climada_python/pull/728)
4445

4546
### Fixed
4647

climada/hazard/tc_tracks.py

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,9 @@ class TCTracks():
189189
Computed during processing:
190190
- on_land (bool for each track position)
191191
- dist_since_lf (in km)
192+
Additional data variables such as "nature" (specifiying, for each track position, whether a
193+
system is a disturbance, tropical storm, post-transition extratropical storm etc.) might be
194+
included, depending on the data source and on use cases.
192195
"""
193196
def __init__(self,
194197
data: Optional[List[xr.Dataset]] = None,
@@ -328,7 +331,7 @@ def read_ibtracs_netcdf(self, *args, **kwargs):
328331
def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=None,
329332
year_range=None, basin=None, genesis_basin=None,
330333
interpolate_missing=True, estimate_missing=False, correct_pres=False,
331-
discard_single_points=True,
334+
discard_single_points=True, additional_variables=None,
332335
file_name='IBTrACS.ALL.v04r00.nc'):
333336
"""Create new TCTracks object from IBTrACS databse.
334337
@@ -438,6 +441,10 @@ def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=No
438441
file_name : str, optional
439442
Name of NetCDF file to be dowloaded or located at climada/data/system.
440443
Default: 'IBTrACS.ALL.v04r00.nc'
444+
additional_variables : list of str, optional
445+
If specified, additional IBTrACS data variables are extracted, such as "nature" or
446+
"storm_speed". Only variables that are not agency-specific are supported.
447+
Default: None.
441448
442449
Returns
443450
-------
@@ -462,6 +469,9 @@ def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=No
462469
f'Error while downloading {IBTRACS_URL}. Try to download it manually and '
463470
f'put the file in {ibtracs_path}') from err
464471

472+
if additional_variables is None:
473+
additional_variables = []
474+
465475
ibtracs_ds = xr.open_dataset(ibtracs_path)
466476
ibtracs_date = ibtracs_ds.attrs["date_created"]
467477
if (np.datetime64('today') - np.datetime64(ibtracs_date)).item().days > 180:
@@ -576,7 +586,8 @@ def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=No
576586
ibtracs_ds[tc_var].sel(storm=nonsingular_mask).interpolate_na(
577587
dim="date_time", method="linear"))
578588
ibtracs_ds = ibtracs_ds[['sid', 'name', 'basin', 'time', 'valid_t']
579-
+ phys_vars + [f'{v}_agency' for v in phys_vars]]
589+
+ additional_variables + phys_vars
590+
+ [f'{v}_agency' for v in phys_vars]]
580591

581592
if estimate_missing:
582593
ibtracs_ds['pres'][:] = _estimate_pressure(
@@ -685,28 +696,37 @@ def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=No
685696
"{}({})".format(v, track_ds[f'{v}_agency'].astype(str).item())
686697
for v in phys_vars)
687698

688-
all_tracks.append(xr.Dataset({
689-
'time_step': ('time', track_ds.time_step.data),
699+
data_vars = {
690700
'radius_max_wind': ('time', track_ds.rmw.data),
691701
'radius_oci': ('time', track_ds.roci.data),
692702
'max_sustained_wind': ('time', track_ds.wind.data),
693703
'central_pressure': ('time', track_ds.pres.data),
694704
'environmental_pressure': ('time', track_ds.poci.data),
695-
'basin': ('time', track_ds.basin.data.astype("<U2")),
696-
}, coords={
697-
'time': track_ds.time.dt.round('s').data,
705+
}
706+
coords = {
707+
'time': ('time', track_ds.time.dt.round('s').data),
698708
'lat': ('time', track_ds.lat.data),
699709
'lon': ('time', track_ds.lon.data),
700-
}, attrs={
710+
}
711+
attrs = {
701712
'max_sustained_wind_unit': 'kn',
702713
'central_pressure_unit': 'mb',
703-
'name': track_ds.name.astype(str).item(),
704-
'sid': track_ds.sid.astype(str).item(),
705714
'orig_event_flag': True,
706715
'data_provider': provider_str,
707-
'id_no': track_ds.id_no.item(),
708716
'category': category[i_track],
709-
}))
717+
}
718+
# automatically assign the remaining variables as attributes or data variables
719+
for varname in ["time_step", "basin", "name", "sid", "id_no"] + additional_variables:
720+
values = track_ds[varname].data
721+
if track_ds[varname].dtype.kind == "S":
722+
# This converts the `bytes` (dtype "|S*") in IBTrACS to the more common `str`
723+
# objects (dtype "<U*") that we use in CLIMADA.
724+
values = values.astype(str)
725+
if values.ndim == 0:
726+
attrs[varname] = values.item()
727+
else:
728+
data_vars[varname] = ('time', values)
729+
all_tracks.append(xr.Dataset(data_vars, coords=coords, attrs=attrs))
710730
if last_perc != 100:
711731
LOGGER.info("Progress: 100%")
712732
if len(all_tracks) == 0:
@@ -1382,8 +1402,10 @@ def from_hdf5(cls, file_name):
13821402
for i in track_no:
13831403
track = ds_dict[f'track{i}']
13841404
track.attrs['orig_event_flag'] = bool(track.attrs['orig_event_flag'])
1385-
# when writing '<U2' and reading in again, xarray reads as dtype 'object'. undo this:
1386-
track['basin'] = track['basin'].astype('<U2')
1405+
# when writing '<U*' and reading in again, xarray reads as dtype 'object'. undo this:
1406+
for varname in track.data_vars:
1407+
if track[varname].dtype == "object":
1408+
track[varname] = track[varname].astype(str)
13871409
data.append(track)
13881410
return cls(data)
13891411

@@ -1499,7 +1521,9 @@ def _one_interp_data(track, time_step_h, land_geom=None):
14991521
time_step = pd.tseries.frequencies.to_offset(pd.Timedelta(hours=time_step_h)).freqstr
15001522
track_int = track.resample(time=time_step, skipna=True)\
15011523
.interpolate('linear')
1502-
track_int['basin'] = track.basin.resample(time=time_step).nearest()
1524+
for var in track.data_vars:
1525+
if "time" in track[var].dims and track[var].dtype.kind != "f":
1526+
track_int[var] = track[var].resample(time=time_step).nearest()
15031527
track_int['time_step'][:] = time_step_h
15041528
lon_int = lon.resample(time=time_step).interpolate(method)
15051529
lon_int[lon_int > 180] -= 360

climada/hazard/test/test_tc_tracks.py

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -256,18 +256,56 @@ def test_ibtracs_discard_single_points(self):
256256
break
257257
self.assertTrue(passed)
258258

259+
def test_ibtracs_additional_variables(self):
260+
"""Check additional_variables option"""
261+
# this is the complete list (as of May 2023) of variables in IBTrACS that are not
262+
# agency-specific and that are not already considered by other parts of
263+
# `from_ibtracs_netcdf`:
264+
addtl_vars = [
265+
'numobs', 'season', 'number', 'subbasin', 'name', 'source_usa', 'source_jma',
266+
'source_cma', 'source_hko', 'source_new', 'source_reu', 'source_bom', 'source_nad',
267+
'source_wel', 'source_td5', 'source_td6', 'source_ds8', 'source_neu', 'source_mlc',
268+
'iso_time', 'nature', 'wmo_wind', 'wmo_pres', 'wmo_agency', 'track_type',
269+
'main_track_sid', 'dist2land', 'landfall', 'iflag', 'storm_speed', 'storm_dir',
270+
]
271+
tc_track = tc.TCTracks.from_ibtracs_netcdf(
272+
storm_id='2017242N16333',
273+
additional_variables=addtl_vars,
274+
)
275+
track_ds = tc_track.get_track()
276+
for v in addtl_vars:
277+
self.assertIn(v, list(track_ds.data_vars) + list(track_ds.attrs))
278+
self.assertEqual(track_ds.attrs["numobs"], 123)
279+
self.assertEqual(track_ds.attrs["season"], 2017)
280+
self.assertEqual(track_ds["nature"].values[0], "TS")
281+
self.assertEqual(track_ds["nature"].values[-1], "DS")
282+
self.assertEqual(track_ds["subbasin"].values[0], "NA")
283+
self.assertEqual(track_ds["subbasin"].values[58], "CS")
284+
self.assertEqual(track_ds["dist2land"].values[0], 1020)
285+
self.assertEqual(track_ds["dist2land"].values[-1], 0)
286+
self.assertEqual(track_ds["storm_speed"].values[0], 13.0)
287+
self.assertEqual(track_ds["storm_speed"].values[5], 11.0)
288+
self.assertEqual(track_ds["storm_speed"].values[-1], 8.0)
289+
259290
class TestIO(unittest.TestCase):
260291
"""Test reading of tracks from files of different formats"""
261292
def test_netcdf_io(self):
262293
"""Test writting and reading netcdf4 TCTracks instances"""
263294
path = DATA_DIR.joinpath("tc_tracks_nc")
264295
path.mkdir(exist_ok=True)
265296
tc_track = tc.TCTracks.from_ibtracs_netcdf(
266-
provider='usa', storm_id='1988234N13299', estimate_missing=True)
267-
tc_track.write_netcdf(str(path))
297+
provider='usa', storm_id='1988234N13299', estimate_missing=True,
298+
additional_variables=["numobs", "storm_speed", "nature"],
299+
)
300+
tc_track.write_netcdf(path)
301+
tc_read = tc.TCTracks.from_netcdf(path)
268302

269-
tc_read = tc.TCTracks.from_netcdf(str(path))
270-
self.assertEqual(tc_track.get_track().sid, tc_read.get_track().sid)
303+
ds_read = tc_read.get_track()
304+
ds_write = tc_track.get_track()
305+
self.assertEqual(ds_write.attrs, ds_read.attrs)
306+
self.assertEqual(sorted(ds_write.data_vars), sorted(ds_read.data_vars))
307+
for v in ds_write.data_vars:
308+
np.testing.assert_array_equal(ds_write[v], ds_read[v])
271309

272310
def test_read_legacy_netcdf(self):
273311
"""Test reading from NetCDF files with legacy basin attributes"""
@@ -281,7 +319,7 @@ def test_read_legacy_netcdf(self):
281319
np.testing.assert_array_equal(tr.basin, "SP")
282320

283321
def test_hdf5_io(self):
284-
"""Test writting and reading hdf5 TCTracks instances"""
322+
"""Test writing and reading hdf5 TCTracks instances"""
285323
path = DATA_DIR.joinpath("tc_tracks.h5")
286324
tc_track = tc.TCTracks.from_ibtracs_netcdf(
287325
provider='usa', year_range=(1993, 1994), basin='EP', estimate_missing=True)

0 commit comments

Comments
 (0)