Skip to content

Commit 39b6699

Browse files
add function to subset tc by years
1 parent f90267a commit 39b6699

File tree

3 files changed

+119
-0
lines changed

3 files changed

+119
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ Removed:
6060

6161
### Added
6262

63+
- `climada.hazard.tc_tracks.TCTracks.subset_years` function
6364
- `climada.hazard.tc_tracks.TCTracks.from_FAST` function, add Australia basin (AU) [#993](https://github.com/CLIMADA-project/climada_python/pull/993)
6465
- Add `osm-flex` package to CLIMADA core [#981](https://github.com/CLIMADA-project/climada_python/pull/981)
6566
- `doc.tutorial.climada_entity_Exposures_osm.ipynb` tutorial explaining how to use `osm-flex`with CLIMADA

climada/hazard/tc_tracks.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import re
2929
import shutil
3030
import warnings
31+
from operator import itemgetter
3132
from pathlib import Path
3233
from typing import List, Optional
3334

@@ -321,6 +322,74 @@ def subset(self, filterdict):
321322

322323
return out
323324

325+
def subset_year(self, start_year: int = None, end_year: int = None):
326+
"""Subset TCTracks between start and end years, both included.
327+
328+
Parameters:
329+
----------
330+
start_year: int
331+
First year to include in the selection
332+
end_year: int
333+
Last year to include in the selection
334+
335+
Returns:
336+
--------
337+
subset: TCTracks
338+
TCTtracks object containing the subset of tracks
339+
Raises:
340+
-------
341+
TypeError
342+
- If either `start_year` or `end_year` is not an integer.
343+
- If `self` is not a `TCTracks` object.
344+
- If `self.data` is empty (i.e., no tracks are available).
345+
ValueError
346+
- If `start_year` is greater than `end_year`.
347+
- If the date format in a track is invalid and the year cannot be extracted.
348+
- If no tracks are found within the specified year range.
349+
350+
"""
351+
352+
subset = self.__class__()
353+
354+
if not isinstance(start_year, int) or not isinstance(end_year, int):
355+
raise TypeError("Both start_year and end_year must be integers.")
356+
357+
if start_year > end_year:
358+
raise ValueError(
359+
f"start_year ({start_year}) cannot be greater than end_year ({end_year})."
360+
)
361+
362+
if not isinstance(self, TCTracks):
363+
raise TypeError(
364+
f"self should be a TCTtracks object and not {self.__class__()}."
365+
)
366+
367+
if len(self.data) == 0:
368+
raise TypeError("self.data should be a non-empty list of tracks.")
369+
370+
# Find indices corresponding to the years
371+
index: list = []
372+
for i, track in enumerate(self.data):
373+
try:
374+
date_array = track.time[0].to_numpy()
375+
year = date_array.astype("datetime64[Y]").item().year
376+
except AttributeError:
377+
raise ValueError(
378+
f"Invalid date format in track {i}, could not extract year."
379+
)
380+
381+
if start_year <= year <= end_year:
382+
index.append(i)
383+
384+
if not index:
385+
raise ValueError(
386+
f"No tracks found for the years between {start_year} and {end_year}."
387+
)
388+
389+
subset.data = itemgetter(*index)(self.data)
390+
391+
return subset
392+
324393
def tracks_in_exp(self, exposure, buffer=1.0):
325394
"""Select only the tracks that are in the vicinity (buffer) of an exposure.
326395

climada/hazard/test/test_tc_tracks.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import unittest
2323
from datetime import datetime as dt
2424

25+
import cftime
2526
import geopandas as gpd
2627
import numpy as np
2728
import pandas as pd
@@ -763,6 +764,54 @@ def test_subset(self):
763764
tc_track = tc.TCTracks.from_ibtracs_netcdf(storm_id=storms)
764765
self.assertEqual(tc_track.subset({"basin": "SP"}).size, 2)
765766

767+
def test_subset_years(self):
768+
"""Test that subset_years correctly select tracks between year min and year max."""
769+
770+
tc_test = tc.TCTracks.from_simulations_emanuel(TEST_TRACK_EMANUEL)
771+
for i in range(5):
772+
date = cftime.DatetimeProlepticGregorian(
773+
2000 + i, 2, 20, 0, 0, 0, 0, has_year_zero=True
774+
)
775+
tc_test.data[i]["time"] = np.full(tc_test.data[i].time.shape[0], date)
776+
777+
tc_subset = tc_test.subset_year(start_year=2001, end_year=2003)
778+
779+
self.assertEqual(len(tc_subset.data), 3)
780+
self.assertEqual(tc_subset.data[0].time[0].item().year, 2001)
781+
self.assertEqual(tc_subset.data[1].time[0].item().year, 2002)
782+
self.assertEqual(tc_subset.data[2].time[0].item().year, 2003)
783+
784+
# Invalid input: non-integer start_year
785+
with self.assertRaisesRegex(
786+
TypeError, "Both start_year and end_year must be integers."
787+
):
788+
tc_test.subset_year(start_year="2000", end_year=2003)
789+
790+
# Invalid input: non-integer end_year
791+
with self.assertRaisesRegex(
792+
TypeError, "Both start_year and end_year must be integers."
793+
):
794+
tc_test.subset_year(start_year=2000, end_year=None)
795+
796+
# Invalid range: start_year greater than end_year
797+
with self.assertRaisesRegex(
798+
ValueError, r"start_year \(2005\) cannot be greater than end_year \(2000\)."
799+
):
800+
tc_test.subset_year(start_year=2005, end_year=2000)
801+
802+
# No tracks match the year range
803+
with self.assertRaisesRegex(
804+
ValueError, "No tracks found for the years between 2050 and 2060."
805+
):
806+
tc_test.subset_year(start_year=2050, end_year=2060)
807+
808+
# Empty data case
809+
empty_tc = tc.TCTracks()
810+
with self.assertRaisesRegex(
811+
TypeError, "self.data should be a non-empty list of tracks."
812+
):
813+
empty_tc.subset_year(start_year=2000, end_year=2010)
814+
766815
def test_get_extent(self):
767816
"""Test extent/bounds attributes."""
768817
storms = ["1988169N14259", "2002073S16161", "2002143S07157"]

0 commit comments

Comments
 (0)