Skip to content

Commit 206e57b

Browse files
Merge pull request #1023 from CLIMADA-project/feature/subset_years
Feature/subset_years
2 parents bd03769 + b96af1b commit 206e57b

File tree

3 files changed

+218
-0
lines changed

3 files changed

+218
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ Removed:
6969

7070
### Added
7171

72+
- `climada.hazard.tc_tracks.TCTracks.subset_years` function [#1023](https://github.com/CLIMADA-project/climada_python/pull/1023)
7273
- `climada.hazard.tc_tracks.TCTracks.from_FAST` function, add Australia basin (AU) [#993](https://github.com/CLIMADA-project/climada_python/pull/993)
7374
- Add `osm-flex` package to CLIMADA core [#981](https://github.com/CLIMADA-project/climada_python/pull/981)
7475
- `doc.tutorial.climada_entity_Exposures_osm.ipynb` tutorial explaining how to use `osm-flex`with CLIMADA

climada/hazard/tc_tracks.py

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import re
2929
import shutil
3030
import warnings
31+
from operator import itemgetter
3132
from pathlib import Path
3233
from typing import List, Optional
3334

@@ -321,6 +322,111 @@ def subset(self, filterdict):
321322

322323
return out
323324

325+
def subset_year(
326+
self,
327+
start_date: tuple = (False, False, False),
328+
end_date: tuple = (False, False, False),
329+
):
330+
"""Subset TCTracks between start and end dates, both included.
331+
332+
Parameters:
333+
----------
334+
start_date: tuple
335+
First date to include in the selection (YYYY, MM, DD). Each element can either
336+
be an integer or `False`. If an element is `False`, it is ignored during the filter.
337+
end_date: tuple
338+
Last date to include in the selection, same as start_date if selecting only one day.
339+
340+
Returns:
341+
--------
342+
subset: TCTracks
343+
TCTracks object containing the subset of tracks
344+
345+
Raises:
346+
-------
347+
ValueError
348+
- If there's a mismatch between `start_*` and `end_*` values (e.g., one is set to `True` while the other is `False`).
349+
- If no tracks are found within the specified date range.
350+
- If `start_date` or `end_date` are incorrectly ordered (start > end).
351+
352+
Example 1 (Filter by Year Only):
353+
---------------------------------
354+
>>> start_date = (2022, False, False)
355+
>>> end_date = (2022, False, False)
356+
>>> # This will filter all tracks from the year 2022, regardless of month or day.
357+
358+
Example 2 (Filter by Year and Month):
359+
--------------------------------------
360+
>>> start_date = (2022, 5, False)
361+
>>> end_date = (2022, 5, False)
362+
>>> # This will filter all tracks from May 2022, regardless of the day.
363+
364+
Example 3 (Filter by Year, Month, and Day):
365+
--------------------------------------------
366+
>>> start_date = (2022, 5, 10)
367+
>>> end_date = (2022, 5, 20)
368+
>>> # This will filter all tracks from May 10th to May 20th, 2022.
369+
370+
Example 4 (Invalid: Only one of day is specified):
371+
---------------------------------------------------
372+
>>> start_date = (2022, False, 10)
373+
>>> end_date = (2022, 5, 20)
374+
>>> # Raises a ValueError since the day is specified in the start_date but not in end_date.
375+
"""
376+
377+
subset = self.__class__()
378+
379+
# Extract date components
380+
start_year, end_year = start_date[0], end_date[0]
381+
start_month, end_month = start_date[1], end_date[1]
382+
start_day, end_day = start_date[2], end_date[2]
383+
384+
if (start_day and not end_day) or (not start_day and end_day):
385+
raise ValueError(
386+
"Mismatch between start_day and end_day: Both must be either True or False."
387+
)
388+
elif (start_month and not end_month) or (not start_month and end_month):
389+
raise ValueError(
390+
"Mismatch between start_month and end_month: Both must be either True or False."
391+
)
392+
elif (start_year and not end_year) or (not start_year and end_year):
393+
raise ValueError(
394+
"Mismatch between start_year and end_year: Both must be either True or False."
395+
)
396+
elif start_year and end_year and start_year > end_year:
397+
raise ValueError("Start year is after end year.")
398+
399+
# Find indices corresponding to the date range
400+
index: list = []
401+
for i, track in enumerate(self.data):
402+
403+
date_array = track.time[0].to_numpy()
404+
year = date_array.astype("datetime64[Y]").item().year
405+
month = date_array.astype("datetime64[M]").item().month
406+
day = date_array.astype("datetime64[D]").item().day
407+
408+
condition_year = start_year <= year <= end_year
409+
condition_month = start_month <= month <= end_month
410+
condition_day = start_day <= day <= end_day
411+
412+
if not start_day and not end_day:
413+
condition_day = True
414+
if not start_month and not end_month:
415+
condition_month = True
416+
if not start_year and not end_year:
417+
condition_year = True
418+
419+
if condition_year and condition_month and condition_day:
420+
index.append(i)
421+
422+
# Raise error if no tracks found
423+
if not index:
424+
raise ValueError("No tracks found for the specified date range")
425+
# Create subset with filtered tracks
426+
subset.data = [self.data[i] for i in index]
427+
428+
return subset
429+
324430
def tracks_in_exp(self, exposure, buffer=1.0):
325431
"""Select only the tracks that are in the vicinity (buffer) of an exposure.
326432

climada/hazard/test/test_tc_tracks.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import unittest
2323
from datetime import datetime as dt
2424

25+
import cftime
2526
import geopandas as gpd
2627
import numpy as np
2728
import pandas as pd
@@ -763,6 +764,116 @@ def test_subset(self):
763764
tc_track = tc.TCTracks.from_ibtracs_netcdf(storm_id=storms)
764765
self.assertEqual(tc_track.subset({"basin": "SP"}).size, 2)
765766

767+
def test_subset_years(self):
768+
"""Test that subset_years correctly select tracks between year min and year max."""
769+
770+
tc_test = tc.TCTracks.from_simulations_emanuel(TEST_TRACK_EMANUEL)
771+
for i in range(5):
772+
date = cftime.DatetimeProlepticGregorian(
773+
2000 + i, 1 + i, 10 + i, 0, 0, 0, 0, has_year_zero=True
774+
)
775+
tc_test.data[i]["time"] = np.full(tc_test.data[i].time.shape[0], date)
776+
777+
# correct calling of the function
778+
tc_subset = tc_test.subset_year(
779+
start_date=(2000, False, False), end_date=(2003, False, False)
780+
)
781+
self.assertEqual(len(tc_subset.data), 4)
782+
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
783+
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
784+
self.assertEqual(tc_subset.data[1].time[0].item().year, 2001)
785+
self.assertEqual(tc_subset.data[1].time[0].item().month, 2)
786+
self.assertEqual(tc_subset.data[2].time[0].item().year, 2002)
787+
self.assertEqual(tc_subset.data[2].time[0].item().month, 3)
788+
self.assertEqual(tc_subset.data[3].time[0].item().year, 2003)
789+
self.assertEqual(tc_subset.data[3].time[0].item().month, 4)
790+
tc_subset = tc_test.subset_year(
791+
start_date=(2000, False, False), end_date=(2000, False, False)
792+
)
793+
self.assertEqual(len(tc_subset.data), 1)
794+
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
795+
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
796+
tc_subset = tc_test.subset_year(
797+
start_date=(False, 1, False), end_date=(False, 4, False)
798+
)
799+
self.assertEqual(len(tc_subset.data), 4)
800+
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
801+
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
802+
self.assertEqual(tc_subset.data[1].time[0].item().year, 2001)
803+
self.assertEqual(tc_subset.data[1].time[0].item().month, 2)
804+
self.assertEqual(tc_subset.data[2].time[0].item().year, 2002)
805+
self.assertEqual(tc_subset.data[2].time[0].item().month, 3)
806+
self.assertEqual(tc_subset.data[3].time[0].item().year, 2003)
807+
self.assertEqual(tc_subset.data[3].time[0].item().month, 4)
808+
tc_subset = tc_test.subset_year(
809+
start_date=(False, 3, False), end_date=(False, 3, False)
810+
)
811+
self.assertEqual(len(tc_subset.data), 1)
812+
self.assertEqual(tc_subset.data[0].time[0].item().month, 3)
813+
self.assertEqual(tc_subset.data[0].time[0].item().year, 2002)
814+
tc_subset = tc_test.subset_year(
815+
start_date=(False, False, 11), end_date=(False, False, 14)
816+
)
817+
self.assertEqual(len(tc_subset.data), 4)
818+
self.assertEqual(tc_subset.data[0].time[0].item().year, 2001)
819+
self.assertEqual(tc_subset.data[0].time[0].item().month, 2)
820+
self.assertEqual(tc_subset.data[1].time[0].item().year, 2002)
821+
self.assertEqual(tc_subset.data[1].time[0].item().month, 3)
822+
self.assertEqual(tc_subset.data[2].time[0].item().year, 2003)
823+
self.assertEqual(tc_subset.data[2].time[0].item().month, 4)
824+
self.assertEqual(tc_subset.data[3].time[0].item().year, 2004)
825+
self.assertEqual(tc_subset.data[3].time[0].item().month, 5)
826+
tc_subset = tc_test.subset_year(
827+
start_date=(False, False, 10), end_date=(False, False, 10)
828+
)
829+
self.assertEqual(len(tc_subset.data), 1)
830+
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
831+
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
832+
tc_subset = tc_test.subset_year(
833+
start_date=(2000, 1, 10), end_date=(2000, 1, 13)
834+
)
835+
self.assertEqual(len(tc_subset.data), 1)
836+
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
837+
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
838+
tc_subset = tc_test.subset_year(
839+
start_date=(2000, 1, 10), end_date=(2004, 9, 13)
840+
)
841+
self.assertEqual(len(tc_subset.data), 4)
842+
self.assertEqual(tc_subset.data[0].time[0].item().year, 2000)
843+
self.assertEqual(tc_subset.data[0].time[0].item().month, 1)
844+
self.assertEqual(tc_subset.data[1].time[0].item().year, 2001)
845+
self.assertEqual(tc_subset.data[1].time[0].item().month, 2)
846+
self.assertEqual(tc_subset.data[2].time[0].item().year, 2002)
847+
self.assertEqual(tc_subset.data[2].time[0].item().month, 3)
848+
self.assertEqual(tc_subset.data[3].time[0].item().year, 2003)
849+
self.assertEqual(tc_subset.data[3].time[0].item().month, 4)
850+
851+
# Invalid input: Mismatch between start_day and end_day
852+
with self.assertRaisesRegex(
853+
ValueError,
854+
"Mismatch between start_year and end_year: "
855+
"Both must be either True or False.",
856+
):
857+
tc_test.subset_year((2000, False, False), (False, False, False))
858+
with self.assertRaisesRegex(
859+
ValueError,
860+
"Mismatch between start_month and end_month: "
861+
"Both must be either True or False.",
862+
):
863+
tc_test.subset_year((2000, False, False), (2000, 5, False))
864+
with self.assertRaisesRegex(
865+
ValueError,
866+
"Mismatch between start_day and end_day: "
867+
"Both must be either True or False.",
868+
):
869+
tc_test.subset_year((False, False, False), (False, False, 3))
870+
with self.assertRaisesRegex(ValueError, "Start year is after end year."):
871+
tc_test.subset_year((2007, False, False), (2000, False, False))
872+
with self.assertRaisesRegex(
873+
ValueError, "No tracks found for the specified date range"
874+
):
875+
tc_test.subset_year((2100, False, False), (2150, False, False))
876+
766877
def test_get_extent(self):
767878
"""Test extent/bounds attributes."""
768879
storms = ["1988169N14259", "2002073S16161", "2002143S07157"]

0 commit comments

Comments
 (0)