diff --git a/src/nwp_consumer/internal/entities/modelmetadata.py b/src/nwp_consumer/internal/entities/modelmetadata.py index da1bc8f5..03d5482f 100644 --- a/src/nwp_consumer/internal/entities/modelmetadata.py +++ b/src/nwp_consumer/internal/entities/modelmetadata.py @@ -65,6 +65,12 @@ class ModelMetadata: to the repository. """ + delay_minutes: int + """The model delay in minutes. + + Note that this can be overwritten by using `with_delay_minutes`. + """ + chunk_count_overrides: dict[str, int] = dataclasses.field(default_factory=dict) """Mapping of dimension names to the desired number of chunks in that dimension. @@ -201,6 +207,10 @@ def with_running_hours(self, hours: list[int]) -> "ModelMetadata": """Returns metadata for the given model with the given running hours.""" return dataclasses.replace(self, running_hours=hours) + def with_delay_minutes(self, delay_minutes: int) -> "ModelMetadata": + """Returns metadata for the given model with the given delay minutes.""" + return dataclasses.replace(self, delay_minutes=delay_minutes) + def with_max_step(self, max_step: int) -> "ModelMetadata": """Returns metadata for the given model with the given max step.""" return dataclasses.replace( @@ -254,6 +264,7 @@ class Models: longitude=[float(f"{lon / 10:.2f}") for lon in range(-1800, 1800 + 1, 1)], ), running_hours=[0, 6, 12, 18], + delay_minutes=(60 * 26), # 1 day, plus leeway ) """ECMWF's High Resolution Integrated Forecast System.""" @@ -274,6 +285,7 @@ class Models: longitude=[v / 10 for v in range(-1800, 1800, 1)], ), running_hours=[0, 12], + delay_minutes=(60 * 26), # 1 day, plus leeway ) """Summary statistics from ECMWF's Ensemble Forecast System.""" @@ -303,6 +315,7 @@ class Models: longitude=[v / 10 for v in range(-1800, 1800, 1)], ), running_hours=[0, 6, 12, 18], + delay_minutes=(60 * 26), # 1 day, plus leeway ) """Full ensemble data from ECMWF's Ensemble Forecast System.""" @@ -335,6 +348,7 @@ class Models: longitude=[float(lon) for lon in range(-180, 180 + 1, 1)], ), running_hours=[0, 6, 12, 18], + delay_minutes=(60 * 5), # 5 hours ) """NCEP's Global Forecast System.""" @@ -372,6 +386,7 @@ class Models: # TODO: Change to -180 -> 180 ), running_hours=[0, 6, 12, 18], + delay_minutes=300, ) """MetOffice's Unified Model, in the Global configuration, at a resolution of 17km.""" @@ -404,7 +419,8 @@ class Models: for lon in np.arange(-179.929687, 179.929688 + 0.140625, 0.140625) ], ), - running_hours=[0, 6, 12, 18], + running_hours=[0, 12], + delay_minutes=300, ) """MetOffice's Unified Model, in the Global configuration, at a resolution of 10km.""" @@ -438,6 +454,7 @@ class Models: x_osgb=[int(x) for x in np.arange(start=-239000, stop=857000, step=2000)], ), running_hours=list(range(0, 24, 6)), + delay_minutes=120, ) """MetOffice's Unified Model in the UKV configuration, at a resolution of 2km""" @@ -468,5 +485,6 @@ class Models: x_laea=[int(x) for x in np.arange(start=-576000, stop=332000 + 2000, step=2000)], ), running_hours=list(range(0, 24, 3)), # Only first 12 steps available for hourly runs + delay_minutes=120, ) """MetOffice's Unified Model in the UKV configuration, at a resolution of 2km""" diff --git a/src/nwp_consumer/internal/entities/repometadata.py b/src/nwp_consumer/internal/entities/repometadata.py index 70158987..563fbf2f 100644 --- a/src/nwp_consumer/internal/entities/repometadata.py +++ b/src/nwp_consumer/internal/entities/repometadata.py @@ -40,12 +40,6 @@ class RawRepositoryMetadata: but rather are defined by pre-selected agreements with the provider. """ - delay_minutes: int - """The approximate model delay in minutes. - - This delay is the time between the running of the model and the time - at which the data is actually available.""" - required_env: list[str] """Environment variables required for usage.""" @@ -65,18 +59,23 @@ class RawRepositoryMetadata: available_models: dict[str, ModelMetadata] """A dictionary of available models and their metadata.""" - def determine_latest_it_from(self, t: dt.datetime, running_hours: list[int]) -> dt.datetime: + def determine_latest_it_from( + self, t: dt.datetime, + running_hours: list[int], + delay_minutes:int) -> dt.datetime: """Determine the latest available initialization time from a given time. Args: t: The time from which to determine the latest initialization time. running_hours: A list of hours at which the model runs each day. + delay_minutes: The delay in minutes after the initialization time + before data is available. Returns: The latest available initialization time prior to the given time. """ it = ( - t.replace(minute=0, second=0, microsecond=0) - dt.timedelta(minutes=self.delay_minutes) + t.replace(minute=0, second=0, microsecond=0) - dt.timedelta(minutes=delay_minutes) ).replace(minute=0) while it.hour not in running_hours: it -= dt.timedelta(hours=1) @@ -97,7 +96,6 @@ def __str__(self) -> str: ( "Model Repository: ", f"\n\t{self.name} ({'archive' if self.is_archive else 'live/rolling'} dataset.)", - f"\n\t\t(available after {self.delay_minutes} minute delay)", "\nEnvironment variables:", "\n\tRequired:", "\n".join(f"\t\t{var}" for var in self.required_env), diff --git a/src/nwp_consumer/internal/entities/test_modelmetadata.py b/src/nwp_consumer/internal/entities/test_modelmetadata.py index 8f46d7a8..07fac6a0 100644 --- a/src/nwp_consumer/internal/entities/test_modelmetadata.py +++ b/src/nwp_consumer/internal/entities/test_modelmetadata.py @@ -27,6 +27,7 @@ def test_with_region(self) -> None: longitude=[float(f"{lon / 10:.2f}") for lon in range(-1800, 1800 + 1, 1)], ), running_hours=[0, 6, 12, 18], + delay_minutes=60, ) @dataclasses.dataclass diff --git a/src/nwp_consumer/internal/entities/test_repometadata.py b/src/nwp_consumer/internal/entities/test_repometadata.py index 25417ba7..ecb61669 100644 --- a/src/nwp_consumer/internal/entities/test_repometadata.py +++ b/src/nwp_consumer/internal/entities/test_repometadata.py @@ -13,7 +13,6 @@ class TestRawRepositoryMetadata(unittest.TestCase): name="test", is_archive=False, is_order_based=False, - delay_minutes=55, required_env=["TEST"], optional_env={"TEST": "test"}, max_connections=1, @@ -50,7 +49,9 @@ class TestCase: for test in tests: with self.subTest(name=test.name): - result = self.metadata.determine_latest_it_from(test.t, [0, 6, 12, 18]) + result = self.metadata.determine_latest_it_from(test.t, + [0, 6, 12, 18], + delay_minutes=60) self.assertEqual(result, test.expected) diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/ceda.py b/src/nwp_consumer/internal/repositories/raw_repositories/ceda.py index ab519814..d7639698 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/ceda.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/ceda.py @@ -117,7 +117,6 @@ def repository() -> entities.RawRepositoryMetadata: name="CEDA", is_archive=True, is_order_based=False, - delay_minutes=(60 * 24 * 7) + (60 * 12), # 7.5 days max_connections=20, required_env=["CEDA_USER", "CEDA_PASS"], optional_env={}, diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_mars.py b/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_mars.py index e0a7e505..56b807fd 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_mars.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_mars.py @@ -211,7 +211,6 @@ def repository() -> entities.RawRepositoryMetadata: name="ECMWF-MARS", is_archive=True, is_order_based=False, - delay_minutes=(60 * 26), # 1 day, plus leeway max_connections=20, required_env=[ "ECMWF_API_KEY", diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_realtime.py b/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_realtime.py index 8161ef37..11a7334c 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_realtime.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/ecmwf_realtime.py @@ -67,7 +67,6 @@ def repository() -> entities.RawRepositoryMetadata: name="ECMWF-Realtime-S3", is_archive=False, is_order_based=True, - delay_minutes=(60 * 7), # 7 hours max_connections=100, required_env=[ "ECMWF_REALTIME_S3_ACCESS_KEY", @@ -81,14 +80,16 @@ def repository() -> entities.RawRepositoryMetadata: }, postprocess_options=entities.PostProcessOptions(), available_models={ - "default": entities.Models.ECMWF_HRES_IFS_0P1DEGREE.with_region("uk-north60"), - "hres-ifs-uk": entities.Models.ECMWF_HRES_IFS_0P1DEGREE.with_region("uk-north60"), + "default": entities.Models.ECMWF_HRES_IFS_0P1DEGREE.with_region("uk-north60"). + with_delay_minutes(60 * 7), + "hres-ifs-uk": entities.Models.ECMWF_HRES_IFS_0P1DEGREE.with_region("uk-north60"). + with_delay_minutes(60 * 7), "hres-ifs-india": entities.Models.ECMWF_HRES_IFS_0P1DEGREE.with_region( "india", - ).with_chunk_count_overrides({"variable": 1}), + ).with_chunk_count_overrides({"variable": 1}).with_delay_minutes(60 * 7), "hres-ifs-nl": entities.Models.ECMWF_HRES_IFS_0P1DEGREE.with_region( "nl", - ).with_max_step(84), + ).with_max_step(84).with_delay_minutes(60 * 7), }, ) diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/mo_datahub.py b/src/nwp_consumer/internal/repositories/raw_repositories/mo_datahub.py index 16f0e7c8..481a0d53 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/mo_datahub.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/mo_datahub.py @@ -152,11 +152,11 @@ def __init__(self, order_id: str, api_key: str) -> None: @staticmethod @override def repository() -> entities.RawRepositoryMetadata: + return entities.RawRepositoryMetadata( name="MetOffice-Weather-Datahub", is_archive=False, is_order_based=True, - delay_minutes=120, max_connections=10, required_env=["METOFFICE_API_KEY", "METOFFICE_ORDER_ID"], optional_env={"METOFFICE_DATASPEC": "1.1.0"}, diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/noaa_s3.py b/src/nwp_consumer/internal/repositories/raw_repositories/noaa_s3.py index e6556287..0934f5e9 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/noaa_s3.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/noaa_s3.py @@ -55,7 +55,6 @@ def repository() -> entities.RawRepositoryMetadata: name="NOAA-GFS-S3", is_archive=False, is_order_based=False, - delay_minutes=(60 * 5), # 5 hours max_connections=100, required_env=[], optional_env={}, diff --git a/src/nwp_consumer/internal/repositories/raw_repositories/test_mo_datahub.py b/src/nwp_consumer/internal/repositories/raw_repositories/test_mo_datahub.py index 4d4a3231..fde499dd 100644 --- a/src/nwp_consumer/internal/repositories/raw_repositories/test_mo_datahub.py +++ b/src/nwp_consumer/internal/repositories/raw_repositories/test_mo_datahub.py @@ -28,6 +28,7 @@ def test__download(self) -> None: test_it = c.repository().determine_latest_it_from( dt.datetime.now(tz=dt.UTC), c.model().running_hours, + delay_minutes=c.model().delay_minutes, ) dl_result = c._download( @@ -92,6 +93,8 @@ class TestCase: else: self.assertIsInstance(region_result, Success, msg=f"{region_result}") + + @patch.dict(os.environ, {"MODEL": "um-ukv-2km"}, clear=True) def test_convert_ukv(self) -> None: @dataclasses.dataclass diff --git a/src/nwp_consumer/internal/services/_dummy_adaptors.py b/src/nwp_consumer/internal/services/_dummy_adaptors.py index 13ca9745..85312d99 100644 --- a/src/nwp_consumer/internal/services/_dummy_adaptors.py +++ b/src/nwp_consumer/internal/services/_dummy_adaptors.py @@ -23,7 +23,6 @@ def repository() -> entities.RawRepositoryMetadata: name="ACME-Test-Models", is_archive=False, is_order_based=False, - delay_minutes=60, max_connections=4, required_env=[], optional_env={}, @@ -49,6 +48,7 @@ def model() -> entities.ModelMetadata: longitude=np.linspace(-180, 179.8, 1440).tolist(), ), running_hours=[0, 6, 12, 18], + delay_minutes=60, ) @override diff --git a/src/nwp_consumer/internal/services/consumer_service.py b/src/nwp_consumer/internal/services/consumer_service.py index 591b4929..5fc6ad99 100644 --- a/src/nwp_consumer/internal/services/consumer_service.py +++ b/src/nwp_consumer/internal/services/consumer_service.py @@ -148,6 +148,7 @@ def _create_suitable_store( repository_metadata.determine_latest_it_from( t=dt.datetime.now(tz=dt.UTC), running_hours=model_metadata.running_hours, + delay_minutes=model_metadata.delay_minutes, ), ] case single_it if isinstance(period, dt.datetime):