Skip to content

Commit 705e0c7

Browse files
authored
feat: allow explicitly setting model and theme (#27)
Needed for different hrefs, e.g. for the PC.
1 parent d3dffa7 commit 705e0c7

File tree

3 files changed

+77
-23
lines changed

3 files changed

+77
-23
lines changed

src/stactools/met_office_deterministic/href.py

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66

77
from .constants import Model, Theme
88

9-
HREF_REGEX = re.compile(
10-
r"^(?:(?P<scheme>[^:]+)://(?P<bucket>[^/]+)/)?(?P<collection>[^/]+)/(?P<reference_datetime>[^/]+)/(?P<valid_time>[^-]+)-(?P<forecast_horizon>[^-]+)-(?P<parameter>.+)\.nc$"
9+
FILE_NAME_REGEX = re.compile(
10+
r"(?P<valid_time>[^-]+)-(?P<forecast_horizon>[^-]+)-(?P<parameter>.+)\.nc"
1111
)
1212

1313

@@ -22,37 +22,54 @@ class Href:
2222
forecast_horizon: str
2323

2424
@classmethod
25-
def parse(cls, href: str) -> Href:
26-
"""Parse a UK Met Office href into an Href object.
25+
def parse(
26+
cls, href: str, model: Model | None = None, theme: Theme | None = None
27+
) -> Href:
28+
"""Parse a Met Office deterministic forecast href into an Href object.
29+
30+
Parses hrefs in the format:
31+
[scheme://bucket/]collection/reference_datetime/valid_time-forecast_horizon-parameter.nc
32+
33+
The model and theme are automatically extracted from the collection name and
34+
parameter, but can be overridden with the optional parameters.
2735
2836
Args:
29-
href: The href string to parse.
37+
href: The href string to parse. Can be a full S3 URL or a relative path.
38+
model: Optional model to override automatic detection. If None, inferred
39+
from the collection name (global-deterministic-10km or
40+
uk-deterministic-2km).
41+
theme: Optional theme to override automatic detection. If None, inferred
42+
from the parameter name.
3043
3144
Returns:
32-
An Href object containing parsed components.
45+
An Href object containing parsed components including model, theme,
46+
parameter, reference_datetime, valid_time, and forecast_horizon.
3347
3448
Raises:
35-
ValueError: If the href format is invalid or contains an unknown collection.
49+
ValueError: If the href format is invalid, contains an unknown collection,
50+
or contains an unknown parameter (when theme inference is required).
3651
"""
37-
matched = HREF_REGEX.match(href)
38-
if not matched:
39-
raise ValueError(f"Invalid UK Met Office href: {href}")
40-
matched_dict = matched.groupdict()
41-
match collection := matched_dict["collection"]:
42-
case "global-deterministic-10km":
43-
model = Model.global_
44-
case "uk-deterministic-2km":
45-
model = Model.uk
46-
case _:
47-
raise ValueError(f"Invalid collection: {collection}")
52+
parts = href.split("/")
53+
if model is None:
54+
match collection := parts[-3]:
55+
case "global-deterministic-10km":
56+
model = Model.global_
57+
case "uk-deterministic-2km":
58+
model = Model.uk
59+
case _:
60+
raise ValueError(f"Invalid collection: {collection}")
61+
matched_dict = FILE_NAME_REGEX.match(parts[-1])
62+
if matched_dict is None:
63+
raise ValueError(f"Invalid file name: {href}")
4864
parameter = matched_dict["parameter"]
49-
theme = Theme.from_parameter(parameter)
65+
if theme is None:
66+
theme = Theme.from_parameter(parameter)
5067
return Href(
5168
href=href,
5269
model=model,
5370
theme=theme,
5471
parameter=parameter,
55-
reference_datetime=matched_dict["reference_datetime"],
72+
reference_datetime=parts[-2],
5673
valid_time=matched_dict["valid_time"],
5774
forecast_horizon=matched_dict["forecast_horizon"],
5875
)

src/stactools/met_office_deterministic/stac.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,11 @@ def create_collection(model: Model, theme: Theme) -> Collection:
7979
return collection
8080

8181

82-
def create_items(source_hrefs: Sequence[str | Href]) -> list[Item]:
82+
def create_items(
83+
source_hrefs: Sequence[str | Href],
84+
model: Model | None = None,
85+
theme: Theme | None = None,
86+
) -> list[Item]:
8387
"""Creates one or more STAC items for the given hrefs."""
8488
hrefs: defaultdict[str, defaultdict[str, list[Href]]] = defaultdict(
8589
lambda: defaultdict(list)
@@ -88,7 +92,7 @@ def create_items(source_hrefs: Sequence[str | Href]) -> list[Item]:
8892
if isinstance(source_href, Href):
8993
href = source_href
9094
else:
91-
href = Href.parse(source_href)
95+
href = Href.parse(source_href, model=model, theme=theme)
9296
hrefs[href.collection_id][href.item_id].append(href)
9397
items = list()
9498
for items_hrefs in hrefs.values():
@@ -97,7 +101,10 @@ def create_items(source_hrefs: Sequence[str | Href]) -> list[Item]:
97101
return items
98102

99103

100-
def _create_item(item_id: str, hrefs: list[Href]) -> Item:
104+
def _create_item(
105+
item_id: str,
106+
hrefs: list[Href],
107+
) -> Item:
101108
"""Create a STAC item from a list of hrefs.
102109
103110
Args:

tests/test_stac.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,36 @@ def test_items(items: list[Item]) -> None:
4949
item.validate()
5050

5151

52+
def test_forcing_the_items(items: list[Item]) -> None:
53+
for item in items:
54+
datetime.datetime.strptime(
55+
item.properties["forecast:reference_datetime"], "%Y-%m-%dT%H:%M:%SZ"
56+
)
57+
58+
for key, asset in item.assets.items():
59+
assert asset.roles == ["data"]
60+
assert asset.media_type == "application/netcdf", "No media type: " + key
61+
assert asset.title
62+
63+
if should_have_descriptions(item):
64+
assert asset.description
65+
66+
item.validate()
67+
68+
69+
def test_forcing_the_collection() -> None:
70+
items = stac.create_items(
71+
[
72+
"https://ukmoeuwest.blob.core.windows.net/staging/global/height/20251202T0600Z/20251203T2100Z-PT0039H00M-cloud_amount_on_height_levels.nc"
73+
],
74+
model=Model.global_,
75+
theme=Theme.height,
76+
)
77+
for item in items:
78+
assert item.properties["met_office_deterministic:model"] == Model.global_
79+
assert item.properties["met_office_deterministic:theme"] == Theme.height
80+
81+
5282
def should_have_descriptions(item: Item) -> bool:
5383
# Temporary helper function while we wait for more descriptions from the met office
5484
model = item.properties["met_office_deterministic:model"]

0 commit comments

Comments
 (0)