66
77from .constants import Model , Theme
88
9- HREF_REGEX = re .compile (
10- r"^(?:(?P<scheme>[^:]+)://(?P<bucket>[^/]+)/)?(?P<collection>[^/]+)/(?P<reference_datetime>[^/]+)/(?P< valid_time>[^-]+)-(?P<forecast_horizon>[^-]+)-(?P<parameter>.+)\.nc$ "
9+ FILE_NAME_REGEX = re .compile (
10+ r"(?P< valid_time>[^-]+)-(?P<forecast_horizon>[^-]+)-(?P<parameter>.+)\.nc"
1111)
1212
1313
@@ -22,37 +22,54 @@ class Href:
2222 forecast_horizon : str
2323
2424 @classmethod
25- def parse (cls , href : str ) -> Href :
26- """Parse a UK Met Office href into an Href object.
25+ def parse (
26+ cls , href : str , model : Model | None = None , theme : Theme | None = None
27+ ) -> Href :
28+ """Parse a Met Office deterministic forecast href into an Href object.
29+
30+ Parses hrefs in the format:
31+ [scheme://bucket/]collection/reference_datetime/valid_time-forecast_horizon-parameter.nc
32+
33+ The model and theme are automatically extracted from the collection name and
34+ parameter, but can be overridden with the optional parameters.
2735
2836 Args:
29- href: The href string to parse.
37+ href: The href string to parse. Can be a full S3 URL or a relative path.
38+ model: Optional model to override automatic detection. If None, inferred
39+ from the collection name (global-deterministic-10km or
40+ uk-deterministic-2km).
41+ theme: Optional theme to override automatic detection. If None, inferred
42+ from the parameter name.
3043
3144 Returns:
32- An Href object containing parsed components.
45+ An Href object containing parsed components including model, theme,
46+ parameter, reference_datetime, valid_time, and forecast_horizon.
3347
3448 Raises:
35- ValueError: If the href format is invalid or contains an unknown collection.
49+ ValueError: If the href format is invalid, contains an unknown collection,
50+ or contains an unknown parameter (when theme inference is required).
3651 """
37- matched = HREF_REGEX .match (href )
38- if not matched :
39- raise ValueError (f"Invalid UK Met Office href: { href } " )
40- matched_dict = matched .groupdict ()
41- match collection := matched_dict ["collection" ]:
42- case "global-deterministic-10km" :
43- model = Model .global_
44- case "uk-deterministic-2km" :
45- model = Model .uk
46- case _:
47- raise ValueError (f"Invalid collection: { collection } " )
52+ parts = href .split ("/" )
53+ if model is None :
54+ match collection := parts [- 3 ]:
55+ case "global-deterministic-10km" :
56+ model = Model .global_
57+ case "uk-deterministic-2km" :
58+ model = Model .uk
59+ case _:
60+ raise ValueError (f"Invalid collection: { collection } " )
61+ matched_dict = FILE_NAME_REGEX .match (parts [- 1 ])
62+ if matched_dict is None :
63+ raise ValueError (f"Invalid file name: { href } " )
4864 parameter = matched_dict ["parameter" ]
49- theme = Theme .from_parameter (parameter )
65+ if theme is None :
66+ theme = Theme .from_parameter (parameter )
5067 return Href (
5168 href = href ,
5269 model = model ,
5370 theme = theme ,
5471 parameter = parameter ,
55- reference_datetime = matched_dict [ "reference_datetime" ],
72+ reference_datetime = parts [ - 2 ],
5673 valid_time = matched_dict ["valid_time" ],
5774 forecast_horizon = matched_dict ["forecast_horizon" ],
5875 )
0 commit comments