Skip to content

Commit 940db4e

Browse files
authored
Merge pull request #183 from Climate-REF/cv
feat: Add basic support for a controlled vocabulary
2 parents d4a6c88 + 6f1d0d8 commit 940db4e

8 files changed

Lines changed: 328 additions & 9 deletions

File tree

changelog/183.feature.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Add the basic framework for enforcing a controlled vocabulary
2+
for the results in a CMEC metrics bundle.
3+
This is still in the prototype stage
4+
and is not yet integrated into post-metric execution processing.

packages/ref-core/src/cmip_ref_core/exceptions.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,7 @@ class ConstraintNotSatisfied(RefException):
3838
"""Exception raised when a constraint is violated"""
3939

4040
# TODO: implement when we have agreed on using constraints
41+
42+
43+
class ResultValidationError(RefException):
44+
"""Exception raised when the results from a metric execution are invalid"""
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
import pathlib
2+
3+
from attrs import field, frozen
4+
from cattrs import Converter, transform_error
5+
from loguru import logger
6+
from ruamel.yaml import YAML
7+
8+
from cmip_ref_core.exceptions import ResultValidationError
9+
from cmip_ref_core.pycmec.metric import CMECMetric
10+
11+
yaml = YAML()
12+
13+
14+
@frozen
15+
class DimensionValue:
16+
"""
17+
An allowed value for a dimension
18+
"""
19+
20+
name: str
21+
long_name: str
22+
description: str | None
23+
units: str
24+
25+
26+
@frozen
27+
class Dimension:
28+
"""
29+
Description of a dimension in a metric bundle
30+
31+
This information is also used by the frontend for presentation purposes.
32+
"""
33+
34+
name: str
35+
"""
36+
A short idenfifier of the dimension.
37+
38+
This is used as a key in the metric bundle.
39+
"""
40+
long_name: str
41+
"""
42+
A longer name used for presentation
43+
"""
44+
description: str
45+
"""
46+
A short description of the dimension.
47+
48+
This is used for presentation
49+
"""
50+
allow_extra_values: bool
51+
"""
52+
If True, additional non-controlled values are allowed.
53+
This is used for dimensions where not all the values are known at run time,'
54+
for example, the model dimension.
55+
"""
56+
required: bool
57+
"""
58+
If True, this dimension is required to be specified in the results.
59+
"""
60+
values: list[DimensionValue] = field(factory=list)
61+
"""
62+
The list of controlled values for a given dimension.
63+
64+
If `allow_extra_values` is False,
65+
then only these values are valid for the dimension.
66+
"""
67+
68+
69+
@frozen
70+
class CV:
71+
"""
72+
A collection of controlled dimensions and values used to validate results.
73+
74+
A metric bundle does not have to specify all dimensions,
75+
but any dimensions not in the CV are not permitted.
76+
"""
77+
78+
# TODO: There might be some additional fields in future if this CV is project-specific
79+
80+
dimensions: list[Dimension]
81+
82+
def get_dimension_by_name(self, name: str) -> Dimension:
83+
"""
84+
Get a dimension by name
85+
86+
Parameters
87+
----------
88+
name
89+
The name of the dimension
90+
91+
Returns
92+
-------
93+
Dimension
94+
The dimension with the given name
95+
96+
Raises
97+
------
98+
KeyError
99+
If the dimension is not found
100+
"""
101+
for dim in self.dimensions:
102+
if dim.name == name:
103+
return dim
104+
raise KeyError(f"Dimension {name} not found")
105+
106+
def validate_metrics(self, metric_bundle: CMECMetric) -> None:
107+
"""
108+
Validate a metric bundle against a CV
109+
110+
The CV describes the accepted dimensions and values within a bundle
111+
112+
Parameters
113+
----------
114+
metric_bundle
115+
116+
Raises
117+
------
118+
ResultValidationError
119+
If the validation of the dimensions or values fails
120+
"""
121+
for result in metric_bundle.iter_results():
122+
for k, v in result.dimensions.items():
123+
try:
124+
dimension = self.get_dimension_by_name(k)
125+
except KeyError:
126+
raise ResultValidationError(f"Unknown dimension: {k!r}")
127+
if not dimension.allow_extra_values:
128+
if v not in [dv.name for dv in dimension.values]:
129+
raise ResultValidationError(f"Unknown value {v!r} for dimension {k!r}")
130+
if not isinstance(result.value, float): # pragma: no cover
131+
# This may not be possible with the current CMECMetric implementation
132+
raise ResultValidationError(f"Unexpected value: {result.value!r}")
133+
134+
@staticmethod
135+
def load_from_file(filename: pathlib.Path | str) -> "CV":
136+
"""
137+
Load a CV from disk
138+
139+
Returns
140+
-------
141+
A new CV instance
142+
143+
"""
144+
convertor = Converter(forbid_extra_keys=True)
145+
contents = yaml.load(pathlib.Path(filename))
146+
147+
try:
148+
return convertor.structure(contents, CV)
149+
except Exception as exc:
150+
logger.error(f"Error loading CV from {filename}")
151+
for error in transform_error(exc):
152+
logger.error(error)
153+
raise

packages/ref-core/src/cmip_ref_core/pycmec/metric.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313

1414
import pathlib
1515
from collections import Counter
16+
from collections.abc import Generator
1617
from enum import Enum
17-
from typing import Any
18+
from typing import Any, cast
1819

1920
from pydantic import (
2021
BaseModel,
@@ -136,6 +137,9 @@ def merge_dimension(cls, metric_dim1: Any, metric_dim2: Any) -> Self:
136137
merged_dim[dim][key] = mdim2.root[dim][key]
137138
return cls(merged_dim)
138139

140+
def __getitem__(self, item: str) -> Any:
141+
return self.root[item]
142+
139143

140144
class MetricResults(RootModel[Any]):
141145
"""
@@ -192,6 +196,18 @@ class StrNumDict(RootModel[Any]):
192196
root: dict[str, float | str]
193197

194198

199+
class MetricValue(BaseModel):
200+
"""
201+
A flattened representation of a metric value
202+
203+
This includes the dimensions and the value of the metric
204+
"""
205+
206+
dimensions: dict[str, str]
207+
value: float | str
208+
attributes: dict[str, str | float | int] | None = None
209+
210+
195211
class CMECMetric(BaseModel):
196212
"""
197213
CMEC metric bundle object
@@ -356,6 +372,42 @@ def create_template() -> dict[str, Any]:
356372
MetricCV.NOTES.value: None,
357373
}
358374

375+
def iter_results(self) -> Generator[MetricValue]:
376+
"""
377+
Iterate over the results in the metric bundle
378+
379+
This will yield a dictionary for each result, with the dimensions and the value
380+
381+
Returns
382+
-------
383+
A generator of metric values
384+
385+
"""
386+
dimensions = cast(list[str], self.DIMENSIONS[MetricCV.JSON_STRUCTURE.value])
387+
# TODO: This is pretty hacky
388+
# A missing dimension in the results should be a validationError
389+
if "statistic" not in dimensions:
390+
dimensions = [*dimensions, "statistic"]
391+
392+
yield from _walk_results(dimensions, self.RESULTS, {})
393+
394+
395+
def _walk_results(
396+
dimensions: list[str], results: dict[str, Any], metadata: dict[str, str]
397+
) -> Generator[MetricValue]:
398+
assert len(dimensions), "Not enough dimensions" # noqa: S101
399+
dimension = dimensions[0]
400+
for key, value in results.items():
401+
if key == MetricCV.ATTRIBUTES.value:
402+
continue
403+
metadata[dimension] = key
404+
if isinstance(value, str | float):
405+
yield MetricValue(
406+
dimensions=metadata, value=value, attributes=results.get(MetricCV.ATTRIBUTES.value)
407+
)
408+
else:
409+
yield from _walk_results(dimensions[1:], value, {**metadata})
410+
359411

360412
class CMECGenerateJsonSchema(GenerateJsonSchema):
361413
"""
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
dimensions:
2+
- name: model
3+
long_name: model_id
4+
description: ""
5+
allow_extra_values: true
6+
required: false
7+
- name: source_id
8+
long_name: source_id
9+
description: ""
10+
allow_extra_values: true
11+
required: false
12+
- name: metric
13+
long_name: ""
14+
description: ""
15+
required: true
16+
allow_extra_values: true
17+
- name: statistic
18+
long_name: ""
19+
description: ""
20+
required: true
21+
allow_extra_values: false
22+
values:
23+
- name: rmse
24+
long_name: Root Mean Square Error
25+
description: ""
26+
units: dimensionless
27+
- name: overall score
28+
long_name: Overall Score
29+
description: ""
30+
units: dimensionless
31+
- name: bias
32+
long_name: Bias
33+
description: ""
34+
units: dimensionless
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,25 @@
1+
import json
12
import pathlib
23

34
import pytest
45

6+
from cmip_ref_core.pycmec.metric import CMECMetric
7+
58

69
# Update the original_datadir to specify where the expected values go
710
@pytest.fixture(scope="session")
811
def original_datadir():
912
return pathlib.Path(__file__).parent / "cmec_testdata"
13+
14+
15+
@pytest.fixture
16+
def cmec_right_metric_dict(datadir):
17+
with open(datadir / "cmec_metric_sample.json") as fh:
18+
content = json.loads(fh.read())
19+
20+
return content
21+
22+
23+
@pytest.fixture
24+
def cmec_metric(cmec_right_metric_dict):
25+
return CMECMetric(**cmec_right_metric_dict)

packages/ref-core/tests/unit/pycmec/test_cmec_metric.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,6 @@
1010
)
1111

1212

13-
@pytest.fixture
14-
def cmec_right_metric_dict(datadir):
15-
with open(datadir / "cmec_metric_sample.json") as fh:
16-
content = json.loads(fh.read())
17-
18-
return content
19-
20-
2113
@pytest.fixture(params=["dict", "CMECMetric"])
2214
def cmec_right_metric_data(request, cmec_right_metric_dict):
2315
if request.param == "dict":
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
import pytest
2+
3+
from cmip_ref_core.exceptions import ResultValidationError
4+
from cmip_ref_core.pycmec.controlled_vocabulary import CV
5+
from cmip_ref_core.pycmec.metric import CMECMetric
6+
7+
8+
@pytest.fixture
9+
def cv(datadir):
10+
return CV.load_from_file(datadir / "cv_sample.yaml")
11+
12+
13+
def test_load_from_file(datadir):
14+
cv = CV.load_from_file(str(datadir / "cv_sample.yaml"))
15+
16+
assert len(cv.dimensions)
17+
18+
19+
def test_validate(cv, cmec_metric):
20+
cv.validate_metrics(cmec_metric)
21+
22+
23+
def test_invalid_dimension(cv, cmec_metric):
24+
cmec_metric = CMECMetric(
25+
DIMENSIONS={
26+
"json_structure": ["model", "extra"],
27+
"model": {
28+
"E3SM": {"name": "E3SM"},
29+
},
30+
"extra": {
31+
"Ecosystem and Carbon Cycle": {"name": "Ecosystem and Carbon Cycle"},
32+
"Hydrology Cycle": {"name": "Hydrology Cycle"},
33+
},
34+
},
35+
RESULTS={
36+
"E3SM": {
37+
"Ecosystem and Carbon Cycle": {"overall score": 0.11, "bias": 0.56},
38+
"Hydrology Cycle": {"overall score": 0.26, "bias": 0.70},
39+
},
40+
},
41+
)
42+
with pytest.raises(ResultValidationError, match="Unknown dimension: 'extra'"):
43+
cv.validate_metrics(cmec_metric)
44+
45+
46+
def test_missing_value(cv, cmec_metric):
47+
cmec_metric = CMECMetric(
48+
DIMENSIONS={
49+
"json_structure": ["model", "metric"],
50+
"model": {
51+
"E3SM": {"name": "E3SM"},
52+
},
53+
"metric": {
54+
"Hydrology Cycle": {"name": "Hydrology Cycle"},
55+
},
56+
},
57+
RESULTS={
58+
"E3SM": {
59+
"Hydrology Cycle": {"unknown": 0.26, "bias": 0.70},
60+
},
61+
},
62+
)
63+
with pytest.raises(ResultValidationError, match="Unknown value 'unknown' for dimension 'statistic'"):
64+
cv.validate_metrics(cmec_metric)

0 commit comments

Comments
 (0)