Skip to content

Commit 88bafa3

Browse files
Repair fix_file (#2993)
Co-authored-by: Manuel Schlund <32543114+schlunma@users.noreply.github.com>
1 parent f265989 commit 88bafa3

File tree

3 files changed

+152
-68
lines changed

3 files changed

+152
-68
lines changed

esmvalcore/cmor/_fixes/fix.py

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,6 @@
3535
if TYPE_CHECKING:
3636
from collections.abc import Sequence
3737

38-
import ncdata
39-
import xarray as xr
4038
from iris.coords import Coord
4139
from iris.cube import Cube
4240

@@ -84,10 +82,10 @@ def __init__(
8482

8583
def fix_file(
8684
self,
87-
file: str | Path | xr.Dataset | ncdata.NcData,
85+
file: Path,
8886
output_dir: Path, # noqa: ARG002
8987
add_unique_suffix: bool = False, # noqa: ARG002
90-
) -> str | Path | xr.Dataset | ncdata.NcData:
88+
) -> Path | Sequence[Cube]:
9189
"""Fix files before loading them into a :class:`~iris.cube.CubeList`.
9290
9391
This is mainly intended to fix errors that prevent loading the data
@@ -98,11 +96,12 @@ def fix_file(
9896
9997
Warning
10098
-------
101-
A path should only be returned if it points to the original (unchanged)
102-
file (i.e., a fix was not necessary). If a fix is necessary, this
103-
function should return a :class:`~ncdata.NcData` or
104-
:class:`~xarray.Dataset` object. Under no circumstances a copy of the
105-
input data should be created (this is very inefficient).
99+
For fix developers: a path should only be returned if it points to the
100+
original (unchanged) file (i.e., a fix was not necessary). If a fix is
101+
necessary, this function should return a :class:`~iris.cube.CubeList` or
102+
a :class:`~collections.abc.Sequence` of :class:`~iris.cube.Cube`
103+
objects. Under no circumstances a copy of the input data should be
104+
created (this is very inefficient).
106105
107106
Parameters
108107
----------
@@ -116,7 +115,7 @@ def fix_file(
116115
117116
Returns
118117
-------
119-
str | pathlib.Path | xr.Dataset | ncdata.NcData:
118+
:
120119
Fixed data or a path to them.
121120
122121
"""

esmvalcore/cmor/fix.py

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,16 @@
99

1010
import logging
1111
from collections import defaultdict
12+
from collections.abc import Sequence
13+
from pathlib import Path
1214
from typing import TYPE_CHECKING, Any
1315

14-
from iris.cube import CubeList
16+
from iris.cube import Cube, CubeList
1517

1618
from esmvalcore.cmor._fixes.fix import Fix
19+
from esmvalcore.io.local import LocalFile
1720

1821
if TYPE_CHECKING:
19-
from collections.abc import Sequence
20-
from pathlib import Path
21-
22-
import ncdata
23-
import xarray as xr
24-
from iris.cube import Cube
25-
2622
from esmvalcore.config import Session
2723

2824
logger = logging.getLogger(__name__)
@@ -39,22 +35,14 @@ def fix_file( # noqa: PLR0913
3935
session: Session | None = None,
4036
frequency: str | None = None,
4137
**extra_facets: Any,
42-
) -> str | Path | xr.Dataset | ncdata.NcData:
38+
) -> Path | Sequence[Cube]:
4339
"""Fix files before loading them into a :class:`~iris.cube.CubeList`.
4440
4541
This is mainly intended to fix errors that prevent loading the data with
4642
Iris (e.g., those related to ``missing_value`` or ``_FillValue``) or
4743
operations that are more efficient with other packages (e.g., loading files
4844
with lots of variables is much faster with Xarray than Iris).
4945
50-
Warning
51-
-------
52-
A path should only be returned if it points to the original (unchanged)
53-
file (i.e., a fix was not necessary). If a fix is necessary, this function
54-
should return a :class:`~ncdata.NcData` or :class:`~xarray.Dataset` object.
55-
Under no circumstances a copy of the input data should be created (this is
56-
very inefficient).
57-
5846
Parameters
5947
----------
6048
file:
@@ -80,10 +68,15 @@ def fix_file( # noqa: PLR0913
8068
8169
Returns
8270
-------
83-
str | pathlib.Path | xr.Dataset | ncdata.NcData:
71+
:
8472
Fixed data or a path to them.
8573
8674
"""
75+
if not isinstance(file, Path):
76+
# Skip this function for `esmvalcore.io.DataElement` that is not a path
77+
# to a file.
78+
return file
79+
8780
# Update extra_facets with variable information given as regular arguments
8881
# to this function
8982
extra_facets.update(
@@ -96,6 +89,7 @@ def fix_file( # noqa: PLR0913
9689
},
9790
)
9891

92+
result: Path | Sequence[Cube] = Path(file)
9993
for fix in Fix.get_fixes(
10094
project=project,
10195
dataset=dataset,
@@ -105,12 +99,34 @@ def fix_file( # noqa: PLR0913
10599
session=session,
106100
frequency=frequency,
107101
):
108-
file = fix.fix_file(
109-
file,
102+
result = fix.fix_file(
103+
result,
110104
output_dir,
111105
add_unique_suffix=add_unique_suffix,
112106
)
113-
return file
107+
108+
if isinstance(file, LocalFile):
109+
# This happens when this function is called from
110+
# `esmvalcore.dataset.Dataset.load`.
111+
if isinstance(result, Path):
112+
if result == file:
113+
# No fixes have been applied, return the original file.
114+
result = file
115+
else:
116+
# The file has been fixed and the result is a path to the fixed
117+
# file. The result needs to be loaded to read the global
118+
# attributes for recording provenance.
119+
fixed_file = LocalFile(result)
120+
fixed_file.facets = file.facets
121+
fixed_file.ignore_warnings = file.ignore_warnings
122+
result = fixed_file.to_iris()
123+
124+
if isinstance(result, Sequence) and isinstance(result[0], Cube):
125+
# Set the attributes for recording provenance here because
126+
# to_iris will not be called on the original file.
127+
file.attributes = result[0].attributes.globals.copy()
128+
129+
return result
114130

115131

116132
def fix_metadata(

tests/unit/cmor/test_fix.py

Lines changed: 106 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,22 @@
11
"""Unit tests for :mod:`esmvalcore.cmor.fix`."""
22

3+
from __future__ import annotations
4+
5+
from collections.abc import Sequence
36
from pathlib import Path
7+
from typing import TYPE_CHECKING
48
from unittest.mock import Mock, patch, sentinel
59

10+
import iris
11+
import iris.cube
612
import pytest
713

814
from esmvalcore.cmor.fix import Fix, fix_data, fix_file, fix_metadata
15+
from esmvalcore.io.local import LocalFile
16+
from esmvalcore.io.protocol import DataElement
17+
18+
if TYPE_CHECKING:
19+
from pytest_mock import MockerFixture
920

1021

1122
class TestFixFile:
@@ -14,9 +25,9 @@ class TestFixFile:
1425
@pytest.fixture(autouse=True)
1526
def setUp(self):
1627
"""Prepare for testing."""
17-
self.filename = "filename"
28+
self.filename = Path("filename")
1829
self.mock_fix = Mock()
19-
self.mock_fix.fix_file.return_value = "new_filename"
30+
self.mock_fix.fix_file.return_value = Path("new_filename")
2031
self.expected_get_fixes_call = {
2132
"project": "project",
2233
"dataset": "model",
@@ -33,48 +44,106 @@ def setUp(self):
3344
"frequency": "frequency",
3445
}
3546

36-
def test_fix(self):
47+
def test_fix(self, mocker: MockerFixture) -> None:
3748
"""Check that the returned fix is applied."""
38-
with patch(
49+
mock_get_fixes = mocker.patch(
3950
"esmvalcore.cmor._fixes.fix.Fix.get_fixes",
4051
return_value=[self.mock_fix],
41-
) as mock_get_fixes:
42-
file_returned = fix_file(
43-
file="filename",
44-
short_name="short_name",
45-
project="project",
46-
dataset="model",
47-
mip="mip",
48-
output_dir=Path("output_dir"),
49-
session=sentinel.session,
50-
frequency="frequency",
51-
)
52-
assert file_returned != self.filename
53-
assert file_returned == "new_filename"
54-
mock_get_fixes.assert_called_once_with(
55-
**self.expected_get_fixes_call,
56-
)
52+
)
53+
file_returned = fix_file(
54+
file=Path("filename"),
55+
short_name="short_name",
56+
project="project",
57+
dataset="model",
58+
mip="mip",
59+
output_dir=Path("output_dir"),
60+
session=sentinel.session,
61+
frequency="frequency",
62+
)
63+
assert file_returned != self.filename
64+
assert file_returned == Path("new_filename")
65+
mock_get_fixes.assert_called_once_with(
66+
**self.expected_get_fixes_call,
67+
)
5768

58-
def test_nofix(self):
69+
def test_fix_returns_cubes(
70+
self,
71+
mocker: MockerFixture,
72+
tmp_path: Path,
73+
) -> None:
74+
"""Check that the returned fix is applied."""
75+
# Prepare some mock fixed data and save it to a file.
76+
fixed_file = LocalFile(tmp_path / "new_filename.nc")
77+
fixed_cube = iris.cube.Cube([0], var_name="tas")
78+
fixed_cube.attributes.globals = {"a": "b"}
79+
iris.save(fixed_cube, fixed_file)
80+
81+
# Set up a mock fix to that returns this data.
82+
self.mock_fix.fix_file.return_value = fixed_file
83+
mock_get_fixes = mocker.patch(
84+
"esmvalcore.cmor._fixes.fix.Fix.get_fixes",
85+
return_value=[self.mock_fix],
86+
)
87+
88+
mock_input_file = LocalFile(self.filename)
89+
result = fix_file(
90+
file=mock_input_file,
91+
short_name="short_name",
92+
project="project",
93+
dataset="model",
94+
mip="mip",
95+
output_dir=Path("output_dir"),
96+
session=sentinel.session,
97+
frequency="frequency",
98+
)
99+
# Check that a sequence of cubes is returned and that the attributes
100+
# of the input file have been updated with the global attributes of the
101+
# fixed cube for recording provenance.
102+
assert isinstance(result, Sequence)
103+
assert len(result) == 1
104+
assert isinstance(result[0], iris.cube.Cube)
105+
assert result[0].var_name == "tas"
106+
assert "a" in mock_input_file.attributes
107+
assert mock_input_file.attributes["a"] == "b"
108+
mock_get_fixes.assert_called_once_with(
109+
**self.expected_get_fixes_call,
110+
)
111+
112+
def test_nofix(self, mocker: MockerFixture) -> None:
59113
"""Check that the same file is returned if no fix is available."""
60-
with patch(
114+
mock_get_fixes = mocker.patch(
61115
"esmvalcore.cmor._fixes.fix.Fix.get_fixes",
62116
return_value=[],
63-
) as mock_get_fixes:
64-
file_returned = fix_file(
65-
file="filename",
66-
short_name="short_name",
67-
project="project",
68-
dataset="model",
69-
mip="mip",
70-
output_dir=Path("output_dir"),
71-
session=sentinel.session,
72-
frequency="frequency",
73-
)
74-
assert file_returned == self.filename
75-
mock_get_fixes.assert_called_once_with(
76-
**self.expected_get_fixes_call,
77-
)
117+
)
118+
file_returned = fix_file(
119+
file=Path("filename"),
120+
short_name="short_name",
121+
project="project",
122+
dataset="model",
123+
mip="mip",
124+
output_dir=Path("output_dir"),
125+
session=sentinel.session,
126+
frequency="frequency",
127+
)
128+
assert file_returned == self.filename
129+
mock_get_fixes.assert_called_once_with(
130+
**self.expected_get_fixes_call,
131+
)
132+
133+
def test_nofix_if_not_path(self, mocker: MockerFixture) -> None:
134+
"""Check that the same object is returned if the input is not a Path."""
135+
mock_data_element = mocker.create_autospec(DataElement, instance=True)
136+
file_returned = fix_file(
137+
file=mock_data_element,
138+
short_name="short_name",
139+
project="project",
140+
dataset="model",
141+
mip="mip",
142+
output_dir=Path("output_dir"),
143+
session=sentinel.session,
144+
frequency="frequency",
145+
)
146+
assert file_returned is mock_data_element
78147

79148

80149
class TestGetCube:

0 commit comments

Comments
 (0)