-
Notifications
You must be signed in to change notification settings - Fork 45
Expand file tree
/
Copy pathlocal.py
More file actions
324 lines (276 loc) · 11.3 KB
/
local.py
File metadata and controls
324 lines (276 loc) · 11.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
"""Find files on the local filesystem.
.. deprecated:: 2.14.0
This module has been moved to :mod:`esmvalcore.io.local`. Importing it as
:mod:`esmvalcore.local` is deprecated and will be removed in version 2.16.0.
"""
from __future__ import annotations
import logging
import os.path
import textwrap
import warnings
from pathlib import Path
from typing import TYPE_CHECKING, Any
from esmvalcore.config import CFG
from esmvalcore.config._config import CFG as CONFIG_DEVELOPER
from esmvalcore.config._config import (
get_ignored_warnings,
get_project_config,
load_config_developer,
)
from esmvalcore.exceptions import RecipeError
from esmvalcore.io.local import (
LocalDataSource,
LocalFile,
_filter_versions_called_latest,
_MissingFacetError,
_replace_tags,
_select_latest_version,
)
if TYPE_CHECKING:
from esmvalcore.typing import FacetValue
__all__ = [
"DataSource",
"LocalDataSource",
"LocalFile",
"find_files",
]
logger = logging.getLogger(__name__)
def _ensure_config_developer_drs() -> Path:
"""Ensure that directory structure from config-developer.yml is loaded."""
config_developer_file = CFG.get("config_developer_file")
if not config_developer_file:
config_developer_file = Path(__file__).parent / "config-developer.yml"
if not CONFIG_DEVELOPER:
# Load the config-developer.yml file, but do not update the CMOR tables.
load_config_developer(config_developer_file, set_cmor_tables=False)
return config_developer_file
def _select_drs(input_type: str, project: str, structure: str) -> list[str]:
"""Select the directory structure of input path."""
cfg = get_project_config(project)
input_path_patterns = cfg[input_type]
if isinstance(input_path_patterns, str):
return [input_path_patterns]
if structure in input_path_patterns:
value = input_path_patterns[structure]
if isinstance(value, str):
value = [value]
return value
msg = f"drs {structure} for {project} project not specified in config-developer file"
raise KeyError(msg)
_ROOTPATH_WARNED: set[tuple[str, tuple[str]]] = set()
_LEGACY_DATA_SOURCES_WARNED: set[str] = set()
def _get_data_sources(project: str) -> list[LocalDataSource]:
"""Get a list of data sources."""
config_developer_file = _ensure_config_developer_drs()
rootpaths = CFG["rootpath"]
default_drs = {
"CMIP3": "ESGF",
"CMIP5": "ESGF",
"CMIP6": "ESGF",
"CORDEX": "ESGF",
"obs4MIPs": "ESGF",
}
for key in (project, "default"):
if key in rootpaths:
paths = rootpaths[key]
nonexistent = tuple(p for p in paths if not os.path.exists(p))
if nonexistent and (key, nonexistent) not in _ROOTPATH_WARNED:
logger.warning(
"Configured '%s' rootpaths '%s' do not exist",
key,
", ".join(str(p) for p in nonexistent),
)
_ROOTPATH_WARNED.add((key, nonexistent))
if isinstance(paths, list):
structure = CFG.get("drs", {}).get(
project,
default_drs.get(project, "default"),
)
paths = dict.fromkeys(paths, structure)
sources: list[LocalDataSource] = []
for path, structure in paths.items():
dir_templates = _select_drs("input_dir", project, structure)
file_templates = _select_drs("input_file", project, structure)
sources.extend(
LocalDataSource(
name="legacy-local",
project=project,
priority=1,
rootpath=Path(path),
dirname_template=d,
filename_template=f,
ignore_warnings=get_ignored_warnings(project, "load"),
)
for d in dir_templates
for f in file_templates
)
if project not in _LEGACY_DATA_SOURCES_WARNED:
logger.warning(
(
"Using legacy data sources for project '%s' using 'rootpath' "
"and 'drs' settings and the path templates from '%s'"
),
project,
config_developer_file,
)
_LEGACY_DATA_SOURCES_WARNED.add(project)
return sources
msg = (
f"No '{project}' or 'default' path specified under 'rootpath' in "
"the configuration."
)
raise KeyError(msg)
class DataSource(LocalDataSource):
"""Data source for finding files on a local filesystem.
.. deprecated:: 2.14.0
This class is deprecated and will be removed in version 2.16.0.
Please use :class:`esmvalcore.local.LocalDataSource` instead.
"""
def __init__(self, *args, **kwargs):
msg = (
"The 'esmvalcore.local.LocalDataSource' class is deprecated and will be "
"removed in version 2.16.0. Please use 'esmvalcore.local.LocalDataSource'"
)
warnings.warn(msg, DeprecationWarning, stacklevel=2)
super().__init__(*args, **kwargs)
@property
def regex_pattern(self) -> str:
"""Get regex pattern that can be used to extract facets from paths."""
return self._regex_pattern
def get_glob_patterns(self, **facets: FacetValue) -> list[Path]:
"""Compose the globs that will be used to look for files."""
try:
return self._get_glob_patterns(**facets)
except _MissingFacetError as exc:
raise RecipeError(exc.args[0]) from exc
def path2facets(self, path: Path, add_timerange: bool) -> dict[str, str]:
"""Extract facets from path."""
return self._path2facets(path, add_timerange)
def find_files(self, **facets: FacetValue) -> list[LocalFile]:
"""Find files."""
return self.find_data(**facets)
def find_files(
*,
debug: bool = False,
**facets: FacetValue,
) -> list[LocalFile] | tuple[list[LocalFile], list[Path]]:
"""Find files on the local filesystem.
.. deprecated:: 2.14.0
This function is deprecated and will be removed in version 2.16.0.
Please use :meth:`esmvalcore.local.LocalDataSource.find_data` instead.
The directories that are searched for files are defined in
:data:`esmvalcore.config.CFG` under the ``'rootpath'`` key using the
directory structure defined under the ``'drs'`` key.
If ``esmvalcore.config.CFG['rootpath']`` contains a key that matches the
value of the ``project`` facet, those paths will be used. If there is no
project specific key, the directories in
``esmvalcore.config.CFG['rootpath']['default']`` will be searched.
See :ref:`findingdata` for extensive instructions on configuring ESMValCore
so it can find files locally.
Parameters
----------
debug
When debug is set to :obj:`True`, the function will return a tuple
with the first element containing the files that were found
and the second element containing the :func:`glob.glob` patterns that
were used to search for files.
**facets
Facets used to search for files. An ``'*'`` can be used to match
any value. By default, only the latest version of a file will
be returned. To select all versions use ``version='*'``. It is also
possible to specify multiple values for a facet, e.g.
``exp=['historical', 'ssp585']`` will match any file that belongs
to either the historical or ssp585 experiment.
The ``timerange`` facet can be specified in `ISO 8601 format
<https://en.wikipedia.org/wiki/ISO_8601>`__.
Note
----
A value of ``timerange='*'`` is supported, but combining a ``'*'`` with
a time or period :ref:`as supported in the recipe <datasets>` is currently
not supported and will return all found files.
Examples
--------
Search for files containing surface air temperature from any CMIP6 model
for the historical experiment:
>>> esmvalcore.local.find_files(
... project='CMIP6',
... activity='CMIP',
... mip='Amon',
... short_name='tas',
... exp='historical',
... dataset='*',
... ensemble='*',
... grid='*',
... institute='*',
... ) # doctest: +SKIP
[LocalFile('/home/bandela/climate_data/CMIP6/CMIP/BCC/BCC-ESM1/historical/r1i1p1f1/Amon/tas/gn/v20181214/tas_Amon_BCC-ESM1_historical_r1i1p1f1_gn_185001-201412.nc')]
Returns
-------
list[LocalFile]
The files that were found.
"""
msg = (
"The function 'esmvalcore.local.find_files' is deprecated and will be removed "
"in version 2.16.0. Please use 'esmvalcore.local.LocalDataSource.find_data'"
)
warnings.warn(msg, DeprecationWarning, stacklevel=2)
facets = dict(facets)
if "original_short_name" in facets:
facets["short_name"] = facets["original_short_name"]
files = []
filter_latest = False
data_sources = _get_data_sources(facets["project"]) # type: ignore
for data_source in data_sources:
for file in data_source.find_data(**facets):
if file.facets.get("version") == "latest":
filter_latest = True
files.append(file)
if filter_latest:
files = _filter_versions_called_latest(files)
if "version" not in facets:
files = _select_latest_version(files)
files.sort() # sorting makes it easier to see what was found
if debug:
globs = []
for data_source in data_sources:
try:
globs.extend(data_source._get_glob_patterns(**facets)) # noqa: SLF001
except _MissingFacetError as exc:
raise RecipeError(exc.args[0]) from exc
return files, sorted(globs)
return files
_GET_OUTPUT_FILE_WARNED: set[str] = set()
def _get_output_file(variable: dict[str, Any], preproc_dir: Path) -> Path:
"""Return the full path to the output (preprocessed) file."""
_ensure_config_developer_drs()
project = variable["project"]
cfg = get_project_config(project)
if project not in _GET_OUTPUT_FILE_WARNED:
_GET_OUTPUT_FILE_WARNED.add(project)
msg = textwrap.dedent(
f"""
Defining 'output_file' in config-developer.yml is deprecated and will be removed in version 2.16.0. Please use the following configuration instead:
projects:
{variable["project"]}:
preprocessor_filename_template: "{cfg["output_file"]}"
""",
).rstrip()
logger.warning(msg)
# Join different experiment names
if isinstance(variable.get("exp"), (list, tuple)):
variable = dict(variable)
variable["exp"] = "-".join(variable["exp"])
try:
outfile = _replace_tags(cfg["output_file"], variable)[0]
except _MissingFacetError as exc:
raise RecipeError(exc.args[0]) from exc
if "timerange" in variable:
timerange = variable["timerange"].replace("/", "-")
outfile = Path(f"{outfile}_{timerange}")
outfile = Path(f"{outfile}.nc")
return Path(
preproc_dir,
variable.get("diagnostic", ""),
variable.get("variable_group", ""),
outfile,
)