|
1 | 1 | # Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
|
2 | 2 | # SPDX-License-Identifier: Apache-2.0
|
3 | 3 |
|
4 |
| -"""Utilities to handle MOM6 parameter files. |
| 4 | +"""Parser for MOM6 parameter files. |
5 | 5 |
|
6 | 6 | The MOM6 parameter file format is described here:
|
7 | 7 |
|
|
11 | 11 | - no opening nor closing clauses ('&NAME' and '\')
|
12 | 12 | - usage of an override directive ('#override')
|
13 | 13 | - some character, like '*', are allowed in the MOM6 parameter files, but not in namelists
|
| 14 | + - keys are case-sensitive |
14 | 15 | We have also found MOM6 parameter files with C-style comments in files used by CESM. These are ignored by MOM6, but
|
15 | 16 | are actually not part of the specifications.
|
16 |
| -
|
17 |
| -However, it is possible to preprocess the file to make it a conforming Fortran namelist and then use the f90nml |
18 |
| -package to read it. Similarly, one can use the f90nml package to write the file and then postprocess it. |
19 |
| -
|
20 |
| -This means that the path from a MOM6 parameter file to a Python dictionary requires the following steps: |
21 |
| - 1. read file and preprocess it to handle the directives and the C-style comments. |
22 |
| - 2. add opening and closing namelist clauses |
23 |
| - 3. parse the file with f90nml, which returns a Namelist object |
24 |
| - 4. convert the Namelist object to a Python dictionary |
25 |
| -
|
26 |
| -Similarly, to get write a Python dictionary as a MOM6 parameter file, one requires the following steps: |
27 |
| - 1. convert the Python dictionary into a Namelist object |
28 |
| - 2. write the Namelist object to a file |
29 |
| - 3. remove opening and closing namelist clauses |
30 |
| -
|
31 |
| - In the following, we use the following naming conventions: |
32 |
| - - 'mom6_input': the contents of the parameter file as a Python dictionary |
33 |
| - - 'mom6_input_str': the contents of the parameter file, stored as a string |
34 |
| - - 'nml_str': the contents of the file, patched to make it a conforming namelist, stored as a string |
35 |
| -
|
36 |
| -We then have utility functions to convert from one representation to another: |
37 |
| - - nml_str -> mom6_input (_nml_str_to_mom6_input) |
38 |
| - - mom6_input -> nml_str (_mom6_input_to_nml_str) |
39 |
| - - mom6_input_str -> nml_str (_mom6_input_str_to_nml_str + _patch_mom6_input_str) |
40 |
| - - nml_str -> mom6_input_str (_nml_str_to_mom6_input_str + _unpatch_mom6_input_str) |
41 |
| -
|
42 |
| -For round-trip parsing, one needs to keep track of the changes done to the file to make it a conforming Fortran |
43 |
| -namelist and then undo those changes. Since we use the f90mnml parser ability to patch a file as it is read, we also |
44 |
| -need to keep the original nml_str and a dictionary with all the changes done to mom6_input. We do this by introducing |
45 |
| -a MOM6Input class that extends the dict class. |
46 | 17 | """
|
47 | 18 |
|
48 |
| -from pathlib import Path |
49 |
| -import re |
50 |
| -from io import StringIO |
51 |
| - |
52 |
| -import f90nml |
53 |
| - |
54 |
| - |
55 |
| -def _patch_mom6_input_str(mom6_input_str: str) -> tuple[str, dict]: |
56 |
| - """Modify the contents of a MOM6 file into a Fortran namelist format readable by f90nml. |
57 |
| -
|
58 |
| - Currently, the "#override" directive is not properly supported. When parsing the file, we will treat variables with |
59 |
| - this directive as normal variables (i.e., we will pretend the directive is not there), but when writing the file |
60 |
| - back, the directive will be preserved. This might introduce unexpected changes. |
61 |
| -
|
62 |
| - Also includes fixes for some non-standard things we have come across. In particular: |
63 |
| - - C style comments (/* This is a comment */). These are added by CESM/CIME. We simply remove them and do not put |
64 |
| - them back when writing to a file. |
65 |
| - - "#" before a variable declaration (without the "override"). Some experiments suggest the following behaviour |
66 |
| - from the MOM6 parser: "# variable = 1" is equivalent to "variable = 1", while "#variable = 1" is equivalent to |
67 |
| - "!variable = 1". We try to handle them accordingly and to preserve them when writing the file back. |
68 |
| - (Reference: https://github.com/COSIMA/mom6-panan/commit/80e4a872f2b24f2e41da87439dd342df0c643d00#r130376163) |
69 |
| -
|
70 |
| - The changes are recorded as a "patch", which is a dictionary: the keys are the line numbers where changes |
71 |
| - were made, while the values are tuples containing a keyword describing the type of change and, optionally, a string. |
72 |
| -
|
73 |
| - Args: |
74 |
| - mom6_input_str (str): Contents of the MOM6 parameter file to patch. |
75 |
| -
|
76 |
| - Returns: |
77 |
| - tuple: Contents of the patched MOM6 parameter file and the patch that was applied. |
78 |
| - """ |
79 |
| - # Define several patterns that need to be matched |
80 |
| - comment_pattern = re.compile(r"/\*.*?\*/", flags=re.DOTALL) |
81 |
| - zstar_pattern = re.compile(r"Z\*") |
82 |
| - block_pattern = re.compile(r"KPP%|%KPP|CVMix_CONVECTION%|%CVMix_CONVECTION|CVMIX_DDIFF%|%CVMIX_DDIFF") |
83 |
| - override_directive_pattern = re.compile(r"^(#override\s*?)") |
84 |
| - incorrect_directive_pattern = re.compile(r"^(#\s+)") |
85 |
| - comment_directive_pattern = re.compile(r"^#((?!override)\w+\b\s*=\s*\w+$)") |
86 |
| - |
87 |
| - # Modify the input while recording the changes |
88 |
| - patch = {} |
89 |
| - output = "" |
90 |
| - lines = mom6_input_str.split("\n") |
91 |
| - for i in range(len(lines)): |
92 |
| - line = lines[i] + "\n" |
93 |
| - if zstar_pattern.search(line): |
94 |
| - patch[i] = ("zstar", line) |
95 |
| - output += zstar_pattern.sub("ZSTAR", line) |
96 |
| - elif block_pattern.search(line): |
97 |
| - patch[i] = ("block", line) |
98 |
| - output += block_pattern.sub("", line) |
99 |
| - elif override_directive_pattern.search(line): |
100 |
| - patch[i] = ("override", override_directive_pattern.match(line).group(0)) |
101 |
| - output += override_directive_pattern.sub("", line) |
102 |
| - elif incorrect_directive_pattern.search(line): |
103 |
| - patch[i] = ( |
104 |
| - "incorrect directive", |
105 |
| - incorrect_directive_pattern.match(line).group(0), |
106 |
| - ) |
107 |
| - output += incorrect_directive_pattern.sub("", line) |
108 |
| - elif comment_directive_pattern.search(line): |
109 |
| - patch[i] = ("comment_directive", line) |
110 |
| - output += "\n" |
111 |
| - else: |
112 |
| - output += line |
113 |
| - |
114 |
| - # Remove all C-style comments. These are not recorded and will not be undone. |
115 |
| - def replace_comment(match): |
116 |
| - return "\n" * match.group().count("\n") |
117 |
| - |
118 |
| - output = comment_pattern.sub(replace_comment, output) |
119 |
| - |
120 |
| - return output, patch |
121 |
| - |
| 19 | +from access.parsers.config import ConfigParser |
122 | 20 |
|
123 |
| -def _unpatch_mom6_input_str(mom6_input_str: str, patch: dict = None) -> str: |
124 |
| - """Undo the changes that were done to a MOM6 parameter file to make it into a conforming Fortran namelist. |
125 | 21 |
|
126 |
| - Args: |
127 |
| - mom6_input_str (str): Contents of the MOM6 parameter file to unpatch. |
128 |
| - patch (dict): A dict containing the patch to revert. |
| 22 | +class MOM6InputParser(ConfigParser): |
| 23 | + """MOM6 input file parser. |
129 | 24 |
|
130 |
| - Returns: |
131 |
| - str: Unpatched contents of the MOM6 parameter file. |
| 25 | + Note: The "override" directive is currently not implemented. |
132 | 26 | """
|
133 |
| - output = "" |
134 |
| - lines = mom6_input_str.split("\n")[1:-2] |
135 |
| - for i in range(len(lines)): |
136 |
| - line = lines[i] + "\n" |
137 |
| - if i in patch: |
138 |
| - if patch[i][0] == "block": |
139 |
| - output += patch[i][1] |
140 |
| - elif patch[i][0] == "zstar": |
141 |
| - output += re.sub(r"ZSTAR", "Z*", line) |
142 |
| - elif patch[i][0] == "override": |
143 |
| - output += patch[i][1] + line |
144 |
| - elif patch[i][0] == "incorrect directive": |
145 |
| - output += patch[i][1] + line |
146 |
| - elif patch[i][0] == "comment_directive": |
147 |
| - output += patch[i][1] |
148 |
| - else: |
149 |
| - line = line.lstrip() if line != "\n" else line |
150 |
| - output += line |
151 |
| - return output |
152 | 27 |
|
153 |
| - |
154 |
| -def _mom6_input_str_to_nml_str(mom6_input_str: str) -> str: |
155 |
| - """Convert the MOM6 parameter file to a conforming Fortran namelist. |
156 |
| -
|
157 |
| - Args: |
158 |
| - mom6_input_str (str): Contents of the MOM6 parameter file. |
159 |
| -
|
160 |
| - Returns: |
161 |
| - str: Fortran namelist. |
162 |
| - """ |
163 |
| - return "&mom6\n" + mom6_input_str + "\n/" |
164 |
| - |
165 |
| - |
166 |
| -def _nml_str_to_mom6_input_str(nml_str: str) -> str: |
167 |
| - """Convert a Fortran namelist into a MOM6 parameter file. |
168 |
| -
|
169 |
| - Args: |
170 |
| - nml_str (str): Fortran namelist. |
171 |
| -
|
172 |
| - Returns: |
173 |
| - str: MOM6 parameter file. |
174 |
| - """ |
175 |
| - lines = nml_str.split("\n") |
176 |
| - lines = lines[1:-2] |
177 |
| - return "\n".join(lines) |
178 |
| - |
179 |
| - |
180 |
| -def _mom6_input_to_nml_str(mom6_input: dict) -> str: |
181 |
| - """Convert MOM6 parameters stored in a dictionary into a Fortran namelist. |
182 |
| -
|
183 |
| - Args: |
184 |
| - mom6_input (dict): Dictionary of MOM6 parameters. |
185 |
| -
|
186 |
| - Returns: |
187 |
| - str: Fortran namelist. |
188 |
| - """ |
189 |
| - output_file = StringIO("") |
190 |
| - nml = f90nml.Namelist({"mom6": mom6_input}) |
191 |
| - nml.uppercase = True |
192 |
| - nml.false_repr = "False" |
193 |
| - nml.true_repr = "True" |
194 |
| - nml.indent = 0 |
195 |
| - nml.write(output_file) |
196 |
| - return output_file.getvalue() |
197 |
| - |
198 |
| - |
199 |
| -def _nml_str_to_mom6_input(nml_str: str) -> dict: |
200 |
| - """Convert MOM6 parameters stored as a Fortran namelist into a dictionary. |
201 |
| -
|
202 |
| - Args: |
203 |
| - nml_str (str): Fortran namelist. |
204 |
| -
|
205 |
| - Returns: |
206 |
| - dict: Dictionary of MOM6 parameters. |
207 |
| - """ |
208 |
| - parser = f90nml.Parser() |
209 |
| - nml = parser.reads(nml_str) |
210 |
| - nml.uppercase = True |
211 |
| - return dict(nml.todict()["mom6"]) |
212 |
| - |
213 |
| - |
214 |
| -class Mom6Input(dict): |
215 |
| - """Class to read, store, modify and write a MOM6 parameter file. |
216 |
| -
|
217 |
| - This class is used to enable round-trip parsing of MOM6 parameter files. |
218 |
| - It overrides the dict methods to: |
219 |
| - - stored all the keys in upper case |
220 |
| - - keep track of the changes done to the original dictionary |
221 |
| -
|
222 |
| - It also stores the "patch" that was applied to the mom6_input_str to convert it to a conforming Fortran namelist. |
223 |
| - This is used to "undo" the changes when writing the file. |
224 |
| - """ |
225 |
| - |
226 |
| - # Patched contents of the file to make it look like proper f90 namelist |
227 |
| - _mom6_input_str_patched = None |
228 |
| - |
229 |
| - # Dictionary containing information that can be used to reconstruct the original file from the output of f90nml |
230 |
| - _file_patch = {} |
231 |
| - |
232 |
| - # A record of all the changes done to the dictionary that can be passed to f90nml to do round-trip parsing |
233 |
| - _nml_patch = None |
234 |
| - |
235 |
| - # A record of keys that have been deleted from the dictionary |
236 |
| - _deleted_keys = [] |
237 |
| - |
238 |
| - def __init__(self, file_name: str = None): |
239 |
| - """Read NOM6 parameters from file. |
240 |
| -
|
241 |
| - Args: |
242 |
| - file_name (str): Name of file to read. |
243 |
| - """ |
244 |
| - # Open file and read contents |
245 |
| - file = Path(file_name) |
246 |
| - if not file.is_file(): |
247 |
| - raise FileNotFoundError(f"File not found: {file.as_posix()}") |
248 |
| - |
249 |
| - with open(file, "r") as f: |
250 |
| - mom6_input_str = f.read() |
251 |
| - |
252 |
| - # Convert file contents to dictionary |
253 |
| - self._mom6_input_str_patched, self._file_patch = _patch_mom6_input_str(mom6_input_str) |
254 |
| - nml_str = _mom6_input_str_to_nml_str(self._mom6_input_str_patched) |
255 |
| - mom6_input = _nml_str_to_mom6_input(nml_str) |
256 |
| - |
257 |
| - # Initialize class dictionary |
258 |
| - super().__init__(mom6_input) |
259 |
| - self._keys_to_upper() |
260 |
| - |
261 |
| - # Initialize nml patch |
262 |
| - self._nml_patch = {"mom6": {}} |
263 |
| - |
264 |
| - def __setitem__(self, key, value): |
265 |
| - """Override method to add item to dict. |
266 |
| -
|
267 |
| - This method takes into account that all keys should be stored in uppercase. It also adds the new item to the |
268 |
| - namelist patch used for round-trip parsing. |
269 |
| - """ |
270 |
| - super().__setitem__(key.upper(), value) |
271 |
| - |
272 |
| - if key.upper() in self._deleted_keys: |
273 |
| - self._deleted_keys.remove(key.upper()) |
274 |
| - |
275 |
| - if self._nml_patch: |
276 |
| - self._nml_patch["mom6"][key.upper()] = value |
277 |
| - |
278 |
| - def __getitem__(self, key): |
279 |
| - """Override method to get item from dict, taking into account all keys are stored in uppercase.""" |
280 |
| - return super().__getitem__(key.upper()) |
281 |
| - |
282 |
| - def __delitem__(self, key): |
283 |
| - """Override method to delete item from dict, so that all keys are stored in uppercase.""" |
284 |
| - self._deleted_keys.append(key.upper()) |
285 |
| - super().__delitem__(key.upper()) |
286 |
| - |
287 |
| - def write(self, file: Path): |
288 |
| - """Write contents of MOM6Input to a file. |
289 |
| -
|
290 |
| - Args: |
291 |
| - file (Path): File to write to. |
292 |
| - """ |
293 |
| - # Streams to pass to f90nml |
294 |
| - nml_file = StringIO(_mom6_input_str_to_nml_str(self._mom6_input_str_patched)) |
295 |
| - tmp_file = StringIO("") |
296 |
| - |
297 |
| - parser = f90nml.Parser() |
298 |
| - parser.read(nml_file, self._nml_patch, tmp_file) |
299 |
| - mom6_input_str = _unpatch_mom6_input_str(tmp_file.getvalue(), self._file_patch) |
300 |
| - |
301 |
| - # Change keys to uppercase using a regex substitution, as there seems to be no way of doing this with f90nml |
302 |
| - # when applying a nml patch. |
303 |
| - mom6_input_str = re.sub(r"((?<=^)|(?<=\n))(\w+)", lambda pat: pat.group(2).upper(), mom6_input_str) |
304 |
| - |
305 |
| - # Explicitly removed keys from string |
306 |
| - for key in self._deleted_keys: |
307 |
| - mom6_input_str = re.sub(r"\s*" + f"{key}" + r"\s*=\s*\S*\s*\n", r"\n", mom6_input_str) |
308 |
| - |
309 |
| - file.write_text(mom6_input_str) |
310 |
| - |
311 |
| - def _keys_to_upper(self): |
312 |
| - """Change all keys in dictionary to uppercase.""" |
313 |
| - for key in list(self.keys()): |
314 |
| - if not key.isupper(): |
315 |
| - self[key.upper()] = self.pop(key) |
316 |
| - |
317 |
| - |
318 |
| -def read_mom6_input(file_name: str) -> Mom6Input: |
319 |
| - """Read the contents of a MOM6 parameter file and return its contents as an instance of the MOM6Input class. |
320 |
| -
|
321 |
| - Args: |
322 |
| - file_name: Name of MOM6 parameter file to read. |
323 |
| -
|
324 |
| - Returns: |
325 |
| - MOM6Input: Contents of parameter file. |
326 |
| - """ |
327 |
| - return Mom6Input(file_name) |
328 |
| - |
329 |
| - |
330 |
| -def write_mom6_input(mom_input: [dict | Mom6Input], file: Path): |
331 |
| - """Write MOM6 parameters stored either as a dict of a MOM6Input to a file. |
332 |
| -
|
333 |
| - Args: |
334 |
| - mom_input (dict|MOM6Input): MOM6 parameters. |
335 |
| - file (Path): File to write to. |
336 |
| - """ |
337 |
| - if isinstance(mom_input, Mom6Input): |
338 |
| - mom_input.write(file) |
339 |
| - else: |
340 |
| - nml_str = _mom6_input_to_nml_str(mom_input) |
341 |
| - mom6_input_str = _nml_str_to_mom6_input_str(nml_str) + "\n" |
342 |
| - file.write_text(mom6_input_str) |
| 28 | + @property |
| 29 | + def case_sensitive_keys(self) -> bool: |
| 30 | + return True |
| 31 | + |
| 32 | + @property |
| 33 | + def grammar(self) -> str: |
| 34 | + return """ |
| 35 | +?start: lines* |
| 36 | +
|
| 37 | +?lines: key_value |
| 38 | + | key_list |
| 39 | + | key_block |
| 40 | + | empty_line |
| 41 | +
|
| 42 | +key_value: key ws* "=" ws* value line_end |
| 43 | +key_list: key ws* "=" ws* value (ws* "," ws* value)+ line_end |
| 44 | +key_block: key "%" line_end block "%" key line_end |
| 45 | +
|
| 46 | +block: (key_value | key_list | empty_line)* |
| 47 | +
|
| 48 | +?value: bool |
| 49 | + | integer |
| 50 | + | float |
| 51 | + | string |
| 52 | +
|
| 53 | +empty_line: line_end |
| 54 | +line_end: (fortran_comment|ws*) NEWLINE |
| 55 | +
|
| 56 | +%import config.key |
| 57 | +%import config.bool |
| 58 | +%import config.integer |
| 59 | +%import config.float |
| 60 | +%import config.string |
| 61 | +%import config.fortran_comment |
| 62 | +%import config.ws |
| 63 | +%import config.NEWLINE |
| 64 | +""" |
0 commit comments