Skip to content

Commit 92b2cce

Browse files
Modify NUOPC config parser to use the new Lark-based infrastructure.
1 parent c5b472f commit 92b2cce

File tree

2 files changed

+174
-113
lines changed

2 files changed

+174
-113
lines changed

src/access/parsers/nuopc_config.py

Lines changed: 61 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
# Copyright 2025 ACCESS-NRI and contributors. See the top-level COPYRIGHT file for details.
22
# SPDX-License-Identifier: Apache-2.0
33

4-
"""Utilities to handle NUOPC configuration files.
4+
"""Parser for NUOPC configuration files.
55
6-
The `nuopc.runconfig` files used by the CESM driver, and thus by the NUOPC-based ACCESS models, are a mixture of formats.
7-
At the top-level, one has the Resource Files as implemented in ESMF. From the ESMF documentation:
6+
The `nuopc.runconfig` files used by the CESM driver, and thus by the NUOPC-based ACCESS models, are a mixture of
7+
formats. At the top-level, one has the Resource Files as implemented in ESMF. From the ESMF documentation:
88
99
A Resource File (RF) is a text file consisting of list of label-value pairs. There is a limit of 1024 characters per
1010
line and the Resource File can contain a maximum of 200 records. Each label should be followed by some data, the
@@ -48,7 +48,9 @@
4848
See https://earthsystemmodeling.org/docs/release/ESMF_8_6_0/ESMF_refdoc/node6.html#SECTION06090000000000000000 for
4949
further details.
5050
51-
The CESM driver then uses tables as defined in Resource Files to store lists of key-value pairs instead of simple values:
51+
52+
The CESM driver then uses tables as defined in Resource Files to store lists of key-value pairs instead of simple
53+
values:
5254
5355
DRIVER_attributes::
5456
Verbosity = off
@@ -67,82 +69,62 @@
6769
ocn2glc_levels = 1:10:19:26:30:33:35
6870
::
6971
70-
This format of key-value pairs does not seem to be documented and, although it resembles Fortran namlists, it is not.
72+
This format of key-value pairs does not seem to be documented and, although it resembles Fortran namelists, it is not.
7173
For example, the keys are case-sensitive, which is not the case in Fortran namelists. The format used to store arrays
7274
of values is also not the same as in Fortran namelists.
7375
"""
7476

75-
from pathlib import Path
76-
import re
77-
78-
from access.parsers.utils import convert_from_string, convert_to_string
79-
80-
81-
def read_nuopc_config(file_name: str) -> dict:
82-
"""Read a NUOPC config file and return its contents as a dictionary.
83-
84-
Args:
85-
file_name (str): File to read.
86-
87-
Returns:
88-
dict: Contents of file.
89-
"""
90-
fname = Path(file_name)
91-
if not fname.is_file():
92-
raise FileNotFoundError(f"File not found: {fname.as_posix()}")
93-
94-
label_value_pattern = re.compile(r"\s*(\w+)\s*:\s*(.+)\s*")
95-
table_start_pattern = re.compile(r"\s*(\w+)\s*::\s*")
96-
table_end_pattern = re.compile(r"\s*::\s*")
97-
assignment_pattern = re.compile(r"\s*(\w+)\s*=\s*(\S+)\s*")
98-
99-
config = {}
100-
with open(fname, "r") as stream:
101-
reading_table = False
102-
label = None
103-
table = None
104-
for line in stream:
105-
line = re.sub(r"(#).*", "", line)
106-
if line.strip():
107-
if reading_table:
108-
if re.match(table_end_pattern, line):
109-
config[label] = table
110-
reading_table = False
111-
else:
112-
match = re.match(assignment_pattern, line)
113-
if match:
114-
table[match.group(1)] = convert_from_string(match.group(2))
115-
else:
116-
raise ValueError(
117-
f"Line: {line} in file {file_name} is not a valid NUOPC configuration specification"
118-
)
119-
120-
elif re.match(table_start_pattern, line):
121-
reading_table = True
122-
match = re.match(label_value_pattern, line)
123-
label = match.group(1)
124-
table = {}
125-
126-
elif re.match(label_value_pattern, line):
127-
match = re.match(label_value_pattern, line)
128-
config[match.group(1)] = [convert_from_string(string) for string in match.group(2).split()]
129-
130-
return config
131-
132-
133-
def write_nuopc_config(config: dict, file: Path):
134-
"""Write a dictionary to a NUOPC config file.
135-
136-
Args:
137-
config (dict): NUOPC configuration to write.
138-
file (Path): File to write to.
139-
"""
140-
with open(file, "w") as stream:
141-
for key, item in config.items():
142-
if isinstance(item, dict):
143-
stream.write(key + "::\n")
144-
for label, value in item.items():
145-
stream.write(" " + label + " = " + convert_to_string(value) + "\n")
146-
stream.write("::\n\n")
147-
else:
148-
stream.write(key + ": " + " ".join(map(convert_to_string, item)) + "\n")
77+
from access.parsers.config import ConfigParser
78+
79+
80+
class NUOPCParser(ConfigParser):
81+
"""NUOPC config parser."""
82+
83+
@property
84+
def case_sensitive_keys(self) -> bool:
85+
return True
86+
87+
@property
88+
def grammar(self) -> str:
89+
return """
90+
?start: lines*
91+
92+
?lines: rfile_key_value
93+
| rfile_key_list
94+
| rfile_key_block
95+
| empty_line
96+
97+
rfile_key_value: ws* key ":" ws* value line_end -> key_value
98+
rfile_key_list: ws* key ":" ws* value (ws* value)+ line_end -> key_list
99+
rfile_key_block: ws* key "::" line_end block "::" line_end -> key_block
100+
101+
block: block_line*
102+
103+
?block_line: block_key_value
104+
| block_key_list
105+
| empty_line
106+
107+
block_key_value : ws* key ws* "=" ws* value line_end -> key_value
108+
block_key_list : ws* key ws* "=" ws* value (":"value)+ line_end -> key_list
109+
110+
?value: logical
111+
| integer
112+
| float
113+
| double
114+
| identifier
115+
| path
116+
117+
empty_line: line_end
118+
line_end: (comment|ws*) NEWLINE
119+
120+
%import config.key
121+
%import config.logical
122+
%import config.integer
123+
%import config.float
124+
%import config.double
125+
%import config.identifier
126+
%import config.path
127+
%import config.comment
128+
%import config.ws
129+
%import config.NEWLINE
130+
"""

tests/test_nuopc_config.py

Lines changed: 113 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,26 @@
22
# SPDX-License-Identifier: Apache-2.0
33

44
import pytest
5-
from unittest.mock import mock_open, patch
5+
from lark.exceptions import UnexpectedCharacters, UnexpectedEOF
66
from pathlib import Path
77

8-
from access.parsers.nuopc_config import read_nuopc_config, write_nuopc_config
8+
from access.parsers.nuopc_config import NUOPCParser
99

1010

11-
@pytest.fixture()
12-
def simple_nuopc_config():
11+
@pytest.fixture(scope="module")
12+
def parser():
13+
"""Fixture instantiating the parser."""
14+
return NUOPCParser()
15+
16+
17+
@pytest.fixture(scope="module")
18+
def nuopc_config():
19+
"""Fixture returning a dict holding the parsed content of a NUOPC config file."""
1320
return dict(
1421
DRIVER_attributes={
1522
"Verbosity": "off",
1623
"cime_model": "cesm",
17-
"logFilePostFix": ".log",
24+
"logFilePostFix": Path(".log"),
1825
"pio_blocksize": -1,
1926
"pio_rearr_comm_enable_hs_comp2io": True,
2027
"pio_rearr_comm_enable_hs_io2comp": False,
@@ -23,73 +30,145 @@ def simple_nuopc_config():
2330
"wv_sat_transition_start": 20.0,
2431
},
2532
COMPONENTS=["atm", "ocn"],
33+
TEST="On",
2634
ALLCOMP_attributes={
2735
"ATM_model": "datm",
2836
"GLC_model": "sglc",
2937
"OCN_model": "mom",
30-
"ocn2glc_levels": "1:10:19:26:30:33:35",
38+
"ocn2glc_levels": [1, 10, 19, 26, 30, 33, 35],
3139
},
3240
)
3341

3442

35-
@pytest.fixture()
36-
def simple_nuopc_config_file():
37-
return """DRIVER_attributes::
43+
@pytest.fixture(scope="module")
44+
def nuopc_config_file():
45+
"""Fixture returning the content of a NUOPC config file."""
46+
return """DRIVER_attributes:: # Comment 1
47+
3848
Verbosity = off
39-
cime_model = cesm
49+
cime_model = cesm # Comment 2
50+
4051
logFilePostFix = .log
4152
pio_blocksize = -1
53+
54+
# Comment 3
55+
4256
pio_rearr_comm_enable_hs_comp2io = .true.
4357
pio_rearr_comm_enable_hs_io2comp = .false.
4458
reprosum_diffmax = -1.000000D-08
4559
wv_sat_table_spacing = 1.000000D+00
4660
wv_sat_transition_start = 2.000000D+01
4761
::
4862
49-
COMPONENTS: atm ocn
63+
TEST: On
64+
65+
# Comment 4
66+
# Comment 5
67+
68+
COMPONENTS: atm ocn # Comment 6
69+
5070
ALLCOMP_attributes::
71+
5172
ATM_model = datm
5273
GLC_model = sglc
5374
OCN_model = mom
5475
ocn2glc_levels = 1:10:19:26:30:33:35
76+
5577
::
5678
5779
"""
5880

5981

60-
@pytest.fixture()
61-
def invalid_nuopc_config_file():
62-
return """DRIVER_attributes::
63-
Verbosity: off
64-
cime_model - cesm
82+
@pytest.fixture(scope="module")
83+
def modified_nuopc_config_file():
84+
"""Fixture returning the content of the previous NUOPC config file, but with some modifications."""
85+
return """DRIVER_attributes:: # Comment 1
86+
87+
Verbosity = off
88+
cime_model = cesm # Comment 2
89+
90+
logFilePostFix = .log
91+
pio_blocksize = -1
92+
93+
# Comment 3
94+
95+
pio_rearr_comm_enable_hs_comp2io = .true.
96+
pio_rearr_comm_enable_hs_io2comp = .false.
97+
reprosum_diffmax = -1.000000D-08
98+
wv_sat_table_spacing = 1.000000D+00
99+
wv_sat_transition_start = 2.000000D+01
100+
::
101+
102+
TEST: Off
103+
104+
# Comment 4
105+
# Comment 5
106+
107+
COMPONENTS: atm um # Comment 6
108+
109+
ALLCOMP_attributes::
110+
111+
ATM_model = um
112+
GLC_model = sglc
113+
OCN_model = mom
114+
ocn2glc_levels = 1:10:19:26:30:33:36
115+
65116
::
66117
67-
COMPONENTS::: atm ocn
68118
"""
69119

70120

71-
@patch("pathlib.Path.is_file", new=lambda file: True)
72-
def test_read_nuopc_config(simple_nuopc_config, simple_nuopc_config_file):
73-
with patch("builtins.open", mock_open(read_data=simple_nuopc_config_file)) as m:
74-
config = read_nuopc_config(file_name="simple_nuopc_config_file")
121+
def test_valid_nuopc_config(parser):
122+
"""Test the basic grammar constructs"""
123+
assert dict(parser.parse("TEST: a")) == {"TEST": "a"}
124+
assert dict(parser.parse(" TEST:a")) == {"TEST": "a"}
125+
assert dict(parser.parse("TEST: a b")) == {"TEST": ["a", "b"]}
126+
assert dict(parser.parse("TEST1: a\n TEST2: b")) == {"TEST1": "a", "TEST2": "b"}
127+
assert dict(parser.parse("TEST1: a \nTEST2: b")) == {"TEST1": "a", "TEST2": "b"}
128+
assert dict(parser.parse("TEST1: a b \n TEST2: c")) == {"TEST1": ["a", "b"], "TEST2": "c"}
129+
assert dict(parser.parse("TEST::\na=1\n::")) == {"TEST": {"a": 1}}
130+
assert dict(parser.parse("TEST::\na=1:2:3\n::")) == {"TEST": {"a": [1, 2, 3]}}
131+
132+
133+
def test_invalid_nuopc_config(parser):
134+
"""Test checking that the parser catches malformed expressions"""
135+
with pytest.raises(UnexpectedCharacters):
136+
parser.parse("TEST::\n cime_model - cesm")
137+
138+
with pytest.raises(UnexpectedCharacters):
139+
parser.parse("TEST:\n cime_model = cesm\n::")
140+
141+
with pytest.raises(UnexpectedEOF):
142+
parser.parse("TEST::\n cime_model = cesm")
143+
144+
with pytest.raises(UnexpectedCharacters):
145+
parser.parse("TEST::\n cime_model = cesm ATM_model = datm")
146+
75147

76-
assert config == simple_nuopc_config
148+
def test_nuopc_config_parse(parser, nuopc_config, nuopc_config_file):
149+
"""Test parsing of a file."""
150+
config = parser.parse(nuopc_config_file)
151+
assert dict(config) == nuopc_config
77152

78153

79-
def test_write_nuopc_config(simple_nuopc_config, simple_nuopc_config_file):
80-
with patch("builtins.open", mock_open()) as m:
81-
write_nuopc_config(simple_nuopc_config, Path("config_file"))
154+
def test_nuopc_config_roundtrip(parser, nuopc_config_file):
155+
"""Test round-trip parsing."""
156+
config = parser.parse(nuopc_config_file)
82157

83-
assert simple_nuopc_config_file == "".join(call.args[0] for call in m().write.mock_calls)
158+
assert str(config) == nuopc_config_file
84159

85160

86-
@patch("pathlib.Path.is_file", new=lambda file: True)
87-
def test_read_invalid_nuopc_config_file(invalid_nuopc_config_file):
88-
with patch("builtins.open", mock_open(read_data=invalid_nuopc_config_file)) as m:
89-
with pytest.raises(ValueError):
90-
read_nuopc_config(file_name="invalid_nuopc_config_file")
161+
def test_nuopc_config_roundtrip_with_mutation(parser, nuopc_config_file, modified_nuopc_config_file):
162+
"""Test round-trip parsing with mutation of the config."""
163+
config = parser.parse(nuopc_config_file)
91164

165+
# Scalar
166+
config["TEST"] = "Off"
167+
# List
168+
config["COMPONENTS"] = ["atm", "um"]
169+
# Scalar in table
170+
config["ALLCOMP_attributes"]["ATM_model"] = "um"
171+
# List in table
172+
config["ALLCOMP_attributes"]["ocn2glc_levels"] = [1, 10, 19, 26, 30, 33, 36]
92173

93-
def test_read_missing_nuopc_config_file():
94-
with pytest.raises(FileNotFoundError):
95-
read_nuopc_config(file_name="garbage")
174+
assert str(config) == modified_nuopc_config_file

0 commit comments

Comments
 (0)