Skip to content

Commit ef021f8

Browse files
authored
minimal lark parser (#175)
generic parser just recognizing blocks and lines within them. while a type-aware parser is coming soon, we can fall back to this in case type info is ever not available. if calling load on simulation, model or package, it will be. but maybe we also want a generic load that tries to infer the component from the input file name and contents, in which case we can connect this to a type-aware converter that can match fields against known types to recognize the component, then parse fields before structuring? also eagerly import submodules of flopy.mf6 so components are all registered on first import of the top-level module.
1 parent e695f51 commit ef021f8

File tree

10 files changed

+574
-314
lines changed

10 files changed

+574
-314
lines changed

flopy4/mf6/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
from flopy4.mf6 import ( # noqa: F401
2+
gwf,
3+
ims,
4+
simulation,
5+
tdis,
6+
)
17
from flopy4.mf6.codec import dump
28
from flopy4.mf6.component import Component
39
from flopy4.mf6.converter import COMPONENT_CONVERTER
Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,46 @@
11
from os import PathLike
2+
from pathlib import Path
23
from typing import Any
34

5+
from flopy4.mf6.codec.reader.parser import make_generic_parser
6+
from flopy4.mf6.codec.reader.transformer import GenericTransformer
7+
48

59
def load(path: str | PathLike) -> Any:
6-
# TODO
7-
pass
10+
"""
11+
Load and parse an MF6 input file.
12+
13+
Parameters
14+
----------
15+
path : str | PathLike
16+
Path to the MF6 input file
17+
18+
Returns
19+
-------
20+
Any
21+
Parsed MF6 input file structure
22+
"""
23+
path = Path(path)
24+
with open(path, "r") as f:
25+
data = f.read()
26+
return loads(data)
827

928

1029
def loads(data: str) -> Any:
11-
# TODO
12-
pass
30+
"""
31+
Parse MF6 input file content from string.
32+
33+
Parameters
34+
----------
35+
data : str
36+
MF6 input file content as string
37+
38+
Returns
39+
-------
40+
Any
41+
Parsed MF6 input file structure
42+
"""
43+
44+
parser = make_generic_parser()
45+
transformer = GenericTransformer()
46+
return transformer.transform(parser.parse(data))
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from os import PathLike
2+
from pathlib import Path
3+
4+
import jinja2
5+
from modflow_devtools.dfn import Dfn
6+
7+
from .filters import get_block_variables, get_blocks, get_variables, is_recarray_block
8+
9+
10+
def _get_template_env():
11+
loader = jinja2.FileSystemLoader(Path(__file__).parent / "templates")
12+
env = jinja2.Environment(
13+
loader=loader,
14+
trim_blocks=True,
15+
lstrip_blocks=True,
16+
keep_trailing_newline=True,
17+
)
18+
env.filters["is_recarray_block"] = is_recarray_block
19+
env.filters["get_block_variables"] = get_block_variables
20+
return env
21+
22+
23+
_TEMPLATE_ENV = _get_template_env()
24+
25+
26+
def make_grammar(dfn: Dfn, outdir: PathLike):
27+
"""Generate a Lark grammar file for a single component."""
28+
outdir = Path(outdir).expanduser().resolve().absolute()
29+
template = _TEMPLATE_ENV.get_template("component.lark.j2")
30+
blocks = get_blocks(dfn)
31+
variables = get_variables(dfn)
32+
target_path = outdir / f"{dfn['name'].replace('-', '')}.lark"
33+
with open(target_path, "w") as f:
34+
f.write(template.render(component=dfn["name"], blocks=blocks, variables=variables))
35+
36+
37+
def make_all_grammars(dfns: dict[str, Dfn], outdir: PathLike):
38+
"""Generate grammars for all components."""
39+
outdir = Path(outdir).expanduser().resolve().absolute()
40+
outdir.mkdir(parents=True, exist_ok=True)
41+
for dfn in dfns.values():
42+
make_grammar(dfn, outdir)
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
start: [WS] [_NL*] (block [[WS] _NL*])+ [WS]
2+
block: "begin"i block_name _NL _content "end"i block_name _NL+
3+
block_name: CNAME [INT]
4+
_content: line*
5+
line: [WS] item+ _NL+
6+
item: word | NUMBER
7+
word: /[a-zA-Z0-9._'~,-\\(\\)]+/
8+
9+
%import common.NEWLINE -> _NL
10+
%import common.WS
11+
%import common.WS_INLINE
12+
%import common.CNAME
13+
%import common.WORD
14+
%import common.NUMBER
15+
%import common.INT
16+
%import common.SH_COMMENT
17+
%import common._STRING_INNER
18+
%ignore WS_INLINE
19+
%ignore SH_COMMENT

flopy4/mf6/codec/reader/parser.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from pathlib import Path
2+
3+
from lark import Lark
4+
5+
6+
def make_generic_parser() -> Lark:
7+
grammar_path = Path(__file__).parent / "grammar" / "mf6.lark"
8+
with open(grammar_path, "r") as f:
9+
grammar = f.read()
10+
return Lark(grammar, parser="lalr", debug=True)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
from typing import Any
2+
3+
from lark import Token, Transformer
4+
5+
6+
class GenericTransformer(Transformer):
7+
"""
8+
Generic transformer for MF6 input files. Works only with the generic
9+
grammar. Returns structures of blocks consisting of lines of tokens.
10+
"""
11+
12+
def start(self, items: list[Any]) -> dict[str, Any]:
13+
blocks = {}
14+
for item in items:
15+
if not isinstance(item, dict):
16+
continue
17+
block_name = next(iter(item.keys()))
18+
blocks[block_name] = next(iter(item.values()))
19+
return blocks
20+
21+
def block(self, items: list[Any]) -> dict[str, Any]:
22+
return {items[0]: items[1 : (len(items) - 1)]}
23+
24+
def block_name(self, items: list[Any]) -> str:
25+
return " ".join([str(item) for item in items if item is not None])
26+
27+
def line(self, items: list[Any]) -> list[Any]:
28+
return items[1:]
29+
30+
def item(self, items: list[Any]) -> str | float | int:
31+
return items[0]
32+
33+
def word(self, items: list[Token]) -> str:
34+
return str(items[0])
35+
36+
def NUMBER(self, token: Token) -> int | float:
37+
value = str(token)
38+
try:
39+
if "." in value or "e" in value.lower():
40+
return float(value)
41+
else:
42+
return int(value)
43+
except ValueError:
44+
return float(value)
45+
46+
def CNAME(self, token: Token) -> str:
47+
return str(token)
48+
49+
def INT(self, token: Token) -> int:
50+
return int(token)

flopy4/mf6/gwf/dis.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
@xattree
1515
class Dis(Package):
1616
length_units: str = field(
17+
block="options",
1718
default=None,
18-
metadata={"block": "options"},
1919
)
2020
nogrb: bool = field(block="options", default=False)
2121
xorigin: float = field(block="options", default=None)

0 commit comments

Comments
 (0)