|
| 1 | +from os import linesep |
| 2 | +from pathlib import Path |
| 3 | + |
| 4 | +import numpy as np |
| 5 | +from lark import Lark, Transformer |
| 6 | +from xattree import has_xats |
| 7 | + |
| 8 | +__all__ = ["make_parser", "MF6Transformer"] |
| 9 | + |
| 10 | + |
| 11 | +_GRAMMAR = r""" |
| 12 | +// component |
| 13 | +component: _NL* (block _NL+)+ _NL* |
| 14 | +
|
| 15 | +// block |
| 16 | +block: _dictblock | _listblock |
| 17 | +_dictblock: _BEGIN dictblock _NL dict _END dictblock |
| 18 | +_listblock: _BEGIN listblock _NL list _END listblock |
| 19 | +dictblock: DICTBLOCK |
| 20 | +listblock: LISTBLOCK [_blockindex] |
| 21 | +_blockindex: INT |
| 22 | +_BEGIN: "begin"i |
| 23 | +_END: "end"i |
| 24 | +
|
| 25 | +// dict |
| 26 | +dict.1: (param _NL)* |
| 27 | +
|
| 28 | +// list adapted from https://github.com/lark-parser/lark/blob/master/examples/composition/csv.lark |
| 29 | +// negative priority for records because the pattern is so indiscriminate |
| 30 | +list.-1: record* |
| 31 | +record.-1: _record+ _NL |
| 32 | +_record: scalar |
| 33 | +
|
| 34 | +// parameter |
| 35 | +param: key | _pair |
| 36 | +_pair: key value |
| 37 | +key: PARAM |
| 38 | +?value: array |
| 39 | + | list |
| 40 | + | path |
| 41 | + | string |
| 42 | + | scalar |
| 43 | +?scalar: int |
| 44 | + | float |
| 45 | + | word |
| 46 | +
|
| 47 | +// string |
| 48 | +word: WORD |
| 49 | +?string: word+ |
| 50 | +NON_SEPARATOR_STRING: /[a-zA-z.;\\\/]+/ |
| 51 | +
|
| 52 | +// number |
| 53 | +int: INT |
| 54 | +float: FLOAT |
| 55 | +
|
| 56 | +// file path |
| 57 | +path: INOUT PATH |
| 58 | +PATH: [_PATHSEP] (NON_SEPARATOR_STRING [_PATHSEP]) [NON_SEPARATOR_STRING] |
| 59 | +_PATHSEP: "/" |
| 60 | +INOUT: "filein"i|"fileout"i |
| 61 | +
|
| 62 | +// array |
| 63 | +array: constantarray | internalarray | externalarray |
| 64 | +constantarray: "CONSTANT" float |
| 65 | +internalarray: "INTERNAL" [factor] [iprn] (float* [_NL])* |
| 66 | +externalarray: "OPEN/CLOSE" PATH [factor] ["binary"] [iprn] |
| 67 | +factor: "FACTOR" NUMBER |
| 68 | +iprn: "IPRN" INT |
| 69 | +
|
| 70 | +// newline |
| 71 | +_NL: /(\r?\n[\t ]*)+/ |
| 72 | +
|
| 73 | +%import common.SH_COMMENT -> COMMENT |
| 74 | +%import common.SIGNED_NUMBER -> NUMBER |
| 75 | +%import common.SIGNED_INT -> INT |
| 76 | +%import common.SIGNED_FLOAT -> FLOAT |
| 77 | +%import common.WORD |
| 78 | +%import common.WS_INLINE |
| 79 | +
|
| 80 | +%ignore COMMENT |
| 81 | +%ignore WS_INLINE |
| 82 | +""" |
| 83 | +""" |
| 84 | +EBNF description for the MODFLOW 6 input language. |
| 85 | +""" |
| 86 | + |
| 87 | + |
| 88 | +def make_parser(cls: type, **kwargs) -> Lark: |
| 89 | + """ |
| 90 | + Create a parser for the MODFLOW 6 input language with the given |
| 91 | + parameter and block specification. |
| 92 | +
|
| 93 | + Notes |
| 94 | + ----- |
| 95 | + Blocks with just parameters must be handled separately because |
| 96 | + the pattern for list elements (records) casts a wider net than |
| 97 | + the pattern for parameters, which can cause a dictionary block |
| 98 | + of named parameters to parse as a block with a list of records. |
| 99 | + """ |
| 100 | + if not has_xats(cls): |
| 101 | + raise ValueError(f"Class '{cls.__name__}' is not a `xattree`.") |
| 102 | + spec = cls.__xattree__["spec"].flat |
| 103 | + params = "|".join(['"' + n + '"i' for n in spec.keys()]) |
| 104 | + blocks = set([xat.metadata.get("block", None) for xat in spec.values()]) |
| 105 | + blocks.discard(None) |
| 106 | + # temp hack, TODO detect list blocks as blocks with a single |
| 107 | + # parameter with list or array type |
| 108 | + dict_blocks = [b for b in blocks if b not in ["perioddata"]] |
| 109 | + list_blocks = [b for b in blocks if b in ["perioddata"]] |
| 110 | + dict_blocks = "|".join(['"' + n + '"i' for n in dict_blocks]) |
| 111 | + list_blocks = "|".join(['"' + n + '"i' for n in list_blocks]) |
| 112 | + grammar = linesep.join( |
| 113 | + [ |
| 114 | + _GRAMMAR, |
| 115 | + f"PARAM: ({params})", |
| 116 | + f"DICTBLOCK: ({dict_blocks})", |
| 117 | + f"LISTBLOCK: ({list_blocks})", |
| 118 | + ] |
| 119 | + ) |
| 120 | + return Lark(grammar, start="component", **kwargs) |
| 121 | + |
| 122 | + |
| 123 | +def _parse_word(_, w): |
| 124 | + (w,) = w |
| 125 | + return str(w) |
| 126 | + |
| 127 | + |
| 128 | +def _parse_string(_, s): |
| 129 | + return " ".join(s) |
| 130 | + |
| 131 | + |
| 132 | +def _parse_int(_, i): |
| 133 | + (i,) = i |
| 134 | + return int(i) |
| 135 | + |
| 136 | + |
| 137 | +def _parse_float(_, f): |
| 138 | + (f,) = f |
| 139 | + return float(f) |
| 140 | + |
| 141 | + |
| 142 | +def _parse_array(_, a): |
| 143 | + (a,) = a |
| 144 | + return np.array(a) |
| 145 | + |
| 146 | + |
| 147 | +class MF6Transformer(Transformer): |
| 148 | + """ |
| 149 | + Transforms a parse tree for the MODFLOW 6 input language |
| 150 | + into a nested dictionary AST suitable for structuring to |
| 151 | + a strongly-typed input data model. |
| 152 | +
|
| 153 | + Notes |
| 154 | + ----- |
| 155 | + Each function represents a node in the tree. Its argument |
| 156 | + is a list of its children. Nodes are processed bottom-up, |
| 157 | + so non-leaf functions can assume they will get a list of |
| 158 | + primitives which are already in the right representation. |
| 159 | +
|
| 160 | + See https://lark-parser.readthedocs.io/en/stable/visitors.html#transformer |
| 161 | + for more info. |
| 162 | + """ |
| 163 | + |
| 164 | + def key(self, k): |
| 165 | + (k,) = k |
| 166 | + return str(k).lower() |
| 167 | + |
| 168 | + def constantarray(self, a): |
| 169 | + # TODO factor out `ConstantArray` |
| 170 | + # array-like class from `MFArray` |
| 171 | + # with deferred shape and use it? |
| 172 | + pass |
| 173 | + |
| 174 | + def internalarray(self, a): |
| 175 | + factor = a[0] |
| 176 | + array = np.array(a[2:]) |
| 177 | + if factor is not None: |
| 178 | + array *= factor |
| 179 | + return array |
| 180 | + |
| 181 | + def externalarray(self, a): |
| 182 | + # TODO |
| 183 | + pass |
| 184 | + |
| 185 | + def path(self, p): |
| 186 | + _, p = p |
| 187 | + return Path(p) |
| 188 | + |
| 189 | + def param(self, p): |
| 190 | + k = p[0] |
| 191 | + v = True if len(p) == 1 else p[1] |
| 192 | + return k, v |
| 193 | + |
| 194 | + def block(self, b): |
| 195 | + return tuple(b[:2]) |
| 196 | + |
| 197 | + def dictblock(self, b): |
| 198 | + return str(b[0]).lower() |
| 199 | + |
| 200 | + def listblock(self, b): |
| 201 | + name = str(b[0]) |
| 202 | + if len(b) == 2: |
| 203 | + index = int(b[1]) |
| 204 | + name = f"{name} {index}" |
| 205 | + return name.lower() |
| 206 | + |
| 207 | + word = _parse_word |
| 208 | + string = _parse_string |
| 209 | + int = _parse_int |
| 210 | + float = _parse_float |
| 211 | + array = _parse_array |
| 212 | + record = tuple |
| 213 | + list = list |
| 214 | + dict = dict |
| 215 | + params = dict |
| 216 | + component = dict |
0 commit comments