cleanup

wpbonelli · wpbonelli · commit 5d1c290b7343 · 2025-10-29T15:34:20.000-04:00
diff --git a/flopy4/mf6/codec/reader/grammar/filters.py b/flopy4/mf6/codec/reader/grammar/filters.py
@@ -77,7 +77,9 @@ def get_recarray_name(block_name: str) -> str:
     return f"{block_name}data"
 
 
-def get_recarray_columns(field_names: list[str], block_fields: Mapping[str, FieldV2]) -> list[tuple[str, bool]]:
+def get_recarray_columns(
+    field_names: list[str], block_fields: Mapping[str, FieldV2]
+) -> list[tuple[str, bool]]:
     """
     Get column names for a recarray with optionality info.
 
@@ -102,7 +104,7 @@ def get_recarray_columns(field_names: list[str], block_fields: Mapping[str, Fiel
     # Add the field names as columns with their optionality
     for name in field_names:
         field = block_fields[name]
-        is_optional = getattr(field, 'optional', False)
+        is_optional = getattr(field, "optional", False)
         columns.append((name, is_optional))
 
     return columns
diff --git a/flopy4/mf6/codec/reader/grammar/generated/gwf-wel.lark b/flopy4/mf6/codec/reader/grammar/generated/gwf-wel.lark
@@ -1,6 +1,7 @@
 // Auto-generated grammar for MF6 GWF-WEL
 %import common.WS
 %import common.SH_COMMENT
+%import common.NEWLINE
 
 %ignore WS
 %ignore SH_COMMENT
@@ -16,52 +17,24 @@ dimensions_fields: (maxbound)*
 period_fields: (stress_period_data)*
 auxiliary: "auxiliary"i array
 auxmultname: "auxmultname"i string
-boundnames: "boundnames"i 
-print_input: "print_input"i 
-print_flows: "print_flows"i 
-save_flows: "save_flows"i 
+boundnames: "boundnames"i
+print_input: "print_input"i
+print_flows: "print_flows"i
+save_flows: "save_flows"i
 auto_flow_reduce: "auto_flow_reduce"i double
 afrcsv_filerecord: "auto_flow_reduce_csv"i "fileout"i string
 ts_filerecord: "ts6"i "filein"i string
 obs_filerecord: "filein"i "obs6"i string
-mover: "mover"i 
+mover: "mover"i
 maxbound: "maxbound"i integer
-stress_period_data: (number | simple_string)+
+stress_period_data: (number | simple_string)+ NEWLINE
 
-// Inline typed grammar rules
-// Named fields use specific types (integer/double) for validation
-// List/recarray data uses generic 'number' - structuring step handles type conversion
-integer: SIGNED_INT | INT
-double: SIGNED_NUMBER | NUMBER
-number: SIGNED_NUMBER | NUMBER
-string: ESCAPED_STRING | record
-simple_string: ESCAPED_STRING | _word
-record.1: _token+ _NL
-list: record*
-array: (single_array | layered_array)
-single_array: [netcdf] readarray
-layered_array: layered [netcdf] readarray+
-layered: "layered"i
-netcdf: "netcdf"i
-readarray: control [data]
-control: constant | internal | external
-constant: "constant"i double
-internal: "internal"i [factor] [iprn]
-external: "open/close"i filename [factor] [binary] [iprn]
-factor: "factor"i double
-iprn: "iprn"i integer
-binary: "(binary)"i
-filename: ESCAPED_STRING | _word
-data: double+
-
-_word: /[a-zA-Z0-9._'~,-\\(\\)]+/
-_token: _word | number
-
-%import common.NEWLINE -> _NL
-%import common.CNAME
-%import common.WORD
-%import common.ESCAPED_STRING
-%import common.NUMBER
-%import common.INT
-%import common.SIGNED_NUMBER
-%import common.SIGNED_INT
+// Import typed grammar rules
+%import typed.integer -> integer
+%import typed.double -> double
+%import typed.number -> number
+%import typed.string -> string
+%import typed.simple_string -> simple_string
+%import typed.record -> record
+%import typed.list -> list
+%import typed.array -> array
diff --git a/flopy4/mf6/codec/reader/grammar/templates/component.lark.jinja b/flopy4/mf6/codec/reader/grammar/templates/component.lark.jinja
@@ -1,6 +1,7 @@
 // Auto-generated grammar for MF6 {{ name|upper }}
 %import common.WS
 %import common.SH_COMMENT
+%import common.NEWLINE
 
 %ignore WS
 %ignore SH_COMMENT
@@ -82,44 +83,16 @@ opt.type }}{% endif %}
 {% set recarray_name = block_name|get_recarray_name %}
 {% set field_names = period_groups.values()|first %}
 {% set columns = field_names|get_recarray_columns(block_) %}
-{{ recarray_name}}: (number | simple_string)+
+{{ recarray_name}}: (number | simple_string)+ NEWLINE
 {% endif %}
 {% endfor %}
 
-// Inline typed grammar rules
-// Named fields use specific types (integer/double) for validation
-// List/recarray data uses generic 'number' - structuring step handles type conversion
-integer: SIGNED_INT | INT
-double: SIGNED_NUMBER | NUMBER
-number: SIGNED_NUMBER | NUMBER
-string: ESCAPED_STRING | record
-simple_string: ESCAPED_STRING | _word
-record.1: _token+ _NL
-list: record*
-array: (single_array | layered_array)
-single_array: [netcdf] readarray
-layered_array: layered [netcdf] readarray+
-layered: "layered"i
-netcdf: "netcdf"i
-readarray: control [data]
-control: constant | internal | external
-constant: "constant"i double
-internal: "internal"i [factor] [iprn]
-external: "open/close"i filename [factor] [binary] [iprn]
-factor: "factor"i double
-iprn: "iprn"i integer
-binary: "(binary)"i
-filename: ESCAPED_STRING | _word
-data: double+
-
-_word: /[a-zA-Z0-9._'~,-\\(\\)]+/
-_token: _word | number
-
-%import common.NEWLINE -> _NL
-%import common.CNAME
-%import common.WORD
-%import common.ESCAPED_STRING
-%import common.NUMBER
-%import common.INT
-%import common.SIGNED_NUMBER
-%import common.SIGNED_INT
+// Import typed grammar rules
+%import typed.integer -> integer
+%import typed.double -> double
+%import typed.number -> number
+%import typed.string -> string
+%import typed.simple_string -> simple_string
+%import typed.record -> record
+%import typed.list -> list
+%import typed.array -> array
diff --git a/flopy4/mf6/codec/reader/grammar/typed.lark b/flopy4/mf6/codec/reader/grammar/typed.lark
@@ -1,7 +1,12 @@
-integer: _integer
-double: _number
+// Typed grammar rules for MF6 input files
+// Named fields use specific types (integer/double) for validation
+// List/recarray data uses generic 'number' - structuring step handles type conversion
+integer: SIGNED_INT | INT
+double: SIGNED_NUMBER | NUMBER
+number: SIGNED_NUMBER | NUMBER
 string: ESCAPED_STRING | record
-record: _token+ _NL
+simple_string: ESCAPED_STRING | _word
+record.1: _token+ NEWLINE
 list: record*
 array: (single_array | layered_array)
 single_array: [netcdf] readarray
@@ -19,12 +24,10 @@ binary: "(binary)"i
 filename: ESCAPED_STRING | _word
 data: double+
 
-_word: /[a-zA-Z0-9._'~,-\\(\\)]+/
-_number: SIGNED_NUMBER | NUMBER
-_integer: SIGNED_INT | INT
-_token: _word | _number
+_word: /(?!(?i:begin|end))[a-zA-Z0-9._'~,-\\(\\)]+/
+_token: _word | number
 
-%import common.NEWLINE -> _NL
+%import common.NEWLINE
 %import common.WS
 %import common.WS_INLINE
 %import common.CNAME
diff --git a/flopy4/mf6/codec/reader/transformer.py b/flopy4/mf6/codec/reader/transformer.py
@@ -1,13 +1,10 @@
-from collections import ChainMap
-from collections.abc import Mapping
 from pathlib import Path
 from typing import Any
 
 import numpy as np
 import xarray as xr
 from lark import Token, Transformer
 from modflow_devtools.dfn import Dfn
-from modflow_devtools.dfn.schema.v2 import SCALAR_TYPES
 
 
 class BasicTransformer(Transformer):
@@ -76,22 +73,19 @@ def start(self, items: list[Any]) -> dict:
             if not isinstance(item, dict):
                 continue
             for block_name, block_data in item.items():
-                # Pluralize indexed blocks (e.g., period -> periods)
-                if isinstance(block_data, dict) and all(isinstance(k, int) for k in block_data.keys()):
-                    # This is an indexed block, use plural form
-                    if not block_name.endswith('s'):
-                        block_name = block_name + 's'
-
-                if block_name not in merged:
+                # Check if this is an indexed block (dict with integer keys)
+                if isinstance(block_data, dict) and all(
+                    isinstance(k, int) for k in block_data.keys()
+                ):
+                    # Flatten indexed blocks into separate keys like "period 1", "period 2"
+                    for index, data in block_data.items():
+                        indexed_key = f"{block_name} {index}"
+                        merged[indexed_key] = data
+                elif block_name not in merged:
                     merged[block_name] = block_data
                 else:
-                    # If both are dicts with integer keys (indexed blocks), merge them
-                    existing = merged[block_name]
-                    if (isinstance(existing, dict) and isinstance(block_data, dict) and
-                        all(isinstance(k, int) for k in existing.keys()) and
-                        all(isinstance(k, int) for k in block_data.keys())):
-                        existing.update(block_data)
-                    # Otherwise, indexed block overwrites (shouldn't happen for well-formed input)
+                    # This shouldn't happen for well-formed input
+                    pass
         return merged
 
     def block(self, items: list[Any]) -> dict:
@@ -160,7 +154,7 @@ def factor(self, items: list[Any]) -> dict[str, float]:
     def iprn(self, items: list[Any]) -> dict[str, int]:
         return {"iprn": items[0]}
 
-    def binary(self, items: list[Any]) -> dict[str, bool]:
+    def binary(self, _) -> dict[str, bool]:
         return {"binary": True}
 
     def filename(self, items: list[Any]) -> Path:
@@ -193,59 +187,20 @@ def number(self, items: list[Any]) -> int | float:
     def data(self, items: list[Any]) -> np.ndarray:
         return np.array(items)
 
-    def netcdf(self, items: list[Any]) -> dict[str, bool]:
+    def netcdf(self, _) -> dict[str, bool]:
         return {"netcdf": True}
 
-    # Handle typed__ prefixed rules from imports
-    def typed__single_array(self, items: list[Any]) -> dict:
-        return self.single_array(items)
-
-    def typed__layered_array(self, items: list[Any]) -> list[dict]:
-        return self.layered_array(items)
-
-    def typed__readarray(self, items: list[Any]) -> dict[str, Any]:
-        return self.readarray(items)
-
-    def typed__control(self, items: list[Any]) -> dict[str, Any]:
-        return self.control(items)
-
-    def typed__constant(self, items: list[Any]) -> dict[str, Any]:
-        return self.constant(items)
-
-    def typed__internal(self, items: list[Any]) -> dict[str, Any]:
-        return self.internal(items)
-
-    def typed__external(self, items: list[Any]) -> dict[str, Any]:
-        return self.external(items)
-
-    def typed__factor(self, items: list[Any]) -> dict[str, float]:
-        return self.factor(items)
-
-    def typed__iprn(self, items: list[Any]) -> dict[str, int]:
-        return self.iprn(items)
-
-    def typed__binary(self, items: list[Any]) -> dict[str, bool]:
-        return self.binary(items)
-
-    def typed__filename(self, items: list[Any]) -> Path:
-        return self.filename(items)
-
-    def typed__data(self, items: list[Any]) -> np.ndarray:
-        return self.data(items)
-
-    def typed__netcdf(self, items: list[Any]) -> dict[str, bool]:
-        return self.netcdf(items)
-
-    def typed__layered(self, items: list[Any]) -> dict[str, bool]:
-        return {"layered": True}
-
     def block_index(self, items: list[Any]) -> int:
         """Extract block index (e.g., period number)."""
         return items[0]
 
     def stress_period_data(self, items: list[Any]) -> list[Any]:
-        """Handle stress period data (list of values for a single record)."""
-        return items
+        """Handle stress period data (one row of values).
+
+        The parser gives us the values for one row plus a NEWLINE token.
+        Filter out the NEWLINE token and return just the data values.
+        """
+        return [item for item in items if not isinstance(item, Token) or item.type != "NEWLINE"]
 
     @staticmethod
     def try_create_dataarray(array_info: dict) -> dict:
@@ -263,7 +218,7 @@ def __default__(self, data, children, meta):
         if self.blocks is None or self.fields is None:
             return super().__default__(data, children, meta)
         if data.endswith("_block") and (block_name := data[:-6]) in self.blocks:
-            # Check if this is an indexed block (period blocks have 3 children: [index, fields, index])
+            # See if this is an indexed block (period blocks have 3 children: index, fields, index
             if len(children) == 3 and isinstance(children[0], int) and isinstance(children[2], int):
                 # Indexed block: [index, fields, index]
                 block_index = children[0]
diff --git a/test/test_mf6_grammar_gen.py b/test/test_mf6_grammar_gen.py
@@ -59,7 +59,9 @@ def test_make_grammar_creates_file(tmp_path, minimal_dfn):
     assert expected_file.is_file()
     content = expected_file.read_text()
     assert "// Auto-generated grammar for MF6 TEST-COMPONENT" in content
-    assert '%import "typed.lark"' in content
+    # Grammar imports typed rules from typed.lark
+    assert "%import typed.integer -> integer" in content
+    assert "%import typed.double -> double" in content
     assert "start: block*" in content
     assert "options_block" in content
 
@@ -182,7 +184,11 @@ def test_make_grammar_with_period_block(tmp_path):
     lines = content.split("\n")
     period_fields_line = [l for l in lines if "period_fields:" in l][0]
     assert "stress_period_data" in period_fields_line
-    assert "cellid" in content.lower()
+
+    # stress_period_data should accept numbers and strings, one row per line
+    assert "stress_period_data:" in content
+    stress_period_data_line = [l for l in lines if l.strip().startswith("stress_period_data:")][0]
+    assert "NEWLINE" in stress_period_data_line
 
 
 def test_make_grammar_with_named_subfields(tmp_path):
@@ -206,6 +212,10 @@ def test_make_grammar_with_named_subfields(tmp_path):
     grammar_file = tmp_path / "gwf-rch.lark"
     content = grammar_file.read_text()
 
-    assert "stress_period_data: cellid recharge" in content
-    assert "cellid: integer+" in content
-    assert "recharge: double" in content
+    # stress_period_data should be a generic recarray accepting numbers/strings per line
+    assert "stress_period_data" in content
+    lines = content.split("\n")
+    stress_period_data_line = [l for l in lines if l.strip().startswith("stress_period_data:")][0]
+    assert "NEWLINE" in stress_period_data_line
+    # Should accept both numbers and simple strings
+    assert "number" in stress_period_data_line or "simple_string" in stress_period_data_line
diff --git a/test/test_mf6_reader.py b/test/test_mf6_reader.py
@@ -344,5 +344,14 @@ def test_transform_gwf_wel_file(model_workspace):
 
     # Check structure
     assert isinstance(result, dict)
-    assert "periods" in result  # WEL has periods
-    assert len(result["periods"]) > 0  # Should have at least one period
+
+    # Should have a period 2 entry (indexed period blocks are flattened to "period N" keys)
+    assert "period 2" in result
+    assert "stress_period_data" in result["period 2"]
+
+    # Should have 2 rows of data (MAXBOUND = 2)
+    assert len(result["period 2"]["stress_period_data"]) == 2
+
+    # Each row should have 4 values (cellid components + q value)
+    assert len(result["period 2"]["stress_period_data"][0]) == 4
+    assert len(result["period 2"]["stress_period_data"][1]) == 4