Sparse converter (#106)

deltamarnix · web-flow · commit 0db2281f9e9b · 2025-04-24T10:13:19.000-04:00
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -1,14 +1,26 @@
 {
+    "editor.formatOnSave": true,
+    "files.insertFinalNewline": true,
     "python.testing.pytestArgs": [
         "test"
     ],
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
     "[python]": {
-        "editor.formatOnSave": true,
         "editor.defaultFormatter": "charliermarsh.ruff",
         "editor.codeActionsOnSave": {
             "source.fixAll": "explicit"
         }
+    },
+    "mypy-type-checker.importStrategy": "fromEnvironment",
+    "files.exclude": {
+        "**/.git": true,
+        "**/.svn": true,
+        "**/.hg": true,
+        "**/.DS_Store": true,
+        "**/Thumbs.db": true,
+        ".pixi": true,
+        ".ruff_cache": true,
+        ".pytest_cache": true
     }
-}
+}
diff --git a/docs/examples/quickstart.py b/docs/examples/quickstart.py
@@ -29,7 +29,7 @@
 # check CHD
 assert chd.data["head"][0, 0].item() == 1.0
 assert chd.data["head"][0, 99].item() == 0.0
-assert np.allclose(chd.data["head"][:, 1:99], np.full(98, 1e30))
+assert np.allclose(chd.data["head"][:, 1:99].data.todense(), np.full(98, 1e30))
 
 # TODO: xarray index aliasing nlay/ncol/nrow to k/i/j?
 # assert chd.data["head"].loc(dict(k=0, i=0, j=0)) == 1.
diff --git a/flopy4/mf6/config.py b/flopy4/mf6/config.py
@@ -0,0 +1,3 @@
+# TODO use https://environ-config.readthedocs.io/en/stable/?
+
+SPARSE_THRESHOLD = 1000
diff --git a/flopy4/mf6/converters.py b/flopy4/mf6/converters.py
@@ -1,7 +1,11 @@
+from typing import Any, Tuple
+
 import numpy as np
+import sparse
 from numpy.typing import NDArray
 from xattree import _get_xatspec
 
+from flopy4.mf6.config import SPARSE_THRESHOLD
 from flopy4.mf6.constants import FILL_DNODATA
 
 
@@ -26,11 +30,29 @@ def convert_array(value, self_, field) -> NDArray:
     if any(unresolved):
         raise ValueError(f"Couldn't resolve dims: {unresolved}")
 
-    # create array
-    # TDOD: support other fill values, configurable by field?
-    a = np.full(
-        shape, fill_value=field.default or FILL_DNODATA
-    )  # , dtype=field.dtype)
+    if np.prod(shape) > SPARSE_THRESHOLD:
+        a: dict[Tuple[Any, ...], Any] = dict()
+
+        def set_(arr, val, *ind):
+            arr[tuple(ind)] = val
+
+        def final(arr):
+            coords = np.array(list(map(list, zip(*arr.keys()))))
+            return sparse.COO(
+                coords,
+                list(arr.values()),
+                shape=shape,
+                fill_value=field.default or FILL_DNODATA,
+            )
+    else:
+        a = np.full(shape, FILL_DNODATA, dtype=field.dtype)  # type: ignore
+
+        def set_(arr, val, *ind):
+            arr[ind] = val
+
+        def final(arr):
+            arr[arr == FILL_DNODATA] = field.default or FILL_DNODATA
+            return arr
 
     def _get_nn(cellid):
         match len(cellid):
@@ -53,16 +75,19 @@ def _get_nn(cellid):
                 kper = 0
             match len(shape):
                 case 1:
-                    a[kper] = period
+                    set_(a, period, kper)
+                    # a[(kper,)] = period
                 case _:
                     for cellid, v in period.items():
                         nn = _get_nn(cellid)
-                        a[kper, nn] = v
+                        set_(a, v, kper, nn)
+                        # a[(kper, nn)] = v
             if kper == "*":
                 break
     else:
         for cellid, v in value.items():
             nn = _get_nn(cellid)
-            a[nn] = v
+            set_(a, v, nn)
+            # a[(nn,)] = v
 
-    return a
+    return final(a)
diff --git a/pixi.lock b/pixi.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,18 +34,19 @@ classifiers = [
 ]
 requires-python = ">=3.11"
 dependencies = [
-    "attrs",                                # todo: bounds?
-    "cattrs",                               # todo: bounds?
+    "attrs",                                                                            # todo: bounds?
+    "cattrs",                                                                           # todo: bounds?
     "flopy",
     "Jinja2>=3.0",
     "numpy>=1.20.3",
     "pandas>=2.0.0",
     "toml>=0.10",
     "networkx>=3.4.2,<4",
-    "xarray[parallel,io]>=2024.11.0,<2025",
+    "xarray[parallel,io]>=2024.11.0",
     "scipy>=1.14.1,<2",
     "modflow-devtools[dfn] @ git+https://github.com/MODFLOW-USGS/modflow-devtools.git",
     "xattree @ git+https://github.com/modflowpy/xattree.git",
+    "sparse>=0.15.5,<1",
 ]
 dynamic = ["version"]
 
@@ -63,7 +64,7 @@ test = [
     "pytest!=8.1.0",
     "pytest-dotenv",
     "pytest-xdist",
-    "pytest-benchmark"
+    "pytest-benchmark",
 ]
 build = ["build", "twine"]
 
@@ -143,4 +144,4 @@ install = { cmd = "pre-commit install --install-hooks" }
 [tool.mypy]
 mypy_path = "flopy4"
 ignore_missing_imports = true
-warn_unreachable = true
+warn_unreachable = true
diff --git a/test/test_component.py b/test/test_component.py
@@ -140,6 +140,62 @@ def test_init_sim_explicit_dims():
     assert np.array_equal(sim.models["gwf"].npf.data.k, np.ones(100))
     assert chd.head[0, 0] == 1.0
     assert chd.head[0, 99] == 0.0
-    assert np.array_equal(chd.head[0, 1:99], np.full((98,), FILL_DNODATA))
-    assert np.array_equal(chd.head, chd.data.head)
-    assert np.array_equal(chd.head, sim.models["gwf"].chd[0].data.head)
+    assert np.array_equal(chd.head[0, 1:99].data, np.full((98,), FILL_DNODATA))
+    assert np.array_equal(chd.head.data, chd.data.head.data)
+    assert np.array_equal(
+        chd.head.data,
+        sim.models["gwf"].chd[0].data.head.data,
+    )
+
+
+def test_init_big_sim():
+    # if size over threshold, arrays should be sparse
+    time = ModelTime(perlen=[1.0], nstp=[1], tsmult=[1.0])
+    grid = StructuredGrid(nlay=1, nrow=100, ncol=100)
+    dims = {
+        "nlay": grid.nlay,
+        "nrow": grid.nrow,
+        "ncol": grid.ncol,
+    }
+    dis = Dis(**dims)
+    dims["nper"] = time.nper
+    dims["nnodes"] = grid.nnodes
+    ic = Ic(dims=dims)
+    oc = Oc(dims=dims)
+    npf = Npf(dims=dims)
+    chd = Chd(dims=dims, head={"*": {(0, 0, 0): 1.0, (0, 99, 99): 0.0}})
+    gwf = Gwf(
+        dis=dis,
+        ic=ic,
+        oc=oc,
+        npf=npf,
+        chd=[chd],
+        dims=dims,
+    )
+    tdis = Tdis(dims=dims)
+    sim = Simulation(tdis=tdis, models={"gwf": gwf})
+
+    assert sim.tdis is tdis
+    assert sim.models["gwf"] is gwf
+    assert isinstance(sim.data, DataTree)
+    assert sim.data.tdis is tdis.data
+    assert sim.data.gwf is gwf.data
+    assert gwf.dis is dis
+    assert gwf.ic is ic
+    assert gwf.oc is oc
+    assert gwf.npf is npf
+    assert gwf.chd[0] is chd
+    assert np.array_equal(sim.models["gwf"].npf.k, np.ones(10000))
+    assert np.array_equal(sim.models["gwf"].npf.data.k, np.ones(10000))
+    assert chd.head[0, 0] == 1.0
+    assert chd.head[0, 9999] == 0.0
+    assert np.array_equal(
+        chd.head[0, 1:9999].data.todense(), np.full((9998,), FILL_DNODATA)
+    )
+    assert np.array_equal(
+        chd.head.data.todense(), chd.data.head.data.todense()
+    )
+    assert np.array_equal(
+        chd.head.data.todense(),
+        sim.models["gwf"].chd[0].data.head.data.todense(),
+    )
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+# TODO use https://environ-config.readthedocs.io/en/stable/?`
	`2`	`+`
	`3`	`+SPARSE_THRESHOLD = 1000`