Merge pull request #72 from nsidc/other-api-improvements

trey-stafford · web-flow · commit 1f8012191237 · 2025-06-16T16:14:24.000-06:00
Improvements to code organization &amp; api/docs
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,9 @@
   those on to CMR. This provides much greater flexibility over data search and
   makes the interface more consistent with `icepyx` and `earthaccess`.
   https://github.com/nsidc/iceflow/issues/51.
+- Remove restrictive `fetch_iceflow_df` function from public API. Users should
+  utilize the search, download, and read functions described in
+  `doc/getting-started.md` instead.
 
 # v0.3.0
 
diff --git a/docs/getting-started.md b/docs/getting-started.md
@@ -39,17 +39,32 @@ By default, all iceflow-supported datasets are searched. To search for a
 specific subset of iceflow-supported datasets, use the `datasets` kwarg:
 
 ```
-from nsidc.iceflow import ILATM1BDataset
+from nsidc.iceflow import Dataset
 
 
 search_results = find_iceflow_data(
-    datasets=[ILATM1BDataset(version="1")],
+    datasets=[Dataset(short_name="ILATM1B", version="1")],
     # Lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat
     bounding_box=(-103.125559, -75.180563, -102.677327, -74.798063),
     temporal=(dt.date(2009, 11, 1), dt.date(2009, 12, 31)),
 )
 ```
 
+```{include} ../supported_datasets
+
+```
+
+`iceflow` currently supports the following datasets:
+
+| Dataset                                                  | Temporal Coverage             |
+| -------------------------------------------------------- | ----------------------------- |
+| [ILATM1B v1](https://nsidc.org/data/ilatm1b/versions/1)  | 2009-03-31 through 2012-11-08 |
+| [ILATM1B v2](https://nsidc.org/data/ilatm1b/versions/2)  | 2013-03-20 through 2019-11-20 |
+| [BLATM1B v1](https://nsidc.org/data/blatm1b/versions/1)  | 1993-06-23 through 2008-10-30 |
+| [ILVIS2 v1](https://nsidc.org/data/ilvis2/versions/1)    | 2009-04-14 through 2015-10-31 |
+| [ILVIS2 v2](https://nsidc.org/data/ilvis2/versions/2)    | 2017-08-25 through 2017-09-20 |
+| [GLAH06 v034](https://nsidc.org/data/glah06/versions/34) | 2003-02-20 through 2009-10-11 |
+
 All other keyword arguments to this function (e.g,. `bounding_box`, `temporal`)
 map to [CMR](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html)
 search parameters, and are passed un-modified to
diff --git a/docs/notebooks/iceflow-example.ipynb b/docs/notebooks/iceflow-example.ipynb
@@ -43,7 +43,7 @@
     "import matplotlib.pyplot as plt\n",
     "\n",
     "from nsidc.iceflow import (\n",
-    "    ILATM1BDataset,\n",
+    "    Dataset,\n",
     "    download_iceflow_results,\n",
     "    find_iceflow_data,\n",
     "    read_iceflow_datafiles,\n",
@@ -77,10 +77,8 @@
     "data_path.mkdir(exist_ok=True)\n",
     "\n",
     "# Define the dataset that we want to search for.\n",
-    "atm1b_v1_dataset = ILATM1BDataset(version=\"1\")\n",
+    "atm1b_v1_dataset = Dataset(short_name=\"ILATM1B\", version=\"1\")\n",
     "\n",
-    "# Define the dataset that we want to search for.\n",
-    "atm1b_v1_dataset = ILATM1BDataset(version=\"1\")\n",
     "# Define a bounding box for our area of interest.\n",
     "BBOX = (\n",
     "    -103.125559,\n",
@@ -145,13 +143,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[32m2025-06-10 11:00:52.469\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mnsidc.iceflow.data.fetch\u001b[0m:\u001b[36m_download_iceflow_search_result\u001b[0m:\u001b[36m72\u001b[0m - \u001b[1mDownloading 1 granules to downloaded-data/ILATM1B_1.\u001b[0m\n"
+      "\u001b[32m2025-06-12 17:43:41.316\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mnsidc.iceflow.data.fetch\u001b[0m:\u001b[36m_download_iceflow_search_result\u001b[0m:\u001b[36m62\u001b[0m - \u001b[1mDownloading 1 granules to downloaded-data/ILATM1B_1.\u001b[0m\n"
      ]
     },
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "28d39c1f197042658119f2853eb69670",
+       "model_id": "f23a8e430d9e4a8db9a654bbbb9e86b8",
        "version_major": 2,
        "version_minor": 0
       },
@@ -165,7 +163,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1e4341335843469e8757c8ebe0c3ca3a",
+       "model_id": "11cf2a6ec02c404d81dc3a9635f6ed90",
        "version_major": 2,
        "version_minor": 0
       },
@@ -179,7 +177,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "b43146c391224e4494a864e6c3f0f364",
+       "model_id": "baaff268fc2c413e9606ccb33c431c38",
        "version_major": 2,
        "version_minor": 0
       },
diff --git a/docs/notebooks/iceflow-with-icepyx.ipynb b/docs/notebooks/iceflow-with-icepyx.ipynb
diff --git a/src/nsidc/iceflow/__init__.py b/src/nsidc/iceflow/__init__.py
@@ -20,22 +20,26 @@
 from nsidc.iceflow.api import make_iceflow_parquet
 from nsidc.iceflow.data.fetch import download_iceflow_results, find_iceflow_data
 from nsidc.iceflow.data.models import (
-    ALL_DATASETS,
+    Dataset,
+    IceflowDataFrame,
+)
+from nsidc.iceflow.data.read import read_iceflow_datafiles
+from nsidc.iceflow.data.supported_datasets import (
+    ALL_SUPPORTED_DATASETS,
     BLATM1BDataset,
     GLAH06Dataset,
-    IceflowDataFrame,
     ILATM1BDataset,
     ILVIS2Dataset,
 )
-from nsidc.iceflow.data.read import read_iceflow_datafiles
 from nsidc.iceflow.itrf.converter import transform_itrf
 
 __version__ = "v1.0.0"
 
 
 __all__ = [
-    "ALL_DATASETS",
+    "ALL_SUPPORTED_DATASETS",
     "BLATM1BDataset",
+    "Dataset",
     "GLAH06Dataset",
     "ILATM1BDataset",
     "ILVIS2Dataset",
diff --git a/src/nsidc/iceflow/api.py b/src/nsidc/iceflow/api.py
@@ -8,61 +8,11 @@
 import dask.dataframe as dd
 from loguru import logger
 
-from nsidc.iceflow.data.fetch import download_iceflow_results, find_iceflow_data
-from nsidc.iceflow.data.models import (
-    ALL_DATASETS,
-    BoundingBoxLike,
-    Dataset,
-    IceflowDataFrame,
-    TemporalRange,
-)
 from nsidc.iceflow.data.read import read_iceflow_datafiles
+from nsidc.iceflow.data.supported_datasets import ALL_SUPPORTED_DATASETS
 from nsidc.iceflow.itrf.converter import transform_itrf
 
 
-def fetch_iceflow_df(
-    *,
-    bounding_box: BoundingBoxLike,
-    temporal: TemporalRange,
-    datasets: list[Dataset] = ALL_DATASETS,
-    output_dir: Path,
-    # TODO: also add option for target epoch!!
-    output_itrf: str | None = None,
-) -> IceflowDataFrame:
-    """Search for data matching parameters and return an IceflowDataframe.
-
-    Optionally transform data to the given ITRF for consistency.
-
-    Note: a potentially large amount of data may be returned, especially if the
-    user requests a large spatial/temporal area across multiple datasets. The
-    result may not even fit in memory!
-
-    Consider using `make_iceflow_parquet` to store downloaded data in parquet
-    format.
-    """
-
-    iceflow_search_reuslts = find_iceflow_data(
-        bounding_box=bounding_box,
-        temporal=temporal,
-        datasets=datasets,
-    )
-
-    downloaded_files = download_iceflow_results(
-        iceflow_search_results=iceflow_search_reuslts,
-        output_dir=output_dir,
-    )
-
-    iceflow_df = read_iceflow_datafiles(downloaded_files)
-
-    if output_itrf is not None:
-        iceflow_df = transform_itrf(
-            data=iceflow_df,
-            target_itrf=output_itrf,
-        )
-
-    return iceflow_df
-
-
 def make_iceflow_parquet(
     *,
     data_dir: Path,
@@ -97,7 +47,7 @@ def make_iceflow_parquet(
 
     all_subdirs = [
         data_dir / ds.subdir_name
-        for ds in ALL_DATASETS
+        for ds in ALL_SUPPORTED_DATASETS
         if (data_dir / ds.subdir_name).is_dir()
     ]
     for subdir in all_subdirs:
diff --git a/src/nsidc/iceflow/data/__init__.py b/src/nsidc/iceflow/data/__init__.py
@@ -1,16 +1,18 @@
 from __future__ import annotations
 
 from nsidc.iceflow.data.models import (
-    ALL_DATASETS,
-    BLATM1BDataset,
     Dataset,
+)
+from nsidc.iceflow.data.supported_datasets import (
+    ALL_SUPPORTED_DATASETS,
+    BLATM1BDataset,
     GLAH06Dataset,
     ILATM1BDataset,
     ILVIS2Dataset,
 )
 
 __all__ = [
-    "ALL_DATASETS",
+    "ALL_SUPPORTED_DATASETS",
     "BLATM1BDataset",
     "Dataset",
     "GLAH06Dataset",
diff --git a/src/nsidc/iceflow/data/fetch.py b/src/nsidc/iceflow/data/fetch.py
@@ -3,14 +3,15 @@
 from pathlib import Path
 
 import earthaccess
+import pydantic
 from loguru import logger
 
 from nsidc.iceflow.data.models import (
-    ALL_DATASETS,
     Dataset,
     IceflowSearchResult,
     IceflowSearchResults,
 )
+from nsidc.iceflow.data.supported_datasets import ALL_SUPPORTED_DATASETS
 
 
 def _find_iceflow_data_for_dataset(
@@ -75,9 +76,10 @@ def _download_iceflow_search_result(
     return downloaded_filepaths
 
 
+@pydantic.validate_call()
 def find_iceflow_data(
     *,
-    datasets: list[Dataset] = ALL_DATASETS,
+    datasets: list[Dataset] = ALL_SUPPORTED_DATASETS,
     **search_kwargs,
 ) -> IceflowSearchResults:
     """Find iceflow-compatible data using search kwargs.
diff --git a/src/nsidc/iceflow/data/models.py b/src/nsidc/iceflow/data/models.py
@@ -205,46 +205,12 @@ class ATM1BDataset(Dataset):
     short_name: ATM1BShortName
 
 
-class ILATM1BDataset(ATM1BDataset):
-    short_name: ATM1BShortName = "ILATM1B"
-    version: Literal["1", "2"]
-
-
-class BLATM1BDataset(ATM1BDataset):
-    short_name: ATM1BShortName = "BLATM1B"
-    # There is only 1 version of BLATM1B
-    version: Literal["1"] = "1"
-
-
-class ILVIS2Dataset(Dataset):
-    short_name: DatasetShortName = "ILVIS2"
-    version: Literal["1", "2"]
-
-
-class GLAH06Dataset(Dataset):
-    short_name: DatasetShortName = "GLAH06"
-    # Note: some dataset versions are padded with zeros like GLAH06. NSIDC
-    # documentation refers to "version 34", but CMR only recognizes "034".  As a
-    # rule-of-thumb, ICESat-2, SMAP, and GLAH/GLA datasets have zero padding.
-    version: Literal["034"] = "034"
-
-
 # This mirrors the bounding box construct in `earthaccess` and `icepyx`: a
 # list/float of len 4:
 # (lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)
 BoundingBoxLike = list[float] | tuple[float, float, float, float]
 
 
-ALL_DATASETS: list[Dataset] = [
-    ILATM1BDataset(version="1"),
-    ILATM1BDataset(version="2"),
-    BLATM1BDataset(version="1"),
-    ILVIS2Dataset(version="1"),
-    ILVIS2Dataset(version="2"),
-    GLAH06Dataset(),
-]
-
-
 TemporalRange = tuple[dt.datetime | dt.date, dt.datetime | dt.date]
 
 
diff --git a/src/nsidc/iceflow/data/supported_datasets.py b/src/nsidc/iceflow/data/supported_datasets.py
@@ -0,0 +1,44 @@
+from __future__ import annotations
+
+from typing import Literal
+
+from nsidc.iceflow.data.models import (
+    ATM1BDataset,
+    ATM1BShortName,
+    Dataset,
+    DatasetShortName,
+)
+
+
+class ILATM1BDataset(ATM1BDataset):
+    short_name: ATM1BShortName = "ILATM1B"
+    version: Literal["1", "2"]
+
+
+class BLATM1BDataset(ATM1BDataset):
+    short_name: ATM1BShortName = "BLATM1B"
+    # There is only 1 version of BLATM1B
+    version: Literal["1"] = "1"
+
+
+class ILVIS2Dataset(Dataset):
+    short_name: DatasetShortName = "ILVIS2"
+    version: Literal["1", "2"]
+
+
+class GLAH06Dataset(Dataset):
+    short_name: DatasetShortName = "GLAH06"
+    # Note: some dataset versions are padded with zeros like GLAH06. NSIDC
+    # documentation refers to "version 34", but CMR only recognizes "034".  As a
+    # rule-of-thumb, ICESat-2, SMAP, and GLAH/GLA datasets have zero padding.
+    version: Literal["034"] = "034"
+
+
+ALL_SUPPORTED_DATASETS: list[Dataset] = [
+    ILATM1BDataset(version="1"),
+    ILATM1BDataset(version="2"),
+    BLATM1BDataset(version="1"),
+    ILVIS2Dataset(version="1"),
+    ILVIS2Dataset(version="2"),
+    GLAH06Dataset(),
+]
diff --git a/tests/integration/test_e2e.py b/tests/integration/test_e2e.py