Skip to content

Commit 6c679c3

Browse files
Merge pull request #71 from nsidc/51-bbox-consistent-icepyx-earthaccess
51 bbox consistent icepyx earthaccess
2 parents 4a6415d + d5388ba commit 6c679c3

File tree

12 files changed

+139
-244
lines changed

12 files changed

+139
-244
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222

2323
- uses: actions/setup-python@v5
2424
with:
25-
python-version: "3.11"
25+
python-version: "3.12"
2626

2727
- name: Install package
2828
run: python -m pip install .[dev]

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@
77
Generate an Elevation Timeseries" notebook.
88
- Filter for cloud-hosted data, avoiding duplicate granule results from
99
`fetch.find_iceflow_data`
10+
- Pass through search kwargs to `earthaccess` without any type validation. This
11+
allows earthaccess to do watever validation it needs to, and then it passes
12+
those on to CMR. This provides much greater flexibility over data search and
13+
makes the interface more consistent with `icepyx` and `earthaccess`.
14+
https://github.com/nsidc/iceflow/issues/51.
1015

1116
# v0.3.0
1217

docs/getting-started.md

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,21 +25,37 @@ To find `iceflow`-supported data for an area of interest and timeframe, use
2525
```
2626
import datetime as dt
2727
28-
from nsidc.iceflow import (
29-
find_iceflow_data,
30-
DatasetSearchParameters,
31-
BoundingBox,
28+
from nsidc.iceflow import find_iceflow_data
29+
30+
31+
search_results = find_iceflow_data(
32+
# Lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat
33+
bounding_box=(-103.125559, -75.180563, -102.677327, -74.798063),
34+
temporal=(dt.date(2009, 11, 1), dt.date(2009, 12, 31)),
3235
)
36+
```
37+
38+
By default, all iceflow-supported datasets are searched. To search for a
39+
specific subset of iceflow-supported datasets, use the `datasets` kwarg:
40+
41+
```
42+
from nsidc.iceflow import ILATM1BDataset
3343
3444
3545
search_results = find_iceflow_data(
36-
dataset_search_params=DatasetSearchParameters(
37-
bounding_box=BoundingBox(lower_left_lon=-103.125559, lower_left_lat=-75.180563, upper_right_lon=-102.677327, upper_right_lat=-74.798063),
38-
temporal=(dt.date(2009, 11, 1), dt.date(2009, 12, 31)),
39-
),
46+
datasets=[ILATM1BDataset(version="1")],
47+
# Lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat
48+
bounding_box=(-103.125559, -75.180563, -102.677327, -74.798063),
49+
temporal=(dt.date(2009, 11, 1), dt.date(2009, 12, 31)),
4050
)
4151
```
4252

53+
All other keyword arguments to this function (e.g,. `bounding_box`, `temporal`)
54+
map to [CMR](https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html)
55+
search parameters, and are passed un-modified to
56+
[earthaccess.search_data](https://earthaccess.readthedocs.io/en/latest/user-reference/api/api/#earthaccess.api.search_data)
57+
to perform the search.
58+
4359
### Downloading data
4460

4561
Once search results have been found, download data with

docs/notebooks/iceflow-example.ipynb

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,6 @@
4343
"import matplotlib.pyplot as plt\n",
4444
"\n",
4545
"from nsidc.iceflow import (\n",
46-
" BoundingBox,\n",
47-
" DatasetSearchParameters,\n",
4846
" ILATM1BDataset,\n",
4947
" download_iceflow_results,\n",
5048
" find_iceflow_data,\n",
@@ -84,11 +82,11 @@
8482
"# Define the dataset that we want to search for.\n",
8583
"atm1b_v1_dataset = ILATM1BDataset(version=\"1\")\n",
8684
"# Define a bounding box for our area of interest.\n",
87-
"BBOX = BoundingBox(\n",
88-
" lower_left_lon=-103.125559,\n",
89-
" lower_left_lat=-75.180563,\n",
90-
" upper_right_lon=-102.677327,\n",
91-
" upper_right_lat=-74.798063,\n",
85+
"BBOX = (\n",
86+
" -103.125559,\n",
87+
" -75.180563,\n",
88+
" -102.677327,\n",
89+
" -74.798063,\n",
9290
")\n",
9391
"\n",
9492
"# We will define a short date range in 2009 to search for data.\n",
@@ -122,11 +120,9 @@
122120
],
123121
"source": [
124122
"search_results = find_iceflow_data(\n",
125-
" dataset_search_params=DatasetSearchParameters(\n",
126-
" datasets=[atm1b_v1_dataset],\n",
127-
" bounding_box=BBOX,\n",
128-
" temporal=date_range,\n",
129-
" ),\n",
123+
" datasets=[atm1b_v1_dataset],\n",
124+
" bounding_box=BBOX,\n",
125+
" temporal=date_range,\n",
130126
")\n",
131127
"len(search_results)"
132128
]

docs/notebooks/iceflow-with-icepyx.ipynb

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -763,8 +763,6 @@
763763
"import xarray as xr\n",
764764
"\n",
765765
"from nsidc.iceflow import (\n",
766-
" BoundingBox,\n",
767-
" DatasetSearchParameters,\n",
768766
" IceflowDataFrame,\n",
769767
" download_iceflow_results,\n",
770768
" find_iceflow_data,\n",
@@ -804,11 +802,11 @@
804802
"ICESAT2_ITRF = \"ITRF2014\"\n",
805803
"\n",
806804
"# This bounding box covers an area near Sermeq Kujalleq (Jakobshavn Isbrae)\n",
807-
"BBOX = BoundingBox(\n",
808-
" lower_left_lon=-49.149,\n",
809-
" lower_left_lat=69.186,\n",
810-
" upper_right_lon=-48.949,\n",
811-
" upper_right_lat=69.238,\n",
805+
"BBOX = (\n",
806+
" -49.149,\n",
807+
" 69.186,\n",
808+
" -48.949,\n",
809+
" 69.238,\n",
812810
")\n",
813811
"\n",
814812
"# Range of dates we want to evaluate\n",
@@ -822,7 +820,7 @@
822820
"source": [
823821
"Next we will use the `find_iceflow_data` function from the `iceflow` API to find data matching our area of interest.\n",
824822
"\n",
825-
"By default, `DatasetSearchParameters` will include all `iceflow` supported datasets, unless one or more are specified as a filter with the `datasets` kwarg. There may be warnings raised about there not being search results for specific datasets supported by `iceflow`."
823+
"By default, `find_iceflow_data` will include all `iceflow` supported datasets, unless one or more are specified as a filter with the `datasets` kwarg. There may be warnings raised about there not being search results for specific datasets supported by `iceflow`."
826824
]
827825
},
828826
{
@@ -851,10 +849,8 @@
851849
],
852850
"source": [
853851
"search_results = find_iceflow_data(\n",
854-
" dataset_search_params=DatasetSearchParameters(\n",
855-
" bounding_box=BBOX,\n",
856-
" temporal=DATE_RANGE,\n",
857-
" ),\n",
852+
" bounding_box=BBOX,\n",
853+
" temporal=DATE_RANGE,\n",
858854
")\n",
859855
"len(search_results)"
860856
]

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ Changelog = "https://github.com/NSIDC/iceflow/releases"
4848
dev = [
4949
"bump-my-version",
5050
"invoke",
51-
"mypy >=1.15.0",
51+
"mypy >=1.16.0",
5252
"pandas-stubs >=2.2",
5353
"pre-commit",
5454
"pytest",

src/nsidc/iceflow/__init__.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
from nsidc.iceflow.data.models import (
2323
ALL_DATASETS,
2424
BLATM1BDataset,
25-
BoundingBox,
26-
DatasetSearchParameters,
2725
GLAH06Dataset,
2826
IceflowDataFrame,
2927
ILATM1BDataset,
@@ -32,16 +30,12 @@
3230
from nsidc.iceflow.data.read import read_iceflow_datafiles
3331
from nsidc.iceflow.itrf.converter import transform_itrf
3432

35-
# TODO: add bumpversion config to control this version number, and the conda
36-
# recipe/meta.yaml.
3733
__version__ = "v1.0.0"
3834

3935

4036
__all__ = [
4137
"ALL_DATASETS",
4238
"BLATM1BDataset",
43-
"BoundingBox",
44-
"DatasetSearchParameters",
4539
"GLAH06Dataset",
4640
"ILATM1BDataset",
4741
"ILVIS2Dataset",

src/nsidc/iceflow/api.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,20 @@
1111
from nsidc.iceflow.data.fetch import download_iceflow_results, find_iceflow_data
1212
from nsidc.iceflow.data.models import (
1313
ALL_DATASETS,
14-
DatasetSearchParameters,
14+
BoundingBoxLike,
15+
Dataset,
1516
IceflowDataFrame,
17+
TemporalRange,
1618
)
1719
from nsidc.iceflow.data.read import read_iceflow_datafiles
1820
from nsidc.iceflow.itrf.converter import transform_itrf
1921

2022

2123
def fetch_iceflow_df(
2224
*,
23-
dataset_search_params: DatasetSearchParameters,
25+
bounding_box: BoundingBoxLike,
26+
temporal: TemporalRange,
27+
datasets: list[Dataset] = ALL_DATASETS,
2428
output_dir: Path,
2529
# TODO: also add option for target epoch!!
2630
output_itrf: str | None = None,
@@ -38,7 +42,9 @@ def fetch_iceflow_df(
3842
"""
3943

4044
iceflow_search_reuslts = find_iceflow_data(
41-
dataset_search_params=dataset_search_params,
45+
bounding_box=bounding_box,
46+
temporal=temporal,
47+
datasets=datasets,
4248
)
4349

4450
downloaded_files = download_iceflow_results(

src/nsidc/iceflow/data/fetch.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,26 @@
11
from __future__ import annotations
22

3-
import datetime as dt
43
from pathlib import Path
54

65
import earthaccess
76
from loguru import logger
87

98
from nsidc.iceflow.data.models import (
10-
BoundingBox,
9+
ALL_DATASETS,
1110
Dataset,
12-
DatasetSearchParameters,
1311
IceflowSearchResult,
1412
IceflowSearchResults,
1513
)
1614

1715

18-
def _find_iceflow_data(
16+
def _find_iceflow_data_for_dataset(
1917
*,
2018
dataset: Dataset,
21-
bounding_box: BoundingBox,
22-
temporal: tuple[dt.datetime | dt.date, dt.datetime | dt.date],
19+
**search_kwargs,
2320
) -> IceflowSearchResult:
2421
earthaccess.login()
2522

26-
ctx_string = (
27-
f"{dataset.short_name=} {dataset.version=} with {bounding_box=} {temporal=}"
28-
)
23+
ctx_string = f"{dataset.short_name=} {dataset.version=} with {search_kwargs=}"
2924

3025
try:
3126
granules_list = earthaccess.search_data(
@@ -35,13 +30,7 @@ def _find_iceflow_data(
3530
# non-cloud, we may get duplicate granules as long as the ECS copy
3631
# remains.
3732
cloud_hosted=True,
38-
bounding_box=(
39-
bounding_box.lower_left_lon,
40-
bounding_box.lower_left_lat,
41-
bounding_box.upper_right_lon,
42-
bounding_box.upper_right_lat,
43-
),
44-
temporal=temporal,
33+
**search_kwargs,
4534
)
4635
except IndexError:
4736
# There's no data matching the given parameters.
@@ -88,14 +77,20 @@ def _download_iceflow_search_result(
8877

8978
def find_iceflow_data(
9079
*,
91-
dataset_search_params: DatasetSearchParameters,
80+
datasets: list[Dataset] = ALL_DATASETS,
81+
**search_kwargs,
9282
) -> IceflowSearchResults:
83+
"""Find iceflow-compatible data using search kwargs.
84+
85+
`search_kwargs` are passed to `earthaccess.search_data`, allowing for
86+
CMR-supported filters (see
87+
https://cmr.earthdata.nasa.gov/search/site/docs/search/api.html)
88+
"""
9389
iceflow_search_results = []
94-
for dataset in dataset_search_params.datasets:
95-
iceflow_search_result = _find_iceflow_data(
90+
for dataset in datasets:
91+
iceflow_search_result = _find_iceflow_data_for_dataset(
9692
dataset=dataset,
97-
bounding_box=dataset_search_params.bounding_box,
98-
temporal=dataset_search_params.temporal,
93+
**search_kwargs,
9994
)
10095
iceflow_search_results.append(iceflow_search_result)
10196

src/nsidc/iceflow/data/models.py

Lines changed: 5 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -229,60 +229,10 @@ class GLAH06Dataset(Dataset):
229229
version: Literal["034"] = "034"
230230

231231

232-
class BoundingBox(pydantic.BaseModel):
233-
lower_left_lon: float
234-
lower_left_lat: float
235-
upper_right_lon: float
236-
upper_right_lat: float
237-
238-
def __init__(self, *args, **kwargs):
239-
"""Accept either named args, one arg for each coord, or an iterable of
240-
`(lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)`."""
241-
if args:
242-
if len(args) == 1 and isinstance(args[0], list | tuple):
243-
# The first arg should be treated like a tuple of
244-
# (lower_left_lon, lower_left_lat, upper_right_lon,
245-
# upper_right_lat)
246-
args = tuple(args[0])
247-
# Each arg should be treated as one of (lower_left_lon,
248-
# lower_left_lat, upper_right_lon, upper_right_lat).
249-
if len(args) != 4:
250-
raise ValueError(
251-
"Expected four values for bounding box:"
252-
" (lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)"
253-
)
254-
# Set kwargs if args are given.
255-
kwargs = {
256-
"lower_left_lon": args[0],
257-
"lower_left_lat": args[1],
258-
"upper_right_lon": args[2],
259-
"upper_right_lat": args[3],
260-
}
261-
262-
# Initialize the model.
263-
super().__init__(**kwargs)
264-
265-
def __iter__(self):
266-
"""Return bounding box as a iter (list/tuple)."""
267-
return iter(
268-
(
269-
self.lower_left_lon,
270-
self.lower_left_lat,
271-
self.upper_right_lon,
272-
self.upper_right_lat,
273-
)
274-
)
275-
276-
def __getitem__(self, idx):
277-
if isinstance(idx, int):
278-
return list(self.__iter__())[idx]
279-
elif isinstance(idx, str):
280-
return getattr(self, idx)
281-
else:
282-
raise TypeError(
283-
"Getitem on BoundingBox must be int (e.g. 0)"
284-
" or str (e.g., 'lower_left_lon')."
285-
)
232+
# This mirrors the bounding box construct in `earthaccess` and `icepyx`: a
233+
# list/float of len 4:
234+
# (lower_left_lon, lower_left_lat, upper_right_lon, upper_right_lat)
235+
BoundingBoxLike = list[float] | tuple[float, float, float, float]
286236

287237

288238
ALL_DATASETS: list[Dataset] = [
@@ -295,10 +245,7 @@ def __getitem__(self, idx):
295245
]
296246

297247

298-
class DatasetSearchParameters(pydantic.BaseModel):
299-
datasets: list[Dataset] = ALL_DATASETS
300-
bounding_box: BoundingBox
301-
temporal: tuple[dt.datetime | dt.date, dt.datetime | dt.date]
248+
TemporalRange = tuple[dt.datetime | dt.date, dt.datetime | dt.date]
302249

303250

304251
class IceflowSearchResult(pydantic.BaseModel):

0 commit comments

Comments
 (0)