Skip to content

Commit 5a3e6f4

Browse files
committed
Merge branch 'issue666-load_stac-temporal-metadata'
2 parents 37ba260 + 4cd6f59 commit 5a3e6f4

File tree

8 files changed

+76
-28
lines changed

8 files changed

+76
-28
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1717

1818
### Fixed
1919

20+
- `load_stac`: use fallback temporal dimension when no "cube:dimensions" in STAC Collection ([#666](https://github.com/Open-EO/openeo-python-client/issues/666))
2021

2122
## [0.35.0] - 2024-11-19
2223

openeo/metadata.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
import pystac.extensions.item_assets
1212

1313
from openeo.internal.jupyter import render_component
14-
from openeo.util import deep_get
14+
from openeo.util import Rfc3339, deep_get
1515

1616
_log = logging.getLogger(__name__)
1717

@@ -691,6 +691,11 @@ def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalD
691691
if len(temporal_dims) == 1:
692692
name, extent = temporal_dims[0]
693693
return TemporalDimension(name=name, extent=extent)
694+
elif isinstance(stac_obj, pystac.Collection) and stac_obj.extent.temporal:
695+
# No explicit "cube:dimensions": build fallback from "extent.temporal",
696+
# with dimension name "t" (openEO API recommendation).
697+
extent = [Rfc3339(propagate_none=True).normalize(d) for d in stac_obj.extent.temporal.intervals[0]]
698+
return TemporalDimension(name="t", extent=extent)
694699
else:
695700
if isinstance(stac_obj, pystac.Item):
696701
cube_dimensions = stac_obj.properties.get("cube:dimensions", {})

openeo/rest/datacube.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def load_collection(
203203
metadata = None
204204
if metadata:
205205
bands = [b if isinstance(b, str) else metadata.band_dimension.band_name(b) for b in bands]
206+
# TODO: also apply spatial/temporal filters to metadata?
206207
metadata = metadata.filter_bands(bands)
207208
arguments['bands'] = bands
208209

@@ -385,6 +386,9 @@ def load_stac(
385386
graph = PGNode("load_stac", arguments=arguments)
386387
try:
387388
metadata = metadata_from_stac(url)
389+
if bands:
390+
# TODO: also apply spatial/temporal filters to metadata?
391+
metadata = metadata.filter_bands(band_names=bands)
388392
except Exception:
389393
log.warning(f"Failed to extract cube metadata from STAC URL {url}", exc_info=True)
390394
metadata = None

openeo/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ def parse_date_or_datetime(
172172
@classmethod
173173
def _format_datetime(cls, d: dt.datetime) -> str:
174174
"""Format given datetime as RFC-3339 date-time string."""
175-
if d.tzinfo not in {None, dt.timezone.utc}:
175+
if not (d.tzinfo is None or d.tzinfo.tzname(d) == "UTC"):
176176
# TODO: add support for non-UTC timezones?
177177
raise ValueError(f"No support for non-UTC timezone {d.tzinfo}")
178178
return d.strftime(cls._FMT_DATETIME)

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
"pyproj>=3.2.0", # Pyproj is an optional, best-effort runtime dependency
3131
"dirty_equals>=0.8.0",
3232
"pyarrow>=10.0.1", # For Parquet read/write support in pandas
33+
"python-dateutil>=2.7.0",
3334
]
3435

3536
docs_require = [

tests/rest/test_connection.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import openeo
1919
from openeo.capabilities import ApiVersionException
2020
from openeo.internal.graph_building import FlatGraphableMixin, PGNode
21+
from openeo.metadata import _PYSTAC_1_9_EXTENSION_INTERFACE, TemporalDimension
2122
from openeo.rest import (
2223
CapabilitiesException,
2324
OpenEoApiError,
@@ -40,7 +41,7 @@
4041
)
4142
from openeo.rest.vectorcube import VectorCube
4243
from openeo.testing.stac import StacDummyBuilder
43-
from openeo.util import ContextTimer, dict_no_none
44+
from openeo.util import ContextTimer, deep_get, dict_no_none
4445

4546
from .auth.test_cli import auth_config, refresh_token_store
4647

@@ -2622,6 +2623,51 @@ def test_load_stac_reduce_temporal(self, con120, tmp_path, temporal_dim):
26222623
},
26232624
}
26242625

2626+
@pytest.mark.skipif(
2627+
not _PYSTAC_1_9_EXTENSION_INTERFACE,
2628+
reason="No backport of implementation/test below PySTAC 1.9 extension interface",
2629+
)
2630+
@pytest.mark.parametrize(
2631+
["collection_extent", "dim_extent"],
2632+
[
2633+
(
2634+
{"spatial": {"bbox": [[3, 4, 5, 6]]}, "temporal": {"interval": [["2024-01-01", "2024-05-05"]]}},
2635+
["2024-01-01T00:00:00Z", "2024-05-05T00:00:00Z"],
2636+
),
2637+
(
2638+
{"spatial": {"bbox": [[3, 4, 5, 6]]}, "temporal": {"interval": [[None, "2024-05-05"]]}},
2639+
[None, "2024-05-05T00:00:00Z"],
2640+
),
2641+
],
2642+
)
2643+
def test_load_stac_no_cube_extension_temporal_dimension(self, con120, tmp_path, collection_extent, dim_extent):
2644+
"""
2645+
Metadata detection when STAC metadata does not use "cube" extension
2646+
https://github.com/Open-EO/openeo-python-client/issues/666
2647+
"""
2648+
stac_path = tmp_path / "stac.json"
2649+
stac_data = StacDummyBuilder.collection(extent=collection_extent)
2650+
# No cube:dimensions, but at least "temporal" extent is set as indicator for having a temporal dimension
2651+
assert "cube:dimensions" not in stac_data
2652+
assert deep_get(stac_data, "extent", "temporal")
2653+
stac_path.write_text(json.dumps(stac_data))
2654+
2655+
cube = con120.load_stac(str(stac_path))
2656+
assert cube.metadata.temporal_dimension == TemporalDimension(name="t", extent=dim_extent)
2657+
2658+
def test_load_stac_band_filtering(self, con120, tmp_path):
2659+
stac_path = tmp_path / "stac.json"
2660+
stac_data = StacDummyBuilder.collection(
2661+
summaries={"eo:bands": [{"name": "B01"}, {"name": "B02"}, {"name": "B03"}]}
2662+
)
2663+
stac_path.write_text(json.dumps(stac_data))
2664+
2665+
cube = con120.load_stac(str(stac_path))
2666+
assert cube.metadata.band_names == ["B01", "B02", "B03"]
2667+
2668+
cube = con120.load_stac(str(stac_path), bands=["B03", "B02"])
2669+
assert cube.metadata.band_names == ["B03", "B02"]
2670+
26252671

26262672
@pytest.mark.parametrize(
26272673
"data",

tests/test_metadata.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -849,6 +849,10 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat
849849
assert warn_count == (0 if eo_extension_is_declared else 1)
850850

851851

852+
@pytest.mark.skipif(
853+
not _PYSTAC_1_9_EXTENSION_INTERFACE,
854+
reason="No backport of implementation/test below PySTAC 1.9 extension interface",
855+
)
852856
@pytest.mark.parametrize(
853857
["stac_dict", "expected"],
854858
[
@@ -868,7 +872,7 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat
868872
),
869873
(
870874
StacDummyBuilder.collection(),
871-
None,
875+
("t", ["2024-01-01T00:00:00Z", "2024-05-05T00:00:00Z"]),
872876
),
873877
(
874878
StacDummyBuilder.collection(

tests/test_util.py

Lines changed: 11 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import unittest.mock as mock
88
from typing import List, Union
99

10+
import dateutil.tz
1011
import pyproj
1112
import pytest
1213
import shapely.geometry
@@ -87,21 +88,12 @@ def test_datetime(self):
8788
assert "2020-03-17T12:34:56Z" == rfc3339.datetime([2020, 3, 17, 12, 34, 56])
8889
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(2020, 3, 17, 12, 34, 56)
8990
assert "2020-03-17T12:34:00Z" == rfc3339.datetime(2020, 3, 17, 12, 34)
90-
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(
91-
(2020, "3", 17, "12", "34", 56)
92-
)
93-
assert "2020-09-17T12:34:56Z" == rfc3339.datetime(
94-
[2020, "09", 17, "12", "34", 56]
95-
)
96-
assert "2020-09-17T12:34:56Z" == rfc3339.datetime(
97-
2020, "09", "17", "12", "34", 56
98-
)
99-
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(
100-
dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None)
101-
)
102-
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(
103-
dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc)
104-
)
91+
assert "2020-03-17T12:34:56Z" == rfc3339.datetime((2020, "3", 17, "12", "34", 56))
92+
assert "2020-09-17T12:34:56Z" == rfc3339.datetime([2020, "09", 17, "12", "34", 56])
93+
assert "2020-09-17T12:34:56Z" == rfc3339.datetime(2020, "09", "17", "12", "34", 56)
94+
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None))
95+
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc))
96+
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dateutil.tz.UTC))
10597
assert "2020-03-17T12:34:56Z" == rfc3339.datetime(
10698
dt.datetime(
10799
*(2020, 3, 17, 12, 34, 56),
@@ -125,15 +117,10 @@ def test_normalize(self):
125117
"2020-03-17T12:34:56.44546546Z"
126118
)
127119
assert "2020-03-17" == rfc3339.normalize(dt.date(2020, 3, 17))
128-
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(
129-
dt.datetime(2020, 3, 17, 12, 34, 56)
130-
)
131-
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(
132-
dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None)
133-
)
134-
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(
135-
dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc)
136-
)
120+
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56))
121+
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=None))
122+
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dt.timezone.utc))
123+
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(dt.datetime(2020, 3, 17, 12, 34, 56, tzinfo=dateutil.tz.UTC))
137124
assert "2020-03-17T12:34:56Z" == rfc3339.normalize(
138125
dt.datetime(
139126
*(2020, 3, 17, 12, 34, 56),

0 commit comments

Comments
 (0)