Skip to content

Commit 6a678e2

Browse files
committed
Issue #699 fully integrate item_asset handling
1 parent 16c5006 commit 6a678e2

File tree

3 files changed

+175
-107
lines changed

3 files changed

+175
-107
lines changed

openeo/metadata.py

Lines changed: 86 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -654,26 +654,8 @@ def metadata_from_stac(url: str) -> CubeMetadata:
654654
:param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection
655655
:return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url.
656656
"""
657-
658-
659657
stac_object = pystac.read_file(href=url)
660-
661-
if isinstance(stac_object, pystac.Item):
662-
bands = _StacMetadataParser().bands_from_stac_item(item=stac_object)
663-
664-
elif isinstance(stac_object, pystac.Collection):
665-
# TODO #699: migrate to _StacMetadataParser
666-
collection = stac_object
667-
bands = _StacMetadataParser().bands_from_stac_collection(collection=stac_object)
668-
669-
if _PYSTAC_1_9_EXTENSION_INTERFACE and collection.ext.has("item_assets"):
670-
# TODO #575 support unordered band names and avoid conversion to a list.
671-
bands = list(_StacMetadataParser().get_bands_from_item_assets(collection.ext.item_assets))
672-
673-
elif isinstance(stac_object, pystac.Catalog):
674-
bands = _StacMetadataParser().bands_from_stac_catalog(catalog=stac_object)
675-
else:
676-
raise ValueError(stac_object)
658+
bands = _StacMetadataParser().bands_from_stac_object(stac_object)
677659

678660
# At least assume there are spatial dimensions
679661
# TODO #743: are there conditions in which we even should not assume the presence of spatial dimensions?
@@ -698,6 +680,10 @@ def metadata_from_stac(url: str) -> CubeMetadata:
698680
# TODO: remove this once support for Python 3.7 and 3.8 is dropped
699681
_PYSTAC_1_9_EXTENSION_INTERFACE = hasattr(pystac.Item, "ext")
700682

683+
# Sniff for PySTAC support for Collection.item_assets (in STAC core since 1.1)
684+
# (supported since PySTAC 1.12.0, which requires Python>=3.10)
685+
_PYSTAC_1_12_ITEM_ASSETS = hasattr(pystac.Collection, "item_assets")
686+
701687

702688
class _BandList(list):
703689
"""Internal wrapper for list of ``Band`` objects"""
@@ -720,79 +706,14 @@ class _StacMetadataParser:
720706
Helper to extract openEO metadata from STAC metadata resource
721707
"""
722708

723-
724-
725-
def __init__(self, *, logger=_log, log_level=logging.DEBUG):
709+
def __init__(self, *, logger=_log, log_level=logging.DEBUG, supress_duplicate_warnings: bool = True):
726710
self._logger = logger
727711
self._log_level = log_level
728712
self._log = lambda msg, **kwargs: self._logger.log(msg=msg, level=self._log_level, **kwargs)
729713
self._warn = lambda msg, **kwargs: self._logger.warning(msg=msg, **kwargs)
730-
731-
def _get_band_from_eo_bands_item(self, eo_band: Union[dict, pystac.extensions.eo.Band]) -> Band:
732-
if isinstance(eo_band, pystac.extensions.eo.Band):
733-
return Band(
734-
name=eo_band.name,
735-
common_name=eo_band.common_name,
736-
wavelength_um=eo_band.center_wavelength,
737-
)
738-
elif isinstance(eo_band, dict) and "name" in eo_band:
739-
return Band(
740-
name=eo_band["name"],
741-
common_name=eo_band.get("common_name"),
742-
wavelength_um=eo_band.get("center_wavelength"),
743-
)
744-
else:
745-
raise ValueError(eo_band)
746-
747-
def get_bands_from_eo_bands(self, eo_bands: List[Union[dict, pystac.extensions.eo.Band]]) -> List[Band]:
748-
"""
749-
Extract bands from STAC `eo:bands` array
750-
751-
:param eo_bands: List of band objects, as dict or `pystac.extensions.eo.Band` instances
752-
"""
753-
# TODO: option to skip bands that failed to parse in some way?
754-
return [self._get_band_from_eo_bands_item(band) for band in eo_bands]
755-
756-
def _get_bands_from_item_asset(
757-
self,
758-
item_asset: pystac.extensions.item_assets.AssetDefinition,
759-
*,
760-
_warn: Optional[Callable[[str], None]] = None,
761-
) -> Union[List[Band], None]:
762-
"""Get bands from a STAC 'item_assets' asset definition."""
763-
if _PYSTAC_1_9_EXTENSION_INTERFACE and item_asset.ext.has("eo"):
764-
if item_asset.ext.eo.bands is not None:
765-
return self.get_bands_from_eo_bands(item_asset.ext.eo.bands)
766-
elif "eo:bands" in item_asset.properties:
767-
# TODO: skip this in strict mode?
768-
if _PYSTAC_1_9_EXTENSION_INTERFACE:
769-
(_warn or self._warn)(
770-
"Extracting band info from 'eo:bands' metadata, but 'eo' STAC extension was not declared."
771-
)
772-
return self.get_bands_from_eo_bands(item_asset.properties["eo:bands"])
773-
774-
return None
775-
776-
def get_bands_from_item_assets(
777-
self, item_assets: Dict[str, pystac.extensions.item_assets.AssetDefinition]
778-
) -> Set[Band]:
779-
"""
780-
Get bands extracted from "item_assets" objects (defined by "item-assets" extension,
781-
in combination with "eo" extension) at STAC Collection top-level,
782-
783-
Note that "item_assets" in STAC is a mapping, so the band order is undefined,
784-
which is why we return a set of bands here.
785-
786-
:param item_assets: a STAC `item_assets` mapping
787-
"""
788-
bands = set()
789-
# Trick to just warn once per collection
790-
_warn = functools.lru_cache()(self._warn)
791-
for item_asset in item_assets.values():
792-
asset_bands = self._get_bands_from_item_asset(item_asset, _warn=_warn)
793-
if asset_bands:
794-
bands.update(asset_bands)
795-
return bands
714+
if supress_duplicate_warnings:
715+
# Use caching trick to avoid duplicate warnings
716+
self._warn = functools.lru_cache(maxsize=1000)(self._warn)
796717

797718
def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalDimension, None]:
798719
"""
@@ -828,13 +749,22 @@ def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalD
828749
name, extent = temporal_dims[0]
829750
return TemporalDimension(name=name, extent=extent)
830751

831-
def _band_from_eo_bands_metadata(self, data: dict) -> Band:
752+
def _band_from_eo_bands_metadata(self, band: Union[dict, pystac.extensions.eo.Band]) -> Band:
832753
"""Construct band from metadata dict in eo v1.1 style"""
833-
return Band(
834-
name=data["name"],
835-
common_name=data.get("common_name"),
836-
wavelength_um=data.get("center_wavelength"),
837-
)
754+
if isinstance(band, pystac.extensions.eo.Band):
755+
return Band(
756+
name=band.name,
757+
common_name=band.common_name,
758+
wavelength_um=band.center_wavelength,
759+
)
760+
elif isinstance(band, dict) and "name" in band:
761+
return Band(
762+
name=band["name"],
763+
common_name=band.get("common_name"),
764+
wavelength_um=band.get("center_wavelength"),
765+
)
766+
else:
767+
raise ValueError(band)
838768

839769
def _band_from_common_bands_metadata(self, data: dict) -> Band:
840770
"""Construct band from metadata dict in STAC 1.1 + eo v2 style metadata"""
@@ -844,9 +774,7 @@ def _band_from_common_bands_metadata(self, data: dict) -> Band:
844774
wavelength_um=data.get("eo:center_wavelength"),
845775
)
846776

847-
def bands_from_stac_object(
848-
self, obj: Union[pystac.Catalog, pystac.Collection, pystac.Item, pystac.Asset]
849-
) -> _BandList:
777+
def bands_from_stac_object(self, obj: Union[pystac.STACObject, pystac.Asset]) -> _BandList:
850778
# Note: first check for Collection, as it is a subclass of Catalog
851779
if isinstance(obj, pystac.Collection):
852780
return self.bands_from_stac_collection(collection=obj)
@@ -857,7 +785,7 @@ def bands_from_stac_object(
857785
elif isinstance(obj, pystac.Asset):
858786
return self.bands_from_stac_asset(asset=obj)
859787
else:
860-
raise ValueError(obj)
788+
raise ValueError(f"Unsupported STAC object: {obj!r}")
861789

862790
def bands_from_stac_catalog(self, catalog: pystac.Catalog) -> _BandList:
863791
# TODO: "eo:bands" vs "bands" priority based on STAC and EO extension version information
@@ -877,10 +805,17 @@ def bands_from_stac_collection(
877805
) -> _BandList:
878806
# TODO: "eo:bands" vs "bands" priority based on STAC and EO extension version information
879807
self._log(f"bands_from_stac_collection with {collection.summaries.lists.keys()=}")
808+
# Look for band metadata in collection summaries
880809
if "eo:bands" in collection.summaries.lists:
881810
return _BandList(self._band_from_eo_bands_metadata(b) for b in collection.summaries.lists["eo:bands"])
882811
elif "bands" in collection.summaries.lists:
883812
return _BandList(self._band_from_common_bands_metadata(b) for b in collection.summaries.lists["bands"])
813+
# Check item assets if available
814+
elif _PYSTAC_1_12_ITEM_ASSETS and collection.item_assets:
815+
return self._bands_from_item_assets(collection.item_assets)
816+
elif _PYSTAC_1_9_EXTENSION_INTERFACE and collection.ext.has("item_assets") and collection.ext.item_assets:
817+
return self._bands_from_item_assets(collection.ext.item_assets)
818+
# If no band metadata so far: traverse items in collection
884819
elif consult_items:
885820
bands = _BandList.merge(
886821
self.bands_from_stac_item(item=i, consult_collection=False, consult_assets=consult_assets)
@@ -914,10 +849,17 @@ def bands_from_stac_item(
914849
self._warn("bands_from_stac_item: no band name source found")
915850
return _BandList([])
916851

852+
def _warn_undeclared_metadata(self, *, field: str, ext: str):
853+
"""Helper to warn about using metadata from undeclared STAC extension"""
854+
self._warn(f"Using {field!r} metadata, but STAC extension {ext} was not declared.")
855+
917856
def bands_from_stac_asset(self, asset: pystac.Asset) -> _BandList:
918857
# TODO: "eo:bands" vs "bands" priority based on STAC and EO extension version information
919-
self._log(f"bands_from_stac_asset with {asset.extra_fields.keys()=}")
920-
if "eo:bands" in asset.extra_fields:
858+
if _PYSTAC_1_9_EXTENSION_INTERFACE and asset.owner and asset.ext.has("eo") and asset.ext.eo.bands is not None:
859+
return _BandList(self._band_from_eo_bands_metadata(b) for b in asset.ext.eo.bands)
860+
elif "eo:bands" in asset.extra_fields:
861+
if _PYSTAC_1_9_EXTENSION_INTERFACE and asset.owner and not asset.ext.has("eo"):
862+
self._warn_undeclared_metadata(field="eo:bands", ext="eo")
921863
return _BandList(self._band_from_eo_bands_metadata(b) for b in asset.extra_fields["eo:bands"])
922864
elif "bands" in asset.extra_fields:
923865
# TODO: avoid extra_fields, but built-in "bands" support seems to be scheduled for pystac V2
@@ -926,3 +868,47 @@ def bands_from_stac_asset(self, asset: pystac.Asset) -> _BandList:
926868
# TODO: instead of warning: exception, or return None?
927869
self._warn("bands_from_stac_asset: no band name source found")
928870
return _BandList([])
871+
872+
def _bands_from_item_asset_definition(
873+
self,
874+
asset: Union[
875+
pystac.extensions.item_assets.AssetDefinition,
876+
"pystac.ItemAssetDefinition", # TODO: non-string type hint once pystac dependency is bumped to at least 1.12
877+
],
878+
) -> _BandList:
879+
if isinstance(asset, pystac.extensions.item_assets.AssetDefinition):
880+
if "eo:bands" in asset.properties:
881+
if _PYSTAC_1_9_EXTENSION_INTERFACE and asset.owner and not asset.ext.has("eo"):
882+
self._warn_undeclared_metadata(field="eo:bands", ext="eo")
883+
return _BandList(self._band_from_eo_bands_metadata(b) for b in asset.properties["eo:bands"])
884+
elif "bands" in asset.properties:
885+
return _BandList(self._band_from_common_bands_metadata(b) for b in asset.properties["bands"])
886+
elif _PYSTAC_1_12_ITEM_ASSETS and isinstance(asset, pystac.ItemAssetDefinition):
887+
if "bands" in asset.properties:
888+
return _BandList(self._band_from_common_bands_metadata(b) for b in asset.properties["bands"])
889+
elif "eo:bands" in asset.properties:
890+
if _PYSTAC_1_9_EXTENSION_INTERFACE and asset.owner and not asset.ext.has("eo"):
891+
self._warn_undeclared_metadata(field="eo:bands", ext="eo")
892+
return _BandList(self._band_from_eo_bands_metadata(b) for b in asset.properties["eo:bands"])
893+
return _BandList([])
894+
895+
def _bands_from_item_assets(
896+
self,
897+
item_assets: Dict[
898+
str,
899+
Union[
900+
pystac.extensions.item_assets.AssetDefinition,
901+
"pystac.ItemAssetDefinition", # TODO: non-string type hint once pystac dependency is bumped to at least 1.12
902+
],
903+
],
904+
) -> _BandList:
905+
"""
906+
Get bands extracted from assets defined under
907+
a collection's "item_assets" field
908+
909+
Note that "item_assets" in STAC is a mapping, which means that the
910+
band order might be ill-defined.
911+
"""
912+
self._warn("Deriving band listing from unordered `item_assets`")
913+
# TODO: filter on asset roles?
914+
return _BandList.merge(self._bands_from_item_asset_definition(a) for a in item_assets.values())

openeo/testing/stac.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ def collection(
6666
cube_dimensions: Optional[dict] = None,
6767
summaries: Optional[dict] = None,
6868
links: Optional[List[dict]] = None,
69+
**kwargs,
6970
) -> dict:
7071
"""Create a STAC Collection represented as dictionary."""
7172
if extent is None:
@@ -79,6 +80,7 @@ def collection(
7980
"license": license,
8081
"extent": extent,
8182
"links": links or [],
83+
**kwargs,
8284
}
8385
if cube_dimensions is not None:
8486
d["cube:dimensions"] = cube_dimensions

tests/test_metadata.py

Lines changed: 87 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -980,8 +980,22 @@ def test_metadata_from_stac_bands(tmp_path, test_stac, expected):
980980

981981

982982
@pytest.mark.skipif(not _PYSTAC_1_9_EXTENSION_INTERFACE, reason="Requires PySTAC 1.9+ extension interface")
983-
@pytest.mark.parametrize("eo_extension_is_declared", [False, True])
984-
def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_path, eo_extension_is_declared, caplog):
983+
@pytest.mark.parametrize(
984+
["eo_extension_is_declared", "expected_warnings"],
985+
[
986+
(
987+
False,
988+
[
989+
"Deriving band listing from unordered `item_assets`",
990+
"Using 'eo:bands' metadata, but STAC extension eo was not declared.",
991+
],
992+
),
993+
(True, ["Deriving band listing from unordered `item_assets`"]),
994+
],
995+
)
996+
def test_metadata_from_stac_collection_bands_from_item_assets(
997+
test_data, tmp_path, eo_extension_is_declared, caplog, expected_warnings
998+
):
985999
stac_data = test_data.load_json("stac/collections/agera5_daily01.json")
9861000
stac_data["stac_extensions"] = [
9871001
ext
@@ -1003,11 +1017,7 @@ def test_metadata_from_stac_collection_bands_from_item_assets(test_data, tmp_pat
10031017
"vapour_pressure",
10041018
]
10051019

1006-
warn_count = sum(
1007-
"Extracting band info from 'eo:bands' metadata, but 'eo' STAC extension was not declared." in m
1008-
for m in caplog.messages
1009-
)
1010-
assert warn_count == (0 if eo_extension_is_declared else 1)
1020+
assert caplog.messages == expected_warnings
10111021

10121022

10131023
@pytest.mark.skipif(
@@ -1705,3 +1715,73 @@ def test_bands_from_stac_item_consult_assets(self, data, expected, kwargs):
17051715
def test_bands_from_stac_asset(self, data, expected):
17061716
asset = pystac.Asset.from_dict(data)
17071717
assert _StacMetadataParser().bands_from_stac_asset(asset=asset).band_names() == expected
1718+
1719+
@pytest.mark.parametrize(
1720+
["stac_data", "expected_bands", "expected_warnings"],
1721+
[
1722+
(
1723+
# Legacy use case: STAC 1.0 Collection with "eo" and "item_assets" extensions
1724+
StacDummyBuilder.collection(
1725+
stac_version="1.0.0",
1726+
stac_extensions=[
1727+
"https://stac-extensions.github.io/eo/v1.1.0/schema.json",
1728+
"https://stac-extensions.github.io/item-assets/v1.0.0/schema.json",
1729+
],
1730+
item_assets={
1731+
"asset1": {"eo:bands": [{"name": "B03"}, {"name": "B02"}]},
1732+
"asset2": {"eo:bands": [{"name": "B04"}]},
1733+
},
1734+
),
1735+
["B03", "B02", "B04"],
1736+
["Deriving band listing from unordered `item_assets`"],
1737+
),
1738+
(
1739+
# STAC 1.0, with "eo" extension is used for band metadata, but not declared
1740+
StacDummyBuilder.collection(
1741+
stac_version="1.0.0",
1742+
item_assets={
1743+
"asset1": {"eo:bands": [{"name": "B03"}, {"name": "B02"}]},
1744+
"asset2": {"eo:bands": [{"name": "B04"}]},
1745+
},
1746+
),
1747+
["B03", "B02", "B04"],
1748+
[
1749+
"Deriving band listing from unordered `item_assets`",
1750+
"Using 'eo:bands' metadata, but STAC extension eo was not declared.",
1751+
],
1752+
),
1753+
(
1754+
# STAC 1.1 Collection with common/core "bands" metadata and item_assets
1755+
StacDummyBuilder.collection(
1756+
stac_version="1.1.0",
1757+
item_assets={
1758+
"asset1": {"bands": [{"name": "B03"}, {"name": "B02"}]},
1759+
"asset2": {"bands": [{"name": "B04"}]},
1760+
},
1761+
),
1762+
["B03", "B02", "B04"],
1763+
["Deriving band listing from unordered `item_assets`"],
1764+
),
1765+
(
1766+
# STAC 1.1 Collection with "eo" extension
1767+
StacDummyBuilder.collection(
1768+
stac_version="1.1.0",
1769+
stac_extensions=[
1770+
"https://stac-extensions.github.io/eo/v1.1.0/schema.json",
1771+
],
1772+
item_assets={
1773+
"asset1": {"eo:bands": [{"name": "B03"}, {"name": "B02"}]},
1774+
"asset2": {"eo:bands": [{"name": "B04"}]},
1775+
},
1776+
),
1777+
["B03", "B02", "B04"],
1778+
["Deriving band listing from unordered `item_assets`"],
1779+
),
1780+
],
1781+
)
1782+
def test_bands_from_stac_collection_with_item_assets(
1783+
self, test_data, tmp_path, caplog, stac_data, expected_bands, expected_warnings
1784+
):
1785+
collection = pystac.Collection.from_dict(stac_data)
1786+
assert _StacMetadataParser().bands_from_stac_collection(collection).band_names() == expected_bands
1787+
assert caplog.messages == expected_warnings

0 commit comments

Comments
 (0)