@@ -654,26 +654,8 @@ def metadata_from_stac(url: str) -> CubeMetadata:
654654 :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) or a specific STAC API Collection
655655 :return: A :py:class:`CubeMetadata` containing the DataCube band metadata from the url.
656656 """
657-
658-
659657 stac_object = pystac .read_file (href = url )
660-
661- if isinstance (stac_object , pystac .Item ):
662- bands = _StacMetadataParser ().bands_from_stac_item (item = stac_object )
663-
664- elif isinstance (stac_object , pystac .Collection ):
665- # TODO #699: migrate to _StacMetadataParser
666- collection = stac_object
667- bands = _StacMetadataParser ().bands_from_stac_collection (collection = stac_object )
668-
669- if _PYSTAC_1_9_EXTENSION_INTERFACE and collection .ext .has ("item_assets" ):
670- # TODO #575 support unordered band names and avoid conversion to a list.
671- bands = list (_StacMetadataParser ().get_bands_from_item_assets (collection .ext .item_assets ))
672-
673- elif isinstance (stac_object , pystac .Catalog ):
674- bands = _StacMetadataParser ().bands_from_stac_catalog (catalog = stac_object )
675- else :
676- raise ValueError (stac_object )
658+ bands = _StacMetadataParser ().bands_from_stac_object (stac_object )
677659
678660 # At least assume there are spatial dimensions
679661 # TODO #743: are there conditions in which we even should not assume the presence of spatial dimensions?
@@ -698,6 +680,10 @@ def metadata_from_stac(url: str) -> CubeMetadata:
698680# TODO: remove this once support for Python 3.7 and 3.8 is dropped
699681_PYSTAC_1_9_EXTENSION_INTERFACE = hasattr (pystac .Item , "ext" )
700682
683+ # Sniff for PySTAC support for Collection.item_assets (in STAC core since 1.1)
684+ # (supported since PySTAC 1.12.0, which requires Python>=3.10)
685+ _PYSTAC_1_12_ITEM_ASSETS = hasattr (pystac .Collection , "item_assets" )
686+
701687
702688class _BandList (list ):
703689 """Internal wrapper for list of ``Band`` objects"""
@@ -720,79 +706,14 @@ class _StacMetadataParser:
720706 Helper to extract openEO metadata from STAC metadata resource
721707 """
722708
723-
724-
725- def __init__ (self , * , logger = _log , log_level = logging .DEBUG ):
709+ def __init__ (self , * , logger = _log , log_level = logging .DEBUG , supress_duplicate_warnings : bool = True ):
726710 self ._logger = logger
727711 self ._log_level = log_level
728712 self ._log = lambda msg , ** kwargs : self ._logger .log (msg = msg , level = self ._log_level , ** kwargs )
729713 self ._warn = lambda msg , ** kwargs : self ._logger .warning (msg = msg , ** kwargs )
730-
731- def _get_band_from_eo_bands_item (self , eo_band : Union [dict , pystac .extensions .eo .Band ]) -> Band :
732- if isinstance (eo_band , pystac .extensions .eo .Band ):
733- return Band (
734- name = eo_band .name ,
735- common_name = eo_band .common_name ,
736- wavelength_um = eo_band .center_wavelength ,
737- )
738- elif isinstance (eo_band , dict ) and "name" in eo_band :
739- return Band (
740- name = eo_band ["name" ],
741- common_name = eo_band .get ("common_name" ),
742- wavelength_um = eo_band .get ("center_wavelength" ),
743- )
744- else :
745- raise ValueError (eo_band )
746-
747- def get_bands_from_eo_bands (self , eo_bands : List [Union [dict , pystac .extensions .eo .Band ]]) -> List [Band ]:
748- """
749- Extract bands from STAC `eo:bands` array
750-
751- :param eo_bands: List of band objects, as dict or `pystac.extensions.eo.Band` instances
752- """
753- # TODO: option to skip bands that failed to parse in some way?
754- return [self ._get_band_from_eo_bands_item (band ) for band in eo_bands ]
755-
756- def _get_bands_from_item_asset (
757- self ,
758- item_asset : pystac .extensions .item_assets .AssetDefinition ,
759- * ,
760- _warn : Optional [Callable [[str ], None ]] = None ,
761- ) -> Union [List [Band ], None ]:
762- """Get bands from a STAC 'item_assets' asset definition."""
763- if _PYSTAC_1_9_EXTENSION_INTERFACE and item_asset .ext .has ("eo" ):
764- if item_asset .ext .eo .bands is not None :
765- return self .get_bands_from_eo_bands (item_asset .ext .eo .bands )
766- elif "eo:bands" in item_asset .properties :
767- # TODO: skip this in strict mode?
768- if _PYSTAC_1_9_EXTENSION_INTERFACE :
769- (_warn or self ._warn )(
770- "Extracting band info from 'eo:bands' metadata, but 'eo' STAC extension was not declared."
771- )
772- return self .get_bands_from_eo_bands (item_asset .properties ["eo:bands" ])
773-
774- return None
775-
776- def get_bands_from_item_assets (
777- self , item_assets : Dict [str , pystac .extensions .item_assets .AssetDefinition ]
778- ) -> Set [Band ]:
779- """
780- Get bands extracted from "item_assets" objects (defined by "item-assets" extension,
781- in combination with "eo" extension) at STAC Collection top-level,
782-
783- Note that "item_assets" in STAC is a mapping, so the band order is undefined,
784- which is why we return a set of bands here.
785-
786- :param item_assets: a STAC `item_assets` mapping
787- """
788- bands = set ()
789- # Trick to just warn once per collection
790- _warn = functools .lru_cache ()(self ._warn )
791- for item_asset in item_assets .values ():
792- asset_bands = self ._get_bands_from_item_asset (item_asset , _warn = _warn )
793- if asset_bands :
794- bands .update (asset_bands )
795- return bands
714+ if supress_duplicate_warnings :
715+ # Use caching trick to avoid duplicate warnings
716+ self ._warn = functools .lru_cache (maxsize = 1000 )(self ._warn )
796717
797718 def get_temporal_dimension (self , stac_obj : pystac .STACObject ) -> Union [TemporalDimension , None ]:
798719 """
@@ -828,13 +749,22 @@ def get_temporal_dimension(self, stac_obj: pystac.STACObject) -> Union[TemporalD
828749 name , extent = temporal_dims [0 ]
829750 return TemporalDimension (name = name , extent = extent )
830751
831- def _band_from_eo_bands_metadata (self , data : dict ) -> Band :
752+ def _band_from_eo_bands_metadata (self , band : Union [ dict , pystac . extensions . eo . Band ] ) -> Band :
832753 """Construct band from metadata dict in eo v1.1 style"""
833- return Band (
834- name = data ["name" ],
835- common_name = data .get ("common_name" ),
836- wavelength_um = data .get ("center_wavelength" ),
837- )
754+ if isinstance (band , pystac .extensions .eo .Band ):
755+ return Band (
756+ name = band .name ,
757+ common_name = band .common_name ,
758+ wavelength_um = band .center_wavelength ,
759+ )
760+ elif isinstance (band , dict ) and "name" in band :
761+ return Band (
762+ name = band ["name" ],
763+ common_name = band .get ("common_name" ),
764+ wavelength_um = band .get ("center_wavelength" ),
765+ )
766+ else :
767+ raise ValueError (band )
838768
839769 def _band_from_common_bands_metadata (self , data : dict ) -> Band :
840770 """Construct band from metadata dict in STAC 1.1 + eo v2 style metadata"""
@@ -844,9 +774,7 @@ def _band_from_common_bands_metadata(self, data: dict) -> Band:
844774 wavelength_um = data .get ("eo:center_wavelength" ),
845775 )
846776
847- def bands_from_stac_object (
848- self , obj : Union [pystac .Catalog , pystac .Collection , pystac .Item , pystac .Asset ]
849- ) -> _BandList :
777+ def bands_from_stac_object (self , obj : Union [pystac .STACObject , pystac .Asset ]) -> _BandList :
850778 # Note: first check for Collection, as it is a subclass of Catalog
851779 if isinstance (obj , pystac .Collection ):
852780 return self .bands_from_stac_collection (collection = obj )
@@ -857,7 +785,7 @@ def bands_from_stac_object(
857785 elif isinstance (obj , pystac .Asset ):
858786 return self .bands_from_stac_asset (asset = obj )
859787 else :
860- raise ValueError (obj )
788+ raise ValueError (f"Unsupported STAC object: { obj !r } " )
861789
862790 def bands_from_stac_catalog (self , catalog : pystac .Catalog ) -> _BandList :
863791 # TODO: "eo:bands" vs "bands" priority based on STAC and EO extension version information
@@ -877,10 +805,17 @@ def bands_from_stac_collection(
877805 ) -> _BandList :
878806 # TODO: "eo:bands" vs "bands" priority based on STAC and EO extension version information
879807 self ._log (f"bands_from_stac_collection with { collection .summaries .lists .keys ()= } " )
808+ # Look for band metadata in collection summaries
880809 if "eo:bands" in collection .summaries .lists :
881810 return _BandList (self ._band_from_eo_bands_metadata (b ) for b in collection .summaries .lists ["eo:bands" ])
882811 elif "bands" in collection .summaries .lists :
883812 return _BandList (self ._band_from_common_bands_metadata (b ) for b in collection .summaries .lists ["bands" ])
813+ # Check item assets if available
814+ elif _PYSTAC_1_12_ITEM_ASSETS and collection .item_assets :
815+ return self ._bands_from_item_assets (collection .item_assets )
816+ elif _PYSTAC_1_9_EXTENSION_INTERFACE and collection .ext .has ("item_assets" ) and collection .ext .item_assets :
817+ return self ._bands_from_item_assets (collection .ext .item_assets )
818+ # If no band metadata so far: traverse items in collection
884819 elif consult_items :
885820 bands = _BandList .merge (
886821 self .bands_from_stac_item (item = i , consult_collection = False , consult_assets = consult_assets )
@@ -914,10 +849,17 @@ def bands_from_stac_item(
914849 self ._warn ("bands_from_stac_item: no band name source found" )
915850 return _BandList ([])
916851
852+ def _warn_undeclared_metadata (self , * , field : str , ext : str ):
853+ """Helper to warn about using metadata from undeclared STAC extension"""
854+ self ._warn (f"Using { field !r} metadata, but STAC extension { ext } was not declared." )
855+
917856 def bands_from_stac_asset (self , asset : pystac .Asset ) -> _BandList :
918857 # TODO: "eo:bands" vs "bands" priority based on STAC and EO extension version information
919- self ._log (f"bands_from_stac_asset with { asset .extra_fields .keys ()= } " )
920- if "eo:bands" in asset .extra_fields :
858+ if _PYSTAC_1_9_EXTENSION_INTERFACE and asset .owner and asset .ext .has ("eo" ) and asset .ext .eo .bands is not None :
859+ return _BandList (self ._band_from_eo_bands_metadata (b ) for b in asset .ext .eo .bands )
860+ elif "eo:bands" in asset .extra_fields :
861+ if _PYSTAC_1_9_EXTENSION_INTERFACE and asset .owner and not asset .ext .has ("eo" ):
862+ self ._warn_undeclared_metadata (field = "eo:bands" , ext = "eo" )
921863 return _BandList (self ._band_from_eo_bands_metadata (b ) for b in asset .extra_fields ["eo:bands" ])
922864 elif "bands" in asset .extra_fields :
923865 # TODO: avoid extra_fields, but built-in "bands" support seems to be scheduled for pystac V2
@@ -926,3 +868,47 @@ def bands_from_stac_asset(self, asset: pystac.Asset) -> _BandList:
926868 # TODO: instead of warning: exception, or return None?
927869 self ._warn ("bands_from_stac_asset: no band name source found" )
928870 return _BandList ([])
871+
872+ def _bands_from_item_asset_definition (
873+ self ,
874+ asset : Union [
875+ pystac .extensions .item_assets .AssetDefinition ,
876+ "pystac.ItemAssetDefinition" , # TODO: non-string type hint once pystac dependency is bumped to at least 1.12
877+ ],
878+ ) -> _BandList :
879+ if isinstance (asset , pystac .extensions .item_assets .AssetDefinition ):
880+ if "eo:bands" in asset .properties :
881+ if _PYSTAC_1_9_EXTENSION_INTERFACE and asset .owner and not asset .ext .has ("eo" ):
882+ self ._warn_undeclared_metadata (field = "eo:bands" , ext = "eo" )
883+ return _BandList (self ._band_from_eo_bands_metadata (b ) for b in asset .properties ["eo:bands" ])
884+ elif "bands" in asset .properties :
885+ return _BandList (self ._band_from_common_bands_metadata (b ) for b in asset .properties ["bands" ])
886+ elif _PYSTAC_1_12_ITEM_ASSETS and isinstance (asset , pystac .ItemAssetDefinition ):
887+ if "bands" in asset .properties :
888+ return _BandList (self ._band_from_common_bands_metadata (b ) for b in asset .properties ["bands" ])
889+ elif "eo:bands" in asset .properties :
890+ if _PYSTAC_1_9_EXTENSION_INTERFACE and asset .owner and not asset .ext .has ("eo" ):
891+ self ._warn_undeclared_metadata (field = "eo:bands" , ext = "eo" )
892+ return _BandList (self ._band_from_eo_bands_metadata (b ) for b in asset .properties ["eo:bands" ])
893+ return _BandList ([])
894+
895+ def _bands_from_item_assets (
896+ self ,
897+ item_assets : Dict [
898+ str ,
899+ Union [
900+ pystac .extensions .item_assets .AssetDefinition ,
901+ "pystac.ItemAssetDefinition" , # TODO: non-string type hint once pystac dependency is bumped to at least 1.12
902+ ],
903+ ],
904+ ) -> _BandList :
905+ """
906+ Get bands extracted from assets defined under
907+ a collection's "item_assets" field
908+
909+ Note that "item_assets" in STAC is a mapping, which means that the
910+ band order might be ill-defined.
911+ """
912+ self ._warn ("Deriving band listing from unordered `item_assets`" )
913+ # TODO: filter on asset roles?
914+ return _BandList .merge (self ._bands_from_item_asset_definition (a ) for a in item_assets .values ())
0 commit comments