diff --git a/mapchete_eo/io/assets.py b/mapchete_eo/io/assets.py index 69d7a59..6f421be 100644 --- a/mapchete_eo/io/assets.py +++ b/mapchete_eo/io/assets.py @@ -32,7 +32,7 @@ class STACRasterBandProperties(BaseModel): - nodata: NodataVal = None + nodata: Optional[NodataVal] = None data_type: Optional[str] = None scale: float = 1.0 offset: float = 0.0 @@ -40,9 +40,9 @@ class STACRasterBandProperties(BaseModel): @staticmethod def from_asset( asset: pystac.Asset, - nodataval: NodataVal = None, + nodataval: Optional[NodataVal] = None, ) -> STACRasterBandProperties: - if asset.extra_fields.get("raster:offset") is not None: + if asset.extra_fields.get("raster:offset", {}): properties = dict( offset=asset.extra_fields.get("raster:offset"), scale=asset.extra_fields.get("raster:scale"), @@ -87,16 +87,21 @@ def asset_to_np_array( ) logger.debug("reading asset %s and indexes %s ...", asset, indexes) - data = read_raster( + array = read_raster( inp=path, indexes=indexes, grid=grid, resampling=resampling.name, dst_nodata=band_properties.nodata, - ).data - + ).array if apply_offset and band_properties.offset: - data_type = band_properties.data_type or data.dtype + logger.debug( + "apply offset %s and scale %s to asset %s", + band_properties.offset, + band_properties.scale, + asset, + ) + data_type = band_properties.data_type or array.dtype # determine value range for the target data_type clip_min, clip_max = dtype_ranges[str(data_type)] @@ -105,9 +110,9 @@ def asset_to_np_array( if clip_min == band_properties.nodata: clip_min += 1 - data[:] = ( + array[~array.mask] = ( ( - ((data * band_properties.scale) + band_properties.offset) + ((array[~array.mask] * band_properties.scale) + band_properties.offset) / band_properties.scale ) .round() @@ -115,8 +120,7 @@ def asset_to_np_array( .astype(data_type, copy=False) .data ) - - return data + return array def get_assets( diff --git a/mapchete_eo/search/config.py b/mapchete_eo/search/config.py index 0f3f791..6f4ad0b 100644 --- a/mapchete_eo/search/config.py +++ b/mapchete_eo/search/config.py @@ -1,3 +1,7 @@ +import logging + + +from contextlib import contextmanager from typing import Optional, Dict, Any from mapchete.path import MPath, MPathLike @@ -61,3 +65,46 @@ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]: ), ) search_index: Optional[MPathLike] = None + + +@contextmanager +def patch_invalid_assets(): + """ + Context manager/decorator to fix pystac crash on malformed assets (strings instead of dicts). + + """ + try: + from pystac.extensions.file import FileExtensionHooks + except ImportError: # pragma: no cover + yield + return + + logger = logging.getLogger(__name__) + + _original_migrate = FileExtensionHooks.migrate + + def _safe_migrate(self, obj, version, info): + if "assets" in obj and isinstance(obj["assets"], dict): + bad_keys = [] + for key, asset in obj["assets"].items(): + if not isinstance(asset, dict): + logger.debug( + "Removing malformed asset '%s' (type %s) from item %s", + key, + type(asset), + obj.get("id", "unknown"), + ) + bad_keys.append(key) + + for key in bad_keys: + del obj["assets"][key] + + return _original_migrate(self, obj, version, info) + + # Apply patch + FileExtensionHooks.migrate = _safe_migrate + try: + yield + finally: + # Restore original + FileExtensionHooks.migrate = _original_migrate diff --git a/mapchete_eo/search/stac_search.py b/mapchete_eo/search/stac_search.py index d81ca35..21dbba9 100644 --- a/mapchete_eo/search/stac_search.py +++ b/mapchete_eo/search/stac_search.py @@ -15,7 +15,7 @@ from mapchete_eo.product import blacklist_products from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin -from mapchete_eo.search.config import StacSearchConfig +from mapchete_eo.search.config import StacSearchConfig, patch_invalid_assets from mapchete_eo.settings import mapchete_eo_settings from mapchete_eo.types import TimeRange @@ -138,14 +138,16 @@ def _search_chunks( query=query, ) - for search in _searches(): - for item in search.items(): - if item.get_self_href() in self.blacklist: # pragma: no cover - logger.debug( - "item %s found in blacklist and skipping", item.get_self_href() - ) - continue - yield item + with patch_invalid_assets(): + for search in _searches(): + for item in search.items(): + if item.get_self_href() in self.blacklist: # pragma: no cover + logger.debug( + "item %s found in blacklist and skipping", + item.get_self_href(), + ) + continue + yield item @cached_property def default_search_params(self): diff --git a/tests/conftest.py b/tests/conftest.py index 3dc82a4..2448d61 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -400,7 +400,6 @@ def s2_l2a_safe_metadata(s2_testdata_dir): ) -@pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_metadata_remote(): return guess_s2metadata_from_metadata_xml( @@ -408,7 +407,6 @@ def s2_l2a_metadata_remote(): ) -@pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_roda_metadata_remote(): """Same content as s2_l2a_metadata_remote, but hosted on different server.""" @@ -417,7 +415,6 @@ def s2_l2a_roda_metadata_remote(): ) -@pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_roda_metadata_jp2_masks_remote(): """From about 2022 on, ahte masks are now encoded as JP2 (rasters), not as GMLs (features).""" @@ -426,7 +423,6 @@ def s2_l2a_roda_metadata_jp2_masks_remote(): ) -@pytest.mark.remote @pytest.fixture() def s2_l2a_earthsearch_xml_remote(): """Metadata used by Earth-Search V1 endpoint""" @@ -435,7 +431,6 @@ def s2_l2a_earthsearch_xml_remote(): ) -@pytest.mark.remote @pytest.fixture() def s2_l2a_earthsearch_xml_remote_broken(): """Metadata used by Earth-Search V1 endpoint""" @@ -444,14 +439,12 @@ def s2_l2a_earthsearch_xml_remote_broken(): ) -@pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_earthsearch_remote(s2_l2a_earthsearch_remote_item): """Metadata used by Earth-Search V1 endpoint""" return guess_s2metadata_from_item(s2_l2a_earthsearch_remote_item) -@pytest.mark.remote @pytest.fixture(scope="session") def s2_l2a_earthsearch_remote_item(): """Metadata used by Earth-Search V1 endpoint"""