Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions mapchete_eo/io/assets.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,17 @@


class STACRasterBandProperties(BaseModel):
nodata: NodataVal = None
nodata: Optional[NodataVal] = None
data_type: Optional[str] = None
scale: float = 1.0
offset: float = 0.0

@staticmethod
def from_asset(
asset: pystac.Asset,
nodataval: NodataVal = None,
nodataval: Optional[NodataVal] = None,
) -> STACRasterBandProperties:
if asset.extra_fields.get("raster:offset") is not None:
if asset.extra_fields.get("raster:offset", {}):
properties = dict(
offset=asset.extra_fields.get("raster:offset"),
scale=asset.extra_fields.get("raster:scale"),
Expand Down Expand Up @@ -87,16 +87,21 @@ def asset_to_np_array(
)

logger.debug("reading asset %s and indexes %s ...", asset, indexes)
data = read_raster(
array = read_raster(
inp=path,
indexes=indexes,
grid=grid,
resampling=resampling.name,
dst_nodata=band_properties.nodata,
).data

).array
if apply_offset and band_properties.offset:
data_type = band_properties.data_type or data.dtype
logger.debug(
"apply offset %s and scale %s to asset %s",
band_properties.offset,
band_properties.scale,
asset,
)
data_type = band_properties.data_type or array.dtype

# determine value range for the target data_type
clip_min, clip_max = dtype_ranges[str(data_type)]
Expand All @@ -105,18 +110,17 @@ def asset_to_np_array(
if clip_min == band_properties.nodata:
clip_min += 1

data[:] = (
array[~array.mask] = (
(
((data * band_properties.scale) + band_properties.offset)
((array[~array.mask] * band_properties.scale) + band_properties.offset)
/ band_properties.scale
)
.round()
.clip(clip_min, clip_max)
.astype(data_type, copy=False)
.data
)

return data
return array


def get_assets(
Expand Down
47 changes: 47 additions & 0 deletions mapchete_eo/search/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import logging


from contextlib import contextmanager
from typing import Optional, Dict, Any

from mapchete.path import MPath, MPathLike
Expand Down Expand Up @@ -61,3 +65,46 @@ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
),
)
search_index: Optional[MPathLike] = None


@contextmanager
def patch_invalid_assets():
"""
Context manager/decorator to fix pystac crash on malformed assets (strings instead of dicts).

"""
try:
from pystac.extensions.file import FileExtensionHooks
except ImportError: # pragma: no cover
yield
return

logger = logging.getLogger(__name__)

_original_migrate = FileExtensionHooks.migrate

def _safe_migrate(self, obj, version, info):
if "assets" in obj and isinstance(obj["assets"], dict):
bad_keys = []
for key, asset in obj["assets"].items():
if not isinstance(asset, dict):
logger.debug(
"Removing malformed asset '%s' (type %s) from item %s",
key,
type(asset),
obj.get("id", "unknown"),
)
bad_keys.append(key)

for key in bad_keys:
del obj["assets"][key]

return _original_migrate(self, obj, version, info)

# Apply patch
FileExtensionHooks.migrate = _safe_migrate
try:
yield
finally:
# Restore original
FileExtensionHooks.migrate = _original_migrate
20 changes: 11 additions & 9 deletions mapchete_eo/search/stac_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from mapchete_eo.product import blacklist_products
from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin
from mapchete_eo.search.config import StacSearchConfig
from mapchete_eo.search.config import StacSearchConfig, patch_invalid_assets
from mapchete_eo.settings import mapchete_eo_settings
from mapchete_eo.types import TimeRange

Expand Down Expand Up @@ -138,14 +138,16 @@ def _search_chunks(
query=query,
)

for search in _searches():
for item in search.items():
if item.get_self_href() in self.blacklist: # pragma: no cover
logger.debug(
"item %s found in blacklist and skipping", item.get_self_href()
)
continue
yield item
with patch_invalid_assets():
for search in _searches():
for item in search.items():
if item.get_self_href() in self.blacklist: # pragma: no cover
logger.debug(
"item %s found in blacklist and skipping",
item.get_self_href(),
)
continue
yield item

@cached_property
def default_search_params(self):
Expand Down
7 changes: 0 additions & 7 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,15 +400,13 @@ def s2_l2a_safe_metadata(s2_testdata_dir):
)


@pytest.mark.remote
@pytest.fixture(scope="session")
def s2_l2a_metadata_remote():
return guess_s2metadata_from_metadata_xml(
"s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/metadata.xml"
)


@pytest.mark.remote
@pytest.fixture(scope="session")
def s2_l2a_roda_metadata_remote():
"""Same content as s2_l2a_metadata_remote, but hosted on different server."""
Expand All @@ -417,7 +415,6 @@ def s2_l2a_roda_metadata_remote():
)


@pytest.mark.remote
@pytest.fixture(scope="session")
def s2_l2a_roda_metadata_jp2_masks_remote():
"""From about 2022 on, ahte masks are now encoded as JP2 (rasters), not as GMLs (features)."""
Expand All @@ -426,7 +423,6 @@ def s2_l2a_roda_metadata_jp2_masks_remote():
)


@pytest.mark.remote
@pytest.fixture()
def s2_l2a_earthsearch_xml_remote():
"""Metadata used by Earth-Search V1 endpoint"""
Expand All @@ -435,7 +431,6 @@ def s2_l2a_earthsearch_xml_remote():
)


@pytest.mark.remote
@pytest.fixture()
def s2_l2a_earthsearch_xml_remote_broken():
"""Metadata used by Earth-Search V1 endpoint"""
Expand All @@ -444,14 +439,12 @@ def s2_l2a_earthsearch_xml_remote_broken():
)


@pytest.mark.remote
@pytest.fixture(scope="session")
def s2_l2a_earthsearch_remote(s2_l2a_earthsearch_remote_item):
"""Metadata used by Earth-Search V1 endpoint"""
return guess_s2metadata_from_item(s2_l2a_earthsearch_remote_item)


@pytest.mark.remote
@pytest.fixture(scope="session")
def s2_l2a_earthsearch_remote_item():
"""Metadata used by Earth-Search V1 endpoint"""
Expand Down