Skip to content

Commit 07dc37d

Browse files
authored
Merge pull request #21 from mapchete/fix_apply_offset
make sure array mask is propagated after applying offset to band
2 parents d389f8a + cfeb625 commit 07dc37d

File tree

4 files changed

+73
-27
lines changed

4 files changed

+73
-27
lines changed

mapchete_eo/io/assets.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,17 +32,17 @@
3232

3333

3434
class STACRasterBandProperties(BaseModel):
35-
nodata: NodataVal = None
35+
nodata: Optional[NodataVal] = None
3636
data_type: Optional[str] = None
3737
scale: float = 1.0
3838
offset: float = 0.0
3939

4040
@staticmethod
4141
def from_asset(
4242
asset: pystac.Asset,
43-
nodataval: NodataVal = None,
43+
nodataval: Optional[NodataVal] = None,
4444
) -> STACRasterBandProperties:
45-
if asset.extra_fields.get("raster:offset") is not None:
45+
if asset.extra_fields.get("raster:offset", {}):
4646
properties = dict(
4747
offset=asset.extra_fields.get("raster:offset"),
4848
scale=asset.extra_fields.get("raster:scale"),
@@ -87,16 +87,21 @@ def asset_to_np_array(
8787
)
8888

8989
logger.debug("reading asset %s and indexes %s ...", asset, indexes)
90-
data = read_raster(
90+
array = read_raster(
9191
inp=path,
9292
indexes=indexes,
9393
grid=grid,
9494
resampling=resampling.name,
9595
dst_nodata=band_properties.nodata,
96-
).data
97-
96+
).array
9897
if apply_offset and band_properties.offset:
99-
data_type = band_properties.data_type or data.dtype
98+
logger.debug(
99+
"apply offset %s and scale %s to asset %s",
100+
band_properties.offset,
101+
band_properties.scale,
102+
asset,
103+
)
104+
data_type = band_properties.data_type or array.dtype
100105

101106
# determine value range for the target data_type
102107
clip_min, clip_max = dtype_ranges[str(data_type)]
@@ -105,18 +110,17 @@ def asset_to_np_array(
105110
if clip_min == band_properties.nodata:
106111
clip_min += 1
107112

108-
data[:] = (
113+
array[~array.mask] = (
109114
(
110-
((data * band_properties.scale) + band_properties.offset)
115+
((array[~array.mask] * band_properties.scale) + band_properties.offset)
111116
/ band_properties.scale
112117
)
113118
.round()
114119
.clip(clip_min, clip_max)
115120
.astype(data_type, copy=False)
116121
.data
117122
)
118-
119-
return data
123+
return array
120124

121125

122126
def get_assets(

mapchete_eo/search/config.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import logging
2+
3+
4+
from contextlib import contextmanager
15
from typing import Optional, Dict, Any
26

37
from mapchete.path import MPath, MPathLike
@@ -61,3 +65,46 @@ def deprecate_max_cloud_cover(cls, values: Dict[str, Any]) -> Dict[str, Any]:
6165
),
6266
)
6367
search_index: Optional[MPathLike] = None
68+
69+
70+
@contextmanager
71+
def patch_invalid_assets():
72+
"""
73+
Context manager/decorator to fix pystac crash on malformed assets (strings instead of dicts).
74+
75+
"""
76+
try:
77+
from pystac.extensions.file import FileExtensionHooks
78+
except ImportError: # pragma: no cover
79+
yield
80+
return
81+
82+
logger = logging.getLogger(__name__)
83+
84+
_original_migrate = FileExtensionHooks.migrate
85+
86+
def _safe_migrate(self, obj, version, info):
87+
if "assets" in obj and isinstance(obj["assets"], dict):
88+
bad_keys = []
89+
for key, asset in obj["assets"].items():
90+
if not isinstance(asset, dict):
91+
logger.debug(
92+
"Removing malformed asset '%s' (type %s) from item %s",
93+
key,
94+
type(asset),
95+
obj.get("id", "unknown"),
96+
)
97+
bad_keys.append(key)
98+
99+
for key in bad_keys:
100+
del obj["assets"][key]
101+
102+
return _original_migrate(self, obj, version, info)
103+
104+
# Apply patch
105+
FileExtensionHooks.migrate = _safe_migrate
106+
try:
107+
yield
108+
finally:
109+
# Restore original
110+
FileExtensionHooks.migrate = _original_migrate

mapchete_eo/search/stac_search.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
from mapchete_eo.product import blacklist_products
1717
from mapchete_eo.search.base import CollectionSearcher, StaticCollectionWriterMixin
18-
from mapchete_eo.search.config import StacSearchConfig
18+
from mapchete_eo.search.config import StacSearchConfig, patch_invalid_assets
1919
from mapchete_eo.settings import mapchete_eo_settings
2020
from mapchete_eo.types import TimeRange
2121

@@ -138,14 +138,16 @@ def _search_chunks(
138138
query=query,
139139
)
140140

141-
for search in _searches():
142-
for item in search.items():
143-
if item.get_self_href() in self.blacklist: # pragma: no cover
144-
logger.debug(
145-
"item %s found in blacklist and skipping", item.get_self_href()
146-
)
147-
continue
148-
yield item
141+
with patch_invalid_assets():
142+
for search in _searches():
143+
for item in search.items():
144+
if item.get_self_href() in self.blacklist: # pragma: no cover
145+
logger.debug(
146+
"item %s found in blacklist and skipping",
147+
item.get_self_href(),
148+
)
149+
continue
150+
yield item
149151

150152
@cached_property
151153
def default_search_params(self):

tests/conftest.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -400,15 +400,13 @@ def s2_l2a_safe_metadata(s2_testdata_dir):
400400
)
401401

402402

403-
@pytest.mark.remote
404403
@pytest.fixture(scope="session")
405404
def s2_l2a_metadata_remote():
406405
return guess_s2metadata_from_metadata_xml(
407406
"s3://sentinel-s2-l2a/tiles/51/K/XR/2020/7/31/0/metadata.xml"
408407
)
409408

410409

411-
@pytest.mark.remote
412410
@pytest.fixture(scope="session")
413411
def s2_l2a_roda_metadata_remote():
414412
"""Same content as s2_l2a_metadata_remote, but hosted on different server."""
@@ -417,7 +415,6 @@ def s2_l2a_roda_metadata_remote():
417415
)
418416

419417

420-
@pytest.mark.remote
421418
@pytest.fixture(scope="session")
422419
def s2_l2a_roda_metadata_jp2_masks_remote():
423420
"""From about 2022 on, ahte masks are now encoded as JP2 (rasters), not as GMLs (features)."""
@@ -426,7 +423,6 @@ def s2_l2a_roda_metadata_jp2_masks_remote():
426423
)
427424

428425

429-
@pytest.mark.remote
430426
@pytest.fixture()
431427
def s2_l2a_earthsearch_xml_remote():
432428
"""Metadata used by Earth-Search V1 endpoint"""
@@ -435,7 +431,6 @@ def s2_l2a_earthsearch_xml_remote():
435431
)
436432

437433

438-
@pytest.mark.remote
439434
@pytest.fixture()
440435
def s2_l2a_earthsearch_xml_remote_broken():
441436
"""Metadata used by Earth-Search V1 endpoint"""
@@ -444,14 +439,12 @@ def s2_l2a_earthsearch_xml_remote_broken():
444439
)
445440

446441

447-
@pytest.mark.remote
448442
@pytest.fixture(scope="session")
449443
def s2_l2a_earthsearch_remote(s2_l2a_earthsearch_remote_item):
450444
"""Metadata used by Earth-Search V1 endpoint"""
451445
return guess_s2metadata_from_item(s2_l2a_earthsearch_remote_item)
452446

453447

454-
@pytest.mark.remote
455448
@pytest.fixture(scope="session")
456449
def s2_l2a_earthsearch_remote_item():
457450
"""Metadata used by Earth-Search V1 endpoint"""

0 commit comments

Comments
 (0)