Skip to content

Commit 1168bc5

Browse files
committed
Merge branch 'issue298-stac-bands'
2 parents 9136699 + bf2df5c commit 1168bc5

File tree

8 files changed

+215
-34
lines changed

8 files changed

+215
-34
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ and start a new "In Progress" section above it.
2828
- Preserve original non-spatial dimensions in `resample_cube_spatial` dry run ([#397](https://github.com/Open-EO/openeo-python-driver/issues/397))
2929
- Fix compatibility with Shapely2 ([#158](https://github.com/Open-EO/openeo-python-driver/issues/158))
3030
- Allow `overlap` in `apply_neighborhood` to be not specified ([#401](https://github.com/Open-EO/openeo-python-driver/issues/401))
31+
- Start including STAC-1.1.0-style "bands" metadata in assets in batch job results ([#298](https://github.com/Open-EO/openeo-python-driver/issues/298))
32+
- Start including STAC-1.1.0-style "bands" summaries in collection metadata ([#298](https://github.com/Open-EO/openeo-python-driver/issues/298))
3133

3234

3335
## 0.133.0

openeo_driver/constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
class STAC_EXTENSION:
22
PROCESSING = "https://stac-extensions.github.io/processing/v1.1.0/schema.json"
33
EO = "https://stac-extensions.github.io/eo/v1.1.0/schema.json"
4+
EO_V110 = "https://stac-extensions.github.io/eo/v1.1.0/schema.json"
5+
EO_V200 = "https://stac-extensions.github.io/eo/v2.0.0/schema.json"
46
FILEINFO = "https://stac-extensions.github.io/file/v2.1.0/schema.json"
57
PROJECTION = "https://stac-extensions.github.io/projection/v1.1.0/schema.json"
68
DATACUBE = "https://stac-extensions.github.io/datacube/v2.2.0/schema.json"
79
MLMODEL = "https://stac-extensions.github.io/ml-model/v1.0.0/schema.json"
810
CARD4LOPTICAL = "https://stac-extensions.github.io/card4l/v0.1.0/optical/schema.json"
911
CARD4LSAR = "https://stac-extensions.github.io/card4l/v0.1.0/sar/schema.json"
12+
RASTER_V110 = "https://stac-extensions.github.io/raster/v1.1.0/schema.json"
13+
RASTER_V200 = "https://stac-extensions.github.io/raster/v2.0.0/schema.json"
1014

1115

1216
class JOB_STATUS:

openeo_driver/save_result.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,8 @@ def __init__(
306306
warnings.warn("AggregatePolygonResult: GeometryCollection or DriverVectorCube expected but got {t}".format(t=type(regions)))
307307
self._regions = regions
308308
self._metadata = metadata
309+
# TODO #298 this "raster:bands" helper is old-style
310+
# and just used for "statistics" which moved to the common metadata in v2
309311
self.raster_bands = None
310312

311313
def get_data(self):
@@ -662,7 +664,6 @@ class AggregatePolygonResultCSV(AggregatePolygonResult):
662664
def __init__(self, csv_dir, regions: Union[GeometryCollection, DriverVectorCube, DelayedVector, BaseGeometry], metadata: CollectionMetadata = None):
663665
super().__init__(timeseries=None, regions=regions, metadata=metadata)
664666
self._csv_dir = csv_dir
665-
self.raster_bands = None
666667

667668
def get_data(self):
668669
if self.data is None:
@@ -717,6 +718,8 @@ def stats(band):
717718
stats["stddev"] = series.std()
718719
stats["valid_percent"] = ((100.0 * len(series.dropna()) / len(series)) if len(series) else None)
719720
return {"statistics": stats}
721+
722+
# TODO #298 `raster:bands>statistics` has moved to common STAC in raster extension 2.0.0
720723
self.raster_bands = [stats(b) for b in bands]
721724

722725
if self.is_format('covjson', 'coveragejson'):

openeo_driver/util/stac.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
"""
2+
Generic helpers to handle/consume/produce STAC items, collections, metadata constructs.
3+
"""
4+
from typing import Any
5+
6+
import collections.abc
7+
8+
9+
def sniff_stac_extension_prefix(data: Any, prefix: str) -> bool:
10+
"""
11+
Recursively walk through a data structure to
12+
find a particular STAC extension prefix
13+
in object keys (e.g. "eo:" in a "eo:bands" field of an asset).
14+
15+
:param data: data structure to scan
16+
:param prefix: STAC extension prefix to look for,
17+
e.g. "eo:", "raster:", "proj:", ...
18+
"""
19+
if isinstance(data, dict):
20+
if any(isinstance(k, str) and k.startswith(prefix) for k in data.keys()):
21+
return True
22+
return sniff_stac_extension_prefix(data=list(data.values()), prefix=prefix)
23+
elif isinstance(data, collections.abc.Iterable) and not isinstance(data, (str, bytes)):
24+
return any(sniff_stac_extension_prefix(data=x, prefix=prefix) for x in data)
25+
return False

openeo_driver/views.py

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@
7272
from openeo_driver.users.auth import HttpAuthHandler
7373
from openeo_driver.util.geometry import BoundingBox, reproject_geometry
7474
from openeo_driver.util.logging import ExtraLoggingFilter, FlaskRequestCorrelationIdLogging
75+
from openeo_driver.util.stac import sniff_stac_extension_prefix
7576
from openeo_driver.utils import EvalEnv, filter_supported_kwargs, smart_bool
7677

7778
_log = logging.getLogger(__name__)
@@ -1166,7 +1167,9 @@ def job_results_canonical_url() -> str:
11661167
)
11671168

11681169
assets = {
1169-
filename: _asset_object(job_id, user_id, filename, asset_metadata, job_info)
1170+
filename: _asset_object(
1171+
job_id=job_id, user_id=user_id, filename=filename, asset_metadata=asset_metadata, job_info=job_info
1172+
)
11701173
for filename, asset_metadata in result_assets.items()
11711174
if asset_metadata.get("asset", True)
11721175
}
@@ -1211,7 +1214,7 @@ def job_result_item_url(item_id) -> str:
12111214
"type": "Collection",
12121215
"stac_version": "1.0.0",
12131216
"stac_extensions": [
1214-
STAC_EXTENSION.EO,
1217+
STAC_EXTENSION.EO_V110,
12151218
STAC_EXTENSION.FILEINFO,
12161219
STAC_EXTENSION.PROCESSING,
12171220
STAC_EXTENSION.PROJECTION,
@@ -1274,8 +1277,8 @@ def job_result_item_url(item_id) -> str:
12741277
STAC_EXTENSION.FILEINFO,
12751278
]
12761279

1277-
if any("eo:bands" in asset_object for asset_object in result["assets"].values()):
1278-
result["stac_extensions"].append(STAC_EXTENSION.EO)
1280+
if sniff_stac_extension_prefix(result["assets"].values(), prefix="eo:"):
1281+
result["stac_extensions"].append(STAC_EXTENSION.EO_V110)
12791282

12801283
if any(key.startswith("proj:") for key in result["properties"]) or any(
12811284
key.startswith("proj:") for key in result["assets"]
@@ -1439,9 +1442,9 @@ def _get_job_result_item(job_id, item_id, user_id):
14391442
"type": "Feature",
14401443
"stac_version": "1.0.0",
14411444
"stac_extensions": [
1442-
STAC_EXTENSION.EO,
1445+
STAC_EXTENSION.EO_V110,
14431446
STAC_EXTENSION.FILEINFO,
1444-
STAC_EXTENSION.PROJECTION
1447+
STAC_EXTENSION.PROJECTION,
14451448
],
14461449
"id": item_id,
14471450
"geometry": geometry,
@@ -1514,6 +1517,7 @@ def _asset_object(job_id, user_id, filename: str, asset_metadata: dict, job_info
15141517
),
15151518
"type": asset_metadata.get("type", asset_metadata.get("media_type", "application/octet-stream")),
15161519
"roles": asset_metadata.get("roles", ["data"]),
1520+
# TODO: eliminate this legacy "raster:bands" construct at some point?
15171521
"raster:bands": asset_metadata.get("raster:bands"),
15181522
"file:size": asset_metadata.get("file:size"),
15191523
"alternate": asset_metadata.get("alternate"),
@@ -1524,20 +1528,31 @@ def _asset_object(job_id, user_id, filename: str, asset_metadata: dict, job_info
15241528
return result_dict
15251529
bands = asset_metadata.get("bands")
15261530

1531+
if bands:
1532+
# TODO: eliminate this legacy "eo:bands" construct at some point?
1533+
result_dict["eo:bands"] = [
1534+
dict_no_none(
1535+
{
1536+
"name": band.name,
1537+
"center_wavelength": band.wavelength_um,
1538+
}
1539+
)
1540+
for band in bands
1541+
]
1542+
# TODO: "bands" is a STAC>=1.1 feature, but here we don't know what version we are in.
1543+
result_dict["bands"] = [
1544+
dict_no_none(
1545+
{
1546+
"name": band.name,
1547+
"eo:center_wavelength": band.wavelength_um,
1548+
}
1549+
)
1550+
for band in bands
1551+
]
1552+
15271553
result_dict.update(
15281554
dict_no_none(
15291555
**{
1530-
"eo:bands": [
1531-
dict_no_none(
1532-
**{
1533-
"name": band.name,
1534-
"center_wavelength": band.wavelength_um,
1535-
}
1536-
)
1537-
for band in bands
1538-
]
1539-
if bands
1540-
else None,
15411556
"proj:bbox": asset_metadata.get("proj:bbox", job_info.proj_bbox),
15421557
"proj:epsg": asset_metadata.get("proj:epsg", job_info.epsg),
15431558
"proj:shape": asset_metadata.get("proj:shape", job_info.proj_shape),
@@ -1835,7 +1850,9 @@ def _normalize_collection_metadata(metadata: dict, api_version: ComparableVersio
18351850
# Version dependent metadata conversions
18361851
cube_dims_100 = deep_get(metadata, "cube:dimensions", default=None)
18371852
cube_dims_040 = deep_get(metadata, "properties", "cube:dimensions", default=None)
1853+
bands_110 = deep_get(metadata, "summaries", "bands", default=None)
18381854
eo_bands_100 = deep_get(metadata, "summaries", "eo:bands", default=None)
1855+
# TODO do we still need normalization of openEO 0.4 style eo:bands?
18391856
eo_bands_040 = deep_get(metadata, "properties", "eo:bands", default=None)
18401857
extent_spatial_100 = deep_get(metadata, "extent", "spatial", "bbox", default=None)
18411858
extent_spatial_040 = deep_get(metadata, "extent", "spatial", default=None)
@@ -1845,6 +1862,19 @@ def _normalize_collection_metadata(metadata: dict, api_version: ComparableVersio
18451862
if full and not cube_dims_100 and cube_dims_040:
18461863
_log.warning("Collection metadata 'cube:dimensions' in API 0.4 style instead of 1.0 style")
18471864
metadata["cube:dimensions"] = cube_dims_040
1865+
if full and not bands_110 and eo_bands_100:
1866+
_log.warning("_normalize_collection_metadata: converting eo:bands to bands metadata")
1867+
# TODO #298/#363: "bands" is a STAC>=1.1 feature, but here we don't know what version we are in.
1868+
metadata["summaries"]["bands"] = [
1869+
dict_no_none(
1870+
{
1871+
"name": b.get("name"),
1872+
"eo:common_name": b.get("common_name"),
1873+
"eo:center_wavelength": b.get("center_wavelength"),
1874+
}
1875+
)
1876+
for b in eo_bands_100
1877+
]
18481878
if full and not eo_bands_100 and eo_bands_040:
18491879
_log.warning("Collection metadata 'eo:bands' in API 0.4 style instead of 1.0 style")
18501880
metadata.setdefault("summaries", {})
@@ -1872,13 +1902,14 @@ def _normalize_collection_metadata(metadata: dict, api_version: ComparableVersio
18721902
dim["extent"] = interval
18731903

18741904
# Make sure some required fields are set.
1905+
# TODO #363 bump stac_version default to 1.0.0 or even 1.1.0?
18751906
metadata.setdefault("stac_version", "0.9.0")
18761907
metadata.setdefault(
18771908
"stac_extensions",
18781909
[
18791910
# TODO: enable these extensions only when necessary?
18801911
STAC_EXTENSION.DATACUBE,
1881-
STAC_EXTENSION.EO,
1912+
STAC_EXTENSION.EO_V110,
18821913
],
18831914
)
18841915

tests/test_save_result.py

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import numpy as np
88
import pytest
99
from shapely.geometry import GeometryCollection, Polygon
10+
import dirty_equals
1011

1112
from openeo.metadata import CollectionMetadata
1213
from openeo_driver.datacube import DriverVectorCube
@@ -181,8 +182,6 @@ def test_aggregate_polygon_result_inconsistent_bands():
181182

182183

183184
def test_aggregate_polygon_result_CSV(tmp_path):
184-
185-
186185
metadata = CollectionMetadata({
187186
"cube:dimensions": {
188187
"x": {"type": "spatial"},
@@ -196,28 +195,66 @@ def test_aggregate_polygon_result_CSV(tmp_path):
196195
Polygon([(6, 1), (1, 7), (9, 9)])
197196
])
198197

199-
200198
result = AggregatePolygonResultCSV(csv_dir=Path(__file__).parent / "data" /"aggregate_spatial_spacetime_cube", regions=regions_with_nonexistant, metadata=metadata)
201199
result.set_format("json")
202200

203201
assets = result.write_assets(tmp_path / "ignored")
204-
theAsset = assets.popitem()[1]
205-
filename = theAsset['href']
202+
[(_, asset_metadata)] = assets.items()
206203

207-
assert 'application/json' == theAsset['type']
208-
assert ["red", "green", "blue"] == [b['name'] for b in theAsset['bands']]
209-
assert 'raster:bands' in theAsset
210-
assert 'file:size' in theAsset
204+
assert "application/json" == asset_metadata["type"]
205+
assert asset_metadata["bands"] == [
206+
dirty_equals.IsPartialDict(name="red"),
207+
dirty_equals.IsPartialDict(name="green"),
208+
dirty_equals.IsPartialDict(name="blue"),
209+
]
210+
assert asset_metadata["raster:bands"] == [
211+
{
212+
"statistics": {
213+
"minimum": pytest.approx(4646, rel=0.1),
214+
"mean": pytest.approx(4646, rel=0.1),
215+
"maximum": pytest.approx(4646, rel=0.1),
216+
"stddev": pytest.approx(0.31, rel=0.1),
217+
"valid_percent": 100.0,
218+
}
219+
},
220+
{
221+
"statistics": {
222+
"minimum": pytest.approx(4865, rel=0.1),
223+
"mean": pytest.approx(4865, rel=0.1),
224+
"maximum": pytest.approx(4865, rel=0.1),
225+
"stddev": pytest.approx(0.265, rel=0.1),
226+
"valid_percent": 100.0,
227+
}
228+
},
229+
{
230+
"statistics": {
231+
"minimum": pytest.approx(5178, rel=0.1),
232+
"mean": pytest.approx(5178, rel=0.1),
233+
"maximum": pytest.approx(5178, rel=0.1),
234+
"stddev": pytest.approx(0.41, rel=0.1),
235+
"valid_percent": 100.0,
236+
}
237+
},
238+
]
211239

212-
assert 'mean' in theAsset['raster:bands'][0]["statistics"]
213-
assert 'minimum' in theAsset['raster:bands'][0]["statistics"]
214-
assert 100.0 == theAsset['raster:bands'][0]["statistics"]['valid_percent']
240+
assert "file:size" in asset_metadata
215241

216-
expected = {'2017-09-05T00:00:00Z': [[4646.262612301313, 4865.926572218383, 5178.517363510712], [None, None, None], [4645.719597475695, 4865.467252259935, 5177.803342998465]], '2017-09-06T00:00:00Z': [[None, None, None], [None, None, None], [4645.719597475695, 4865.467252259935, 5177.803342998465]]}
242+
filename = asset_metadata["href"]
243+
expected = {
244+
"2017-09-05T00:00:00Z": [
245+
[4646.262612301313, 4865.926572218383, 5178.517363510712],
246+
[None, None, None],
247+
[4645.719597475695, 4865.467252259935, 5177.803342998465],
248+
],
249+
"2017-09-06T00:00:00Z": [
250+
[None, None, None],
251+
[None, None, None],
252+
[4645.719597475695, 4865.467252259935, 5177.803342998465],
253+
],
254+
}
217255
with open(filename) as f:
218-
219256
timeseries_ds = json.load(f)
220-
assert expected == timeseries_ds
257+
assert timeseries_ds == expected
221258

222259
class TestAggregatePolygonSpatialResult:
223260

0 commit comments

Comments
 (0)