Skip to content

Commit 631f4af

Browse files
committed
Issue #104/#457 replace read_vector with load_url in DataCube._get_geometry_argument
1 parent 5a3e6f4 commit 631f4af

File tree

8 files changed

+137
-59
lines changed

8 files changed

+137
-59
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1212
### Changed
1313

1414
- `MultiBackendJobManager`: costs has been added as a column in tracking databases ([[#588](https://github.com/Open-EO/openeo-python-client/issues/588)])
15+
- Eliminate usage of non-standard `read_vector` process in `DataCube.aggregate_spatial`, `DataCube.mask_polygon`, ..., and replace with standard `load_url` ([#104](https://github.com/Open-EO/openeo-python-client/issues/104))
1516

1617
### Removed
1718

openeo/rest/datacube.py

Lines changed: 47 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
import datetime
1313
import logging
1414
import pathlib
15+
import re
1516
import typing
17+
import urllib.parse
1618
import warnings
1719
from builtins import staticmethod
1820
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
@@ -57,7 +59,13 @@
5759
from openeo.rest.service import Service
5860
from openeo.rest.udp import RESTUserDefinedProcess
5961
from openeo.rest.vectorcube import VectorCube
60-
from openeo.util import dict_no_none, guess_format, normalize_crs, rfc3339
62+
from openeo.util import (
63+
dict_no_none,
64+
guess_format,
65+
load_json_resource,
66+
normalize_crs,
67+
rfc3339,
68+
)
6169

6270
if typing.TYPE_CHECKING:
6371
# Imports for type checking only (circular import issue at runtime).
@@ -1035,7 +1043,7 @@ def _merge_operator_binary_cubes(
10351043

10361044
def _get_geometry_argument(
10371045
self,
1038-
geometry: Union[
1046+
argument: Union[
10391047
shapely.geometry.base.BaseGeometry,
10401048
dict,
10411049
str,
@@ -1047,30 +1055,47 @@ def _get_geometry_argument(
10471055
crs: Optional[str] = None,
10481056
) -> Union[dict, Parameter, PGNode]:
10491057
"""
1050-
Convert input to a geometry as "geojson" subtype object.
1058+
Convert input to a geometry as "geojson" subtype object or vector cube.
10511059
10521060
:param crs: value that encodes a coordinate reference system.
10531061
See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument.
10541062
"""
1055-
if isinstance(geometry, (str, pathlib.Path)):
1056-
# Assumption: `geometry` is path to polygon is a path to vector file at backend.
1057-
# TODO #104: `read_vector` is non-standard process.
1058-
# TODO: If path exists client side: load it client side?
1059-
return PGNode(process_id="read_vector", arguments={"filename": str(geometry)})
1060-
elif isinstance(geometry, Parameter):
1061-
return geometry
1062-
elif isinstance(geometry, _FromNodeMixin):
1063-
return geometry.from_node()
1064-
1065-
if isinstance(geometry, shapely.geometry.base.BaseGeometry):
1066-
geometry = mapping(geometry)
1067-
if not isinstance(geometry, dict):
1068-
raise OpenEoClientException("Invalid geometry argument: {g!r}".format(g=geometry))
1063+
# First handle (abstract) references, e.g. parameter or back-end side vector cube
1064+
if isinstance(argument, Parameter):
1065+
return argument
1066+
elif isinstance(argument, _FromNodeMixin):
1067+
return argument.from_node()
1068+
1069+
if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I):
1070+
# Geometry provided as URL: load with `load_url` (with best-effort format guess)
1071+
url = urllib.parse.urlparse(argument)
1072+
suffix = pathlib.Path(url.path.lower()).suffix
1073+
format = {
1074+
".json": "GeoJSON",
1075+
".geojson": "GeoJSON",
1076+
".pq": "Parquet",
1077+
".parquet": "Parquet",
1078+
".geoparquet": "Parquet",
1079+
}.get(suffix, suffix.split(".")[-1])
1080+
return self.connection.load_url(url=argument, format=format)
1081+
1082+
if isinstance(argument, str):
1083+
geometry = load_json_resource(argument)
1084+
elif isinstance(argument, pathlib.Path):
1085+
geometry = load_json_resource(argument)
1086+
elif isinstance(argument, shapely.geometry.base.BaseGeometry):
1087+
geometry = mapping(argument)
1088+
elif isinstance(argument, dict):
1089+
geometry = argument
1090+
else:
1091+
raise OpenEoClientException(f"Invalid geometry argument: {argument!r}")
10691092

10701093
if geometry.get("type") not in valid_geojson_types:
1071-
raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format(
1072-
t=geometry.get("type"), s=valid_geojson_types
1073-
))
1094+
raise OpenEoClientException(
1095+
f"Invalid geometry type {geometry.get('type')!r}, must be one of {valid_geojson_types}"
1096+
)
1097+
1098+
# TODO #671 get rid of this ad-hoc, inconsistent `crs` handling
10741099
if crs:
10751100
# TODO: don't warn when the crs is Lon-Lat like EPSG:4326?
10761101
warnings.warn(f"Geometry with non-Lon-Lat CRS {crs!r} is only supported by specific back-ends.")
@@ -1099,6 +1124,7 @@ def aggregate_spatial(
10991124
],
11001125
reducer: Union[str, typing.Callable, PGNode],
11011126
target_dimension: Optional[str] = None,
1127+
# TODO #671 deprecate/remove `crs` argument here
11021128
crs: Optional[Union[int, str]] = None,
11031129
context: Optional[dict] = None,
11041130
# TODO arguments: target dimension, context
@@ -1943,6 +1969,7 @@ def mask(self, mask: DataCube = None, replacement=None) -> DataCube:
19431969
def mask_polygon(
19441970
self,
19451971
mask: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube],
1972+
# TODO #671 deprecate/remove `srs` argument here
19461973
srs: str = None,
19471974
replacement=None,
19481975
inside: bool = None,

openeo/util.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,7 @@ def guess_format(filename: Union[str, Path]) -> Union[str, None]:
441441

442442

443443
def load_json(path: Union[Path, str]) -> dict:
444+
"""Load JSON serialized data from a local file"""
444445
with Path(path).open("r", encoding="utf-8") as f:
445446
return json.load(f)
446447

@@ -459,7 +460,7 @@ def load_json_resource(src: Union[str, Path]) -> dict:
459460
elif isinstance(src, str) and re.match(r"^https?://", src, flags=re.I):
460461
# URL to remote JSON resource
461462
return requests.get(src).json()
462-
elif isinstance(src, Path) or (isinstance(src, str) and src.endswith(".json")):
463+
elif isinstance(src, Path) or (isinstance(src, str) and Path(src).suffix.lower() in {".json", ".geojson"}):
463464
# Assume source is a local JSON file path
464465
return load_json(src)
465466
raise ValueError(src)

tests/data/1.0.0/aggregate_zonal_path.json

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,11 @@
2121
}
2222
}
2323
},
24-
"readvector1": {
25-
"process_id": "read_vector",
24+
"loadurl1": {
25+
"process_id": "load_url",
2626
"arguments": {
27-
"filename": "/some/path/to/GeometryCollection.geojson"
27+
"url": "https://example.com/geometries.geojson",
28+
"format": "GeoJSON"
2829
}
2930
},
3031
"aggregatespatial1": {
@@ -34,7 +35,7 @@
3435
"from_node": "filterbbox1"
3536
},
3637
"geometries": {
37-
"from_node": "readvector1"
38+
"from_node": "loadurl1"
3839
},
3940
"reducer": {
4041
"process_graph": {

tests/data/geojson/polygon02.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"type": "Polygon",
3+
"coordinates": [
4+
[
5+
[
6+
3,
7+
50
8+
],
9+
[
10+
4,
11+
50
12+
],
13+
[
14+
4,
15+
51
16+
],
17+
[
18+
3,
19+
50
20+
]
21+
]
22+
]
23+
}

tests/rest/__init__.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
11
import json
2+
from typing import Union
23

34
import mock
45

5-
from openeo.rest.datacube import DataCube
6+
from openeo import DataCube, VectorCube
67

78
# TODO: move (some of) these to openeo.testing?
89

910

10-
def get_download_graph(cube: DataCube, *, drop_save_result: bool = False, drop_load_collection: bool = False) -> dict:
11+
def get_download_graph(
12+
cube: Union[DataCube, VectorCube], *, drop_save_result: bool = False, drop_load_collection: bool = False
13+
) -> dict:
1114
"""
1215
Do fake download of a cube and intercept the process graph
1316
:param cube: cube to download

tests/rest/datacube/test_datacube100.py

Lines changed: 48 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,33 @@ def test_aggregate_spatial_geometry_from_node(con100: Connection, get_geometries
595595
}
596596

597597

598+
def test_aggregate_spatial_geometry_url(con100: Connection):
599+
cube = con100.load_collection("S2")
600+
result = cube.aggregate_spatial(geometries="https://example.com/geometry.json", reducer="mean")
601+
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
602+
"loadurl1": {
603+
"process_id": "load_url",
604+
"arguments": {"url": "https://example.com/geometry.json", "format": "GeoJSON"},
605+
},
606+
"aggregatespatial1": {
607+
"process_id": "aggregate_spatial",
608+
"arguments": {
609+
"data": {"from_node": "loadcollection1"},
610+
"geometries": {"from_node": "loadurl1"},
611+
"reducer": {
612+
"process_graph": {
613+
"mean1": {
614+
"process_id": "mean",
615+
"arguments": {"data": {"from_parameter": "data"}},
616+
"result": True,
617+
}
618+
}
619+
},
620+
},
621+
},
622+
}
623+
624+
598625
def test_aggregate_spatial_window(con100: Connection):
599626
img = con100.load_collection("S2")
600627
size = [5, 3]
@@ -763,21 +790,19 @@ def test_mask_polygon_parameter(con100: Connection):
763790
}
764791

765792

766-
def test_mask_polygon_path(con100: Connection):
767-
img = con100.load_collection("S2")
768-
masked = img.mask_polygon(mask="path/to/polygon.json")
769-
assert sorted(masked.flat_graph().keys()) == ["loadcollection1", "maskpolygon1", "readvector1"]
770-
assert masked.flat_graph()["maskpolygon1"] == {
771-
"process_id": "mask_polygon",
772-
"arguments": {
773-
"data": {"from_node": "loadcollection1"},
774-
"mask": {"from_node": "readvector1"},
793+
@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
794+
def test_mask_polygon_path(con100: Connection, path_factory, test_data):
795+
path = path_factory(test_data.get_path("geojson/polygon02.json"))
796+
cube = con100.load_collection("S2")
797+
masked = cube.mask_polygon(mask=path)
798+
assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == {
799+
"maskpolygon1": {
800+
"process_id": "mask_polygon",
801+
"arguments": {
802+
"data": {"from_node": "loadcollection1"},
803+
"mask": {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
804+
},
775805
},
776-
"result": True,
777-
}
778-
assert masked.flat_graph()["readvector1"] == {
779-
"process_id": "read_vector",
780-
"arguments": {"filename": "path/to/polygon.json"},
781806
}
782807

783808

@@ -1490,18 +1515,19 @@ def test_chunk_polygon_parameter(con100: Connection):
14901515
}
14911516

14921517

1493-
def test_chunk_polygon_path(con100: Connection):
1518+
@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
1519+
def test_chunk_polygon_path(con100: Connection, test_data, path_factory):
1520+
path = path_factory(test_data.get_path("geojson/polygon02.json"))
14941521
cube = con100.load_collection("S2")
14951522
process = lambda data: data.run_udf(udf="myfancycode", runtime="Python")
14961523
with pytest.warns(UserDeprecationWarning, match="Use `apply_polygon`"):
1497-
result = cube.chunk_polygon(chunks="path/to/polygon.json", process=process)
1524+
result = cube.chunk_polygon(chunks=path, process=process)
14981525
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
1499-
"readvector1": {"process_id": "read_vector", "arguments": {"filename": "path/to/polygon.json"}},
15001526
"chunkpolygon1": {
15011527
"process_id": "chunk_polygon",
15021528
"arguments": {
15031529
"data": {"from_node": "loadcollection1"},
1504-
"chunks": {"from_node": "readvector1"},
1530+
"chunks": {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
15051531
"process": {
15061532
"process_graph": {
15071533
"runudf1": {
@@ -1704,21 +1730,17 @@ def test_apply_polygon_parameter(con100: Connection, geometries_argument, geomet
17041730
("geometries", "geometries"),
17051731
],
17061732
)
1707-
def test_apply_polygon_path(con100: Connection, geometries_argument, geometries_parameter):
1733+
def test_apply_polygon_path(con100: Connection, geometries_argument, geometries_parameter, test_data):
1734+
path = test_data.get_path("geojson/polygon02.json")
17081735
cube = con100.load_collection("S2")
17091736
process = UDF(code="myfancycode", runtime="Python")
1710-
result = cube.apply_polygon(**{geometries_argument: "path/to/polygon.json"}, process=process)
1737+
result = cube.apply_polygon(**{geometries_argument: path}, process=process)
17111738
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
1712-
"readvector1": {
1713-
# TODO #104 #457 get rid of non-standard read_vector
1714-
"process_id": "read_vector",
1715-
"arguments": {"filename": "path/to/polygon.json"},
1716-
},
17171739
"applypolygon1": {
17181740
"process_id": "apply_polygon",
17191741
"arguments": {
17201742
"data": {"from_node": "loadcollection1"},
1721-
geometries_parameter: {"from_node": "readvector1"},
1743+
geometries_parameter: {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
17221744
"process": {
17231745
"process_graph": {
17241746
"runudf1": {

tests/rest/datacube/test_zonal_stats.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,19 @@ def test_aggregate_spatial(connection, api_version, reducer, test_data):
2626

2727
def test_polygon_timeseries_path(connection, api_version, test_data):
2828
res = (
29-
connection.load_collection('S2')
30-
.filter_bbox(west=3, east=6, north=52, south=50)
31-
.polygonal_mean_timeseries(polygon="/some/path/to/GeometryCollection.geojson")
29+
connection.load_collection("S2")
30+
.filter_bbox(west=3, east=6, north=52, south=50)
31+
.polygonal_mean_timeseries(polygon="https://example.com/geometries.geojson")
3232
)
3333
assert get_execute_graph(res) == test_data.load_json("%s/aggregate_zonal_path.json" % api_version)
3434

3535

3636
@pytest.mark.parametrize("reducer", ["mean", openeo.processes.mean, lambda x: x.mean()])
37-
def test_aggregate_spatial_read_vector(connection, api_version, reducer, test_data):
37+
def test_aggregate_spatial_with_geometry_url(connection, api_version, reducer, test_data):
3838
res = (
3939
connection.load_collection("S2")
40-
.filter_bbox(3, 6, 52, 50)
41-
.aggregate_spatial(geometries="/some/path/to/GeometryCollection.geojson", reducer=reducer)
40+
.filter_bbox(3, 6, 52, 50)
41+
.aggregate_spatial(geometries="https://example.com/geometries.geojson", reducer=reducer)
4242
)
4343
assert get_execute_graph(res) == test_data.load_json("%s/aggregate_zonal_path.json" % api_version)
4444

0 commit comments

Comments
 (0)