Skip to content

Commit 60be18f

Browse files
committed
Issue #104 drop usage of non-standard read_vector process
instead, support reading GeoJSON from local path
1 parent 491143e commit 60be18f

File tree

6 files changed

+156
-55
lines changed

6 files changed

+156
-55
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1414
### Changed
1515

1616
- `MultiBackendJobManager`: costs has been added as a column in tracking databases ([[#588](https://github.com/Open-EO/openeo-python-client/issues/588)])
17+
- When passing a path/string as `geometry` to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc.: this is not translated automatically to deprecated, non-standard `read_vector` usage. Instead, if it is a local GeoJSON file, the GeoJSON data will be loaded directly client-side. ([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
1718

1819
### Removed
1920

openeo/rest/datacube.py

Lines changed: 37 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
from openeo.rest.service import Service
6060
from openeo.rest.udp import RESTUserDefinedProcess
6161
from openeo.rest.vectorcube import VectorCube
62-
from openeo.util import dict_no_none, guess_format, normalize_crs, rfc3339
62+
from openeo.util import dict_no_none, guess_format, load_json, normalize_crs, rfc3339
6363

6464
if typing.TYPE_CHECKING:
6565
# Imports for type checking only (circular import issue at runtime).
@@ -609,7 +609,8 @@ def filter_spatial(
609609
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
610610
e.g. GeoJSON, GeoParquet, etc.
611611
A ``load_url`` process will automatically be added to the process graph.
612-
- a path (that is valid for the back-end) to a GeoJSON file.
612+
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
613+
which will be loaded automatically to get the geometries as GeoJSON construct.
613614
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
614615
- a :py:class:`~openeo.api.process.Parameter` instance.
615616
@@ -619,6 +620,11 @@ def filter_spatial(
619620
620621
.. versionchanged:: 0.36.0
621622
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
623+
624+
.. versionchanged:: 0.36.0
625+
Support for passing a backend-side path as ``geometries`` argument was removed.
626+
Instead, it's possible to provide a client-side path to a GeoJSON file
627+
(which will be loaded client-side to get the geometries as GeoJSON construct).
622628
"""
623629
valid_geojson_types = [
624630
"Point", "MultiPoint", "LineString", "MultiLineString",
@@ -1065,7 +1071,7 @@ def _get_geometry_argument(
10651071
crs: Optional[str] = None,
10661072
) -> Union[dict, Parameter, PGNode]:
10671073
"""
1068-
Convert input to a geometry as "geojson" subtype object.
1074+
Convert input to a geometry as "geojson" subtype object or vectorcube.
10691075
10701076
:param crs: value that encodes a coordinate reference system.
10711077
See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument.
@@ -1088,13 +1094,13 @@ def _get_geometry_argument(
10881094
}.get(suffix, suffix.split(".")[-1])
10891095
return self.connection.load_url(url=argument, format=format)
10901096

1091-
if isinstance(argument, (str, pathlib.Path)):
1092-
# Assumption: `geometry` is path to polygon is a path to vector file at backend.
1093-
# TODO #104: `read_vector` is non-standard process.
1094-
# TODO: If path exists client side: load it client side?
1095-
return PGNode(process_id="read_vector", arguments={"filename": str(argument)})
1096-
1097-
if isinstance(argument, shapely.geometry.base.BaseGeometry):
1097+
if (
1098+
isinstance(argument, (str, pathlib.Path))
1099+
and pathlib.Path(argument).is_file()
1100+
and pathlib.Path(argument).suffix.lower() in [".json", ".geojson"]
1101+
):
1102+
geometry = load_json(argument)
1103+
elif isinstance(argument, shapely.geometry.base.BaseGeometry):
10981104
geometry = mapping(argument)
10991105
elif isinstance(argument, dict):
11001106
geometry = argument
@@ -1149,7 +1155,8 @@ def aggregate_spatial(
11491155
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
11501156
e.g. GeoJSON, GeoParquet, etc.
11511157
A ``load_url`` process will automatically be added to the process graph.
1152-
- a path (that is valid for the back-end) to a GeoJSON file.
1158+
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
1159+
which will be loaded automatically to get the geometries as GeoJSON construct.
11531160
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
11541161
- a :py:class:`~openeo.api.process.Parameter` instance.
11551162
@@ -1179,6 +1186,11 @@ def aggregate_spatial(
11791186
11801187
.. versionchanged:: 0.36.0
11811188
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
1189+
1190+
.. versionchanged:: 0.36.0
1191+
Support for passing a backend-side path as ``geometries`` argument was removed.
1192+
Instead, it's possible to provide a client-side path to a GeoJSON file
1193+
(which will be loaded client-side to get the geometries as GeoJSON construct).
11821194
"""
11831195
valid_geojson_types = [
11841196
"Point", "MultiPoint", "LineString", "MultiLineString",
@@ -1504,7 +1516,8 @@ def apply_polygon(
15041516
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
15051517
e.g. GeoJSON, GeoParquet, etc.
15061518
A ``load_url`` process will automatically be added to the process graph.
1507-
- a path (that is valid for the back-end) to a GeoJSON file.
1519+
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
1520+
which will be loaded automatically to get the geometries as GeoJSON construct.
15081521
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
15091522
- a :py:class:`~openeo.api.process.Parameter` instance.
15101523
@@ -1521,6 +1534,11 @@ def apply_polygon(
15211534
15221535
.. versionchanged:: 0.36.0
15231536
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
1537+
1538+
.. versionchanged:: 0.36.0
1539+
Support for passing a backend-side path as ``geometries`` argument was removed.
1540+
Instead, it's possible to provide a client-side path to a GeoJSON file
1541+
(which will be loaded client-side to get the geometries as GeoJSON construct).
15241542
"""
15251543
# TODO drop support for legacy `polygons` argument:
15261544
# remove `kwargs, remove default `None` value for `geometries` and `process`
@@ -2013,7 +2031,8 @@ def mask_polygon(
20132031
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
20142032
e.g. GeoJSON, GeoParquet, etc.
20152033
A ``load_url`` process will automatically be added to the process graph.
2016-
- a path (that is valid for the back-end) to a GeoJSON file.
2034+
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
2035+
which will be loaded automatically to get the geometries as GeoJSON construct.
20172036
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
20182037
- a :py:class:`~openeo.api.process.Parameter` instance.
20192038
@@ -2026,6 +2045,11 @@ def mask_polygon(
20262045
20272046
.. versionchanged:: 0.36.0
20282047
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
2048+
2049+
.. versionchanged:: 0.36.0
2050+
Support for passing a backend-side path as ``geometries`` argument was removed.
2051+
Instead, it's possible to provide a client-side path to a GeoJSON file
2052+
(which will be loaded client-side to get the geometries as GeoJSON construct).
20292053
"""
20302054
valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"]
20312055
mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs)

tests/data/1.0.0/aggregate_zonal_path.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@
2121
}
2222
}
2323
},
24-
"readvector1": {
25-
"process_id": "read_vector",
24+
"loadurl1": {
25+
"process_id": "load_url",
2626
"arguments": {
27-
"filename": "/some/path/to/GeometryCollection.geojson"
28-
}
27+
"url": "https://example.com/geometries.geojson",
28+
"format": "GeoJSON"}
2929
},
3030
"aggregatespatial1": {
3131
"process_id": "aggregate_spatial",
@@ -34,7 +34,7 @@
3434
"from_node": "filterbbox1"
3535
},
3636
"geometries": {
37-
"from_node": "readvector1"
37+
"from_node": "loadurl1"
3838
},
3939
"reducer": {
4040
"process_graph": {

tests/data/geojson/polygon02.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"type": "Polygon",
3+
"coordinates": [
4+
[
5+
[
6+
3,
7+
50
8+
],
9+
[
10+
4,
11+
50
12+
],
13+
[
14+
4,
15+
51
16+
],
17+
[
18+
3,
19+
50
20+
]
21+
]
22+
]
23+
}

tests/rest/datacube/test_datacube100.py

Lines changed: 87 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,25 @@ def test_filter_spatial(con100: Connection):
365365
}
366366

367367

368+
@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
369+
def test_filter_spatial_local_path(con100: Connection, path_factory, test_data):
370+
path = path_factory(test_data.get_path("geojson/polygon02.json"))
371+
cube = con100.load_collection("S2")
372+
masked = cube.filter_spatial(geometries=path)
373+
assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == {
374+
"filterspatial1": {
375+
"process_id": "filter_spatial",
376+
"arguments": {
377+
"data": {"from_node": "loadcollection1"},
378+
"geometries": {
379+
"type": "Polygon",
380+
"coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]],
381+
},
382+
},
383+
}
384+
}
385+
386+
368387
@pytest.mark.parametrize(
369388
["url", "expected_format"],
370389
[
@@ -659,6 +678,38 @@ def test_aggregate_spatial_geometry_url(con100: Connection, url, expected_format
659678
}
660679

661680

681+
@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
682+
def test_aggregate_spatial_geometry_local_path(con100: Connection, path_factory, test_data):
683+
cube = con100.load_collection("S2")
684+
path = path_factory(test_data.get_path("geojson/polygon02.json"))
685+
result = cube.aggregate_spatial(geometries=path, reducer="mean")
686+
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
687+
"aggregatespatial1": {
688+
"process_id": "aggregate_spatial",
689+
"arguments": {
690+
"data": {"from_node": "loadcollection1"},
691+
"geometries": {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
692+
"reducer": {
693+
"process_graph": {
694+
"mean1": {
695+
"process_id": "mean",
696+
"arguments": {"data": {"from_parameter": "data"}},
697+
"result": True,
698+
}
699+
}
700+
},
701+
},
702+
},
703+
}
704+
705+
706+
def test_aggregate_spatial_geometry_local_path_invalid(con100: Connection):
707+
path = "nope/invalid:path%here.json"
708+
cube = con100.load_collection("S2")
709+
with pytest.raises(OpenEoClientException, match="Invalid geometry argument"):
710+
_ = cube.aggregate_spatial(geometries=path, reducer="mean")
711+
712+
662713
def test_aggregate_spatial_window(con100: Connection):
663714
img = con100.load_collection("S2")
664715
size = [5, 3]
@@ -827,24 +878,29 @@ def test_mask_polygon_parameter(con100: Connection):
827878
}
828879

829880

830-
def test_mask_polygon_path(con100: Connection):
831-
img = con100.load_collection("S2")
832-
masked = img.mask_polygon(mask="path/to/polygon.json")
833-
assert sorted(masked.flat_graph().keys()) == ["loadcollection1", "maskpolygon1", "readvector1"]
834-
assert masked.flat_graph()["maskpolygon1"] == {
835-
"process_id": "mask_polygon",
836-
"arguments": {
837-
"data": {"from_node": "loadcollection1"},
838-
"mask": {"from_node": "readvector1"},
881+
@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
882+
def test_mask_polygon_geometry_local_path(con100: Connection, path_factory, test_data):
883+
path = path_factory(test_data.get_path("geojson/polygon02.json"))
884+
cube = con100.load_collection("S2")
885+
masked = cube.mask_polygon(mask=path)
886+
assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == {
887+
"maskpolygon1": {
888+
"process_id": "mask_polygon",
889+
"arguments": {
890+
"data": {"from_node": "loadcollection1"},
891+
"mask": {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
892+
},
839893
},
840-
"result": True,
841-
}
842-
assert masked.flat_graph()["readvector1"] == {
843-
"process_id": "read_vector",
844-
"arguments": {"filename": "path/to/polygon.json"},
845894
}
846895

847896

897+
def test_mask_polygon_geometry_local_path_invalid(con100: Connection):
898+
path = "nope/invalid:path%here.json"
899+
cube = con100.load_collection("S2")
900+
with pytest.raises(OpenEoClientException, match="Invalid geometry argument"):
901+
_ = cube.mask_polygon(mask=path)
902+
903+
848904
@pytest.mark.parametrize("get_geometries", [
849905
lambda c: PGNode("load_vector", url="https://geo.test/features.json"),
850906
lambda c: openeo.processes.process("load_vector", url="https://geo.test/features.json"),
@@ -1583,18 +1639,19 @@ def test_chunk_polygon_parameter(con100: Connection):
15831639
}
15841640

15851641

1586-
def test_chunk_polygon_path(con100: Connection):
1642+
@pytest.mark.parametrize("path_factory", [str, pathlib.Path])
1643+
def test_chunk_polygon_path(con100: Connection, test_data, path_factory):
1644+
path = path_factory(test_data.get_path("geojson/polygon02.json"))
15871645
cube = con100.load_collection("S2")
15881646
process = lambda data: data.run_udf(udf="myfancycode", runtime="Python")
15891647
with pytest.warns(UserDeprecationWarning, match="Use `apply_polygon`"):
1590-
result = cube.chunk_polygon(chunks="path/to/polygon.json", process=process)
1648+
result = cube.chunk_polygon(chunks=path, process=process)
15911649
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
1592-
"readvector1": {"process_id": "read_vector", "arguments": {"filename": "path/to/polygon.json"}},
15931650
"chunkpolygon1": {
15941651
"process_id": "chunk_polygon",
15951652
"arguments": {
15961653
"data": {"from_node": "loadcollection1"},
1597-
"chunks": {"from_node": "readvector1"},
1654+
"chunks": {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
15981655
"process": {
15991656
"process_graph": {
16001657
"runudf1": {
@@ -1797,21 +1854,17 @@ def test_apply_polygon_parameter(con100: Connection, geometries_argument, geomet
17971854
("geometries", "geometries"),
17981855
],
17991856
)
1800-
def test_apply_polygon_path(con100: Connection, geometries_argument, geometries_parameter):
1857+
def test_apply_polygon_local_path(con100: Connection, geometries_argument, geometries_parameter, test_data):
1858+
path = test_data.get_path("geojson/polygon02.json")
18011859
cube = con100.load_collection("S2")
18021860
process = UDF(code="myfancycode", runtime="Python")
1803-
result = cube.apply_polygon(**{geometries_argument: "path/to/polygon.json"}, process=process)
1861+
result = cube.apply_polygon(**{geometries_argument: path}, process=process)
18041862
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
1805-
"readvector1": {
1806-
# TODO #104 #457 get rid of non-standard read_vector
1807-
"process_id": "read_vector",
1808-
"arguments": {"filename": "path/to/polygon.json"},
1809-
},
18101863
"applypolygon1": {
18111864
"process_id": "apply_polygon",
18121865
"arguments": {
18131866
"data": {"from_node": "loadcollection1"},
1814-
geometries_parameter: {"from_node": "readvector1"},
1867+
geometries_parameter: {"type": "Polygon", "coordinates": [[[3, 50], [4, 50], [4, 51], [3, 50]]]},
18151868
"process": {
18161869
"process_graph": {
18171870
"runudf1": {
@@ -1830,6 +1883,14 @@ def test_apply_polygon_path(con100: Connection, geometries_argument, geometries_
18301883
}
18311884

18321885

1886+
def test_apply_polygon_local_path_invalid(con100: Connection):
1887+
path = "nope/invalid:path%here.json"
1888+
cube = con100.load_collection("S2")
1889+
process = UDF(code="myfancycode", runtime="Python")
1890+
with pytest.raises(OpenEoClientException, match="Invalid geometry argument"):
1891+
_ = cube.apply_polygon(geometries=path, process=process)
1892+
1893+
18331894
@pytest.mark.parametrize(
18341895
["geometries_argument", "geometries_parameter"],
18351896
[

tests/rest/datacube/test_zonal_stats.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,13 @@ def test_aggregate_spatial(connection, api_version, reducer, test_data):
2424
assert get_execute_graph(res) == test_data.load_json("%s/aggregate_zonal_polygon.json" % api_version)
2525

2626

27-
def test_polygon_timeseries_path(connection, api_version, test_data):
28-
res = (
29-
connection.load_collection('S2')
30-
.filter_bbox(west=3, east=6, north=52, south=50)
31-
.polygonal_mean_timeseries(polygon="/some/path/to/GeometryCollection.geojson")
32-
)
33-
assert get_execute_graph(res) == test_data.load_json("%s/aggregate_zonal_path.json" % api_version)
34-
3527

3628
@pytest.mark.parametrize("reducer", ["mean", openeo.processes.mean, lambda x: x.mean()])
37-
def test_aggregate_spatial_read_vector(connection, api_version, reducer, test_data):
29+
def test_aggregate_spatial_with_geometry_url(connection, api_version, reducer, test_data):
3830
res = (
3931
connection.load_collection("S2")
40-
.filter_bbox(3, 6, 52, 50)
41-
.aggregate_spatial(geometries="/some/path/to/GeometryCollection.geojson", reducer=reducer)
32+
.filter_bbox(3, 6, 52, 50)
33+
.aggregate_spatial(geometries="https://example.com/geometries.geojson", reducer=reducer)
4234
)
4335
assert get_execute_graph(res) == test_data.load_json("%s/aggregate_zonal_path.json" % api_version)
4436

0 commit comments

Comments
 (0)