Skip to content

Commit e8017eb

Browse files
committed
Merge branch 'issue104-drop-read_vector2'
2 parents bb4b368 + ced3faa commit e8017eb

File tree

7 files changed

+224
-66
lines changed

7 files changed

+224
-66
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1414
### Changed
1515

1616
- `MultiBackendJobManager`: costs has been added as a column in tracking databases ([[#588](https://github.com/Open-EO/openeo-python-client/issues/588)])
17+
- When passing a path/string as `geometry` to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc.:
18+
this is not translated automatically anymore to deprecated, non-standard `read_vector` usage.
19+
Instead, if it is a local GeoJSON file, the GeoJSON data will be loaded directly client-side.
20+
([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
1721

1822
### Removed
1923

docs/cookbook/tricks.rst

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,3 +80,51 @@ For example:
8080
8181
# `create_job` with URL to JSON file
8282
job = connection.create_job("https://jsonbin.example/my/process-graph.json")
83+
84+
85+
.. _legacy_read_vector:
86+
87+
88+
Legacy ``read_vector`` usage
89+
----------------------------
90+
91+
In versions up to 0.35.0 of the openEO Python client library,
92+
there was an old, deprecated feature in geometry handling
93+
of :py:class:`~openeo.rest.datacube.DataCube` methods like
94+
:py:meth:`~openeo.rest.datacube.DataCube.aggregate_spatial()` and
95+
:py:meth:`~openeo.rest.datacube.DataCube.mask_polygon()`
96+
where you could pass a *backend-side* path as ``geometries``, e.g.:
97+
98+
.. code-block:: python
99+
100+
cube = cube.aggregate_spatial(
101+
geometries="/backend/path/to/geometries.json",
102+
reducer="mean"
103+
)
104+
105+
The client would handle this by automatically adding a ``read_vector`` process
106+
in the process graph, with that path as argument, to instruct the backend to load the geometries from there.
107+
This ``read_vector`` process was however a backend-specific, experimental and now deprecated process.
108+
Moreover, it assumes that the user has access to (or at least knowledge of) the backend's file system,
109+
which violates the openEO principle of abstracting away backend-specific details.
110+
111+
In version 0.36.0, this old deprecated ``read_vector`` feature has been *removed*,
112+
to allow other and better convenience functionality
113+
when providing a string in the ``geometries`` argument:
114+
e.g. load from a URL with standard process ``load_url``,
115+
or load GeoJSON from a local clientside path.
116+
117+
If your workflow however depends on the old, deprecated ``read_vector`` functionality,
118+
it is possible to reconstruct that by manually adding a ``read_vector`` process in your workflow,
119+
for example as follows:
120+
121+
.. code-block:: python
122+
123+
from openeo.processes import process
124+
125+
cube = cube.aggregate_spatial(
126+
geometries=process("read_vector", filename="/backend/path/to/geometries.json"),
127+
reducer="mean"
128+
)
129+
130+
Note that this is also works with older versions of the openEO Python client library.

openeo/rest/datacube.py

Lines changed: 54 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
from openeo.rest.service import Service
6060
from openeo.rest.udp import RESTUserDefinedProcess
6161
from openeo.rest.vectorcube import VectorCube
62-
from openeo.util import dict_no_none, guess_format, normalize_crs, rfc3339
62+
from openeo.util import dict_no_none, guess_format, load_json, normalize_crs, rfc3339
6363

6464
if typing.TYPE_CHECKING:
6565
# Imports for type checking only (circular import issue at runtime).
@@ -609,7 +609,8 @@ def filter_spatial(
609609
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
610610
e.g. GeoJSON, GeoParquet, etc.
611611
A ``load_url`` process will automatically be added to the process graph.
612-
- a path (that is valid for the back-end) to a GeoJSON file.
612+
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
613+
which will be loaded automatically to get the geometries as GeoJSON construct.
613614
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
614615
- a :py:class:`~openeo.api.process.Parameter` instance.
615616
@@ -619,6 +620,12 @@ def filter_spatial(
619620
620621
.. versionchanged:: 0.36.0
621622
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
623+
624+
.. versionchanged:: 0.36.0
625+
Support for passing a backend-side path as ``geometries`` argument was removed
626+
(also see :ref:`legacy_read_vector`).
627+
Instead, it's possible to provide a client-side path to a GeoJSON file
628+
(which will be loaded client-side to get the geometries as GeoJSON construct).
622629
"""
623630
valid_geojson_types = [
624631
"Point", "MultiPoint", "LineString", "MultiLineString",
@@ -1053,7 +1060,7 @@ def _merge_operator_binary_cubes(
10531060

10541061
def _get_geometry_argument(
10551062
self,
1056-
geometry: Union[
1063+
argument: Union[
10571064
shapely.geometry.base.BaseGeometry,
10581065
dict,
10591066
str,
@@ -1065,19 +1072,19 @@ def _get_geometry_argument(
10651072
crs: Optional[str] = None,
10661073
) -> Union[dict, Parameter, PGNode]:
10671074
"""
1068-
Convert input to a geometry as "geojson" subtype object.
1075+
Convert input to a geometry as "geojson" subtype object or vectorcube.
10691076
10701077
:param crs: value that encodes a coordinate reference system.
10711078
See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument.
10721079
"""
1073-
if isinstance(geometry, Parameter):
1074-
return geometry
1075-
elif isinstance(geometry, _FromNodeMixin):
1076-
return geometry.from_node()
1080+
if isinstance(argument, Parameter):
1081+
return argument
1082+
elif isinstance(argument, _FromNodeMixin):
1083+
return argument.from_node()
10771084

1078-
if isinstance(geometry, str) and re.match(r"^https?://", geometry, flags=re.I):
1085+
if isinstance(argument, str) and re.match(r"^https?://", argument, flags=re.I):
10791086
# Geometry provided as URL: load with `load_url` (with best-effort format guess)
1080-
url = urllib.parse.urlparse(geometry)
1087+
url = urllib.parse.urlparse(argument)
10811088
suffix = pathlib.Path(url.path.lower()).suffix
10821089
format = {
10831090
".json": "GeoJSON",
@@ -1086,18 +1093,20 @@ def _get_geometry_argument(
10861093
".parquet": "Parquet",
10871094
".geoparquet": "Parquet",
10881095
}.get(suffix, suffix.split(".")[-1])
1089-
return self.connection.load_url(url=geometry, format=format)
1090-
1091-
if isinstance(geometry, (str, pathlib.Path)):
1092-
# Assumption: `geometry` is path to polygon is a path to vector file at backend.
1093-
# TODO #104: `read_vector` is non-standard process.
1094-
# TODO: If path exists client side: load it client side?
1095-
return PGNode(process_id="read_vector", arguments={"filename": str(geometry)})
1096+
return self.connection.load_url(url=argument, format=format)
10961097

1097-
if isinstance(geometry, shapely.geometry.base.BaseGeometry):
1098-
geometry = mapping(geometry)
1099-
if not isinstance(geometry, dict):
1100-
raise OpenEoClientException("Invalid geometry argument: {g!r}".format(g=geometry))
1098+
if (
1099+
isinstance(argument, (str, pathlib.Path))
1100+
and pathlib.Path(argument).is_file()
1101+
and pathlib.Path(argument).suffix.lower() in [".json", ".geojson"]
1102+
):
1103+
geometry = load_json(argument)
1104+
elif isinstance(argument, shapely.geometry.base.BaseGeometry):
1105+
geometry = mapping(argument)
1106+
elif isinstance(argument, dict):
1107+
geometry = argument
1108+
else:
1109+
raise OpenEoClientException(f"Invalid geometry argument: {argument!r}")
11011110

11021111
if geometry.get("type") not in valid_geojson_types:
11031112
raise OpenEoClientException("Invalid geometry type {t!r}, must be one of {s}".format(
@@ -1147,7 +1156,8 @@ def aggregate_spatial(
11471156
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
11481157
e.g. GeoJSON, GeoParquet, etc.
11491158
A ``load_url`` process will automatically be added to the process graph.
1150-
- a path (that is valid for the back-end) to a GeoJSON file.
1159+
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
1160+
which will be loaded automatically to get the geometries as GeoJSON construct.
11511161
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
11521162
- a :py:class:`~openeo.api.process.Parameter` instance.
11531163
@@ -1177,6 +1187,12 @@ def aggregate_spatial(
11771187
11781188
.. versionchanged:: 0.36.0
11791189
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
1190+
1191+
.. versionchanged:: 0.36.0
1192+
Support for passing a backend-side path as ``geometries`` argument was removed
1193+
(also see :ref:`legacy_read_vector`).
1194+
Instead, it's possible to provide a client-side path to a GeoJSON file
1195+
(which will be loaded client-side to get the geometries as GeoJSON construct).
11801196
"""
11811197
valid_geojson_types = [
11821198
"Point", "MultiPoint", "LineString", "MultiLineString",
@@ -1502,7 +1518,8 @@ def apply_polygon(
15021518
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
15031519
e.g. GeoJSON, GeoParquet, etc.
15041520
A ``load_url`` process will automatically be added to the process graph.
1505-
- a path (that is valid for the back-end) to a GeoJSON file.
1521+
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
1522+
which will be loaded automatically to get the geometries as GeoJSON construct.
15061523
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
15071524
- a :py:class:`~openeo.api.process.Parameter` instance.
15081525
@@ -1519,6 +1536,12 @@ def apply_polygon(
15191536
15201537
.. versionchanged:: 0.36.0
15211538
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
1539+
1540+
.. versionchanged:: 0.36.0
1541+
Support for passing a backend-side path as ``geometries`` argument was removed
1542+
(also see :ref:`legacy_read_vector`).
1543+
Instead, it's possible to provide a client-side path to a GeoJSON file
1544+
(which will be loaded client-side to get the geometries as GeoJSON construct).
15221545
"""
15231546
# TODO drop support for legacy `polygons` argument:
15241547
# remove `kwargs, remove default `None` value for `geometries` and `process`
@@ -2011,7 +2034,8 @@ def mask_polygon(
20112034
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
20122035
e.g. GeoJSON, GeoParquet, etc.
20132036
A ``load_url`` process will automatically be added to the process graph.
2014-
- a path (that is valid for the back-end) to a GeoJSON file.
2037+
- a path (:py:class:`str` or :py:class:`~pathlib.Path`) to a local, client-side GeoJSON file,
2038+
which will be loaded automatically to get the geometries as GeoJSON construct.
20152039
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
20162040
- a :py:class:`~openeo.api.process.Parameter` instance.
20172041
@@ -2024,6 +2048,12 @@ def mask_polygon(
20242048
20252049
.. versionchanged:: 0.36.0
20262050
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
2051+
2052+
.. versionchanged:: 0.36.0
2053+
Support for passing a backend-side path as ``geometries`` argument was removed
2054+
(also see :ref:`legacy_read_vector`).
2055+
Instead, it's possible to provide a client-side path to a GeoJSON file
2056+
(which will be loaded client-side to get the geometries as GeoJSON construct).
20272057
"""
20282058
valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"]
20292059
mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs)

tests/data/1.0.0/aggregate_zonal_path.json

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,11 @@
2121
}
2222
}
2323
},
24-
"readvector1": {
25-
"process_id": "read_vector",
24+
"loadurl1": {
25+
"process_id": "load_url",
2626
"arguments": {
27-
"filename": "/some/path/to/GeometryCollection.geojson"
28-
}
27+
"url": "https://example.com/geometries.geojson",
28+
"format": "GeoJSON"}
2929
},
3030
"aggregatespatial1": {
3131
"process_id": "aggregate_spatial",
@@ -34,7 +34,7 @@
3434
"from_node": "filterbbox1"
3535
},
3636
"geometries": {
37-
"from_node": "readvector1"
37+
"from_node": "loadurl1"
3838
},
3939
"reducer": {
4040
"process_graph": {

tests/data/geojson/polygon02.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"type": "Polygon",
3+
"coordinates": [
4+
[
5+
[
6+
3,
7+
50
8+
],
9+
[
10+
4,
11+
50
12+
],
13+
[
14+
4,
15+
51
16+
],
17+
[
18+
3,
19+
50
20+
]
21+
]
22+
]
23+
}

0 commit comments

Comments
 (0)