Skip to content

Commit 4487ab7

Browse files
committed
Merge branch 'master' into stac_jobdb
2 parents 949e7a9 + a811bff commit 4487ab7

File tree

14 files changed

+279
-81
lines changed

14 files changed

+279
-81
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1010
### Added
1111

1212
- Automatically use `load_url` when providing a URL as geometries to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc. ([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
13+
- Allow specifying `limit` when listing batch jobs with `Connection.list_jobs()` ([#677](https://github.com/Open-EO/openeo-python-client/issues/677))
1314

1415
### Changed
1516

@@ -18,12 +19,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1819
this is not translated automatically anymore to deprecated, non-standard `read_vector` usage.
1920
Instead, if it is a local GeoJSON file, the GeoJSON data will be loaded directly client-side.
2021
([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
22+
- Move `read()` method from general `JobDatabaseInterface` to more specific `FullDataFrameJobDatabase` ([#680](https://github.com/Open-EO/openeo-python-client/issues/680))
2123

2224
### Removed
2325

2426
### Fixed
2527

2628
- `load_stac`: use fallback temporal dimension when no "cube:dimensions" in STAC Collection ([#666](https://github.com/Open-EO/openeo-python-client/issues/666))
29+
- Fix usage of `Parameter.spatial_extent()` with `load_collection` and `filter_bbox` ([#676](https://github.com/Open-EO/openeo-python-client/issues/676))
2730

2831
## [0.35.0] - 2024-11-19
2932

docs/udp.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ Some useful parameter helpers (class methods of the :py:class:`~openeo.api.proce
134134
- :py:meth:`Parameter.geojson() <openeo.api.process.Parameter.geojson>` to create
135135
a parameter for specifying a GeoJSON geometry.
136136
- :py:meth:`Parameter.spatial_extent() <openeo.api.process.Parameter.spatial_extent>` to create
137-
a spatial_extent parameter that is exactly the same as the corresponding parameter in `load_collection` and `load_stac`.
137+
a spatial_extent parameter that is exactly the same as the corresponding parameter in ``load_collection`` and ``load_stac``.
138138

139139

140140

openeo/api/process.py

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
import textwrap
34
import warnings
45
from typing import List, Optional, Union
56

@@ -279,23 +280,15 @@ def bounding_box(
279280
}
280281
return cls(name=name, description=description, schema=schema, **kwargs)
281282

282-
_spatial_extent_description = """Limits the data to process to the specified bounding box or polygons.
283-
284-
For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).
285-
For vector data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.
286-
287-
Empty geometries are ignored.
288-
Set this parameter to null to set no limit for the spatial extent. """
289-
290283
@classmethod
291284
def spatial_extent(
292285
cls,
293286
name: str = "spatial_extent",
294-
description: str = _spatial_extent_description,
287+
description: Optional[str] = None,
295288
**kwargs,
296289
) -> Parameter:
297290
"""
298-
Helper to easily create a 'spatial_extent' parameter, which is compatible with the 'load_collection' argument of
291+
Helper to easily create a 'spatial_extent' parameter, which is compatible with the ``load_collection`` argument of
299292
the same name. This allows to conveniently create user-defined processes that can be applied to a bounding box and vector data
300293
for spatial filtering. It is also possible for users to set to null, and define spatial filtering using other processes.
301294
@@ -307,6 +300,26 @@ def spatial_extent(
307300
308301
.. versionadded:: 0.32.0
309302
"""
303+
if description is None:
304+
description = textwrap.dedent(
305+
"""
306+
Limits the data to process to the specified bounding box or polygons.
307+
308+
For raster data, the process loads the pixel into the data cube if the point
309+
at the pixel center intersects with the bounding box or any of the polygons
310+
(as defined in the Simple Features standard by the OGC).
311+
312+
For vector data, the process loads the geometry into the data cube if the geometry
313+
is fully within the bounding box or any of the polygons (as defined in the
314+
Simple Features standard by the OGC). Empty geometries may only be in the
315+
data cube if no spatial extent has been provided.
316+
317+
Empty geometries are ignored.
318+
319+
Set this parameter to null to set no limit for the spatial extent.
320+
"""
321+
).strip()
322+
310323
schema = [
311324
{
312325
"title": "Bounding Box",
@@ -410,7 +423,7 @@ def geojson(cls, name: str, description: str = "Geometries specified as GeoJSON
410423
@classmethod
411424
def temporal_interval(
412425
cls,
413-
name: str,
426+
name: str = "temporal_extent",
414427
description: str = "Temporal extent specified as two-element array with start and end date/date-time.",
415428
**kwargs,
416429
) -> Parameter:
@@ -441,3 +454,26 @@ def temporal_interval(
441454
},
442455
}
443456
return cls(name=name, description=description, schema=schema, **kwargs)
457+
458+
459+
def schema_supports(schema: Union[dict, List[dict]], type: str, subtype: Optional[str] = None) -> bool:
460+
"""Helper to check if parameter schema supports given type/subtype"""
461+
# TODO: support checking item type in arrays
462+
if isinstance(schema, dict):
463+
actual_type = schema.get("type")
464+
if isinstance(actual_type, str):
465+
if actual_type != type:
466+
return False
467+
elif isinstance(actual_type, list):
468+
if type not in actual_type:
469+
return False
470+
else:
471+
raise ValueError(actual_type)
472+
if subtype:
473+
if schema.get("subtype") != subtype:
474+
return False
475+
return True
476+
elif isinstance(schema, list):
477+
return any(schema_supports(s, type=type, subtype=subtype) for s in schema)
478+
else:
479+
raise ValueError(schema)
Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141

4242
_log = logging.getLogger(__name__)
4343

44+
4445
class _Backend(NamedTuple):
4546
"""Container for backend info/settings"""
4647

@@ -70,7 +71,6 @@ def exists(self) -> bool:
7071
"""Does the job database already exist, to read job data from?"""
7172
...
7273

73-
7474
@abc.abstractmethod
7575
def persist(self, df: pd.DataFrame):
7676
"""
@@ -370,9 +370,9 @@ def start_job_thread(self, start_job: Callable[[], BatchJob], job_db: JobDatabas
370370

371371
# Resume from existing db
372372
_log.info(f"Resuming `run_jobs` from existing {job_db}")
373-
df = job_db.read()
374373

375374
self._stop_thread = False
375+
376376
def run_loop():
377377

378378
# TODO: support user-provided `stats`
@@ -816,6 +816,15 @@ def initialize_from_df(self, df: pd.DataFrame, *, on_exists: str = "error"):
816816
# Return self to allow chaining with constructor.
817817
return self
818818

819+
@abc.abstractmethod
820+
def read(self) -> pd.DataFrame:
821+
"""
822+
Read job data from the database as pandas DataFrame.
823+
824+
:return: loaded job data.
825+
"""
826+
...
827+
819828
@property
820829
def df(self) -> pd.DataFrame:
821830
if self._df is None:
@@ -862,6 +871,7 @@ class CsvJobDatabase(FullDataFrameJobDatabase):
862871
863872
.. versionadded:: 0.31.0
864873
"""
874+
865875
def __init__(self, path: Union[str, Path]):
866876
super().__init__()
867877
self.path = Path(path)
@@ -918,6 +928,7 @@ class ParquetJobDatabase(FullDataFrameJobDatabase):
918928
919929
.. versionadded:: 0.31.0
920930
"""
931+
921932
def __init__(self, path: Union[str, Path]):
922933
super().__init__()
923934
self.path = Path(path)
@@ -940,6 +951,7 @@ def read(self) -> pd.DataFrame:
940951
metadata = pyarrow.parquet.read_metadata(self.path)
941952
if b"geo" in metadata.metadata:
942953
import geopandas
954+
943955
return geopandas.read_parquet(self.path)
944956
else:
945957
return pd.read_parquet(self.path)
@@ -1051,6 +1063,7 @@ class ProcessBasedJobCreator:
10511063
`feedback and suggestions for improvement <https://github.com/Open-EO/openeo-python-client/issues>`_.
10521064
10531065
"""
1066+
10541067
def __init__(
10551068
self,
10561069
*,
@@ -1083,7 +1096,6 @@ def _get_process_definition(self, connection: Connection) -> Process:
10831096
f"Unsupported process definition source udp_id={self._process_id!r} namespace={self._namespace!r}"
10841097
)
10851098

1086-
10871099
def start_job(self, row: pd.Series, connection: Connection, **_) -> BatchJob:
10881100
"""
10891101
Implementation of the ``start_job`` callable interface

openeo/extra/stac_job_db.py renamed to openeo/extra/job_management/stac_job_db.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,6 @@ def __init__(
5353
self.base_url = stac_root_url
5454
self.bulk_size = 500
5555

56-
57-
5856
def exists(self) -> bool:
5957
return len([c.id for c in self.client.get_collections() if c.id == self.collection_id]) > 0
6058

@@ -153,14 +151,13 @@ def item_from(self, series: pd.Series) -> pystac.Item:
153151
return item
154152

155153
def count_by_status(self, statuses: Iterable[str] = ()) -> dict:
156-
items = self.get_by_status(statuses,max=200)
154+
items = self.get_by_status(statuses, max=200)
157155
if items is None:
158156
return {k: 0 for k in statuses}
159157
else:
160158
return items["status"].value_counts().to_dict()
161159

162160
def get_by_status(self, statuses: Iterable[str], max=None) -> pd.DataFrame:
163-
164161
if isinstance(statuses, str):
165162
statuses = {statuses}
166163
statuses = set(statuses)
@@ -198,7 +195,6 @@ def handle_row(series):
198195
item = self.item_from(series)
199196
all_items.append(item)
200197

201-
202198
df.apply(handle_row, axis=1)
203199

204200
self._upload_items_bulk(self.collection_id, all_items)
@@ -269,13 +265,13 @@ def _create_collection(self, collection: Collection) -> dict:
269265
default_auth = {
270266
"_auth": {
271267
"read": ["anonymous"],
272-
"write": ["stac-openeo-admin", "stac-openeo-editor"]
268+
"write": ["stac-openeo-admin", "stac-openeo-editor"],
273269
}
274270
}
275271

276272
coll_dict.update(default_auth)
277273

278-
response = requests.post(self.join_url("collections"), auth=self._auth,json=coll_dict)
274+
response = requests.post(self.join_url("collections"), auth=self._auth, json=coll_dict)
279275
_check_response_status(response, _EXPECTED_STATUS_POST)
280276

281277
return response.json()
@@ -287,6 +283,7 @@ def _create_collection(self, collection: Collection) -> dict:
287283
requests.status_codes.codes.accepted,
288284
]
289285

286+
290287
def _check_response_status(response: requests.Response, expected_status_codes: List[int], raise_exc: bool = False):
291288
if response.status_code not in expected_status_codes:
292289
message = (

openeo/rest/connection.py

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ def request(
147147
method: str,
148148
path: str,
149149
*,
150+
params: Optional[dict] = None,
150151
headers: Optional[dict] = None,
151152
auth: Optional[AuthBase] = None,
152153
check_error: bool = True,
@@ -159,13 +160,21 @@ def request(
159160
auth = auth or (self.auth if not self._is_external(url) else None)
160161
slow_response_threshold = kwargs.pop("slow_response_threshold", self.slow_response_threshold)
161162
if _log.isEnabledFor(logging.DEBUG):
162-
_log.debug("Request `{m} {u}` with headers {h}, auth {a}, kwargs {k}".format(
163-
m=method.upper(), u=url, h=headers and headers.keys(), a=type(auth).__name__, k=list(kwargs.keys()))
163+
_log.debug(
164+
"Request `{m} {u}` with params {p}, headers {h}, auth {a}, kwargs {k}".format(
165+
m=method.upper(),
166+
u=url,
167+
p=params,
168+
h=headers and headers.keys(),
169+
a=type(auth).__name__,
170+
k=list(kwargs.keys()),
171+
)
164172
)
165173
with ContextTimer() as timer:
166174
resp = self.session.request(
167175
method=method,
168176
url=url,
177+
params=params,
169178
headers=self._merged_headers(headers),
170179
auth=auth,
171180
timeout=kwargs.pop("timeout", self.default_timeout),
@@ -227,16 +236,25 @@ def _raise_api_error(self, response: requests.Response):
227236

228237
raise OpenEoApiPlainError(message=text, http_status_code=status_code, error_message=error_message)
229238

230-
def get(self, path: str, stream: bool = False, auth: Optional[AuthBase] = None, **kwargs) -> Response:
239+
def get(
240+
self,
241+
path: str,
242+
*,
243+
params: Optional[dict] = None,
244+
stream: bool = False,
245+
auth: Optional[AuthBase] = None,
246+
**kwargs,
247+
) -> Response:
231248
"""
232249
Do GET request to REST API.
233250
234251
:param path: API path (without root url)
252+
:param params: Additional query parameters
235253
:param stream: True if the get request should be streamed, else False
236254
:param auth: optional custom authentication to use instead of the default one
237255
:return: response: Response
238256
"""
239-
return self.request("get", path=path, stream=stream, auth=auth, **kwargs)
257+
return self.request("get", path=path, params=params, stream=stream, auth=auth, **kwargs)
240258

241259
def post(self, path: str, json: Optional[dict] = None, **kwargs) -> Response:
242260
"""
@@ -1047,18 +1065,24 @@ def describe_process(self, id: str, namespace: Optional[str] = None) -> dict:
10471065

10481066
raise OpenEoClientException("Process does not exist.")
10491067

1050-
def list_jobs(self) -> List[dict]:
1068+
def list_jobs(self, limit: Union[int, None] = None) -> List[dict]:
10511069
"""
10521070
Lists all jobs of the authenticated user.
10531071
1072+
:param limit: maximum number of jobs to return. Setting this limit enables pagination.
1073+
10541074
:return: job_list: Dict of all jobs of the user.
1075+
1076+
.. versionadded:: 0.36.0
1077+
Added ``limit`` argument
10551078
"""
1056-
# TODO: Parse the result so that there get Job classes returned?
1057-
resp = self.get('/jobs', expected_status=200).json()
1079+
# TODO: Parse the result so that Job classes returned?
1080+
resp = self.get("/jobs", params={"limit": limit}, expected_status=200).json()
10581081
if resp.get("federation:missing"):
10591082
_log.warning("Partial user job listing due to missing federation components: {c}".format(
10601083
c=",".join(resp["federation:missing"])
10611084
))
1085+
# TODO: when pagination is enabled: how to expose link to next page?
10621086
jobs = resp["jobs"]
10631087
return VisualList("data-table", data=jobs, parameters={'columns': 'jobs'})
10641088

openeo/rest/datacube.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
import shapely.geometry.base
2626
from shapely.geometry import MultiPolygon, Polygon, mapping
2727

28-
from openeo.api.process import Parameter
28+
from openeo.api.process import Parameter, schema_supports
2929
from openeo.dates import get_temporal_extent
3030
from openeo.internal.documentation import openeo_process
3131
from openeo.internal.graph_building import PGNode, ReduceNode, _FromNodeMixin
@@ -182,10 +182,10 @@ def load_collection(
182182
temporal_extent = cls._get_temporal_extent(extent=temporal_extent)
183183

184184
if isinstance(spatial_extent, Parameter):
185-
if spatial_extent.schema.get("type") != "object":
185+
if not schema_supports(spatial_extent.schema, type="object"):
186186
warnings.warn(
187187
"Unexpected parameterized `spatial_extent` in `load_collection`:"
188-
f" expected schema with type 'object' but got {spatial_extent.schema!r}."
188+
f" expected schema compatible with type 'object' but got {spatial_extent.schema!r}."
189189
)
190190
arguments = {
191191
'id': collection_id,
@@ -481,7 +481,7 @@ def filter_bbox(
481481
crs: Optional[Union[int, str]] = None,
482482
base: Optional[float] = None,
483483
height: Optional[float] = None,
484-
bbox: Optional[Sequence[float]] = None,
484+
bbox: Union[Sequence[float], Parameter, None] = None,
485485
) -> DataCube:
486486
"""
487487
Limits the data cube to the specified bounding box.
@@ -555,10 +555,10 @@ def filter_bbox(
555555
raise ValueError(args)
556556

557557
if isinstance(bbox, Parameter):
558-
if bbox.schema.get("type") != "object":
558+
if not schema_supports(bbox.schema, type="object"):
559559
warnings.warn(
560560
"Unexpected parameterized `extent` in `filter_bbox`:"
561-
f" expected schema with type 'object' but got {bbox.schema!r}."
561+
f" expected schema compatible with type 'object' but got {bbox.schema!r}."
562562
)
563563
extent = bbox
564564
else:

0 commit comments

Comments
 (0)