Skip to content

Commit ec66b31

Browse files
committed
Issue #683/#681 align additional/job_options argument in create_job, download, ...
1 parent a811bff commit ec66b31

File tree

9 files changed

+336
-26
lines changed

9 files changed

+336
-26
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111

1212
- Automatically use `load_url` when providing a URL as geometries to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc. ([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
1313
- Allow specifying `limit` when listing batch jobs with `Connection.list_jobs()` ([#677](https://github.com/Open-EO/openeo-python-client/issues/677))
14+
- Add `additional` and `job_options` arguments to `Connection.download()`, `Datacube.download()` and related ([#681](https://github.com/Open-EO/openeo-python-client/issues/681))
1415

1516
### Changed
1617

@@ -20,6 +21,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2021
Instead, if it is a local GeoJSON file, the GeoJSON data will be loaded directly client-side.
2122
([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
2223
- Move `read()` method from general `JobDatabaseInterface` to more specific `FullDataFrameJobDatabase` ([#680](https://github.com/Open-EO/openeo-python-client/issues/680))
24+
- Align `additional` and `job_options` arguments in `Connection.create_job()`, `DataCube.create_job()` and related.
25+
Also, follow official spec more closely. ([#683](https://github.com/Open-EO/openeo-python-client/issues/683), [Open-EO/openeo-api#276](https://github.com/Open-EO/openeo-api/issues/276))
2326

2427
### Removed
2528

openeo/rest/_testing.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,17 @@ class DummyBackend:
3232
"""
3333

3434
# TODO: move to openeo.testing
35+
# TODO: unify "batch_jobs", "batch_jobs_full" and "extra_job_metadata_fields"?
36+
# TODO: unify "sync_requests" and "sync_requests_full"?
3537

3638
__slots__ = (
3739
"_requests_mock",
3840
"connection",
3941
"file_formats",
4042
"sync_requests",
43+
"sync_requests_full",
4144
"batch_jobs",
45+
"batch_jobs_full",
4246
"validation_requests",
4347
"next_result",
4448
"next_validation_errors",
@@ -60,7 +64,9 @@ def __init__(
6064
self.connection = connection
6165
self.file_formats = {"input": {}, "output": {}}
6266
self.sync_requests = []
67+
self.sync_requests_full = []
6368
self.batch_jobs = {}
69+
self.batch_jobs_full = {}
6470
self.validation_requests = []
6571
self.next_result = self.DEFAULT_RESULT
6672
self.next_validation_errors = []
@@ -163,7 +169,9 @@ def setup_file_format(self, name: str, type: str = "output", gis_data_types: Ite
163169

164170
def _handle_post_result(self, request, context):
165171
"""handler of `POST /result` (synchronous execute)"""
166-
pg = request.json()["process"]["process_graph"]
172+
post_data = request.json()
173+
pg = post_data["process"]["process_graph"]
174+
self.sync_requests_full.append(post_data)
167175
self.sync_requests.append(pg)
168176
result = self.next_result
169177
if isinstance(result, (dict, list)):
@@ -185,6 +193,10 @@ def _handle_post_jobs(self, request, context):
185193
job_id = f"job-{len(self.batch_jobs):03d}"
186194
assert job_id not in self.batch_jobs
187195

196+
# Full post data dump
197+
self.batch_jobs_full[job_id] = post_data
198+
199+
# Batch job essentials
188200
job_data = {"job_id": job_id, "pg": pg, "status": "created"}
189201
for field in ["title", "description"]:
190202
if field in post_data:
@@ -272,6 +284,11 @@ def get_sync_pg(self) -> dict:
272284
assert len(self.sync_requests) == 1
273285
return self.sync_requests[0]
274286

287+
def get_sync_post_data(self) -> dict:
288+
"""Get post data of the one and only synchronous job."""
289+
assert len(self.sync_requests_full) == 1
290+
return self.sync_requests_full[0]
291+
275292
def get_batch_pg(self) -> dict:
276293
"""
277294
Get process graph of the one and only batch job.
@@ -280,6 +297,14 @@ def get_batch_pg(self) -> dict:
280297
assert len(self.batch_jobs) == 1
281298
return self.batch_jobs[max(self.batch_jobs.keys())]["pg"]
282299

300+
def get_batch_post_data(self) -> dict:
301+
"""
302+
Get post data of the one and only batch job.
303+
Fails when there is none or more than one.
304+
"""
305+
assert len(self.batch_jobs_full) == 1
306+
return self.batch_jobs_full[max(self.batch_jobs_full.keys())]
307+
283308
def get_validation_pg(self) -> dict:
284309
"""
285310
Get process graph of the one and only validation request.

openeo/rest/connection.py

Lines changed: 52 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1644,6 +1644,8 @@ def upload_file(
16441644
def _build_request_with_process_graph(
16451645
self,
16461646
process_graph: Union[dict, FlatGraphableMixin, str, Path, List[FlatGraphableMixin]],
1647+
additional: Optional[dict] = None,
1648+
job_options: Optional[dict] = None,
16471649
**kwargs,
16481650
) -> dict:
16491651
"""
@@ -1655,6 +1657,15 @@ def _build_request_with_process_graph(
16551657
if any(c != self for c in connections):
16561658
raise OpenEoClientException(f"Mixing different connections: {self} and {connections}.")
16571659
result = kwargs
1660+
1661+
if additional:
1662+
result.update(additional)
1663+
if job_options is not None:
1664+
# Note: this "job_options" top-level property is not in official openEO API spec,
1665+
# but a commonly used convention, e.g. in openeo-python-driver based deployments.
1666+
assert "job_options" not in result
1667+
result["job_options"] = job_options
1668+
16581669
process_graph = as_flat_graph(process_graph)
16591670
if "process_graph" not in process_graph:
16601671
process_graph = {"process_graph": process_graph}
@@ -1702,6 +1713,8 @@ def download(
17021713
timeout: Optional[int] = None,
17031714
validate: Optional[bool] = None,
17041715
chunk_size: int = DEFAULT_DOWNLOAD_CHUNK_SIZE,
1716+
additional: Optional[dict] = None,
1717+
job_options: Optional[dict] = None,
17051718
) -> Union[None, bytes]:
17061719
"""
17071720
Downloads the result of a process graph synchronously,
@@ -1715,8 +1728,16 @@ def download(
17151728
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
17161729
(overruling the connection's ``auto_validate`` setting).
17171730
:param chunk_size: chunk size for streaming response.
1731+
:param additional: additional (top-level) properties to set in the request body
1732+
:param job_options: dictionary of job options to pass to the backend
1733+
(under top-level property "job_options")
1734+
1735+
.. versionadded:: 0.36.0
1736+
Added arguments ``additional`` and ``job_options``.
17181737
"""
1719-
pg_with_metadata = self._build_request_with_process_graph(process_graph=graph)
1738+
pg_with_metadata = self._build_request_with_process_graph(
1739+
process_graph=graph, additional=additional, job_options=job_options
1740+
)
17201741
self._preflight_validation(pg_with_metadata=pg_with_metadata, validate=validate)
17211742
response = self.post(
17221743
path="/result",
@@ -1740,6 +1761,8 @@ def execute(
17401761
timeout: Optional[int] = None,
17411762
validate: Optional[bool] = None,
17421763
auto_decode: bool = True,
1764+
additional: Optional[dict] = None,
1765+
job_options: Optional[dict] = None,
17431766
) -> Union[dict, requests.Response]:
17441767
"""
17451768
Execute a process graph synchronously and return the result. If the result is a JSON object, it will be parsed.
@@ -1749,10 +1772,18 @@ def execute(
17491772
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
17501773
(overruling the connection's ``auto_validate`` setting).
17511774
:param auto_decode: Boolean flag to enable/disable automatic JSON decoding of the response. Defaults to True.
1775+
:param additional: additional (top-level) properties to set in the request body
1776+
:param job_options: dictionary of job options to pass to the backend
1777+
(under top-level property "job_options")
17521778
17531779
:return: parsed JSON response as a dict if auto_decode is True, otherwise response object
1780+
1781+
.. versionadded:: 0.36.0
1782+
Added arguments ``additional`` and ``job_options``.
17541783
"""
1755-
pg_with_metadata = self._build_request_with_process_graph(process_graph=process_graph)
1784+
pg_with_metadata = self._build_request_with_process_graph(
1785+
process_graph=process_graph, additional=additional, job_options=job_options
1786+
)
17561787
self._preflight_validation(pg_with_metadata=pg_with_metadata, validate=validate)
17571788
response = self.post(
17581789
path="/result",
@@ -1779,6 +1810,7 @@ def create_job(
17791810
plan: Optional[str] = None,
17801811
budget: Optional[float] = None,
17811812
additional: Optional[dict] = None,
1813+
job_options: Optional[dict] = None,
17821814
validate: Optional[bool] = None,
17831815
) -> BatchJob:
17841816
"""
@@ -1795,23 +1827,27 @@ def create_job(
17951827
:param plan: The billing plan to process and charge the job with
17961828
:param budget: Maximum budget to be spent on executing the job.
17971829
Note that some backends do not honor this limit.
1798-
:param additional: additional job options to pass to the backend
1830+
:param additional: additional (top-level) properties to set in the request body
1831+
:param job_options: dictionary of job options to pass to the backend
1832+
(under top-level property "job_options")
17991833
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
18001834
(overruling the connection's ``auto_validate`` setting).
18011835
:return: Created job
18021836
18031837
.. versionchanged:: 0.35.0
18041838
Add :ref:`multi-result support <multi-result-process-graphs>`.
1839+
1840+
.. versionadded:: 0.36.0
1841+
Added argument ``job_options``.
18051842
"""
18061843
# TODO move all this (BatchJob factory) logic to BatchJob?
18071844

18081845
pg_with_metadata = self._build_request_with_process_graph(
18091846
process_graph=process_graph,
1847+
additional=additional,
1848+
job_options=job_options,
18101849
**dict_no_none(title=title, description=description, plan=plan, budget=budget)
18111850
)
1812-
if additional:
1813-
# TODO: get rid of this non-standard field? https://github.com/Open-EO/openeo-api/issues/276
1814-
pg_with_metadata["job_options"] = additional
18151851

18161852
self._preflight_validation(pg_with_metadata=pg_with_metadata, validate=validate)
18171853
response = self.post("/jobs", json=pg_with_metadata, expected_status=201)
@@ -1871,9 +1907,12 @@ def load_disk_collection(
18711907
def as_curl(
18721908
self,
18731909
data: Union[dict, DataCube, FlatGraphableMixin],
1910+
*,
18741911
path="/result",
18751912
method="POST",
18761913
obfuscate_auth: bool = False,
1914+
additional: Optional[dict] = None,
1915+
job_options: Optional[dict] = None,
18771916
) -> str:
18781917
"""
18791918
Build curl command to evaluate given process graph or data cube
@@ -1891,14 +1930,20 @@ def as_curl(
18911930
or ``"/jobs"`` for batch jobs
18921931
:param method: HTTP method to use (typically ``"POST"``)
18931932
:param obfuscate_auth: don't show actual bearer token
1933+
:param additional: additional (top-level) properties to set in the request body
1934+
:param job_options: dictionary of job options to pass to the backend
1935+
(under top-level property "job_options")
18941936
18951937
:return: curl command as a string
1938+
1939+
.. versionadded:: 0.36.0
1940+
Added arguments ``additional`` and ``job_options``.
18961941
"""
18971942
cmd = ["curl", "-i", "-X", method]
18981943
cmd += ["-H", "Content-Type: application/json"]
18991944
if isinstance(self.auth, BearerAuth):
19001945
cmd += ["-H", f"Authorization: Bearer {'...' if obfuscate_auth else self.auth.bearer}"]
1901-
pg_with_metadata = self._build_request_with_process_graph(data)
1946+
pg_with_metadata = self._build_request_with_process_graph(data, additional=additional, job_options=job_options)
19021947
if path == "/validation":
19031948
pg_with_metadata = pg_with_metadata["process"]
19041949
post_json = json.dumps(pg_with_metadata, separators=(",", ":"))

openeo/rest/datacube.py

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2329,6 +2329,8 @@ def download(
23292329
*,
23302330
validate: Optional[bool] = None,
23312331
auto_add_save_result: bool = True,
2332+
additional: Optional[dict] = None,
2333+
job_options: Optional[dict] = None,
23322334
) -> Union[None, bytes]:
23332335
"""
23342336
Execute synchronously and download the raster data cube, e.g. as GeoTIFF.
@@ -2342,11 +2344,17 @@ def download(
23422344
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
23432345
(overruling the connection's ``auto_validate`` setting).
23442346
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet.
2347+
:param additional: additional (top-level) properties to set in the request body
2348+
:param job_options: dictionary of job options to pass to the backend
2349+
(under top-level property "job_options")
23452350
23462351
:return: None if the result is stored to disk, or a bytes object returned by the backend.
23472352
23482353
.. versionchanged:: 0.32.0
23492354
Added ``auto_add_save_result`` option
2355+
2356+
.. versionadded:: 0.36.0
2357+
Added arguments ``additional`` and ``job_options``.
23502358
"""
23512359
# TODO #278 centralize download/create_job/execute_job logic in DataCube, VectorCube, MlModel, ...
23522360
cube = self
@@ -2359,7 +2367,9 @@ def download(
23592367
default_format=self._DEFAULT_RASTER_FORMAT,
23602368
method="DataCube.download()",
23612369
)
2362-
return self._connection.download(cube.flat_graph(), outputfile, validate=validate)
2370+
return self._connection.download(
2371+
cube.flat_graph(), outputfile, validate=validate, additional=additional, job_options=job_options
2372+
)
23632373

23642374
def validate(self) -> List[dict]:
23652375
"""
@@ -2463,6 +2473,7 @@ def execute_batch(
24632473
print: typing.Callable[[str], None] = print,
24642474
max_poll_interval: float = 60,
24652475
connection_retry_interval: float = 30,
2476+
additional: Optional[dict] = None,
24662477
job_options: Optional[dict] = None,
24672478
validate: Optional[bool] = None,
24682479
auto_add_save_result: bool = True,
@@ -2477,13 +2488,18 @@ def execute_batch(
24772488
24782489
:param outputfile: The path of a file to which a result can be written
24792490
:param out_format: (optional) File format to use for the job result.
2480-
:param job_options:
2491+
:param additional: additional (top-level) properties to set in the request body
2492+
:param job_options: dictionary of job options to pass to the backend
2493+
(under top-level property "job_options")
24812494
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
24822495
(overruling the connection's ``auto_validate`` setting).
24832496
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet.
24842497
24852498
.. versionchanged:: 0.32.0
24862499
Added ``auto_add_save_result`` option
2500+
2501+
.. versionadded:: 0.36.0
2502+
Added argument ``additional``.
24872503
"""
24882504
# TODO: start showing deprecation warnings about these inconsistent argument names
24892505
if "format" in format_options and not out_format:
@@ -2506,6 +2522,7 @@ def execute_batch(
25062522
description=description,
25072523
plan=plan,
25082524
budget=budget,
2525+
additional=additional,
25092526
job_options=job_options,
25102527
validate=validate,
25112528
auto_add_save_result=False,
@@ -2523,6 +2540,7 @@ def create_job(
25232540
description: Optional[str] = None,
25242541
plan: Optional[str] = None,
25252542
budget: Optional[float] = None,
2543+
additional: Optional[dict] = None,
25262544
job_options: Optional[dict] = None,
25272545
validate: Optional[bool] = None,
25282546
auto_add_save_result: bool = True,
@@ -2543,15 +2561,20 @@ def create_job(
25432561
:param plan: The billing plan to process and charge the job with
25442562
:param budget: Maximum budget to be spent on executing the job.
25452563
Note that some backends do not honor this limit.
2546-
:param job_options: custom job options.
2564+
:param additional: additional (top-level) properties to set in the request body
2565+
:param job_options: dictionary of job options to pass to the backend
2566+
(under top-level property "job_options")
25472567
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
25482568
(overruling the connection's ``auto_validate`` setting).
25492569
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet.
25502570
25512571
:return: Created job.
25522572
2553-
.. versionchanged:: 0.32.0
2573+
.. versionadded:: 0.32.0
25542574
Added ``auto_add_save_result`` option
2575+
2576+
.. versionadded:: 0.36.0
2577+
Added ``additional`` argument.
25552578
"""
25562579
# TODO: add option to also automatically start the job?
25572580
# TODO: avoid using all kwargs as format_options
@@ -2572,7 +2595,8 @@ def create_job(
25722595
plan=plan,
25732596
budget=budget,
25742597
validate=validate,
2575-
additional=job_options,
2598+
additional=additional,
2599+
job_options=job_options,
25762600
)
25772601

25782602
send_job = legacy_alias(create_job, name="send_job", since="0.10.0")
@@ -2617,6 +2641,7 @@ def execute(self, *, validate: Optional[bool] = None, auto_decode: bool = True)
26172641
26182642
:return: parsed JSON response as a dict if auto_decode is True, otherwise response object
26192643
"""
2644+
# TODO: deprecated this. It's ill-defined how to "execute" a data cube without downloading it.
26202645
return self._connection.execute(self.flat_graph(), validate=validate, auto_decode=auto_decode)
26212646

26222647
@staticmethod

0 commit comments

Comments
 (0)