Skip to content

Commit 53936a6

Browse files
Merge branch 'master' into issue693-sar_backscatter-get-coefficients-from-schema
2 parents 71b2b79 + d5180c3 commit 53936a6

File tree

12 files changed

+208
-30
lines changed

12 files changed

+208
-30
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12+
- Added `show_error_logs` argument to `cube.execute_batch()`/`job.start_and_wait()`/... to toggle the automatic printing of error logs on failure ([#505](https://github.com/Open-EO/openeo-python-client/issues/505))
13+
1214
### Changed
1315

1416
### Removed

docs/batch_jobs.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,8 @@ When using
292292
:py:meth:`job.start_and_wait() <openeo.rest.job.BatchJob.start_and_wait>`
293293
or :py:meth:`cube.execute_batch() <openeo.rest.datacube.DataCube.execute_batch>`
294294
to run a batch job and it fails,
295-
the openEO Python client library will automatically
296-
print the batch job logs and instructions to help with further investigation:
295+
the openEO Python client library will print (by default)
296+
the batch job's error logs and instructions to help with further investigation:
297297
298298
.. code-block:: pycon
299299

openeo/extra/job_management/__init__.py

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -658,16 +658,25 @@ def on_job_cancel(self, job: BatchJob, row):
658658

659659
def _cancel_prolonged_job(self, job: BatchJob, row):
660660
"""Cancel the job if it has been running for too long."""
661-
job_running_start_time = rfc3339.parse_datetime(row["running_start_time"], with_timezone=True)
662-
elapsed = datetime.datetime.now(tz=datetime.timezone.utc) - job_running_start_time
663-
if elapsed > self._cancel_running_job_after:
664-
try:
661+
try:
662+
# Ensure running start time is valid
663+
job_running_start_time = rfc3339.parse_datetime(row.get("running_start_time"), with_timezone=True)
664+
665+
# Parse the current time into a datetime object with timezone info
666+
current_time = rfc3339.parse_datetime(rfc3339.utcnow(), with_timezone=True)
667+
668+
# Calculate the elapsed time between job start and now
669+
elapsed = current_time - job_running_start_time
670+
671+
if elapsed > self._cancel_running_job_after:
672+
665673
_log.info(
666674
f"Cancelling long-running job {job.job_id} (after {elapsed}, running since {job_running_start_time})"
667675
)
668676
job.stop()
669-
except OpenEoApiError as e:
670-
_log.error(f"Failed to cancel long-running job {job.job_id}: {e}")
677+
678+
except Exception as e:
679+
_log.error(f"Unexpected error while handling job {job.job_id}: {e}")
671680

672681
def get_job_dir(self, job_id: str) -> Path:
673682
"""Path to directory where job metadata, results and error logs are be saved."""
@@ -728,6 +737,13 @@ def _track_statuses(self, job_db: JobDatabaseInterface, stats: Optional[dict] =
728737
self.on_job_cancel(the_job, active.loc[i])
729738

730739
if self._cancel_running_job_after and new_status == "running":
740+
if (not active.loc[i, "running_start_time"] or pd.isna(active.loc[i, "running_start_time"])):
741+
_log.warning(
742+
f"Unknown 'running_start_time' for running job {job_id}. Using current time as an approximation."
743+
)
744+
stats["job started running"] += 1
745+
active.loc[i, "running_start_time"] = rfc3339.utcnow()
746+
731747
self._cancel_prolonged_job(the_job, active.loc[i])
732748

733749
active.loc[i, "status"] = new_status

openeo/rest/connection.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@
8484
ContextTimer,
8585
LazyLoadCache,
8686
dict_no_none,
87+
ensure_dir,
8788
ensure_list,
8889
load_json_resource,
8990
repr_truncate,
@@ -1771,7 +1772,9 @@ def download(
17711772
)
17721773

17731774
if outputfile is not None:
1774-
with Path(outputfile).open(mode="wb") as f:
1775+
target = Path(outputfile)
1776+
ensure_dir(target.parent)
1777+
with target.open(mode="wb") as f:
17751778
for chunk in response.iter_content(chunk_size=chunk_size):
17761779
f.write(chunk)
17771780
else:

openeo/rest/datacube.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2484,6 +2484,7 @@ def execute_batch(
24842484
job_options: Optional[dict] = None,
24852485
validate: Optional[bool] = None,
24862486
auto_add_save_result: bool = True,
2487+
show_error_logs: bool = True,
24872488
# TODO: deprecate `format_options` as keyword arguments
24882489
**format_options,
24892490
) -> BatchJob:
@@ -2501,12 +2502,16 @@ def execute_batch(
25012502
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
25022503
(overruling the connection's ``auto_validate`` setting).
25032504
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet.
2505+
:param show_error_logs: whether to automatically print error logs when the batch job failed.
25042506
25052507
.. versionchanged:: 0.32.0
25062508
Added ``auto_add_save_result`` option
25072509
25082510
.. versionadded:: 0.36.0
25092511
Added argument ``additional``.
2512+
2513+
.. versionchanged:: 0.37.0
2514+
Added argument ``show_error_logs``.
25102515
"""
25112516
# TODO: start showing deprecation warnings about these inconsistent argument names
25122517
if "format" in format_options and not out_format:
@@ -2536,7 +2541,10 @@ def execute_batch(
25362541
)
25372542
return job.run_synchronous(
25382543
outputfile=outputfile,
2539-
print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval
2544+
print=print,
2545+
max_poll_interval=max_poll_interval,
2546+
connection_retry_interval=connection_retry_interval,
2547+
show_error_logs=show_error_logs,
25402548
)
25412549

25422550
def create_job(

openeo/rest/job.py

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -235,20 +235,43 @@ def logs(
235235
return VisualList("logs", data=entries)
236236

237237
def run_synchronous(
238-
self, outputfile: Union[str, Path, None] = None,
239-
print=print, max_poll_interval=60, connection_retry_interval=30
238+
self,
239+
outputfile: Union[str, Path, None] = None,
240+
print=print,
241+
max_poll_interval=60,
242+
connection_retry_interval=30,
243+
show_error_logs: bool = True,
240244
) -> BatchJob:
241-
"""Start the job, wait for it to finish and download result"""
245+
"""
246+
Start the job, wait for it to finish and download result
247+
248+
:param outputfile: The path of a file to which a result can be written
249+
:param print: print/logging function to show progress/status
250+
:param max_poll_interval: maximum number of seconds to sleep between status polls
251+
:param connection_retry_interval: how long to wait when status poll failed due to connection issue
252+
:param show_error_logs: whether to automatically print error logs when the batch job failed.
253+
254+
.. versionchanged:: 0.37.0
255+
Added argument ``show_error_logs``.
256+
"""
242257
self.start_and_wait(
243-
print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval
258+
print=print,
259+
max_poll_interval=max_poll_interval,
260+
connection_retry_interval=connection_retry_interval,
261+
show_error_logs=show_error_logs,
244262
)
245263
# TODO #135 support multi file result sets too?
246264
if outputfile is not None:
247265
self.download_result(outputfile)
248266
return self
249267

250268
def start_and_wait(
251-
self, print=print, max_poll_interval: int = 60, connection_retry_interval: int = 30, soft_error_max=10
269+
self,
270+
print=print,
271+
max_poll_interval: int = 60,
272+
connection_retry_interval: int = 30,
273+
soft_error_max=10,
274+
show_error_logs: bool = True,
252275
) -> BatchJob:
253276
"""
254277
Start the batch job, poll its status and wait till it finishes (or fails)
@@ -257,7 +280,10 @@ def start_and_wait(
257280
:param max_poll_interval: maximum number of seconds to sleep between status polls
258281
:param connection_retry_interval: how long to wait when status poll failed due to connection issue
259282
:param soft_error_max: maximum number of soft errors (e.g. temporary connection glitches) to allow
260-
:return:
283+
:param show_error_logs: whether to automatically print error logs when the batch job failed.
284+
285+
.. versionchanged:: 0.37.0
286+
Added argument ``show_error_logs``.
261287
"""
262288
# TODO rename `connection_retry_interval` to something more generic?
263289
start_time = time.time()
@@ -314,13 +340,13 @@ def soft_error(message: str):
314340
poll_interval = min(1.25 * poll_interval, max_poll_interval)
315341

316342
if status != "finished":
317-
# TODO: allow to disable this printing logs (e.g. in non-interactive contexts)?
318343
# TODO: render logs jupyter-aware in a notebook context?
319-
print(f"Your batch job {self.job_id!r} failed. Error logs:")
320-
print(self.logs(level=logging.ERROR))
321-
print(
322-
f"Full logs can be inspected in an openEO (web) editor or with `connection.job({self.job_id!r}).logs()`."
323-
)
344+
if show_error_logs:
345+
print(f"Your batch job {self.job_id!r} failed. Error logs:")
346+
print(self.logs(level=logging.ERROR))
347+
print(
348+
f"Full logs can be inspected in an openEO (web) editor or with `connection.job({self.job_id!r}).logs()`."
349+
)
324350
raise JobFailedException(
325351
f"Batch job {self.job_id!r} didn't finish successfully. Status: {status} (after {elapsed()}).",
326352
job=self,

openeo/rest/mlmodel.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,13 @@ def execute_batch(
7171
connection_retry_interval=30,
7272
additional: Optional[dict] = None,
7373
job_options: Optional[dict] = None,
74+
show_error_logs: bool = True,
7475
) -> BatchJob:
7576
"""
7677
Evaluate the process graph by creating a batch job, and retrieving the results when it is finished.
7778
This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.
7879
79-
For very long running jobs, you probably do not want to keep the client running.
80+
For very long-running jobs, you probably do not want to keep the client running.
8081
8182
:param job_options:
8283
:param outputfile: The path of a file to which a result can be written
@@ -85,9 +86,13 @@ def execute_batch(
8586
:param additional: additional (top-level) properties to set in the request body
8687
:param job_options: dictionary of job options to pass to the backend
8788
(under top-level property "job_options")
89+
:param show_error_logs: whether to automatically print error logs when the batch job failed.
8890
8991
.. versionadded:: 0.36.0
9092
Added argument ``additional``.
93+
94+
.. versionchanged:: 0.37.0
95+
Added argument ``show_error_logs``.
9196
"""
9297
job = self.create_job(
9398
title=title,
@@ -100,7 +105,10 @@ def execute_batch(
100105
return job.run_synchronous(
101106
# TODO #135 support multi file result sets too
102107
outputfile=outputfile,
103-
print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval
108+
print=print,
109+
max_poll_interval=max_poll_interval,
110+
connection_retry_interval=connection_retry_interval,
111+
show_error_logs=show_error_logs,
104112
)
105113

106114
def create_job(

openeo/rest/vectorcube.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,7 @@ def execute_batch(
259259
job_options: Optional[dict] = None,
260260
validate: Optional[bool] = None,
261261
auto_add_save_result: bool = True,
262+
show_error_logs: bool = True,
262263
# TODO: avoid using kwargs as format options
263264
**format_options,
264265
) -> BatchJob:
@@ -277,6 +278,7 @@ def execute_batch(
277278
:param validate: Optional toggle to enable/prevent validation of the process graphs before execution
278279
(overruling the connection's ``auto_validate`` setting).
279280
:param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet.
281+
:param show_error_logs: whether to automatically print error logs when the batch job failed.
280282
281283
.. versionchanged:: 0.21.0
282284
When not specified explicitly, output format is guessed from output file extension.
@@ -286,6 +288,9 @@ def execute_batch(
286288
287289
.. versionadded:: 0.36.0
288290
Added argument ``additional``.
291+
292+
.. versionchanged:: 0.37.0
293+
Added argument ``show_error_logs``.
289294
"""
290295
cube = self
291296
if auto_add_save_result:
@@ -310,7 +315,10 @@ def execute_batch(
310315
return job.run_synchronous(
311316
# TODO #135 support multi file result sets too
312317
outputfile=outputfile,
313-
print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval
318+
print=print,
319+
max_poll_interval=max_poll_interval,
320+
connection_retry_interval=connection_retry_interval,
321+
show_error_logs=show_error_logs,
314322
)
315323

316324
def create_job(

setup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"mock",
2323
"requests-mock>=1.8.0",
2424
"httpretty>=1.1.4",
25+
"urllib3<2.3.0", # httpretty doesn't work properly with urllib3>=2.3.0. See #700 and https://github.com/gabrielfalcao/HTTPretty/issues/484
2526
"netCDF4>=1.7.0",
2627
"matplotlib", # TODO: eliminate matplotlib as test dependency
2728
"geopandas",

tests/extra/job_management/test_job_management.py

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from time import sleep
88
from typing import Callable, Union
99
from unittest import mock
10+
import datetime
1011

1112
import dirty_equals
1213
import geopandas
@@ -554,6 +555,7 @@ def start_job(row, connection_provider, connection, **kwargs):
554555
12 * 60 * 60,
555556
"finished",
556557
),
558+
557559
],
558560
)
559561
def test_automatic_cancel_of_too_long_running_jobs(
@@ -645,6 +647,80 @@ def test_status_logging(self, tmp_path, job_manager, job_manager_root_dir, sleep
645647
assert needle.search(caplog.text)
646648

647649

650+
@pytest.mark.parametrize(
651+
["create_time", "start_time", "running_start_time", "end_time", "end_status", "cancel_after_seconds"],
652+
[
653+
# Scenario 1: Missing running_start_time (None)
654+
(
655+
"2024-09-01T09:00:00Z", # Job creation time
656+
"2024-09-01T09:00:00Z", # Job start time (should be 1 hour after create_time)
657+
None, # Missing running_start_time
658+
"2024-09-01T20:00:00Z", # Job end time
659+
"finished", # Job final status
660+
6 * 60 * 60, # Cancel after 6 hours
661+
),
662+
# Scenario 2: NaN running_start_time
663+
(
664+
"2024-09-01T09:00:00Z",
665+
"2024-09-01T09:00:00Z",
666+
float("nan"), # NaN running_start_time
667+
"2024-09-01T20:00:00Z", # Job end time
668+
"finished", # Job final status
669+
6 * 60 * 60, # Cancel after 6 hours
670+
),
671+
]
672+
)
673+
def test_ensure_running_start_time_is_datetime(
674+
self,
675+
tmp_path,
676+
time_machine,
677+
create_time,
678+
start_time,
679+
running_start_time,
680+
end_time,
681+
end_status,
682+
cancel_after_seconds,
683+
dummy_backend_foo,
684+
job_manager_root_dir,
685+
):
686+
def get_status(job_id, current_status):
687+
if rfc3339.utcnow() < start_time:
688+
return "queued"
689+
elif rfc3339.utcnow() < end_time:
690+
return "running"
691+
return end_status
692+
693+
# Set the job status updater function for the mock backend
694+
dummy_backend_foo.job_status_updater = get_status
695+
696+
job_manager = MultiBackendJobManager(
697+
root_dir=job_manager_root_dir, cancel_running_job_after=cancel_after_seconds
698+
)
699+
job_manager.add_backend("foo", connection=dummy_backend_foo.connection)
700+
701+
# Create a DataFrame representing the job database
702+
df = pd.DataFrame({
703+
"year": [2024],
704+
"running_start_time": [running_start_time], # Initial running_start_time
705+
})
706+
707+
# Move the time machine to the job creation time
708+
time_machine.move_to(create_time)
709+
710+
job_db_path = tmp_path / "jobs.csv"
711+
712+
# Mock sleep() to skip one hour at a time instead of actually sleeping
713+
with mock.patch.object(openeo.extra.job_management.time, "sleep", new=lambda s: time_machine.shift(60 * 60)):
714+
job_manager.run_jobs(df=df, start_job=self._create_year_job, job_db=job_db_path)
715+
716+
final_df = CsvJobDatabase(job_db_path).read()
717+
718+
# Validate running_start_time is a valid datetime object
719+
filled_running_start_time = final_df.iloc[0]["running_start_time"]
720+
assert isinstance(rfc3339.parse_datetime(filled_running_start_time), datetime.datetime)
721+
722+
723+
648724
JOB_DB_DF_BASICS = pd.DataFrame(
649725
{
650726
"numbers": [3, 2, 1],

0 commit comments

Comments
 (0)