Issue #505 fintune #687

soxofaan · soxofaan · commit bd9160c07bf7 · 2025-01-07T18:43:14.000+01:00
- add changelog entry
- doc finetuning
- cover mlmodel and vectorcube too
- darker code style
- make test more to the point
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Added
 
+- Added `show_error_logs` argument to `cube.execute_batch()`/`job.start_and_wait()`/... to toggle the automatic printing of error logs on failure ([#505](https://github.com/Open-EO/openeo-python-client/issues/505))
+
 ### Changed
 
 ### Removed
diff --git a/docs/batch_jobs.rst b/docs/batch_jobs.rst
@@ -292,8 +292,8 @@ When using
 :py:meth:`job.start_and_wait() <openeo.rest.job.BatchJob.start_and_wait>`
 or :py:meth:`cube.execute_batch() <openeo.rest.datacube.DataCube.execute_batch>`
 to run a batch job and it fails,
-the openEO Python client library will automatically
-print the batch job logs and instructions to help with further investigation:
+the openEO Python client library will print (by default)
+the batch job's error logs and instructions to help with further investigation:
 
 .. code-block:: pycon
 
diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py
@@ -2502,6 +2502,9 @@ def execute_batch(
 
         .. versionadded:: 0.36.0
             Added argument ``additional``.
+
+        .. versionchanged:: 0.37.0
+            Added argument ``show_error_logs``.
         """
         # TODO: start showing deprecation warnings about these inconsistent argument names
         if "format" in format_options and not out_format:
@@ -2531,8 +2534,10 @@ def execute_batch(
         )
         return job.run_synchronous(
             outputfile=outputfile,
-            print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval,
-            show_error_logs=show_error_logs
+            print=print,
+            max_poll_interval=max_poll_interval,
+            connection_retry_interval=connection_retry_interval,
+            show_error_logs=show_error_logs,
         )
 
     def create_job(
diff --git a/openeo/rest/job.py b/openeo/rest/job.py
@@ -235,8 +235,12 @@ def logs(
         return VisualList("logs", data=entries)
 
     def run_synchronous(
-            self, outputfile: Union[str, Path, None] = None,
-            print=print, max_poll_interval=60, connection_retry_interval=30, show_error_logs: bool = True
+        self,
+        outputfile: Union[str, Path, None] = None,
+        print=print,
+        max_poll_interval=60,
+        connection_retry_interval=30,
+        show_error_logs: bool = True,
     ) -> BatchJob:
         """
         Start the job, wait for it to finish and download result
@@ -246,20 +250,28 @@ def run_synchronous(
         :param max_poll_interval: maximum number of seconds to sleep between status polls
         :param connection_retry_interval: how long to wait when status poll failed due to connection issue
         :param show_error_logs: whether to automatically print error logs when the batch job failed.
-        :return:
+
+        .. versionchanged:: 0.37.0
+            Added argument ``show_error_logs``.
         """
         self.start_and_wait(
-            print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval,
-            show_error_logs=show_error_logs
+            print=print,
+            max_poll_interval=max_poll_interval,
+            connection_retry_interval=connection_retry_interval,
+            show_error_logs=show_error_logs,
         )
         # TODO #135 support multi file result sets too?
         if outputfile is not None:
             self.download_result(outputfile)
         return self
 
     def start_and_wait(
-            self, print=print, max_poll_interval: int = 60, connection_retry_interval: int = 30, soft_error_max=10,
-            show_error_logs: bool = True
+        self,
+        print=print,
+        max_poll_interval: int = 60,
+        connection_retry_interval: int = 30,
+        soft_error_max=10,
+        show_error_logs: bool = True,
     ) -> BatchJob:
         """
         Start the batch job, poll its status and wait till it finishes (or fails)
@@ -269,7 +281,9 @@ def start_and_wait(
         :param connection_retry_interval: how long to wait when status poll failed due to connection issue
         :param soft_error_max: maximum number of soft errors (e.g. temporary connection glitches) to allow
         :param show_error_logs: whether to automatically print error logs when the batch job failed.
-        :return:
+
+        .. versionchanged:: 0.37.0
+            Added argument ``show_error_logs``.
         """
         # TODO rename `connection_retry_interval` to something more generic?
         start_time = time.time()
diff --git a/openeo/rest/mlmodel.py b/openeo/rest/mlmodel.py
@@ -71,12 +71,13 @@ def execute_batch(
         connection_retry_interval=30,
         additional: Optional[dict] = None,
         job_options: Optional[dict] = None,
+        show_error_logs: bool = True,
     ) -> BatchJob:
         """
         Evaluate the process graph by creating a batch job, and retrieving the results when it is finished.
         This method is mostly recommended if the batch job is expected to run in a reasonable amount of time.
 
-        For very long running jobs, you probably do not want to keep the client running.
+        For very long-running jobs, you probably do not want to keep the client running.
 
         :param job_options:
         :param outputfile: The path of a file to which a result can be written
@@ -85,9 +86,13 @@ def execute_batch(
         :param additional: additional (top-level) properties to set in the request body
         :param job_options: dictionary of job options to pass to the backend
             (under top-level property "job_options")
+        :param show_error_logs: whether to automatically print error logs when the batch job failed.
 
         .. versionadded:: 0.36.0
             Added argument ``additional``.
+
+        .. versionchanged:: 0.37.0
+            Added argument ``show_error_logs``.
         """
         job = self.create_job(
             title=title,
@@ -100,7 +105,10 @@ def execute_batch(
         return job.run_synchronous(
             # TODO #135 support multi file result sets too
             outputfile=outputfile,
-            print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval
+            print=print,
+            max_poll_interval=max_poll_interval,
+            connection_retry_interval=connection_retry_interval,
+            show_error_logs=show_error_logs,
         )
 
     def create_job(
diff --git a/openeo/rest/vectorcube.py b/openeo/rest/vectorcube.py
@@ -259,6 +259,7 @@ def execute_batch(
         job_options: Optional[dict] = None,
         validate: Optional[bool] = None,
         auto_add_save_result: bool = True,
+        show_error_logs: bool = True,
         # TODO: avoid using kwargs as format options
         **format_options,
     ) -> BatchJob:
@@ -277,6 +278,7 @@ def execute_batch(
         :param validate: Optional toggle to enable/prevent validation of the process graphs before execution
             (overruling the connection's ``auto_validate`` setting).
         :param auto_add_save_result: Automatically add a ``save_result`` node to the process graph if there is none yet.
+        :param show_error_logs: whether to automatically print error logs when the batch job failed.
 
         .. versionchanged:: 0.21.0
             When not specified explicitly, output format is guessed from output file extension.
@@ -286,6 +288,9 @@ def execute_batch(
 
         .. versionadded:: 0.36.0
             Added argument ``additional``.
+
+        .. versionchanged:: 0.37.0
+            Added argument ``show_error_logs``.
         """
         cube = self
         if auto_add_save_result:
@@ -310,7 +315,10 @@ def execute_batch(
         return job.run_synchronous(
             # TODO #135 support multi file result sets too
             outputfile=outputfile,
-            print=print, max_poll_interval=max_poll_interval, connection_retry_interval=connection_retry_interval
+            print=print,
+            max_poll_interval=max_poll_interval,
+            connection_retry_interval=connection_retry_interval,
+            show_error_logs=show_error_logs,
         )
 
     def create_job(
diff --git a/tests/rest/test_job.py b/tests/rest/test_job.py
@@ -150,56 +150,36 @@ def test_execute_batch_with_error(con100, requests_mock, tmpdir):
         "Full logs can be inspected in an openEO (web) editor or with `connection.job('f00ba5').logs()`.",
     ]
 
-def test_execute_batch_with_error_with_error_logs_disabled(con100, requests_mock, tmpdir):
+
+@pytest.mark.parametrize("show_error_logs", [True, False])
+def test_execute_batch_show_error_logs(con100, requests_mock, show_error_logs):
     requests_mock.get(API_URL + "/file_formats", json={"output": {"GTiff": {"gis_data_types": ["raster"]}}})
     requests_mock.get(API_URL + "/collections/SENTINEL2", json={"foo": "bar"})
     requests_mock.post(API_URL + "/jobs", status_code=201, headers={"OpenEO-Identifier": "f00ba5"})
     requests_mock.post(API_URL + "/jobs/f00ba5/results", status_code=202)
-    requests_mock.get(
-        API_URL + "/jobs/f00ba5",
-        [
-            {"json": {"status": "submitted"}},
-            {"json": {"status": "queued"}},
-            {"json": {"status": "running", "progress": 15}},
-            {"json": {"status": "running", "progress": 80}},
-            {"json": {"status": "error", "progress": 100}},
-        ],
-    )
+    requests_mock.get(API_URL + "/jobs/f00ba5", json={"status": "error", "progress": 100})
     requests_mock.get(
         API_URL + "/jobs/f00ba5/logs",
-        json={
-            "logs": [
-                {"id": "12", "level": "info", "message": "starting"},
-                {"id": "34", "level": "error", "message": "nope"},
-            ]
-        },
+        json={"logs": [{"id": "34", "level": "error", "message": "nope"}]},
     )
 
-    path = tmpdir.join("tmp.tiff")
-    log = []
-
-    try:
-        with fake_time():
-            con100.load_collection("SENTINEL2").execute_batch(
-                outputfile=path, out_format="GTIFF",
-                max_poll_interval=.1, print=log.append, show_error_logs=False
-            )
-        pytest.fail("execute_batch should fail")
-    except JobFailedException as e:
-        assert e.job.status() == "error"
-        assert [(l.level, l.message) for l in e.job.logs()] == [
-            ("info", "starting"),
-            ("error", "nope"),
-        ]
+    stdout = []
+    with fake_time(), pytest.raises(JobFailedException):
+        con100.load_collection("SENTINEL2").execute_batch(
+            max_poll_interval=0.1, print=stdout.append, show_error_logs=show_error_logs
+        )
 
-    assert log == [
+    expected = [
         "0:00:01 Job 'f00ba5': send 'start'",
-        "0:00:02 Job 'f00ba5': submitted (progress N/A)",
-        "0:00:04 Job 'f00ba5': queued (progress N/A)",
-        "0:00:07 Job 'f00ba5': running (progress 15%)",
-        "0:00:12 Job 'f00ba5': running (progress 80%)",
-        "0:00:20 Job 'f00ba5': error (progress 100%)",
+        "0:00:02 Job 'f00ba5': error (progress 100%)",
     ]
+    if show_error_logs:
+        expected += [
+            "Your batch job 'f00ba5' failed. Error logs:",
+            [{"id": "34", "level": "error", "message": "nope"}],
+            "Full logs can be inspected in an openEO (web) editor or with `connection.job('f00ba5').logs()`.",
+        ]
+    assert stdout == expected
 
 
 @pytest.mark.parametrize(["error_response", "expected"], [