Skip to content

Commit 7c9206e

Browse files
Fetch and display OETC worker error logs (#512)
* Fetch and display OETC worker error logs * Fix pre-commit dependencies version mismatch
1 parent 9f7f88b commit 7c9206e

File tree

3 files changed

+291
-1
lines changed

3 files changed

+291
-1
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ repos:
2424
rev: v0.4.1
2525
hooks:
2626
- id: blackdoc
27+
additional_dependencies: ['black==24.8.0']
2728
- repo: https://github.com/codespell-project/codespell
2829
rev: v2.4.1
2930
hooks:

linopy/remote/oetc.py

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,47 @@ def _submit_job_to_compute_service(self, input_file_name: str) -> str:
268268
except Exception as e:
269269
raise Exception(f"Error submitting job to compute service: {e}")
270270

271+
def _get_job_logs(self, job_uuid: str) -> str:
272+
"""
273+
Fetch logs for a compute job.
274+
275+
Args:
276+
job_uuid: UUID of the job to fetch logs for
277+
278+
Returns:
279+
str: The job logs content as a string
280+
281+
Raises:
282+
Exception: If fetching logs fails
283+
"""
284+
try:
285+
logger.info(f"OETC - Fetching logs for job {job_uuid}...")
286+
287+
response = requests.get(
288+
f"{self.settings.orchestrator_server_url}/compute-job/{job_uuid}/get-logs",
289+
headers={
290+
"Authorization": f"{self.jwt.token_type} {self.jwt.token}",
291+
"Content-Type": "application/json",
292+
},
293+
timeout=30,
294+
)
295+
296+
response.raise_for_status()
297+
logs_data = response.json()
298+
299+
# Extract content from the response structure
300+
logs_content = logs_data.get("content", "")
301+
302+
logger.info(f"OETC - Successfully fetched logs for job {job_uuid}")
303+
return logs_content
304+
305+
except RequestException as e:
306+
logger.warning(f"OETC - Failed to fetch logs for job {job_uuid}: {e}")
307+
return f"[Unable to fetch logs: {e}]"
308+
except Exception as e:
309+
logger.warning(f"OETC - Error fetching logs for job {job_uuid}: {e}")
310+
return f"[Error fetching logs: {e}]"
311+
271312
def wait_and_get_job_data(
272313
self,
273314
job_uuid: str,
@@ -342,7 +383,14 @@ def wait_and_get_job_data(
342383
raise Exception(error_msg)
343384

344385
elif job_result.status == "RUNTIME_ERROR":
345-
error_msg = f"Job failed during execution (status: {job_result.status}). Please check the OETC logs for details."
386+
# Fetch and display logs
387+
logs = self._get_job_logs(job_uuid)
388+
logger.error(f"OETC - Job {job_uuid} logs:\n{logs}")
389+
390+
error_msg = (
391+
f"Job failed during execution (status: {job_result.status}).\n"
392+
f"Logs:\n{logs}"
393+
)
346394
logger.error(f"OETC Error: {error_msg}")
347395
raise Exception(error_msg)
348396

test/remote/test_oetc.py

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import base64
22
import json
33
from datetime import datetime
4+
from typing import Any
45
from unittest.mock import Mock, patch
56

67
import pytest
@@ -513,6 +514,246 @@ def test_is_expired_true_when_exactly_expired(
513514
assert auth_result.is_expired is True
514515

515516

517+
class TestGetJobLogs:
518+
@pytest.fixture
519+
def handler_with_auth_setup(self, sample_jwt_token: str) -> OetcHandler:
520+
"""Create handler with authentication setup for testing log fetching"""
521+
credentials = OetcCredentials(
522+
email="[email protected]", password="test_password"
523+
)
524+
settings = OetcSettings(
525+
credentials=credentials,
526+
name="Test Job",
527+
authentication_server_url="https://auth.example.com",
528+
orchestrator_server_url="https://orchestrator.example.com",
529+
compute_provider=ComputeProvider.GCP,
530+
)
531+
532+
mock_auth_result = AuthenticationResult(
533+
token=sample_jwt_token,
534+
token_type="Bearer",
535+
expires_in=3600,
536+
authenticated_at=datetime.now(),
537+
)
538+
539+
handler = OetcHandler.__new__(OetcHandler)
540+
handler.settings = settings
541+
handler.jwt = mock_auth_result
542+
handler.cloud_provider_credentials = Mock()
543+
544+
return handler
545+
546+
@patch("linopy.remote.oetc.requests.get")
547+
def test_get_job_logs_success(
548+
self, mock_get: Mock, handler_with_auth_setup: OetcHandler
549+
) -> None:
550+
"""Test successful job logs fetching"""
551+
# Setup
552+
job_uuid = "test-job-uuid-123"
553+
expected_logs = "Error: Solver failed\nTraceback: ...\nSolver output: ..."
554+
555+
mock_response = Mock()
556+
mock_response.json.return_value = {"content": expected_logs}
557+
mock_response.raise_for_status.return_value = None
558+
mock_get.return_value = mock_response
559+
560+
# Execute
561+
result = handler_with_auth_setup._get_job_logs(job_uuid)
562+
563+
# Verify request
564+
mock_get.assert_called_once_with(
565+
f"https://orchestrator.example.com/compute-job/{job_uuid}/get-logs",
566+
headers={
567+
"Authorization": f"Bearer {handler_with_auth_setup.jwt.token}",
568+
"Content-Type": "application/json",
569+
},
570+
timeout=30,
571+
)
572+
573+
# Verify result
574+
assert result == expected_logs
575+
576+
@patch("linopy.remote.oetc.requests.get")
577+
def test_get_job_logs_http_error(
578+
self, mock_get: Mock, handler_with_auth_setup: OetcHandler
579+
) -> None:
580+
"""Test job logs fetching with HTTP error"""
581+
# Setup
582+
job_uuid = "test-job-uuid-123"
583+
mock_response = Mock()
584+
mock_response.raise_for_status.side_effect = requests.HTTPError("404 Not Found")
585+
mock_get.return_value = mock_response
586+
587+
# Execute
588+
result = handler_with_auth_setup._get_job_logs(job_uuid)
589+
590+
# Verify - should return error message instead of raising
591+
assert "[Unable to fetch logs:" in result
592+
assert "404 Not Found" in result
593+
594+
@patch("linopy.remote.oetc.requests.get")
595+
def test_get_job_logs_empty_content(
596+
self, mock_get: Mock, handler_with_auth_setup: OetcHandler
597+
) -> None:
598+
"""Test job logs fetching with empty content"""
599+
# Setup
600+
job_uuid = "test-job-uuid-123"
601+
mock_response = Mock()
602+
mock_response.json.return_value = {"content": ""}
603+
mock_response.raise_for_status.return_value = None
604+
mock_get.return_value = mock_response
605+
606+
# Execute
607+
result = handler_with_auth_setup._get_job_logs(job_uuid)
608+
609+
# Verify
610+
assert result == ""
611+
612+
613+
class TestWaitAndGetJobDataWithLogs:
614+
@pytest.fixture
615+
def handler_with_auth_setup(self, sample_jwt_token: str) -> OetcHandler:
616+
"""Create handler with authentication setup for testing job waiting"""
617+
credentials = OetcCredentials(
618+
email="[email protected]", password="test_password"
619+
)
620+
settings = OetcSettings(
621+
credentials=credentials,
622+
name="Test Job",
623+
authentication_server_url="https://auth.example.com",
624+
orchestrator_server_url="https://orchestrator.example.com",
625+
compute_provider=ComputeProvider.GCP,
626+
)
627+
628+
mock_auth_result = AuthenticationResult(
629+
token=sample_jwt_token,
630+
token_type="Bearer",
631+
expires_in=3600,
632+
authenticated_at=datetime.now(),
633+
)
634+
635+
handler = OetcHandler.__new__(OetcHandler)
636+
handler.settings = settings
637+
handler.jwt = mock_auth_result
638+
handler.cloud_provider_credentials = Mock()
639+
640+
return handler
641+
642+
@patch("linopy.remote.oetc.requests.get")
643+
def test_wait_runtime_error_with_logs(
644+
self, mock_get: Mock, handler_with_auth_setup: OetcHandler
645+
) -> None:
646+
"""Test wait_and_get_job_data for RUNTIME_ERROR with successful log fetching"""
647+
# Setup
648+
job_uuid = "test-job-uuid-123"
649+
expected_logs = "Error: Solver crashed\nMemory limit exceeded\nExit code: 137"
650+
651+
# First call returns RUNTIME_ERROR status
652+
mock_status_response = Mock()
653+
mock_status_response.json.return_value = {
654+
"uuid": job_uuid,
655+
"status": "RUNTIME_ERROR",
656+
"name": "Test Job",
657+
}
658+
mock_status_response.raise_for_status.return_value = None
659+
660+
# Second call returns logs
661+
mock_logs_response = Mock()
662+
mock_logs_response.json.return_value = {"content": expected_logs}
663+
mock_logs_response.raise_for_status.return_value = None
664+
665+
mock_get.side_effect = [mock_status_response, mock_logs_response]
666+
667+
# Execute and verify exception
668+
with pytest.raises(Exception) as exc_info:
669+
handler_with_auth_setup.wait_and_get_job_data(job_uuid)
670+
671+
# Verify exception contains logs
672+
error_message = str(exc_info.value)
673+
assert "Job failed during execution" in error_message
674+
assert "RUNTIME_ERROR" in error_message
675+
assert expected_logs in error_message
676+
677+
# Verify both API calls were made
678+
assert mock_get.call_count == 2
679+
# First call - get job status
680+
assert (
681+
f"https://orchestrator.example.com/compute-job/{job_uuid}"
682+
in mock_get.call_args_list[0][0][0]
683+
)
684+
# Second call - get logs
685+
assert (
686+
f"https://orchestrator.example.com/compute-job/{job_uuid}/get-logs"
687+
in mock_get.call_args_list[1][0][0]
688+
)
689+
690+
@patch("linopy.remote.oetc.requests.get")
691+
def test_wait_runtime_error_logs_fetch_fails(
692+
self, mock_get: Mock, handler_with_auth_setup: OetcHandler
693+
) -> None:
694+
"""Test wait_and_get_job_data for RUNTIME_ERROR when log fetching fails"""
695+
# Setup
696+
job_uuid = "test-job-uuid-123"
697+
698+
# First call returns RUNTIME_ERROR status
699+
mock_status_response = Mock()
700+
mock_status_response.json.return_value = {
701+
"uuid": job_uuid,
702+
"status": "RUNTIME_ERROR",
703+
"name": "Test Job",
704+
}
705+
mock_status_response.raise_for_status.return_value = None
706+
707+
# Second call for logs fails
708+
def side_effect_func(*args: Any, **kwargs: Any) -> Mock:
709+
if "get-logs" in args[0]:
710+
raise RequestException("Log service unavailable")
711+
return mock_status_response
712+
713+
mock_get.side_effect = side_effect_func
714+
715+
# Execute and verify exception
716+
with pytest.raises(Exception) as exc_info:
717+
handler_with_auth_setup.wait_and_get_job_data(job_uuid)
718+
719+
# Verify exception contains error message about log fetching failure
720+
error_message = str(exc_info.value)
721+
assert "Job failed during execution" in error_message
722+
assert "RUNTIME_ERROR" in error_message
723+
assert "[Unable to fetch logs:" in error_message
724+
assert "Log service unavailable" in error_message
725+
726+
@patch("linopy.remote.oetc.requests.get")
727+
def test_wait_setup_error_no_logs_fetched(
728+
self, mock_get: Mock, handler_with_auth_setup: OetcHandler
729+
) -> None:
730+
"""Test wait_and_get_job_data for SETUP_ERROR does not fetch logs"""
731+
# Setup
732+
job_uuid = "test-job-uuid-123"
733+
734+
# First call returns SETUP_ERROR status
735+
mock_status_response = Mock()
736+
mock_status_response.json.return_value = {
737+
"uuid": job_uuid,
738+
"status": "SETUP_ERROR",
739+
"name": "Test Job",
740+
}
741+
mock_status_response.raise_for_status.return_value = None
742+
mock_get.return_value = mock_status_response
743+
744+
# Execute and verify exception
745+
with pytest.raises(Exception) as exc_info:
746+
handler_with_auth_setup.wait_and_get_job_data(job_uuid)
747+
748+
# Verify exception does not try to fetch logs
749+
error_message = str(exc_info.value)
750+
assert "Job failed during setup phase" in error_message
751+
assert "SETUP_ERROR" in error_message
752+
753+
# Verify only one API call was made (status check, no logs fetch)
754+
assert mock_get.call_count == 1
755+
756+
516757
class TestFileCompression:
517758
@pytest.fixture
518759
def handler_with_mocked_auth(self) -> OetcHandler:

0 commit comments

Comments
 (0)