|
10 | 10 |
|
11 | 11 | import asyncio |
12 | 12 | import logging |
13 | | -import traceback |
14 | 13 | from collections.abc import Callable, Iterable |
15 | 14 | from dataclasses import dataclass |
16 | 15 | from http.client import HTTPException |
|
57 | 56 | from models_library.users import UserID |
58 | 57 | from pydantic import ValidationError |
59 | 58 | from pydantic.networks import AnyUrl |
| 59 | +from servicelib.logging_errors import create_troubleshootting_log_kwargs |
60 | 60 | from servicelib.logging_utils import log_context |
61 | 61 | from settings_library.s3 import S3Settings |
62 | 62 | from simcore_sdk.node_ports_common.exceptions import NodeportsException |
@@ -457,20 +457,40 @@ async def _get_task_state(job_id: str) -> RunningState: |
457 | 457 | assert isinstance(exception, Exception) # nosec |
458 | 458 |
|
459 | 459 | if isinstance(exception, TaskCancelledError): |
| 460 | + _logger.info( |
| 461 | + **create_troubleshootting_log_kwargs( |
| 462 | + f"Task {job_id} was aborted by user", |
| 463 | + error=exception, |
| 464 | + error_context={ |
| 465 | + "job_id": job_id, |
| 466 | + "dask-scheduler": self.backend.scheduler_id, |
| 467 | + }, |
| 468 | + ) |
| 469 | + ) |
460 | 470 | return RunningState.ABORTED |
461 | 471 | assert exception # nosec |
462 | | - _logger.warning( |
463 | | - "Task %s completed in error:\n%s\nTrace:\n%s", |
464 | | - job_id, |
465 | | - exception, |
466 | | - "".join(traceback.format_exception(exception)), |
| 472 | + _logger.info( |
| 473 | + **create_troubleshootting_log_kwargs( |
| 474 | + f"Task {job_id} completed with an error", |
| 475 | + error=exception, |
| 476 | + error_context={ |
| 477 | + "job_id": job_id, |
| 478 | + "dask-scheduler": self.backend.scheduler_id, |
| 479 | + }, |
| 480 | + ) |
467 | 481 | ) |
468 | 482 | return RunningState.FAILED |
469 | | - except TimeoutError: |
470 | | - _logger.warning( |
471 | | - "Task %s could not be retrieved from dask-scheduler, it is lost\n" |
472 | | - "TIP:If the task was unpublished this can happen, or if the dask-scheduler was restarted.", |
473 | | - job_id, |
| 483 | + except TimeoutError as exc: |
| 484 | + _logger.exception( |
| 485 | + **create_troubleshootting_log_kwargs( |
| 486 | + f"Task {job_id} exception could not be retrieved due to timeout", |
| 487 | + error=exc, |
| 488 | + error_context={ |
| 489 | + "job_id": job_id, |
| 490 | + "dask-scheduler": self.backend.scheduler_id, |
| 491 | + }, |
| 492 | + tip="The dask-scheduler is probably under load, this should resolve itself later.", |
| 493 | + ), |
474 | 494 | ) |
475 | 495 | return RunningState.UNKNOWN |
476 | 496 |
|
|
0 commit comments