Skip to content

Commit cd9de5b

Browse files
committed
refactor
1 parent 49e6e0b commit cd9de5b

File tree

1 file changed

+31
-11
lines changed
  • services/director-v2/src/simcore_service_director_v2/modules

1 file changed

+31
-11
lines changed

services/director-v2/src/simcore_service_director_v2/modules/dask_client.py

Lines changed: 31 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
import asyncio
1212
import logging
13-
import traceback
1413
from collections.abc import Callable, Iterable
1514
from dataclasses import dataclass
1615
from http.client import HTTPException
@@ -57,6 +56,7 @@
5756
from models_library.users import UserID
5857
from pydantic import ValidationError
5958
from pydantic.networks import AnyUrl
59+
from servicelib.logging_errors import create_troubleshootting_log_kwargs
6060
from servicelib.logging_utils import log_context
6161
from settings_library.s3 import S3Settings
6262
from simcore_sdk.node_ports_common.exceptions import NodeportsException
@@ -457,20 +457,40 @@ async def _get_task_state(job_id: str) -> RunningState:
457457
assert isinstance(exception, Exception) # nosec
458458

459459
if isinstance(exception, TaskCancelledError):
460+
_logger.info(
461+
**create_troubleshootting_log_kwargs(
462+
f"Task {job_id} was aborted by user",
463+
error=exception,
464+
error_context={
465+
"job_id": job_id,
466+
"dask-scheduler": self.backend.scheduler_id,
467+
},
468+
)
469+
)
460470
return RunningState.ABORTED
461471
assert exception # nosec
462-
_logger.warning(
463-
"Task %s completed in error:\n%s\nTrace:\n%s",
464-
job_id,
465-
exception,
466-
"".join(traceback.format_exception(exception)),
472+
_logger.info(
473+
**create_troubleshootting_log_kwargs(
474+
f"Task {job_id} completed with an error",
475+
error=exception,
476+
error_context={
477+
"job_id": job_id,
478+
"dask-scheduler": self.backend.scheduler_id,
479+
},
480+
)
467481
)
468482
return RunningState.FAILED
469-
except TimeoutError:
470-
_logger.warning(
471-
"Task %s could not be retrieved from dask-scheduler, it is lost\n"
472-
"TIP:If the task was unpublished this can happen, or if the dask-scheduler was restarted.",
473-
job_id,
483+
except TimeoutError as exc:
484+
_logger.exception(
485+
**create_troubleshootting_log_kwargs(
486+
f"Task {job_id} exception could not be retrieved due to timeout",
487+
error=exc,
488+
error_context={
489+
"job_id": job_id,
490+
"dask-scheduler": self.backend.scheduler_id,
491+
},
492+
tip="The dask-scheduler is probably under load, this should resolve itself later.",
493+
),
474494
)
475495
return RunningState.UNKNOWN
476496

0 commit comments

Comments
 (0)