Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from servicelib.rest_constants import RESPONSE_MODEL_POLICY
from servicelib.status_codes_utils import is_5xx_server_error

from ..logging_errors import create_troubleshotting_log_kwargs
from ..logging_errors import create_troubleshootting_log_kwargs
from ..mimetype_constants import MIMETYPE_APPLICATION_JSON
from ..rest_responses import is_enveloped_from_map, is_enveloped_from_text
from ..status_codes_utils import get_code_description
Expand Down Expand Up @@ -72,7 +72,7 @@ def _log_5xx_server_error(
error_code, error_context = _create_error_context(request, exception)

_logger.exception(
**create_troubleshotting_log_kwargs(
**create_troubleshootting_log_kwargs(
user_error_msg,
error=exception,
error_context=error_context,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from collections.abc import Callable, Coroutine
from typing import Any, ParamSpec, TypeVar

from servicelib.exception_utils import silence_exceptions
from servicelib.exception_utils import suppress_exceptions
from servicelib.redis._errors import CouldNotAcquireLockError

from .background_task import periodic
Expand Down Expand Up @@ -39,10 +39,11 @@ def _decorator(
coro: Callable[P, Coroutine[Any, Any, None]],
) -> Callable[P, Coroutine[Any, Any, None]]:
@periodic(interval=retry_after)
@silence_exceptions(
@suppress_exceptions(
# Replicas will raise CouldNotAcquireLockError
# SEE https://github.com/ITISFoundation/osparc-simcore/issues/7574
(CouldNotAcquireLockError,)
(CouldNotAcquireLockError,),
reason="Multiple instances of the periodic task `{coro.__module__}.{coro.__name__}` are running.",
)
@exclusive(
redis_client,
Expand Down
89 changes: 85 additions & 4 deletions packages/service-library/src/servicelib/exception_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any, Final, ParamSpec, TypeVar

from pydantic import BaseModel, Field, NonNegativeFloat, PrivateAttr
from servicelib.logging_errors import create_troubleshootting_log_kwargs

_logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -76,17 +77,85 @@ def else_reset(self) -> None:
F = TypeVar("F", bound=Callable[..., Any])


def silence_exceptions(exceptions: tuple[type[BaseException], ...]) -> Callable[[F], F]:
def _decorator(func_or_coro: F) -> F:
def _should_suppress_exception(
exc: BaseException,
predicate: Callable[[BaseException], bool] | None,
func_name: str,
) -> bool:
if predicate is None:
# No predicate provided, suppress all exceptions
return True

try:
return predicate(exc)
except Exception as predicate_exc: # pylint: disable=broad-except
# the predicate function raised an exception
# log it and do not suppress the original exception
_logger.warning(
**create_troubleshootting_log_kwargs(
f"Predicate function raised exception {type(predicate_exc).__name__}:{predicate_exc} in {func_name}. "
f"Original exception will be re-raised: {type(exc).__name__}",
error=predicate_exc,
error_context={
"func_name": func_name,
"original_exception": f"{type(exc).__name__}",
},
tip="Predicate raised, please fix it.",
)
)
return False


def suppress_exceptions(
exceptions: tuple[type[BaseException], ...],
*,
reason: str,
predicate: Callable[[BaseException], bool] | None = None,
) -> Callable[[F], F]:
"""
Decorator to suppress specified exceptions.

Args:
exceptions: Tuple of exception types to suppress
reason: Reason for suppression (for logging)
predicate: Optional function to check exception attributes.
If provided, exception is only suppressed if predicate returns True.

Example:
# Suppress all ConnectionError exceptions
@suppress_exceptions((ConnectionError,), reason="Network issues")
def my_func(): ...

# Suppress only ConnectionError with specific errno
@suppress_exceptions(
(ConnectionError,),
reason="Specific network error",
predicate=lambda e: hasattr(e, 'errno') and e.errno == 104
)
def my_func(): ...
"""

def _decorator(func_or_coro: F) -> F:
if inspect.iscoroutinefunction(func_or_coro):

@wraps(func_or_coro)
async def _async_wrapper(*args, **kwargs) -> Any:
try:
assert inspect.iscoroutinefunction(func_or_coro) # nosec
return await func_or_coro(*args, **kwargs)
except exceptions:
except exceptions as exc:
# Check if exception should be suppressed
if not _should_suppress_exception(
exc, predicate, func_or_coro.__name__
):
raise # Re-raise if predicate returns False or fails

_logger.debug(
"Caught suppressed exception %s in %s: TIP: %s",
exc,
func_or_coro.__name__,
reason,
)
return None

return _async_wrapper # type: ignore[return-value] # decorators typing is hard
Expand All @@ -95,7 +164,19 @@ async def _async_wrapper(*args, **kwargs) -> Any:
def _sync_wrapper(*args, **kwargs) -> Any:
try:
return func_or_coro(*args, **kwargs)
except exceptions:
except exceptions as exc:
# Check if exception should be suppressed
if not _should_suppress_exception(
exc, predicate, func_or_coro.__name__
):
raise # Re-raise if predicate returns False or fails

_logger.debug(
"Caught suppressed exception %s in %s: TIP: %s",
exc,
func_or_coro.__name__,
reason,
)
return None

return _sync_wrapper # type: ignore[return-value] # decorators typing is hard
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from fastapi.responses import JSONResponse
from pydantic import ValidationError

from ..logging_errors import create_troubleshotting_log_kwargs
from ..logging_errors import create_troubleshootting_log_kwargs
from ..status_codes_utils import is_5xx_server_error

validation_error_response_definition["properties"] = {
Expand Down Expand Up @@ -50,7 +50,7 @@ async def _http_error_handler(request: Request, exc: Exception) -> JSONResponse:

if is_5xx_server_error(status_code):
_logger.exception(
create_troubleshotting_log_kwargs(
create_troubleshootting_log_kwargs(
"Unexpected error happened in the Resource Usage Tracker. Please contact support.",
error=exc,
error_context={
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from typing import Any, Final

from pydantic import PositiveFloat
from servicelib.logging_errors import create_troubleshotting_log_message
from servicelib.logging_errors import create_troubleshootting_log_message

from ...long_running_tasks.errors import TaskClientTimeoutError, TaskExceptionError
from ...long_running_tasks.models import (
Expand Down Expand Up @@ -130,7 +130,7 @@ async def _wait_for_task_result() -> Any:
except Exception as e:
error = TaskExceptionError(task_id=task_id, exception=e, traceback="")
_logger.warning(
create_troubleshotting_log_message(
create_troubleshootting_log_message(
user_error_msg=f"{task_id=} raised an exception",
error=e,
tip=f"Check the logs of the service responding to '{client.base_url}'",
Expand Down
12 changes: 8 additions & 4 deletions packages/service-library/src/servicelib/logging_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
_logger = logging.getLogger(__name__)


def create_troubleshotting_log_message(
def create_troubleshootting_log_message(
user_error_msg: str,
*,
error: BaseException,
Expand Down Expand Up @@ -57,7 +57,7 @@ class LogKwargs(TypedDict):
extra: LogExtra | None


def create_troubleshotting_log_kwargs(
def create_troubleshootting_log_kwargs(
user_error_msg: str,
*,
error: BaseException,
Expand All @@ -76,7 +76,11 @@ def create_troubleshotting_log_kwargs(
_logger.exception(
**create_troubleshotting_log_kwargs(
user_error_msg=frontend_msg,
exception=exc,
error=exc,
error_context={
"user_id": user_id,
"product_name": product_name,
},
tip="Check row in `groups_extra_properties` for this product. It might be missing.",
)
)
Expand All @@ -88,7 +92,7 @@ def create_troubleshotting_log_kwargs(
context.update(error.error_context())

# compose as log message
log_msg = create_troubleshotting_log_message(
log_msg = create_troubleshootting_log_message(
user_error_msg,
error=error,
error_code=error_code,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import Any

from common_library.error_codes import create_error_code
from servicelib.logging_errors import create_troubleshotting_log_kwargs
from servicelib.logging_errors import create_troubleshootting_log_kwargs

from .errors import TaskNotCompletedError, TaskNotFoundError
from .models import TaskBase, TaskId, TaskStatus
Expand Down Expand Up @@ -43,7 +43,7 @@ async def get_task_result(
raise
except Exception as exc:
_logger.exception(
**create_troubleshotting_log_kwargs(
**create_troubleshootting_log_kwargs(
user_error_msg=f"{task_id=} raised an exception",
error=exc,
error_code=create_error_code(exc),
Expand Down
26 changes: 21 additions & 5 deletions packages/service-library/src/servicelib/redis/_decorators.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import asyncio
import contextlib
import functools
import logging
import socket
Expand All @@ -10,6 +9,7 @@
import arrow
import redis.exceptions
from redis.asyncio.lock import Lock
from servicelib.logging_errors import create_troubleshootting_log_kwargs

from ..background_task import periodic
from ._client import RedisClientSDK
Expand All @@ -23,9 +23,9 @@
R = TypeVar("R")

_EXCLUSIVE_TASK_NAME: Final[str] = "exclusive/{module_name}.{func_name}"
_EXCLUSIVE_AUTO_EXTEND_TASK_NAME: Final[
str
] = "exclusive/autoextend_lock_{redis_lock_key}"
_EXCLUSIVE_AUTO_EXTEND_TASK_NAME: Final[str] = (
"exclusive/autoextend_lock_{redis_lock_key}"
)


@periodic(interval=DEFAULT_LOCK_TTL / 2, raise_on_error=True)
Expand Down Expand Up @@ -134,10 +134,26 @@ async def _wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
assert len(lock_lost_errors.exceptions) == 1 # nosec
raise lock_lost_errors.exceptions[0] from eg
finally:
with contextlib.suppress(redis.exceptions.LockNotOwnedError):
try:
# in the case where the lock would have been lost,
# this would raise again and is not necessary
await lock.release()
except redis.exceptions.LockNotOwnedError as exc:
_logger.exception(
**create_troubleshootting_log_kwargs(
f"Unexpected error while releasing lock '{redis_lock_key}'",
error=exc,
error_context={
"redis_lock_key": redis_lock_key,
"lock_value": lock_value,
"client_name": client.client_name,
"hostname": socket.gethostname(),
"coroutine": coro.__name__,
},
tip="This might happen if the lock was lost before releasing it. "
"Look for synchronous code that prevents refreshing the lock or asyncio loop overload.",
)
)

return _wrapper

Expand Down
Loading
Loading