diff --git a/changes/7803.feature.md b/changes/7803.feature.md new file mode 100644 index 00000000000..26721d959de --- /dev/null +++ b/changes/7803.feature.md @@ -0,0 +1 @@ +Implement `ErrorLog` Service, Repository Layer diff --git a/src/ai/backend/common/metrics/metric.py b/src/ai/backend/common/metrics/metric.py index e69dcbefd1f..eec0c37e5e4 100644 --- a/src/ai/backend/common/metrics/metric.py +++ b/src/ai/backend/common/metrics/metric.py @@ -410,6 +410,7 @@ class LayerType(enum.StrEnum): CONTAINER_REGISTRY_REPOSITORY = "container_registry_repository" DEPLOYMENT_REPOSITORY = "deployment_repository" DOMAIN_REPOSITORY = "domain_repository" + ERROR_LOG_REPOSITORY = "error_log_repository" GROUP_REPOSITORY = "group_repository" HUGGINGFACE_REGISTRY_REPOSITORY = "huggingface_registry_repository" IMAGE_REPOSITORY = "image_repository" @@ -438,6 +439,7 @@ class LayerType(enum.StrEnum): AUTH_DB_SOURCE = "auth_db_source" AGENT_DB_SOURCE = "agent_db_source" DEPLOYMENT_DB_SOURCE = "deployment_db_source" + ERROR_LOG_DB_SOURCE = "error_log_db_source" PERMISSION_CONTROLLER_DB_SOURCE = "permission_controller_db_source" RESOURCE_PRESET_DB_SOURCE = "resource_preset_db_source" SCHEDULE_DB_SOURCE = "schedule_db_source" diff --git a/src/ai/backend/manager/data/error_log/__init__.py b/src/ai/backend/manager/data/error_log/__init__.py new file mode 100644 index 00000000000..65d04219c2b --- /dev/null +++ b/src/ai/backend/manager/data/error_log/__init__.py @@ -0,0 +1,8 @@ +from .types import ErrorLogContent, ErrorLogData, ErrorLogMeta, ErrorLogSeverity + +__all__ = ( + "ErrorLogContent", + "ErrorLogData", + "ErrorLogMeta", + "ErrorLogSeverity", +) diff --git a/src/ai/backend/manager/data/error_log/types.py b/src/ai/backend/manager/data/error_log/types.py new file mode 100644 index 00000000000..3836cb97fb6 --- /dev/null +++ b/src/ai/backend/manager/data/error_log/types.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +import enum +import uuid +from dataclasses import dataclass +from datetime import datetime +from typing import Any + + +class ErrorLogSeverity(enum.StrEnum): + CRITICAL = "critical" + ERROR = "error" + WARNING = "warning" + + +@dataclass +class ErrorLogMeta: + created_at: datetime + user: uuid.UUID | None + source: str + is_read: bool + is_cleared: bool + context_lang: str + context_env: dict[str, Any] + request_url: str | None + request_status: int | None + + +@dataclass +class ErrorLogContent: + severity: ErrorLogSeverity + message: str + traceback: str | None + + +@dataclass +class ErrorLogData: + id: uuid.UUID + meta: ErrorLogMeta + content: ErrorLogContent diff --git a/src/ai/backend/manager/models/error_logs.py b/src/ai/backend/manager/models/error_logs.py index 6375927f49d..115b55673e3 100644 --- a/src/ai/backend/manager/models/error_logs.py +++ b/src/ai/backend/manager/models/error_logs.py @@ -1,15 +1,29 @@ +from __future__ import annotations + +import uuid +from datetime import datetime +from typing import Any + import sqlalchemy as sa from sqlalchemy.dialects import postgresql -from .base import GUID, IDColumn, metadata +from ai.backend.manager.data.error_log.types import ( + ErrorLogContent, + ErrorLogData, + ErrorLogMeta, + ErrorLogSeverity, +) + +from .base import GUID, Base, IDColumn, mapper_registry __all__ = [ "error_logs", + "ErrorLogRow", ] error_logs = sa.Table( "error_logs", - metadata, + mapper_registry.metadata, IDColumn(), sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), index=True), sa.Column( @@ -26,3 +40,57 @@ sa.Column("request_status", sa.Integer, nullable=True), sa.Column("traceback", sa.Text, nullable=True), ) + + +class ErrorLogRow(Base): + __table__ = error_logs + + def __init__( + self, + severity: ErrorLogSeverity, + source: str, + message: str, + context_lang: str, + context_env: dict[str, Any], + user: uuid.UUID | None = None, + is_read: bool = False, + is_cleared: bool = False, + request_url: str | None = None, + request_status: int | None = None, + traceback: str | None = None, + created_at: datetime | None = None, + ) -> None: + self.severity = severity.value + self.source = source + self.user = user + self.is_read = is_read + self.is_cleared = is_cleared + self.message = message + self.context_lang = context_lang + self.context_env = context_env + self.request_url = request_url + self.request_status = request_status + self.traceback = traceback + if created_at: + self.created_at = created_at + + def to_dataclass(self) -> ErrorLogData: + return ErrorLogData( + id=self.id, + meta=ErrorLogMeta( + created_at=self.created_at, + user=self.user, + source=self.source, + is_read=self.is_read, + is_cleared=self.is_cleared, + context_lang=self.context_lang, + context_env=self.context_env, + request_url=self.request_url, + request_status=self.request_status, + ), + content=ErrorLogContent( + severity=ErrorLogSeverity(self.severity), + message=self.message, + traceback=self.traceback, + ), + ) diff --git a/src/ai/backend/manager/repositories/error_log/__init__.py b/src/ai/backend/manager/repositories/error_log/__init__.py new file mode 100644 index 00000000000..f0ff45475d3 --- /dev/null +++ b/src/ai/backend/manager/repositories/error_log/__init__.py @@ -0,0 +1,9 @@ +from .creators import ErrorLogCreatorSpec +from .repositories import ErrorLogRepositories +from .repository import ErrorLogRepository + +__all__ = ( + "ErrorLogCreatorSpec", + "ErrorLogRepositories", + "ErrorLogRepository", +) diff --git a/src/ai/backend/manager/repositories/error_log/creators.py b/src/ai/backend/manager/repositories/error_log/creators.py new file mode 100644 index 00000000000..295eb383650 --- /dev/null +++ b/src/ai/backend/manager/repositories/error_log/creators.py @@ -0,0 +1,45 @@ +from __future__ import annotations + +import uuid +from dataclasses import dataclass +from datetime import datetime +from typing import Any, override + +from ai.backend.manager.data.error_log.types import ErrorLogSeverity +from ai.backend.manager.models.error_logs import ErrorLogRow +from ai.backend.manager.repositories.base import CreatorSpec + +__all__ = ("ErrorLogCreatorSpec",) + + +@dataclass +class ErrorLogCreatorSpec(CreatorSpec[ErrorLogRow]): + severity: ErrorLogSeverity + source: str + message: str + context_lang: str + context_env: dict[str, Any] + user: uuid.UUID | None = None + is_read: bool = False + is_cleared: bool = False + request_url: str | None = None + request_status: int | None = None + traceback: str | None = None + created_at: datetime | None = None + + @override + def build_row(self) -> ErrorLogRow: + return ErrorLogRow( + severity=self.severity, + source=self.source, + message=self.message, + context_lang=self.context_lang, + context_env=self.context_env, + user=self.user, + is_read=self.is_read, + is_cleared=self.is_cleared, + request_url=self.request_url, + request_status=self.request_status, + traceback=self.traceback, + created_at=self.created_at, + ) diff --git a/src/ai/backend/manager/repositories/error_log/db_source/__init__.py b/src/ai/backend/manager/repositories/error_log/db_source/__init__.py new file mode 100644 index 00000000000..ca4a6410e6f --- /dev/null +++ b/src/ai/backend/manager/repositories/error_log/db_source/__init__.py @@ -0,0 +1,3 @@ +from .db_source import ErrorLogDBSource + +__all__ = ("ErrorLogDBSource",) diff --git a/src/ai/backend/manager/repositories/error_log/db_source/db_source.py b/src/ai/backend/manager/repositories/error_log/db_source/db_source.py new file mode 100644 index 00000000000..7256563866a --- /dev/null +++ b/src/ai/backend/manager/repositories/error_log/db_source/db_source.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ai.backend.common.exception import BackendAIError +from ai.backend.common.metrics.metric import DomainType, LayerType +from ai.backend.common.resilience.policies.metrics import MetricArgs, MetricPolicy +from ai.backend.common.resilience.policies.retry import BackoffStrategy, RetryArgs, RetryPolicy +from ai.backend.common.resilience.resilience import Resilience +from ai.backend.manager.data.error_log.types import ErrorLogData +from ai.backend.manager.models.error_logs import ErrorLogRow +from ai.backend.manager.repositories.base import ( + Creator, + execute_creator, +) + +if TYPE_CHECKING: + from ai.backend.manager.models.utils import ExtendedAsyncSAEngine + +__all__ = ("ErrorLogDBSource",) + +error_log_db_source_resilience = Resilience( + policies=[ + MetricPolicy(MetricArgs(domain=DomainType.DB_SOURCE, layer=LayerType.ERROR_LOG_DB_SOURCE)), + RetryPolicy( + RetryArgs( + max_retries=5, + retry_delay=0.1, + backoff_strategy=BackoffStrategy.FIXED, + non_retryable_exceptions=(BackendAIError,), + ) + ), + ] +) + + +class ErrorLogDBSource: + _db: ExtendedAsyncSAEngine + + def __init__(self, db: ExtendedAsyncSAEngine) -> None: + self._db = db + + @error_log_db_source_resilience.apply() + async def create(self, creator: Creator[ErrorLogRow]) -> ErrorLogData: + async with self._db.begin_session() as db_sess: + result = await execute_creator(db_sess, creator) + return result.row.to_dataclass() diff --git a/src/ai/backend/manager/repositories/error_log/repositories.py b/src/ai/backend/manager/repositories/error_log/repositories.py new file mode 100644 index 00000000000..fd3ed2576ca --- /dev/null +++ b/src/ai/backend/manager/repositories/error_log/repositories.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Self + +if TYPE_CHECKING: + from ai.backend.manager.repositories.types import RepositoryArgs + +from .repository import ErrorLogRepository + + +@dataclass +class ErrorLogRepositories: + repository: ErrorLogRepository + + @classmethod + def create(cls, args: RepositoryArgs) -> Self: + return cls( + repository=ErrorLogRepository(db=args.db), + ) diff --git a/src/ai/backend/manager/repositories/error_log/repository.py b/src/ai/backend/manager/repositories/error_log/repository.py new file mode 100644 index 00000000000..ce681ec009e --- /dev/null +++ b/src/ai/backend/manager/repositories/error_log/repository.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from ai.backend.common.exception import BackendAIError +from ai.backend.common.metrics.metric import DomainType, LayerType +from ai.backend.common.resilience.policies.metrics import MetricArgs, MetricPolicy +from ai.backend.common.resilience.policies.retry import BackoffStrategy, RetryArgs, RetryPolicy +from ai.backend.common.resilience.resilience import Resilience +from ai.backend.manager.data.error_log.types import ErrorLogData +from ai.backend.manager.models.error_logs import ErrorLogRow +from ai.backend.manager.repositories.base import Creator + +from .db_source import ErrorLogDBSource + +if TYPE_CHECKING: + from ai.backend.manager.models.utils import ExtendedAsyncSAEngine + +__all__ = ("ErrorLogRepository",) + +error_log_repository_resilience = Resilience( + policies=[ + MetricPolicy( + MetricArgs(domain=DomainType.REPOSITORY, layer=LayerType.ERROR_LOG_REPOSITORY) + ), + RetryPolicy( + RetryArgs( + max_retries=10, + retry_delay=0.1, + backoff_strategy=BackoffStrategy.FIXED, + non_retryable_exceptions=(BackendAIError,), + ) + ), + ] +) + + +class ErrorLogRepository: + _db_source: ErrorLogDBSource + + def __init__(self, db: ExtendedAsyncSAEngine) -> None: + self._db_source = ErrorLogDBSource(db) + + @error_log_repository_resilience.apply() + async def create(self, creator: Creator[ErrorLogRow]) -> ErrorLogData: + return await self._db_source.create(creator) diff --git a/src/ai/backend/manager/repositories/repositories.py b/src/ai/backend/manager/repositories/repositories.py index 9e4904770ae..0533cde4110 100644 --- a/src/ai/backend/manager/repositories/repositories.py +++ b/src/ai/backend/manager/repositories/repositories.py @@ -14,6 +14,7 @@ ) from ai.backend.manager.repositories.deployment.repositories import DeploymentRepositories from ai.backend.manager.repositories.domain.repositories import DomainRepositories +from ai.backend.manager.repositories.error_log.repositories import ErrorLogRepositories from ai.backend.manager.repositories.group.repositories import GroupRepositories from ai.backend.manager.repositories.huggingface_registry.repositories import ( HuggingFaceRegistryRepositories, @@ -63,6 +64,7 @@ class Repositories: container_registry: ContainerRegistryRepositories deployment: DeploymentRepositories domain: DomainRepositories + error_log: ErrorLogRepositories group: GroupRepositories image: ImageRepositories keypair_resource_policy: KeypairResourcePolicyRepositories @@ -97,6 +99,7 @@ def create(cls, args: RepositoryArgs) -> Self: container_registry_repositories = ContainerRegistryRepositories.create(args) deployment_repositories = DeploymentRepositories.create(args) domain_repositories = DomainRepositories.create(args) + error_log_repositories = ErrorLogRepositories.create(args) group_repositories = GroupRepositories.create(args) image_repositories = ImageRepositories.create(args) keypair_resource_policy_repositories = KeypairResourcePolicyRepositories.create(args) @@ -130,6 +133,7 @@ def create(cls, args: RepositoryArgs) -> Self: container_registry=container_registry_repositories, deployment=deployment_repositories, domain=domain_repositories, + error_log=error_log_repositories, group=group_repositories, image=image_repositories, keypair_resource_policy=keypair_resource_policy_repositories, diff --git a/src/ai/backend/manager/services/error_log/__init__.py b/src/ai/backend/manager/services/error_log/__init__.py new file mode 100644 index 00000000000..bf21551162f --- /dev/null +++ b/src/ai/backend/manager/services/error_log/__init__.py @@ -0,0 +1,12 @@ +from __future__ import annotations + +from .actions import CreateErrorLogAction, CreateErrorLogActionResult +from .processors import ErrorLogProcessors +from .service import ErrorLogService + +__all__ = ( + "CreateErrorLogAction", + "CreateErrorLogActionResult", + "ErrorLogProcessors", + "ErrorLogService", +) diff --git a/src/ai/backend/manager/services/error_log/actions/__init__.py b/src/ai/backend/manager/services/error_log/actions/__init__.py new file mode 100644 index 00000000000..676c252b75b --- /dev/null +++ b/src/ai/backend/manager/services/error_log/actions/__init__.py @@ -0,0 +1,8 @@ +from .base import ErrorLogAction +from .create import CreateErrorLogAction, CreateErrorLogActionResult + +__all__ = ( + "ErrorLogAction", + "CreateErrorLogAction", + "CreateErrorLogActionResult", +) diff --git a/src/ai/backend/manager/services/error_log/actions/base.py b/src/ai/backend/manager/services/error_log/actions/base.py new file mode 100644 index 00000000000..74f7d2964ff --- /dev/null +++ b/src/ai/backend/manager/services/error_log/actions/base.py @@ -0,0 +1,16 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import override + +from ai.backend.manager.actions.action import BaseAction + + +@dataclass +class ErrorLogAction(BaseAction): + """Base action class for error log operations.""" + + @override + @classmethod + def entity_type(cls) -> str: + return "error_log" diff --git a/src/ai/backend/manager/services/error_log/actions/create.py b/src/ai/backend/manager/services/error_log/actions/create.py new file mode 100644 index 00000000000..9b36343d445 --- /dev/null +++ b/src/ai/backend/manager/services/error_log/actions/create.py @@ -0,0 +1,40 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING, Optional, override + +from ai.backend.manager.actions.action import BaseActionResult +from ai.backend.manager.data.error_log.types import ErrorLogData +from ai.backend.manager.repositories.base import Creator + +from .base import ErrorLogAction + +if TYPE_CHECKING: + from ai.backend.manager.models.error_logs import ErrorLogRow + + +@dataclass +class CreateErrorLogAction(ErrorLogAction): + """Action to create an error log.""" + + creator: Creator[ErrorLogRow] + + @override + @classmethod + def operation_type(cls) -> str: + return "create" + + @override + def entity_id(self) -> Optional[str]: + return None + + +@dataclass +class CreateErrorLogActionResult(BaseActionResult): + """Result of creating an error log.""" + + error_log_data: ErrorLogData + + @override + def entity_id(self) -> Optional[str]: + return str(self.error_log_data.id) diff --git a/src/ai/backend/manager/services/error_log/processors.py b/src/ai/backend/manager/services/error_log/processors.py new file mode 100644 index 00000000000..a30d464bcdd --- /dev/null +++ b/src/ai/backend/manager/services/error_log/processors.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from typing import override + +from ai.backend.manager.actions.monitors.monitor import ActionMonitor +from ai.backend.manager.actions.processor import ActionProcessor +from ai.backend.manager.actions.types import AbstractProcessorPackage, ActionSpec + +from .actions import CreateErrorLogAction, CreateErrorLogActionResult +from .service import ErrorLogService + +__all__ = ("ErrorLogProcessors",) + + +class ErrorLogProcessors(AbstractProcessorPackage): + """Processor package for error log operations.""" + + create: ActionProcessor[CreateErrorLogAction, CreateErrorLogActionResult] + + def __init__(self, service: ErrorLogService, action_monitors: list[ActionMonitor]) -> None: + self.create = ActionProcessor(service.create, action_monitors) + + @override + def supported_actions(self) -> list[ActionSpec]: + return [ + CreateErrorLogAction.spec(), + ] diff --git a/src/ai/backend/manager/services/error_log/service.py b/src/ai/backend/manager/services/error_log/service.py new file mode 100644 index 00000000000..9cde809cc59 --- /dev/null +++ b/src/ai/backend/manager/services/error_log/service.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import TYPE_CHECKING + +from .actions import CreateErrorLogAction, CreateErrorLogActionResult + +if TYPE_CHECKING: + from ai.backend.manager.repositories.error_log import ErrorLogRepository + +__all__ = ("ErrorLogService",) + + +@dataclass +class ErrorLogService: + """Service for error log operations.""" + + _repository: ErrorLogRepository + + def __init__(self, repository: ErrorLogRepository) -> None: + self._repository = repository + + async def create(self, action: CreateErrorLogAction) -> CreateErrorLogActionResult: + """Creates a new error log.""" + error_log_data = await self._repository.create(action.creator) + return CreateErrorLogActionResult(error_log_data=error_log_data) diff --git a/src/ai/backend/manager/services/processors.py b/src/ai/backend/manager/services/processors.py index b27e16b06f4..8b8f9222e1a 100644 --- a/src/ai/backend/manager/services/processors.py +++ b/src/ai/backend/manager/services/processors.py @@ -43,6 +43,8 @@ from ai.backend.manager.services.deployment.service import DeploymentService from ai.backend.manager.services.domain.processors import DomainProcessors from ai.backend.manager.services.domain.service import DomainService +from ai.backend.manager.services.error_log.processors import ErrorLogProcessors +from ai.backend.manager.services.error_log.service import ErrorLogService from ai.backend.manager.services.group.processors import GroupProcessors from ai.backend.manager.services.group.service import GroupService from ai.backend.manager.services.image.processors import ImageProcessors @@ -134,6 +136,7 @@ class Services: agent: AgentService app_config: AppConfigService domain: DomainService + error_log: ErrorLogService group: GroupService user: UserService image: ImageService @@ -181,6 +184,9 @@ def create(cls, args: ServiceArgs) -> Self: domain_service = DomainService( repositories.domain.repository, repositories.domain.admin_repository ) + error_log_service = ErrorLogService( + repository=repositories.error_log.repository, + ) group_service = GroupService( args.storage_manager, args.config_provider, @@ -333,6 +339,7 @@ def create(cls, args: ServiceArgs) -> Self: agent=agent_service, app_config=app_config_service, domain=domain_service, + error_log=error_log_service, group=group_service, user=user_service, image=image_service, @@ -373,6 +380,7 @@ class Processors(AbstractProcessorPackage): agent: AgentProcessors app_config: AppConfigProcessors domain: DomainProcessors + error_log: ErrorLogProcessors group: GroupProcessors user: UserProcessors image: ImageProcessors @@ -407,6 +415,7 @@ def create(cls, args: ProcessorArgs, action_monitors: list[ActionMonitor]) -> Se agent_processors = AgentProcessors(services.agent, action_monitors) app_config_processors = AppConfigProcessors(services.app_config, action_monitors) domain_processors = DomainProcessors(services.domain, action_monitors) + error_log_processors = ErrorLogProcessors(services.error_log, action_monitors) group_processors = GroupProcessors(services.group, action_monitors) user_processors = UserProcessors(services.user, action_monitors) image_processors = ImageProcessors(services.image, action_monitors) @@ -467,6 +476,7 @@ def create(cls, args: ProcessorArgs, action_monitors: list[ActionMonitor]) -> Se agent=agent_processors, app_config=app_config_processors, domain=domain_processors, + error_log=error_log_processors, group=group_processors, user=user_processors, image=image_processors, @@ -502,6 +512,7 @@ def supported_actions(self) -> list[ActionSpec]: *self.agent.supported_actions(), *self.app_config.supported_actions(), *self.domain.supported_actions(), + *self.error_log.supported_actions(), *self.group.supported_actions(), *self.user.supported_actions(), *self.image.supported_actions(), diff --git a/tests/unit/manager/repositories/error_log/BUILD b/tests/unit/manager/repositories/error_log/BUILD new file mode 100644 index 00000000000..75b8f46de9b --- /dev/null +++ b/tests/unit/manager/repositories/error_log/BUILD @@ -0,0 +1 @@ +python_tests(name="tests") diff --git a/tests/unit/manager/repositories/error_log/test_error_log_repository.py b/tests/unit/manager/repositories/error_log/test_error_log_repository.py new file mode 100644 index 00000000000..c12ded38286 --- /dev/null +++ b/tests/unit/manager/repositories/error_log/test_error_log_repository.py @@ -0,0 +1,252 @@ +""" +Tests for ErrorLogRepository functionality. +Tests the repository layer with real database operations. +""" + +from __future__ import annotations + +import uuid +from collections.abc import AsyncGenerator + +import pytest + +from ai.backend.common.types import BinarySize +from ai.backend.manager.data.error_log.types import ( + ErrorLogData, + ErrorLogSeverity, +) +from ai.backend.manager.models.agent import AgentRow +from ai.backend.manager.models.deployment_auto_scaling_policy import DeploymentAutoScalingPolicyRow +from ai.backend.manager.models.deployment_policy import DeploymentPolicyRow +from ai.backend.manager.models.deployment_revision import DeploymentRevisionRow +from ai.backend.manager.models.domain import DomainRow +from ai.backend.manager.models.endpoint import EndpointRow +from ai.backend.manager.models.error_logs import ErrorLogRow +from ai.backend.manager.models.group import GroupRow +from ai.backend.manager.models.image import ImageRow +from ai.backend.manager.models.keypair import KeyPairRow +from ai.backend.manager.models.rbac_models import UserRoleRow +from ai.backend.manager.models.resource_policy import ( + KeyPairResourcePolicyRow, + ProjectResourcePolicyRow, + UserResourcePolicyRow, +) +from ai.backend.manager.models.resource_preset import ResourcePresetRow +from ai.backend.manager.models.scaling_group import ScalingGroupRow +from ai.backend.manager.models.user import ( + PasswordHashAlgorithm, + PasswordInfo, + UserRole, + UserRow, + UserStatus, +) +from ai.backend.manager.models.utils import ExtendedAsyncSAEngine +from ai.backend.manager.models.vfolder import VFolderRow +from ai.backend.manager.repositories.base import Creator +from ai.backend.manager.repositories.error_log import ErrorLogCreatorSpec, ErrorLogRepository +from ai.backend.testutils.db import with_tables + + +class TestErrorLogRepository: + """Test cases for ErrorLogRepository""" + + @pytest.fixture + async def db_with_cleanup( + self, + database_connection: ExtendedAsyncSAEngine, + ) -> AsyncGenerator[ExtendedAsyncSAEngine, None]: + """Database connection with tables created.""" + async with with_tables( + database_connection, + [ + # FK dependency order: parents before children + DomainRow, + ScalingGroupRow, + UserResourcePolicyRow, + ProjectResourcePolicyRow, + KeyPairResourcePolicyRow, + UserRoleRow, + UserRow, + KeyPairRow, + GroupRow, + AgentRow, + VFolderRow, + ImageRow, + ResourcePresetRow, + EndpointRow, + DeploymentRevisionRow, + DeploymentAutoScalingPolicyRow, + DeploymentPolicyRow, + ErrorLogRow, + ], + ): + yield database_connection + + @pytest.fixture + async def test_domain_name( + self, + db_with_cleanup: ExtendedAsyncSAEngine, + ) -> str: + """Create test domain and return domain name""" + domain_name = f"test-domain-{uuid.uuid4().hex[:8]}" + + async with db_with_cleanup.begin_session() as db_sess: + domain = DomainRow( + name=domain_name, + description="Test domain for error log", + is_active=True, + total_resource_slots={}, + allowed_vfolder_hosts={}, + allowed_docker_registries=[], + ) + db_sess.add(domain) + await db_sess.commit() + + return domain_name + + @pytest.fixture + async def test_resource_policy_name( + self, + db_with_cleanup: ExtendedAsyncSAEngine, + ) -> str: + """Create test resource policy and return policy name""" + policy_name = f"test-policy-{uuid.uuid4().hex[:8]}" + + async with db_with_cleanup.begin_session() as db_sess: + policy = UserResourcePolicyRow( + name=policy_name, + max_vfolder_count=10, + max_quota_scope_size=BinarySize.finite_from_str("10GiB"), + max_session_count_per_model_session=5, + max_customized_image_count=3, + ) + db_sess.add(policy) + await db_sess.commit() + + return policy_name + + @pytest.fixture + async def test_user_id( + self, + db_with_cleanup: ExtendedAsyncSAEngine, + test_domain_name: str, + test_resource_policy_name: str, + ) -> uuid.UUID: + """Create test user and return user UUID""" + user_uuid = uuid.uuid4() + + password_info = PasswordInfo( + password="dummy", + algorithm=PasswordHashAlgorithm.PBKDF2_SHA256, + rounds=600_000, + salt_size=32, + ) + + async with db_with_cleanup.begin_session() as db_sess: + user = UserRow( + uuid=user_uuid, + username=f"testuser-{user_uuid.hex[:8]}", + email=f"test-{user_uuid.hex[:8]}@example.com", + password=password_info, + need_password_change=False, + status=UserStatus.ACTIVE, + status_info="active", + domain_name=test_domain_name, + role=UserRole.USER, + resource_policy=test_resource_policy_name, + ) + db_sess.add(user) + await db_sess.commit() + + return user_uuid + + @pytest.fixture + def error_log_repository( + self, + db_with_cleanup: ExtendedAsyncSAEngine, + ) -> ErrorLogRepository: + """Create ErrorLogRepository instance with database""" + return ErrorLogRepository(db=db_with_cleanup) + + @pytest.mark.asyncio + async def test_create_multiple_error_logs( + self, + error_log_repository: ErrorLogRepository, + test_user_id: uuid.UUID, + ) -> None: + """Test creating multiple error logs and verifying them""" + error_log_specs = [ + ErrorLogCreatorSpec( + severity=ErrorLogSeverity.CRITICAL, + source="manager", + user=test_user_id, + message="Critical error occurred", + context_lang="en", + context_env={"version": "1.0.0"}, + request_url="/api/v1/test", + request_status=500, + traceback="Traceback: ...", + ), + ErrorLogCreatorSpec( + severity=ErrorLogSeverity.ERROR, + source="agent", + user=test_user_id, + message="Error in agent", + context_lang="en", + context_env={"agent_id": "agent-001"}, + ), + ErrorLogCreatorSpec( + severity=ErrorLogSeverity.WARNING, + source="storage", + message="Storage warning", + context_lang="ko", + context_env={"storage_id": "storage-001"}, + request_url="/api/v1/storage", + request_status=400, + ), + ] + + created_logs: list[ErrorLogData] = [] + + for spec in error_log_specs: + creator = Creator(spec=spec) + result = await error_log_repository.create(creator) + created_logs.append(result) + + # Verify all logs were created with correct data + assert len(created_logs) == 3 + + # Verify first log (CRITICAL) + assert created_logs[0].content.severity == ErrorLogSeverity.CRITICAL + assert created_logs[0].meta.source == "manager" + assert created_logs[0].meta.user == test_user_id + assert created_logs[0].content.message == "Critical error occurred" + assert created_logs[0].meta.context_lang == "en" + assert created_logs[0].meta.context_env == {"version": "1.0.0"} + assert created_logs[0].meta.request_url == "/api/v1/test" + assert created_logs[0].meta.request_status == 500 + assert created_logs[0].content.traceback == "Traceback: ..." + assert created_logs[0].meta.is_read is False + assert created_logs[0].meta.is_cleared is False + assert created_logs[0].id is not None + assert created_logs[0].meta.created_at is not None + + # Verify second log (ERROR) + assert created_logs[1].content.severity == ErrorLogSeverity.ERROR + assert created_logs[1].meta.source == "agent" + assert created_logs[1].meta.user == test_user_id + assert created_logs[1].content.message == "Error in agent" + assert created_logs[1].meta.request_url is None + assert created_logs[1].meta.request_status is None + assert created_logs[1].content.traceback is None + + # Verify third log (WARNING, no user) + assert created_logs[2].content.severity == ErrorLogSeverity.WARNING + assert created_logs[2].meta.source == "storage" + assert created_logs[2].meta.user is None + assert created_logs[2].content.message == "Storage warning" + assert created_logs[2].meta.context_lang == "ko" + + # Verify all IDs are unique + ids = [log.id for log in created_logs] + assert len(ids) == len(set(ids)) diff --git a/tests/unit/manager/services/error_log/BUILD b/tests/unit/manager/services/error_log/BUILD new file mode 100644 index 00000000000..b5e1930af77 --- /dev/null +++ b/tests/unit/manager/services/error_log/BUILD @@ -0,0 +1,6 @@ +python_tests( + name="tests", + dependencies=[ + "src/ai/backend/manager:src", + ], +) diff --git a/tests/unit/manager/services/error_log/test_error_log_service.py b/tests/unit/manager/services/error_log/test_error_log_service.py new file mode 100644 index 00000000000..cf2adeec6fe --- /dev/null +++ b/tests/unit/manager/services/error_log/test_error_log_service.py @@ -0,0 +1,92 @@ +""" +Tests for ErrorLogService functionality. +Tests the service layer with mocked repository operations. +""" + +from __future__ import annotations + +import uuid +from datetime import UTC, datetime +from unittest.mock import AsyncMock, MagicMock + +import pytest + +from ai.backend.manager.data.error_log.types import ( + ErrorLogContent, + ErrorLogData, + ErrorLogMeta, + ErrorLogSeverity, +) +from ai.backend.manager.repositories.base import Creator +from ai.backend.manager.repositories.error_log import ErrorLogCreatorSpec, ErrorLogRepository +from ai.backend.manager.services.error_log.actions import CreateErrorLogAction +from ai.backend.manager.services.error_log.service import ErrorLogService + + +class TestErrorLogService: + """Test cases for ErrorLogService""" + + @pytest.fixture + def mock_repository(self) -> MagicMock: + """Create mocked ErrorLogRepository""" + return MagicMock(spec=ErrorLogRepository) + + @pytest.fixture + def error_log_service(self, mock_repository: MagicMock) -> ErrorLogService: + """Create ErrorLogService instance with mocked repository""" + return ErrorLogService(repository=mock_repository) + + @pytest.fixture + def sample_error_log_data(self) -> ErrorLogData: + """Create sample error log data""" + return ErrorLogData( + id=uuid.uuid4(), + meta=ErrorLogMeta( + created_at=datetime.now(tz=UTC), + user=uuid.uuid4(), + source="manager", + is_read=False, + is_cleared=False, + context_lang="en", + context_env={"test": "value"}, + request_url="/api/v1/test", + request_status=500, + ), + content=ErrorLogContent( + severity=ErrorLogSeverity.ERROR, + message="Test error message", + traceback="Traceback: ...", + ), + ) + + @pytest.mark.asyncio + async def test_create_error_log( + self, + error_log_service: ErrorLogService, + mock_repository: MagicMock, + sample_error_log_data: ErrorLogData, + ) -> None: + """Test creating an error log through service""" + mock_repository.create = AsyncMock(return_value=sample_error_log_data) + + creator = Creator( + spec=ErrorLogCreatorSpec( + severity=sample_error_log_data.content.severity, + source=sample_error_log_data.meta.source, + message=sample_error_log_data.content.message, + context_lang=sample_error_log_data.meta.context_lang, + context_env=sample_error_log_data.meta.context_env, + user=sample_error_log_data.meta.user, + is_read=sample_error_log_data.meta.is_read, + is_cleared=sample_error_log_data.meta.is_cleared, + request_url=sample_error_log_data.meta.request_url, + request_status=sample_error_log_data.meta.request_status, + traceback=sample_error_log_data.content.traceback, + ) + ) + action = CreateErrorLogAction(creator=creator) + + result = await error_log_service.create(action) + + assert result.error_log_data == sample_error_log_data + mock_repository.create.assert_called_once_with(creator)