Skip to content

Commit ca7f1eb

Browse files
authored
feat(BA-3726): Implement ErrorLog Service, Repository Layer (#7803)
1 parent bf2e8b4 commit ca7f1eb

File tree

23 files changed

+786
-2
lines changed

23 files changed

+786
-2
lines changed

changes/7803.feature.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implement `ErrorLog` Service, Repository Layer

src/ai/backend/common/metrics/metric.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,7 @@ class LayerType(enum.StrEnum):
410410
CONTAINER_REGISTRY_REPOSITORY = "container_registry_repository"
411411
DEPLOYMENT_REPOSITORY = "deployment_repository"
412412
DOMAIN_REPOSITORY = "domain_repository"
413+
ERROR_LOG_REPOSITORY = "error_log_repository"
413414
GROUP_REPOSITORY = "group_repository"
414415
HUGGINGFACE_REGISTRY_REPOSITORY = "huggingface_registry_repository"
415416
IMAGE_REPOSITORY = "image_repository"
@@ -438,6 +439,7 @@ class LayerType(enum.StrEnum):
438439
AUTH_DB_SOURCE = "auth_db_source"
439440
AGENT_DB_SOURCE = "agent_db_source"
440441
DEPLOYMENT_DB_SOURCE = "deployment_db_source"
442+
ERROR_LOG_DB_SOURCE = "error_log_db_source"
441443
PERMISSION_CONTROLLER_DB_SOURCE = "permission_controller_db_source"
442444
RESOURCE_PRESET_DB_SOURCE = "resource_preset_db_source"
443445
SCHEDULE_DB_SOURCE = "schedule_db_source"
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from .types import ErrorLogContent, ErrorLogData, ErrorLogMeta, ErrorLogSeverity
2+
3+
__all__ = (
4+
"ErrorLogContent",
5+
"ErrorLogData",
6+
"ErrorLogMeta",
7+
"ErrorLogSeverity",
8+
)
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
from __future__ import annotations
2+
3+
import enum
4+
import uuid
5+
from dataclasses import dataclass
6+
from datetime import datetime
7+
from typing import Any
8+
9+
10+
class ErrorLogSeverity(enum.StrEnum):
11+
CRITICAL = "critical"
12+
ERROR = "error"
13+
WARNING = "warning"
14+
15+
16+
@dataclass
17+
class ErrorLogMeta:
18+
created_at: datetime
19+
user: uuid.UUID | None
20+
source: str
21+
is_read: bool
22+
is_cleared: bool
23+
context_lang: str
24+
context_env: dict[str, Any]
25+
request_url: str | None
26+
request_status: int | None
27+
28+
29+
@dataclass
30+
class ErrorLogContent:
31+
severity: ErrorLogSeverity
32+
message: str
33+
traceback: str | None
34+
35+
36+
@dataclass
37+
class ErrorLogData:
38+
id: uuid.UUID
39+
meta: ErrorLogMeta
40+
content: ErrorLogContent

src/ai/backend/manager/models/error_logs.py

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,29 @@
1+
from __future__ import annotations
2+
3+
import uuid
4+
from datetime import datetime
5+
from typing import Any
6+
17
import sqlalchemy as sa
28
from sqlalchemy.dialects import postgresql
39

4-
from .base import GUID, IDColumn, metadata
10+
from ai.backend.manager.data.error_log.types import (
11+
ErrorLogContent,
12+
ErrorLogData,
13+
ErrorLogMeta,
14+
ErrorLogSeverity,
15+
)
16+
17+
from .base import GUID, Base, IDColumn, mapper_registry
518

619
__all__ = [
720
"error_logs",
21+
"ErrorLogRow",
822
]
923

1024
error_logs = sa.Table(
1125
"error_logs",
12-
metadata,
26+
mapper_registry.metadata,
1327
IDColumn(),
1428
sa.Column("created_at", sa.DateTime(timezone=True), server_default=sa.func.now(), index=True),
1529
sa.Column(
@@ -26,3 +40,57 @@
2640
sa.Column("request_status", sa.Integer, nullable=True),
2741
sa.Column("traceback", sa.Text, nullable=True),
2842
)
43+
44+
45+
class ErrorLogRow(Base):
46+
__table__ = error_logs
47+
48+
def __init__(
49+
self,
50+
severity: ErrorLogSeverity,
51+
source: str,
52+
message: str,
53+
context_lang: str,
54+
context_env: dict[str, Any],
55+
user: uuid.UUID | None = None,
56+
is_read: bool = False,
57+
is_cleared: bool = False,
58+
request_url: str | None = None,
59+
request_status: int | None = None,
60+
traceback: str | None = None,
61+
created_at: datetime | None = None,
62+
) -> None:
63+
self.severity = severity.value
64+
self.source = source
65+
self.user = user
66+
self.is_read = is_read
67+
self.is_cleared = is_cleared
68+
self.message = message
69+
self.context_lang = context_lang
70+
self.context_env = context_env
71+
self.request_url = request_url
72+
self.request_status = request_status
73+
self.traceback = traceback
74+
if created_at:
75+
self.created_at = created_at
76+
77+
def to_dataclass(self) -> ErrorLogData:
78+
return ErrorLogData(
79+
id=self.id,
80+
meta=ErrorLogMeta(
81+
created_at=self.created_at,
82+
user=self.user,
83+
source=self.source,
84+
is_read=self.is_read,
85+
is_cleared=self.is_cleared,
86+
context_lang=self.context_lang,
87+
context_env=self.context_env,
88+
request_url=self.request_url,
89+
request_status=self.request_status,
90+
),
91+
content=ErrorLogContent(
92+
severity=ErrorLogSeverity(self.severity),
93+
message=self.message,
94+
traceback=self.traceback,
95+
),
96+
)
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from .creators import ErrorLogCreatorSpec
2+
from .repositories import ErrorLogRepositories
3+
from .repository import ErrorLogRepository
4+
5+
__all__ = (
6+
"ErrorLogCreatorSpec",
7+
"ErrorLogRepositories",
8+
"ErrorLogRepository",
9+
)
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from __future__ import annotations
2+
3+
import uuid
4+
from dataclasses import dataclass
5+
from datetime import datetime
6+
from typing import Any, override
7+
8+
from ai.backend.manager.data.error_log.types import ErrorLogSeverity
9+
from ai.backend.manager.models.error_logs import ErrorLogRow
10+
from ai.backend.manager.repositories.base import CreatorSpec
11+
12+
__all__ = ("ErrorLogCreatorSpec",)
13+
14+
15+
@dataclass
16+
class ErrorLogCreatorSpec(CreatorSpec[ErrorLogRow]):
17+
severity: ErrorLogSeverity
18+
source: str
19+
message: str
20+
context_lang: str
21+
context_env: dict[str, Any]
22+
user: uuid.UUID | None = None
23+
is_read: bool = False
24+
is_cleared: bool = False
25+
request_url: str | None = None
26+
request_status: int | None = None
27+
traceback: str | None = None
28+
created_at: datetime | None = None
29+
30+
@override
31+
def build_row(self) -> ErrorLogRow:
32+
return ErrorLogRow(
33+
severity=self.severity,
34+
source=self.source,
35+
message=self.message,
36+
context_lang=self.context_lang,
37+
context_env=self.context_env,
38+
user=self.user,
39+
is_read=self.is_read,
40+
is_cleared=self.is_cleared,
41+
request_url=self.request_url,
42+
request_status=self.request_status,
43+
traceback=self.traceback,
44+
created_at=self.created_at,
45+
)
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .db_source import ErrorLogDBSource
2+
3+
__all__ = ("ErrorLogDBSource",)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
5+
from ai.backend.common.exception import BackendAIError
6+
from ai.backend.common.metrics.metric import DomainType, LayerType
7+
from ai.backend.common.resilience.policies.metrics import MetricArgs, MetricPolicy
8+
from ai.backend.common.resilience.policies.retry import BackoffStrategy, RetryArgs, RetryPolicy
9+
from ai.backend.common.resilience.resilience import Resilience
10+
from ai.backend.manager.data.error_log.types import ErrorLogData
11+
from ai.backend.manager.models.error_logs import ErrorLogRow
12+
from ai.backend.manager.repositories.base import (
13+
Creator,
14+
execute_creator,
15+
)
16+
17+
if TYPE_CHECKING:
18+
from ai.backend.manager.models.utils import ExtendedAsyncSAEngine
19+
20+
__all__ = ("ErrorLogDBSource",)
21+
22+
error_log_db_source_resilience = Resilience(
23+
policies=[
24+
MetricPolicy(MetricArgs(domain=DomainType.DB_SOURCE, layer=LayerType.ERROR_LOG_DB_SOURCE)),
25+
RetryPolicy(
26+
RetryArgs(
27+
max_retries=5,
28+
retry_delay=0.1,
29+
backoff_strategy=BackoffStrategy.FIXED,
30+
non_retryable_exceptions=(BackendAIError,),
31+
)
32+
),
33+
]
34+
)
35+
36+
37+
class ErrorLogDBSource:
38+
_db: ExtendedAsyncSAEngine
39+
40+
def __init__(self, db: ExtendedAsyncSAEngine) -> None:
41+
self._db = db
42+
43+
@error_log_db_source_resilience.apply()
44+
async def create(self, creator: Creator[ErrorLogRow]) -> ErrorLogData:
45+
async with self._db.begin_session() as db_sess:
46+
result = await execute_creator(db_sess, creator)
47+
return result.row.to_dataclass()
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from __future__ import annotations
2+
3+
from dataclasses import dataclass
4+
from typing import TYPE_CHECKING, Self
5+
6+
if TYPE_CHECKING:
7+
from ai.backend.manager.repositories.types import RepositoryArgs
8+
9+
from .repository import ErrorLogRepository
10+
11+
12+
@dataclass
13+
class ErrorLogRepositories:
14+
repository: ErrorLogRepository
15+
16+
@classmethod
17+
def create(cls, args: RepositoryArgs) -> Self:
18+
return cls(
19+
repository=ErrorLogRepository(db=args.db),
20+
)

0 commit comments

Comments
 (0)