)
@@ -175,6 +174,5 @@ The platform is built on three main pillars:
- **Monitoring Tools**: Using OpenTelemetry and Grafana to keep an eye on system health.
- **Alerts**: Set up notifications for when things go wrong.
-Link for accessing Prometheus is shown in `/editor` web page.
-
+
diff --git a/backend/.env.test b/backend/.env.test
index 31c4437c..f942cc66 100644
--- a/backend/.env.test
+++ b/backend/.env.test
@@ -1,42 +1,39 @@
# Test environment configuration
-# This file is loaded by tests/conftest.py for integration tests
-
-# MongoDB Configuration
-MONGODB_URL="mongodb://localhost:27017"
-PROJECT_NAME="integr8scode_test"
-
-# Redis Configuration
-REDIS_URL="redis://localhost:6379/0"
-
-# Authentication
-SECRET_KEY="test-secret-key-for-testing-only"
-JWT_SECRET_KEY="test-jwt-secret-key-for-testing-only"
-JWT_ALGORITHM="HS256"
-ACCESS_TOKEN_EXPIRE_MINUTES=30
-
-# Rate Limiting - DISABLED for tests
-RATE_LIMIT_ENABLED=false
-RATE_LIMIT_DEFAULT_REQUESTS=1000
-RATE_LIMIT_DEFAULT_WINDOW=1
+PROJECT_NAME=integr8scode_test
+API_V1_STR=/api/v1
+SECRET_KEY=test-secret-key-for-testing-only-32chars!!
+ENVIRONMENT=testing
+TESTING=true
-# Disable tracing for tests
+# MongoDB - use localhost for tests
+MONGODB_URL=mongodb://root:rootpassword@localhost:27017/?authSource=admin
+MONGO_ROOT_USER=root
+MONGO_ROOT_PASSWORD=rootpassword
+
+# Redis - use localhost for tests
+REDIS_HOST=localhost
+REDIS_PORT=6379
+REDIS_DB=0
+REDIS_PASSWORD=
+REDIS_SSL=false
+REDIS_MAX_CONNECTIONS=50
+REDIS_DECODE_RESPONSES=true
+
+# Kafka - use localhost for tests
+KAFKA_BOOTSTRAP_SERVERS=localhost:9092
+SCHEMA_REGISTRY_URL=http://localhost:8081
+
+# Security
+SECURE_COOKIES=true
+CORS_ALLOWED_ORIGINS=["http://localhost:3000","https://localhost:3000"]
+
+# Features
+RATE_LIMIT_ENABLED=true
ENABLE_TRACING=false
OTEL_SDK_DISABLED=true
OTEL_METRICS_EXPORTER=none
OTEL_TRACES_EXPORTER=none
-# API Settings
-BACKEND_BASE_URL="https://[::1]:443"
-BACKEND_CORS_ORIGINS=["http://localhost:3000", "http://localhost:5173"]
-
-# Kafka Configuration (minimal for tests)
-KAFKA_BOOTSTRAP_SERVERS="localhost:9092"
-KAFKA_SECURITY_PROTOCOL="PLAINTEXT"
-
-# Kubernetes Configuration (mocked in tests)
-K8S_IN_CLUSTER=false
-K8S_NAMESPACE="default"
-
-# Test Mode
-TESTING=true
-DEBUG=false
\ No newline at end of file
+# Development
+DEVELOPMENT_MODE=false
+LOG_LEVEL=INFO
diff --git a/backend/Dockerfile.test b/backend/Dockerfile.test
new file mode 100644
index 00000000..21021354
--- /dev/null
+++ b/backend/Dockerfile.test
@@ -0,0 +1,25 @@
+# Test runner container - lightweight, uses same network as services
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+ gcc \
+ curl \
+ && rm -rf /var/lib/apt/lists/*
+
+# Copy requirements
+COPY requirements.txt requirements-dev.txt ./
+
+# Install Python dependencies
+RUN pip install --no-cache-dir -r requirements.txt -r requirements-dev.txt
+
+# Copy application code
+COPY . .
+
+# Set Python path
+ENV PYTHONPATH=/app
+
+# Default command runs all tests
+CMD ["pytest", "-v", "--tb=short"]
\ No newline at end of file
diff --git a/backend/alertmanager/alertmanager.yml b/backend/alertmanager/alertmanager.yml
deleted file mode 100644
index 4599dc37..00000000
--- a/backend/alertmanager/alertmanager.yml
+++ /dev/null
@@ -1,66 +0,0 @@
-global:
- smtp_smarthost: 'localhost:587'
- smtp_from: 'alertmanager@example.com'
-
-route:
- group_by: ['alertname', 'severity']
- group_wait: 30s
- group_interval: 5m
- repeat_interval: 4h
- receiver: 'default'
- routes:
- - match:
- severity: critical
- match_re:
- alertname: '(NetworkPolicyViolations|PrivilegeEscalationAttempts|CriticalMemoryUtilization|CriticalCPUUtilization|KafkaProducerCriticalLatency|KafkaProducerCriticalQueueBacklog|KafkaConsumerCriticalLag|CriticalEventProcessingFailureRate|CriticalEventProcessingTime|KafkaProducerDown|KafkaConsumerStalled)'
- group_wait: 10s
- group_interval: 1m
- repeat_interval: 30m
- receiver: 'critical-security'
-
- - match:
- severity: critical
- group_wait: 15s
- group_interval: 2m
- repeat_interval: 1h
- receiver: 'critical-infrastructure'
-
- - match:
- severity: warning
- group_wait: 1m
- group_interval: 10m
- repeat_interval: 6h
- receiver: 'warning'
-
-receivers:
- - name: 'default'
- webhook_configs:
- - url: 'https://backend:443/api/v1/alertmanager/webhook'
- send_resolved: true
- http_config:
- tls_config:
- insecure_skip_verify: true # Accept self-signed certificates for local development
-
- - name: 'critical-security'
- webhook_configs:
- - url: 'https://backend:443/api/v1/alertmanager/webhook'
- send_resolved: true
- http_config:
- tls_config:
- insecure_skip_verify: true # Accept self-signed certificates for local development
-
- - name: 'critical-infrastructure'
- webhook_configs:
- - url: 'https://backend:443/api/v1/alertmanager/webhook'
- send_resolved: true
- http_config:
- tls_config:
- insecure_skip_verify: true # Accept self-signed certificates for local development
-
- - name: 'warning'
- webhook_configs:
- - url: 'https://backend:443/api/v1/alertmanager/webhook'
- send_resolved: true
- http_config:
- tls_config:
- insecure_skip_verify: true # Accept self-signed certificates for local development
\ No newline at end of file
diff --git a/backend/app/api/dependencies.py b/backend/app/api/dependencies.py
index cc73634f..47c3c7ee 100644
--- a/backend/app/api/dependencies.py
+++ b/backend/app/api/dependencies.py
@@ -1,100 +1,24 @@
-from typing import Optional
-
from dishka import FromDishka
from dishka.integrations.fastapi import inject
-from fastapi import HTTPException, Request, status
-
-from app.core.logging import logger
-from app.core.security import security_service
-from app.db.repositories.user_repository import UserRepository
-from app.domain.enums.user import UserRole
-from app.schemas_pydantic.user import User, UserResponse
-
-
-class AuthService:
- def __init__(self, user_repo: UserRepository):
- self.user_repo = user_repo
-
- async def get_current_user(self, request: Request) -> UserResponse:
- try:
- token = request.cookies.get("access_token")
- if not token:
- raise HTTPException(
- status_code=status.HTTP_401_UNAUTHORIZED,
- detail="Not authenticated",
- headers={"WWW-Authenticate": "Bearer"},
- )
+from fastapi import Request
- user_in_db = await security_service.get_current_user(token, self.user_repo)
-
- return UserResponse(
- user_id=user_in_db.user_id,
- username=user_in_db.username,
- email=user_in_db.email,
- role=user_in_db.role,
- is_superuser=user_in_db.is_superuser,
- created_at=user_in_db.created_at,
- updated_at=user_in_db.updated_at
- )
- except Exception as e:
- logger.error(f"Authentication failed: {e}", exc_info=True)
- raise HTTPException(
- status_code=status.HTTP_401_UNAUTHORIZED,
- detail="Not authenticated",
- headers={"WWW-Authenticate": "Bearer"},
- ) from e
-
- async def require_admin(self, request: Request) -> UserResponse:
- user = await self.get_current_user(request)
- if user.role != UserRole.ADMIN:
- logger.warning(
- f"Admin access denied for user: {user.username} (role: {user.role})"
- )
- raise HTTPException(
- status_code=status.HTTP_403_FORBIDDEN,
- detail="Admin access required"
- )
- return user
-
-
-@inject
-async def require_auth_guard(
- request: Request,
- auth_service: FromDishka[AuthService],
-) -> None:
- await auth_service.get_current_user(request)
+from app.schemas_pydantic.user import UserResponse
+from app.services.auth_service import AuthService
@inject
-async def require_admin_guard(
- request: Request,
- auth_service: FromDishka[AuthService],
-) -> None:
- await auth_service.require_admin(request)
+async def current_user(
+ request: Request,
+ auth_service: FromDishka[AuthService]
+) -> UserResponse:
+ """Get authenticated user."""
+ return await auth_service.get_current_user(request)
@inject
-async def get_current_user_optional(
- request: Request,
- auth_service: FromDishka[AuthService],
-) -> Optional[User]:
- """
- Get current user if authenticated, otherwise return None.
- This is used for optional authentication, like rate limiting.
- """
- try:
- user_response = await auth_service.get_current_user(request)
- # Convert UserResponse to User for compatibility
- return User(
- user_id=user_response.user_id,
- username=user_response.username,
- email=user_response.email,
- role=user_response.role,
- is_active=True, # If they can authenticate, they're active
- is_superuser=user_response.is_superuser,
- created_at=user_response.created_at,
- updated_at=user_response.updated_at
- )
- except HTTPException:
- # User is not authenticated, return None
- return None
+async def admin_user(
+ request: Request,
+ auth_service: FromDishka[AuthService]
+) -> UserResponse:
+ """Get authenticated admin user."""
+ return await auth_service.get_admin(request)
diff --git a/backend/app/api/rate_limit.py b/backend/app/api/rate_limit.py
deleted file mode 100644
index ea0b406c..00000000
--- a/backend/app/api/rate_limit.py
+++ /dev/null
@@ -1,96 +0,0 @@
-from typing import Optional
-
-from dishka import FromDishka
-from dishka.integrations.fastapi import inject
-from fastapi import Depends, HTTPException, Request
-
-from app.api.dependencies import get_current_user_optional
-from app.core.logging import logger
-from app.core.utils import get_client_ip
-from app.schemas_pydantic.user import User
-from app.services.rate_limit_service import RateLimitService
-
-
-@inject
-async def check_rate_limit(
- request: Request,
- rate_limit_service: FromDishka[RateLimitService],
- current_user: Optional[User] = Depends(get_current_user_optional),
-) -> None:
- """
- Rate limiting dependency for API endpoints.
-
- Features:
- - User-based limits for authenticated users
- - IP-based limits for anonymous users (50% of normal limits)
- - Dynamic configuration from Redis
- - Detailed error responses
-
- Usage:
- @router.get("/endpoint", dependencies=[Depends(check_rate_limit)])
- async def my_endpoint():
- ...
- """
- # Determine identifier and multiplier
- if current_user:
- identifier = current_user.user_id
- username = current_user.username
- multiplier = 1.0
- else:
- identifier = f"ip:{get_client_ip(request)}"
- username = None
- multiplier = 0.5 # Anonymous users get half the limit
-
- # Check rate limit
- status = await rate_limit_service.check_rate_limit(
- user_id=identifier,
- endpoint=request.url.path,
- username=username
- )
-
- # Apply multiplier for anonymous users
- if not current_user and multiplier < 1.0:
- status.limit = max(1, int(status.limit * multiplier))
- status.remaining = min(status.remaining, status.limit)
-
- # Add headers to response (via request state)
- request.state.rate_limit_headers = {
- "X-RateLimit-Limit": str(status.limit),
- "X-RateLimit-Remaining": str(status.remaining),
- "X-RateLimit-Reset": str(int(status.reset_at.timestamp())),
- "X-RateLimit-Algorithm": status.algorithm
- }
-
- # Enforce limit
- if not status.allowed:
- logger.warning(
- f"Rate limit exceeded for {identifier} on {request.url.path}",
- extra={
- "identifier": identifier,
- "endpoint": request.url.path,
- "limit": status.limit,
- "algorithm": status.algorithm.value
- }
- )
-
- raise HTTPException(
- status_code=429,
- detail={
- "message": "Rate limit exceeded",
- "retry_after": status.retry_after,
- "reset_at": status.reset_at.isoformat(),
- "limit": status.limit,
- "remaining": 0,
- "algorithm": status.algorithm.value
- },
- headers={
- "X-RateLimit-Limit": str(status.limit),
- "X-RateLimit-Remaining": "0",
- "X-RateLimit-Reset": str(int(status.reset_at.timestamp())),
- "Retry-After": str(status.retry_after or 60)
- }
- )
-
-
-# Alias for backward compatibility
-DynamicRateLimiter = check_rate_limit
diff --git a/backend/app/api/routes/admin/events.py b/backend/app/api/routes/admin/events.py
index dfd614a3..591a4050 100644
--- a/backend/app/api/routes/admin/events.py
+++ b/backend/app/api/routes/admin/events.py
@@ -1,30 +1,23 @@
-import csv
-import json
-from datetime import datetime, timezone
-from io import StringIO
+from datetime import datetime
+from typing import Annotated
from dishka import FromDishka
from dishka.integrations.fastapi import DishkaRoute
-from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query, Request
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Query
from fastapi.encoders import jsonable_encoder
from fastapi.responses import StreamingResponse
-from app.api.dependencies import AuthService, require_admin_guard
+from app.api.dependencies import admin_user
from app.core.correlation import CorrelationContext
-from app.core.logging import logger
-from app.core.service_dependencies import AdminEventsRepositoryDep
-from app.domain.admin.replay_models import ReplayQuery, ReplaySessionFields
-from app.domain.enums.replay import ReplayTarget, ReplayType
-from app.domain.events.event_models import EventFilter, ReplaySessionStatus
-from app.domain.replay.models import ReplayConfig, ReplayFilter
-from app.infrastructure.mappers.event_mapper import (
+from app.domain.enums.events import EventType
+from app.infrastructure.mappers import (
+ AdminReplayApiMapper,
EventDetailMapper,
- EventExportRowMapper,
+ EventFilterMapper,
EventMapper,
EventStatisticsMapper,
- EventSummaryMapper,
+ ReplaySessionMapper,
)
-from app.infrastructure.mappers.replay_mapper import ReplaySessionMapper
from app.schemas_pydantic.admin_events import (
EventBrowseRequest,
EventBrowseResponse,
@@ -35,35 +28,27 @@
EventReplayStatusResponse,
EventStatsResponse,
)
-from app.services.replay_service import ReplayService
+from app.schemas_pydantic.admin_events import EventFilter as AdminEventFilter
+from app.schemas_pydantic.user import UserResponse
+from app.services.admin import AdminEventsService
router = APIRouter(
prefix="/admin/events",
tags=["admin-events"],
route_class=DishkaRoute,
- dependencies=[Depends(require_admin_guard)]
+ dependencies=[Depends(admin_user)]
)
@router.post("/browse")
async def browse_events(
request: EventBrowseRequest,
- repository: AdminEventsRepositoryDep
+ service: FromDishka[AdminEventsService]
) -> EventBrowseResponse:
try:
- # Convert request to domain model
- event_filter = EventFilter(
- event_types=request.filters.event_types,
- aggregate_id=request.filters.aggregate_id,
- correlation_id=request.filters.correlation_id,
- user_id=request.filters.user_id,
- service_name=request.filters.service_name,
- start_time=request.filters.start_time,
- end_time=request.filters.end_time,
- search_text=request.filters.search_text
- )
+ event_filter = EventFilterMapper.from_admin_pydantic(request.filters)
- result = await repository.browse_events(
+ result = await service.browse_events(
filter=event_filter,
skip=request.skip,
limit=request.limit,
@@ -71,7 +56,6 @@ async def browse_events(
sort_order=request.sort_order
)
- # Convert domain model to response
event_mapper = EventMapper()
return EventBrowseResponse(
events=[jsonable_encoder(event_mapper.to_dict(event)) for event in result.events],
@@ -81,37 +65,34 @@ async def browse_events(
)
except Exception as e:
- logger.error(f"Error browsing events: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/stats")
async def get_event_stats(
- repository: AdminEventsRepositoryDep,
+ service: FromDishka[AdminEventsService],
hours: int = Query(default=24, le=168),
) -> EventStatsResponse:
try:
- stats = await repository.get_event_stats(hours=hours)
+ stats = await service.get_event_stats(hours=hours)
stats_mapper = EventStatisticsMapper()
return EventStatsResponse(**stats_mapper.to_dict(stats))
except Exception as e:
- logger.error(f"Error getting event stats: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/{event_id}")
async def get_event_detail(
event_id: str,
- repository: AdminEventsRepositoryDep
+ service: FromDishka[AdminEventsService]
) -> EventDetailResponse:
try:
- result = await repository.get_event_detail(event_id)
+ result = await service.get_event_detail(event_id)
if not result:
raise HTTPException(status_code=404, detail="Event not found")
- # Convert domain model to response
detail_mapper = EventDetailMapper()
serialized_result = jsonable_encoder(detail_mapper.to_dict(result))
return EventDetailResponse(
@@ -123,7 +104,6 @@ async def get_event_detail(
except HTTPException:
raise
except Exception as e:
- logger.error(f"Error getting event detail: {e}")
raise HTTPException(status_code=500, detail=str(e))
@@ -131,110 +111,51 @@ async def get_event_detail(
async def replay_events(
request: EventReplayRequest,
background_tasks: BackgroundTasks,
- repository: AdminEventsRepositoryDep,
- replay_service: FromDishka[ReplayService]
+ service: FromDishka[AdminEventsService]
) -> EventReplayResponse:
try:
- # Build query from request
- replay_query = ReplayQuery(
- event_ids=request.event_ids,
- correlation_id=request.correlation_id,
- aggregate_id=request.aggregate_id,
- start_time=request.start_time,
- end_time=request.end_time
- )
- query = repository.build_replay_query(replay_query)
-
- if not query:
- raise HTTPException(
- status_code=400,
- detail="Must specify at least one filter for replay"
- )
-
replay_correlation_id = f"replay_{CorrelationContext.get_correlation_id()}"
-
- # Prepare replay session
+ rq = AdminReplayApiMapper.request_to_query(request)
try:
- session_data = await repository.prepare_replay_session(
- query=query,
+ result = await service.prepare_or_schedule_replay(
+ replay_query=rq,
dry_run=request.dry_run,
replay_correlation_id=replay_correlation_id,
- max_events=1000
+ target_service=request.target_service,
)
except ValueError as e:
- if "No events found" in str(e):
- raise HTTPException(status_code=404, detail=str(e))
- elif "Too many events" in str(e):
- raise HTTPException(status_code=400, detail=str(e))
+ msg = str(e)
+ if "No events found" in msg:
+ raise HTTPException(status_code=404, detail=msg)
+ if "Too many events" in msg:
+ raise HTTPException(status_code=400, detail=msg)
raise
- # If dry run, return preview
- if request.dry_run:
- summary_mapper = EventSummaryMapper()
- return EventReplayResponse(
- dry_run=True,
- total_events=session_data.total_events,
- replay_correlation_id=replay_correlation_id,
- status="Preview",
- events_preview=[jsonable_encoder(summary_mapper.to_dict(e)) for e in session_data.events_preview]
- )
-
- # Create replay configuration with custom query
- logger.info(f"Replay query for session: {query}")
- replay_filter = ReplayFilter(custom_query=query)
- replay_config = ReplayConfig(
- replay_type=ReplayType.QUERY,
- target=ReplayTarget.KAFKA if request.target_service else ReplayTarget.TEST,
- filter=replay_filter,
- speed_multiplier=1.0,
- preserve_timestamps=False,
- batch_size=100,
- max_events=1000,
- skip_errors=True
- )
-
- # Create replay session using the config
- replay_response = await replay_service.create_session(replay_config)
- session_id = replay_response.session_id
-
- # Update the existing replay session with additional metadata
- await repository.update_replay_session(
- session_id=str(session_id),
- updates={
- ReplaySessionFields.TOTAL_EVENTS: session_data.total_events,
- ReplaySessionFields.CORRELATION_ID: replay_correlation_id,
- ReplaySessionFields.STATUS: ReplaySessionStatus.SCHEDULED
- }
- )
-
- # Start the replay session
- background_tasks.add_task(
- replay_service.start_session,
- session_id
- )
+ if not result.dry_run and result.session_id:
+ background_tasks.add_task(service.start_replay_session, result.session_id)
return EventReplayResponse(
- dry_run=False,
- total_events=session_data.total_events,
- replay_correlation_id=replay_correlation_id,
- session_id=str(session_id),
- status="Replay scheduled in background"
+ dry_run=result.dry_run,
+ total_events=result.total_events,
+ replay_correlation_id=result.replay_correlation_id,
+ session_id=result.session_id,
+ status=result.status,
+ events_preview=result.events_preview,
)
except HTTPException:
raise
except Exception as e:
- logger.error(f"Error replaying events: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/replay/{session_id}/status")
async def get_replay_status(
session_id: str,
- repository: AdminEventsRepositoryDep
+ service: FromDishka[AdminEventsService]
) -> EventReplayStatusResponse:
try:
- status = await repository.get_replay_status_with_progress(session_id)
+ status = await service.get_replay_status(session_id)
if not status:
raise HTTPException(status_code=404, detail="Replay session not found")
@@ -245,45 +166,20 @@ async def get_replay_status(
except HTTPException:
raise
except Exception as e:
- logger.error(f"Error getting replay status: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.delete("/{event_id}")
async def delete_event(
event_id: str,
- repository: AdminEventsRepositoryDep,
- request: Request,
- auth_service: FromDishka[AuthService]
+ admin: Annotated[UserResponse, Depends(admin_user)],
+ service: FromDishka[AdminEventsService]
) -> EventDeleteResponse:
- current_user = await auth_service.require_admin(request)
try:
- logger.warning(
- f"Admin {current_user.email} attempting to delete event {event_id}"
- )
-
- # Get event details first for archiving
- event_detail = await repository.get_event_detail(event_id)
- if not event_detail:
- raise HTTPException(status_code=404, detail="Event not found")
-
- # Archive the event before deletion
- await repository.archive_event(event_detail.event, current_user.email)
-
- # Delete the event
- deleted = await repository.delete_event(event_id)
-
+ deleted = await service.delete_event(event_id=event_id, deleted_by=admin.email)
if not deleted:
raise HTTPException(status_code=500, detail="Failed to delete event")
- logger.info(
- f"Event {event_id} deleted by {current_user.email}",
- extra={
- "event_type": event_detail.event.event_type,
- "correlation_id": event_detail.event.correlation_id
- }
- )
-
return EventDeleteResponse(
message="Event deleted and archived",
event_id=event_id
@@ -292,132 +188,67 @@ async def delete_event(
except HTTPException:
raise
except Exception as e:
- logger.error(f"Error deleting event: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/export/csv")
async def export_events_csv(
- repository: AdminEventsRepositoryDep,
- event_types: str | None = Query(None, description="Comma-separated event types"),
- start_time: float | None = None,
- end_time: float | None = None,
+ service: FromDishka[AdminEventsService],
+ event_types: list[EventType] | None = Query(None, description="Event types (repeat param for multiple)"),
+ start_time: datetime | None = Query(None, description="Start time"),
+ end_time: datetime | None = Query(None, description="End time"),
limit: int = Query(default=10000, le=50000),
) -> StreamingResponse:
try:
- # Create filter for export
- export_filter = EventFilter(
- event_types=event_types.split(",") if event_types else None,
- start_time=datetime.fromtimestamp(start_time, tz=timezone.utc) if start_time else None,
- end_time=datetime.fromtimestamp(end_time, tz=timezone.utc) if end_time else None
+ export_filter = EventFilterMapper.from_admin_pydantic(
+ AdminEventFilter(
+ event_types=event_types,
+ start_time=start_time,
+ end_time=end_time,
+ )
)
-
- export_rows = await repository.export_events_csv(export_filter)
-
- output = StringIO()
- writer = csv.DictWriter(output, fieldnames=[
- "Event ID", "Event Type", "Timestamp", "Correlation ID",
- "Aggregate ID", "User ID", "Service", "Status", "Error"
- ])
-
- writer.writeheader()
- row_mapper = EventExportRowMapper()
- for row in export_rows[:limit]:
- writer.writerow(row_mapper.to_dict(row))
-
- output.seek(0)
- filename = f"events_export_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}.csv"
-
+ result = await service.export_events_csv_content(filter=export_filter, limit=limit)
return StreamingResponse(
- iter([output.getvalue()]),
- media_type="text/csv",
- headers={
- "Content-Disposition": f"attachment; filename={filename}"
- }
+ iter([result.content]),
+ media_type=result.media_type,
+ headers={"Content-Disposition": f"attachment; filename={result.filename}"},
)
except Exception as e:
- logger.error(f"Error exporting events: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/export/json")
async def export_events_json(
- repository: AdminEventsRepositoryDep,
- event_types: str | None = Query(None, description="Comma-separated event types"),
+ service: FromDishka[AdminEventsService],
+ event_types: list[EventType] | None = Query(None, description="Event types (repeat param for multiple)"),
aggregate_id: str | None = Query(None, description="Aggregate ID filter"),
correlation_id: str | None = Query(None, description="Correlation ID filter"),
user_id: str | None = Query(None, description="User ID filter"),
service_name: str | None = Query(None, description="Service name filter"),
- start_time: str | None = Query(None, description="Start time (ISO format)"),
- end_time: str | None = Query(None, description="End time (ISO format)"),
+ start_time: datetime | None = Query(None, description="Start time"),
+ end_time: datetime | None = Query(None, description="End time"),
limit: int = Query(default=10000, le=50000),
) -> StreamingResponse:
"""Export events as JSON with comprehensive filtering."""
try:
- # Create filter for export
- export_filter = EventFilter(
- event_types=event_types.split(",") if event_types else None,
- aggregate_id=aggregate_id,
- correlation_id=correlation_id,
- user_id=user_id,
- service_name=service_name,
- start_time=datetime.fromisoformat(start_time) if start_time else None,
- end_time=datetime.fromisoformat(end_time) if end_time else None
- )
-
- # Get events from repository
- result = await repository.browse_events(
- filter=export_filter,
- skip=0,
- limit=limit,
- sort_by="timestamp",
- sort_order=-1
+ export_filter = EventFilterMapper.from_admin_pydantic(
+ AdminEventFilter(
+ event_types=event_types,
+ aggregate_id=aggregate_id,
+ correlation_id=correlation_id,
+ user_id=user_id,
+ service_name=service_name,
+ start_time=start_time,
+ end_time=end_time,
+ )
)
-
- # Convert events to JSON-serializable format
- event_mapper = EventMapper()
- events_data = []
- for event in result.events:
- event_dict = event_mapper.to_dict(event)
- # Convert datetime fields to ISO format for JSON serialization
- # MongoDB always returns datetime objects, so we can use isinstance
- for field in ["timestamp", "created_at", "updated_at", "stored_at", "ttl_expires_at"]:
- if field in event_dict and isinstance(event_dict[field], datetime):
- event_dict[field] = event_dict[field].isoformat()
- events_data.append(event_dict)
-
- # Create export metadata
- export_data = {
- "export_metadata": {
- "exported_at": datetime.now(timezone.utc).isoformat(),
- "total_events": len(events_data),
- "filters_applied": {
- "event_types": event_types.split(",") if event_types else None,
- "aggregate_id": aggregate_id,
- "correlation_id": correlation_id,
- "user_id": user_id,
- "service_name": service_name,
- "start_time": start_time,
- "end_time": end_time
- },
- "export_limit": limit
- },
- "events": events_data
- }
-
- # Convert to JSON string with pretty formatting
- json_content = json.dumps(export_data, indent=2, default=str)
- filename = f"events_export_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}.json"
-
+ result = await service.export_events_json_content(filter=export_filter, limit=limit)
return StreamingResponse(
- iter([json_content]),
- media_type="application/json",
- headers={
- "Content-Disposition": f"attachment; filename={filename}"
- }
+ iter([result.content]),
+ media_type=result.media_type,
+ headers={"Content-Disposition": f"attachment; filename={result.filename}"},
)
except Exception as e:
- logger.error(f"Error exporting events as JSON: {e}")
raise HTTPException(status_code=500, detail=str(e))
diff --git a/backend/app/api/routes/admin/settings.py b/backend/app/api/routes/admin/settings.py
index 4b2f9180..e254b6f5 100644
--- a/backend/app/api/routes/admin/settings.py
+++ b/backend/app/api/routes/admin/settings.py
@@ -1,127 +1,80 @@
+from typing import Annotated
+
from dishka import FromDishka
from dishka.integrations.fastapi import DishkaRoute
-from fastapi import APIRouter, Depends, HTTPException, Request
+from fastapi import APIRouter, Depends, HTTPException
from pydantic import ValidationError
-from app.api.dependencies import AuthService, require_admin_guard
-from app.core.logging import logger
-from app.core.service_dependencies import AdminSettingsRepositoryDep
-from app.infrastructure.mappers.admin_mapper import SettingsMapper
+from app.api.dependencies import admin_user
+from app.infrastructure.mappers import SettingsMapper
from app.schemas_pydantic.admin_settings import SystemSettings
+from app.schemas_pydantic.user import UserResponse
+from app.services.admin import AdminSettingsService
router = APIRouter(
prefix="/admin/settings",
tags=["admin", "settings"],
route_class=DishkaRoute,
- dependencies=[Depends(require_admin_guard)]
+ dependencies=[Depends(admin_user)]
)
@router.get("/", response_model=SystemSettings)
async def get_system_settings(
- repository: AdminSettingsRepositoryDep,
-
- request: Request, auth_service: FromDishka[AuthService],
+ admin: Annotated[UserResponse, Depends(admin_user)],
+ service: FromDishka[AdminSettingsService],
) -> SystemSettings:
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin retrieving system settings",
- extra={"admin_username": current_user.username}
- )
-
try:
- domain_settings = await repository.get_system_settings()
- # Convert domain model to pydantic schema
+ domain_settings = await service.get_system_settings(admin.username)
settings_mapper = SettingsMapper()
return SystemSettings(**settings_mapper.system_settings_to_pydantic_dict(domain_settings))
- except Exception as e:
- logger.error(f"Failed to retrieve system settings: {str(e)}", exc_info=True)
+ except Exception:
raise HTTPException(status_code=500, detail="Failed to retrieve settings")
@router.put("/", response_model=SystemSettings)
async def update_system_settings(
+ admin: Annotated[UserResponse, Depends(admin_user)],
settings: SystemSettings,
- repository: AdminSettingsRepositoryDep,
-
- request: Request, auth_service: FromDishka[AuthService],
+ service: FromDishka[AdminSettingsService],
) -> SystemSettings:
- current_user = await auth_service.require_admin(request)
- # Validate settings completeness
- try:
- settings_dict = settings.model_dump()
- if not settings_dict:
- raise ValueError("Empty settings payload")
- except Exception as e:
- logger.warning(f"Invalid settings payload from {current_user.username}: {str(e)}")
- raise HTTPException(status_code=400, detail="Invalid settings payload")
-
- logger.info(
- "Admin updating system settings",
- extra={
- "admin_username": current_user.username,
- "settings": settings_dict
- }
- )
-
- # Validate and convert to domain model
try:
settings_mapper = SettingsMapper()
- domain_settings = settings_mapper.system_settings_from_pydantic(settings_dict)
+ domain_settings = settings_mapper.system_settings_from_pydantic(settings.model_dump())
except (ValueError, ValidationError, KeyError) as e:
- logger.warning(
- f"Settings validation failed for {current_user.username}: {str(e)}",
- extra={"settings": settings_dict}
- )
raise HTTPException(
status_code=422,
detail=f"Invalid settings: {str(e)}"
)
- except Exception as e:
- logger.error(f"Unexpected error during settings validation: {str(e)}", exc_info=True)
+ except Exception:
raise HTTPException(status_code=400, detail="Invalid settings format")
# Perform the update
try:
- updated_domain_settings = await repository.update_system_settings(
- settings=domain_settings,
- updated_by=current_user.username,
- user_id=current_user.user_id
+ updated_domain_settings = await service.update_system_settings(
+ domain_settings,
+ updated_by=admin.username,
+ user_id=admin.user_id,
)
- logger.info("System settings updated successfully")
# Convert back to pydantic schema for response
settings_mapper = SettingsMapper()
return SystemSettings(**settings_mapper.system_settings_to_pydantic_dict(updated_domain_settings))
- except Exception as e:
- logger.error(f"Failed to update system settings: {str(e)}", exc_info=True)
+ except Exception:
raise HTTPException(status_code=500, detail="Failed to update settings")
@router.post("/reset", response_model=SystemSettings)
async def reset_system_settings(
- repository: AdminSettingsRepositoryDep,
-
- request: Request, auth_service: FromDishka[AuthService],
+ admin: Annotated[UserResponse, Depends(admin_user)],
+ service: FromDishka[AdminSettingsService],
) -> SystemSettings:
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin resetting system settings to defaults",
- extra={"admin_username": current_user.username}
- )
-
try:
- reset_domain_settings = await repository.reset_system_settings(
- username=current_user.username,
- user_id=current_user.user_id
- )
-
- logger.info("System settings reset to defaults")
+ reset_domain_settings = await service.reset_system_settings(admin.username, admin.user_id)
settings_mapper = SettingsMapper()
return SystemSettings(**settings_mapper.system_settings_to_pydantic_dict(reset_domain_settings))
- except Exception as e:
- logger.error(f"Failed to reset system settings: {str(e)}", exc_info=True)
+ except Exception:
raise HTTPException(status_code=500, detail="Failed to reset settings")
diff --git a/backend/app/api/routes/admin/users.py b/backend/app/api/routes/admin/users.py
index e3d8acee..b10216d8 100644
--- a/backend/app/api/routes/admin/users.py
+++ b/backend/app/api/routes/admin/users.py
@@ -1,24 +1,16 @@
-import uuid
-from datetime import datetime, timezone
+from typing import Annotated
from dishka import FromDishka
from dishka.integrations.fastapi import DishkaRoute
-from fastapi import APIRouter, Depends, HTTPException, Query, Request
+from fastapi import APIRouter, Depends, HTTPException, Query
-from app.api.dependencies import AuthService, require_admin_guard
-from app.core.logging import logger
-from app.core.security import SecurityService
-from app.core.service_dependencies import AdminUserRepositoryDep
-from app.domain.admin.user_models import (
- PasswordReset,
-)
-from app.domain.admin.user_models import (
+from app.api.dependencies import admin_user
+from app.db.repositories.admin.admin_user_repository import AdminUserRepository
+from app.domain.rate_limit import UserRateLimit
+from app.domain.user import (
UserUpdate as DomainUserUpdate,
)
-from app.domain.rate_limit import UserRateLimit
-from app.infrastructure.mappers.admin_mapper import UserMapper
-from app.infrastructure.mappers.admin_overview_api_mapper import AdminOverviewApiMapper
-from app.infrastructure.mappers.rate_limit_mapper import UserRateLimitMapper
+from app.infrastructure.mappers import AdminOverviewApiMapper, UserMapper
from app.schemas_pydantic.admin_user_overview import AdminUserOverview
from app.schemas_pydantic.user import (
MessageResponse,
@@ -29,449 +21,193 @@
UserRole,
UserUpdate,
)
-from app.services.admin_user_service import AdminUserService
+from app.services.admin import AdminUserService
from app.services.rate_limit_service import RateLimitService
router = APIRouter(
prefix="/admin/users",
tags=["admin", "users"],
route_class=DishkaRoute,
- dependencies=[Depends(require_admin_guard)]
+ dependencies=[Depends(admin_user)]
)
@router.get("/", response_model=UserListResponse)
async def list_users(
- request: Request,
- user_repo: AdminUserRepositoryDep,
- auth_service: FromDishka[AuthService],
+ admin: Annotated[UserResponse, Depends(admin_user)],
+ admin_user_service: FromDishka[AdminUserService],
rate_limit_service: FromDishka[RateLimitService],
limit: int = Query(default=100, le=1000),
offset: int = Query(default=0, ge=0),
search: str | None = None,
- role: str | None = None,
+ role: UserRole | None = None,
) -> UserListResponse:
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin listing users",
- extra={
- "admin_username": current_user.username,
- "limit": limit,
- "offset": offset,
- "search": search,
- "role": role,
- },
+ result = await admin_user_service.list_users(
+ admin_username=admin.username,
+ limit=limit,
+ offset=offset,
+ search=search,
+ role=role,
)
- try:
- result = await user_repo.list_users(
- limit=limit,
- offset=offset,
- search=search,
- role=role
- )
-
- # Convert domain users to response models with rate limit data
- user_mapper = UserMapper()
- user_responses = []
- for user in result.users:
- user_dict = user_mapper.to_response_dict(user)
-
- # Add rate limit summary data
- user_rate_limit = await rate_limit_service.get_user_rate_limit(user.user_id)
- if user_rate_limit:
- user_dict["bypass_rate_limit"] = user_rate_limit.bypass_rate_limit
- user_dict["global_multiplier"] = user_rate_limit.global_multiplier
- user_dict["has_custom_limits"] = bool(user_rate_limit.rules)
- else:
- user_dict["bypass_rate_limit"] = False
- user_dict["global_multiplier"] = 1.0
- user_dict["has_custom_limits"] = False
-
- user_responses.append(UserResponse(**user_dict))
-
- return UserListResponse(
- users=user_responses,
- total=result.total,
- offset=result.offset,
- limit=result.limit
- )
-
- except Exception as e:
- logger.error(f"Failed to list users: {str(e)}", exc_info=True)
- raise HTTPException(status_code=500, detail="Failed to list users")
+ user_mapper = UserMapper()
+ summaries = await rate_limit_service.get_user_rate_limit_summaries([u.user_id for u in result.users])
+ user_responses: list[UserResponse] = []
+ for user in result.users:
+ user_dict = user_mapper.to_response_dict(user)
+ summary = summaries.get(user.user_id)
+ if summary:
+ user_dict["bypass_rate_limit"] = summary.bypass_rate_limit
+ user_dict["global_multiplier"] = summary.global_multiplier
+ user_dict["has_custom_limits"] = summary.has_custom_limits
+ user_responses.append(UserResponse(**user_dict))
+
+ return UserListResponse(
+ users=user_responses,
+ total=result.total,
+ offset=result.offset,
+ limit=result.limit,
+ )
@router.post("/", response_model=UserResponse)
async def create_user(
- request: Request,
+ admin: Annotated[UserResponse, Depends(admin_user)],
user_data: UserCreate,
- user_repo: AdminUserRepositoryDep,
- auth_service: FromDishka[AuthService],
+ admin_user_service: FromDishka[AdminUserService],
) -> UserResponse:
"""Create a new user (admin only)."""
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin creating new user",
- extra={
- "admin_username": current_user.username,
- "new_username": user_data.username,
- },
- )
-
+ # Delegate to service; map known validation error to 400
try:
- # Check if user already exists by searching for username
- search_result = await user_repo.list_users(
- limit=1,
- offset=0,
- search=user_data.username
- )
-
- # Check if exact username match exists
- for user in search_result.users:
- if user.username == user_data.username:
- raise HTTPException(
- status_code=400,
- detail="Username already exists"
- )
-
- # Hash the password
- security_service = SecurityService()
- hashed_password = security_service.get_password_hash(user_data.password)
-
- # Create user with proper typing
- user_id = str(uuid.uuid4())
- username = user_data.username
- email = user_data.email
- role = getattr(user_data, 'role', UserRole.USER)
- is_active = getattr(user_data, 'is_active', True)
- is_superuser = False # Default for new users
- created_at = datetime.now(timezone.utc)
- updated_at = datetime.now(timezone.utc)
-
- # Create user document for MongoDB
- user_doc = {
- "user_id": user_id,
- "username": username,
- "email": email,
- "hashed_password": hashed_password,
- "role": role,
- "is_active": is_active,
- "is_superuser": is_superuser,
- "created_at": created_at,
- "updated_at": updated_at
- }
-
- # Insert directly to MongoDB
- await user_repo.users_collection.insert_one(user_doc)
-
- logger.info(f"User {username} created successfully by {current_user.username}")
-
- return UserResponse(
- user_id=user_id,
- username=username,
- email=email,
- role=role,
- is_active=is_active,
- is_superuser=is_superuser,
- created_at=created_at,
- updated_at=updated_at
- )
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"Failed to create user: {str(e)}", exc_info=True)
- raise HTTPException(status_code=500, detail="Failed to create user")
+ domain_user = await admin_user_service.create_user(admin_username=admin.username, user_data=user_data)
+ except ValueError as ve:
+ raise HTTPException(status_code=400, detail=str(ve))
+ user_mapper = UserMapper()
+ return UserResponse(**user_mapper.to_response_dict(domain_user))
@router.get("/{user_id}", response_model=UserResponse)
async def get_user(
+ admin: Annotated[UserResponse, Depends(admin_user)],
user_id: str,
- user_repo: AdminUserRepositoryDep,
- request: Request,
- auth_service: FromDishka[AuthService],
+ admin_user_service: FromDishka[AdminUserService],
) -> UserResponse:
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin getting user details",
- extra={
- "admin_username": current_user.username,
- "target_user_id": user_id,
- },
- )
-
- try:
- user = await user_repo.get_user_by_id(user_id)
- if not user:
- raise HTTPException(status_code=404, detail="User not found")
-
- user_mapper = UserMapper()
- return UserResponse(**user_mapper.to_response_dict(user))
+ user = await admin_user_service.get_user(admin_username=admin.username, user_id=user_id)
+ if not user:
+ raise HTTPException(status_code=404, detail="User not found")
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"Failed to get user {user_id}: {str(e)}", exc_info=True)
- raise HTTPException(status_code=500, detail="Failed to get user")
+ user_mapper = UserMapper()
+ return UserResponse(**user_mapper.to_response_dict(user))
@router.get("/{user_id}/overview", response_model=AdminUserOverview)
async def get_user_overview(
+ admin: Annotated[UserResponse, Depends(admin_user)],
user_id: str,
- request: Request,
- auth_service: FromDishka[AuthService],
admin_user_service: FromDishka[AdminUserService],
) -> AdminUserOverview:
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin getting user overview",
- extra={
- "admin_username": current_user.username,
- "target_user_id": user_id,
- },
- )
-
+ # Service raises ValueError if not found -> map to 404
try:
domain = await admin_user_service.get_user_overview(user_id=user_id, hours=24)
- mapper = AdminOverviewApiMapper()
- return mapper.to_response(domain)
except ValueError:
raise HTTPException(status_code=404, detail="User not found")
- except Exception as e:
- logger.error(f"Failed to get user overview for {user_id}: {str(e)}", exc_info=True)
- raise HTTPException(status_code=500, detail="Failed to get user overview")
+ mapper = AdminOverviewApiMapper()
+ return mapper.to_response(domain)
@router.put("/{user_id}", response_model=UserResponse)
async def update_user(
+ admin: Annotated[UserResponse, Depends(admin_user)],
user_id: str,
user_update: UserUpdate,
- user_repo: AdminUserRepositoryDep,
- request: Request,
- auth_service: FromDishka[AuthService],
+ user_repo: FromDishka[AdminUserRepository],
+ admin_user_service: FromDishka[AdminUserService],
) -> UserResponse:
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin updating user",
- extra={
- "admin_username": current_user.username,
- "target_user_id": user_id,
- "updates": user_update.model_dump(exclude_unset=True),
- },
- )
-
- try:
- # Get existing user
- existing_user = await user_repo.get_user_by_id(user_id)
- if not existing_user:
- raise HTTPException(status_code=404, detail="User not found")
-
- # Convert pydantic update to domain update
- update_dict = user_update.model_dump(exclude_unset=True)
- domain_update = DomainUserUpdate(
- username=update_dict.get("username"),
- email=update_dict.get("email"),
- role=UserRole(update_dict["role"]) if "role" in update_dict else None,
- is_active=update_dict.get("is_active"),
- password=update_dict.get("password")
- )
-
- updated_user = await user_repo.update_user(user_id, domain_update)
- if not updated_user:
- raise HTTPException(status_code=500, detail="Failed to update user")
+ # Get existing user (explicit 404), then update
+ existing_user = await user_repo.get_user_by_id(user_id)
+ if not existing_user:
+ raise HTTPException(status_code=404, detail="User not found")
- user_mapper = UserMapper()
- return UserResponse(**user_mapper.to_response_dict(updated_user))
+ update_dict = user_update.model_dump(exclude_unset=True)
+ domain_update = DomainUserUpdate(
+ username=update_dict.get("username"),
+ email=update_dict.get("email"),
+ role=UserRole(update_dict["role"]) if "role" in update_dict else None,
+ is_active=update_dict.get("is_active"),
+ password=update_dict.get("password"),
+ )
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"Failed to update user {user_id}: {str(e)}", exc_info=True)
+ updated_user = await admin_user_service.update_user(
+ admin_username=admin.username, user_id=user_id, update=domain_update
+ )
+ if not updated_user:
raise HTTPException(status_code=500, detail="Failed to update user")
+ user_mapper = UserMapper()
+ return UserResponse(**user_mapper.to_response_dict(updated_user))
+
@router.delete("/{user_id}")
async def delete_user(
+ admin: Annotated[UserResponse, Depends(admin_user)],
user_id: str,
- user_repo: AdminUserRepositoryDep,
- request: Request,
- auth_service: FromDishka[AuthService],
- rate_limit_service: FromDishka[RateLimitService],
+ admin_user_service: FromDishka[AdminUserService],
cascade: bool = Query(default=True, description="Cascade delete user's data"),
) -> dict:
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin deleting user",
- extra={
- "admin_username": current_user.username,
- "target_user_id": user_id,
- "cascade": cascade,
- },
- )
-
- try:
- # Prevent self-deletion
- if current_user.user_id == user_id:
- raise HTTPException(status_code=400, detail="Cannot delete your own account")
-
- # Get existing user
- existing_user = await user_repo.get_user_by_id(user_id)
- if not existing_user:
- raise HTTPException(status_code=404, detail="User not found")
-
- # Reset rate limits for user if service available
- await rate_limit_service.reset_user_limits(user_id)
-
- # Delete user with cascade
- deleted_counts = await user_repo.delete_user(user_id, cascade=cascade)
-
- if deleted_counts.get("user", 0) == 0:
- raise HTTPException(status_code=500, detail="Failed to delete user")
-
- return {
- "message": f"User {existing_user.username} deleted successfully",
- "deleted_counts": deleted_counts
- }
+ # Prevent self-deletion; delegate to service
+ if admin.user_id == user_id:
+ raise HTTPException(status_code=400, detail="Cannot delete your own account")
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"Failed to delete user {user_id}: {str(e)}", exc_info=True)
+ deleted_counts = await admin_user_service.delete_user(
+ admin_username=admin.username, user_id=user_id, cascade=cascade
+ )
+ if deleted_counts.get("user", 0) == 0:
raise HTTPException(status_code=500, detail="Failed to delete user")
+ return {"message": f"User {user_id} deleted successfully", "deleted_counts": deleted_counts}
+
@router.post("/{user_id}/reset-password", response_model=MessageResponse)
async def reset_user_password(
+ admin: Annotated[UserResponse, Depends(admin_user)],
+ admin_user_service: FromDishka[AdminUserService],
user_id: str,
password_request: PasswordResetRequest,
- request: Request,
- user_repo: AdminUserRepositoryDep,
- auth_service: FromDishka[AuthService],
) -> MessageResponse:
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin resetting user password",
- extra={
- "admin_username": current_user.username,
- "target_user_id": user_id,
- },
+ success = await admin_user_service.reset_user_password(
+ admin_username=admin.username, user_id=user_id, new_password=password_request.new_password
)
-
- try:
- # Get existing user
- existing_user = await user_repo.get_user_by_id(user_id)
- if not existing_user:
- raise HTTPException(status_code=404, detail="User not found")
-
- # Create password reset domain model
- password_reset = PasswordReset(
- user_id=user_id,
- new_password=password_request.new_password
- )
-
- success = await user_repo.reset_user_password(password_reset)
- if not success:
- raise HTTPException(status_code=500, detail="Failed to reset password")
-
- return MessageResponse(message=f"Password reset successfully for user {existing_user.username}")
-
- except HTTPException:
- raise
- except Exception as e:
- logger.error(f"Failed to reset password for user {user_id}: {str(e)}", exc_info=True)
+ if not success:
raise HTTPException(status_code=500, detail="Failed to reset password")
+ return MessageResponse(message=f"Password reset successfully for user {user_id}")
@router.get("/{user_id}/rate-limits")
async def get_user_rate_limits(
+ admin: Annotated[UserResponse, Depends(admin_user)],
+ admin_user_service: FromDishka[AdminUserService],
user_id: str,
- request: Request,
- auth_service: FromDishka[AuthService],
- rate_limit_service: FromDishka[RateLimitService],
) -> dict:
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin getting user rate limits",
- extra={
- "admin_username": current_user.username,
- "target_user_id": user_id,
- },
- )
-
- try:
- user_limit = await rate_limit_service.get_user_rate_limit(user_id)
- usage_stats = await rate_limit_service.get_usage_stats(user_id)
-
- rate_limit_mapper = UserRateLimitMapper()
- return {
- "user_id": user_id,
- "rate_limit_config": rate_limit_mapper.to_dict(user_limit) if user_limit else None,
- "current_usage": usage_stats
- }
-
- except Exception as e:
- logger.error(f"Failed to get rate limits for user {user_id}: {str(e)}", exc_info=True)
- raise HTTPException(status_code=500, detail="Failed to get rate limits")
+ return await admin_user_service.get_user_rate_limits(admin_username=admin.username, user_id=user_id)
@router.put("/{user_id}/rate-limits")
async def update_user_rate_limits(
+ admin: Annotated[UserResponse, Depends(admin_user)],
+ admin_user_service: FromDishka[AdminUserService],
user_id: str,
rate_limit_config: UserRateLimit,
- request: Request,
- auth_service: FromDishka[AuthService],
- rate_limit_service: FromDishka[RateLimitService],
) -> dict:
- current_user = await auth_service.require_admin(request)
- rate_limit_mapper = UserRateLimitMapper()
- logger.info(
- "Admin updating user rate limits",
- extra={
- "admin_username": current_user.username,
- "target_user_id": user_id,
- "config": rate_limit_mapper.to_dict(rate_limit_config),
- },
+ return await admin_user_service.update_user_rate_limits(
+ admin_username=admin.username, user_id=user_id, config=rate_limit_config
)
- try:
- # Ensure user_id matches
- rate_limit_config.user_id = user_id
-
- await rate_limit_service.update_user_rate_limit(user_id, rate_limit_config)
-
- rate_limit_mapper = UserRateLimitMapper()
- return {
- "message": "Rate limits updated successfully",
- "config": rate_limit_mapper.to_dict(rate_limit_config)
- }
-
- except Exception as e:
- logger.error(f"Failed to update rate limits for user {user_id}: {str(e)}", exc_info=True)
- raise HTTPException(status_code=500, detail="Failed to update rate limits")
-
@router.post("/{user_id}/rate-limits/reset")
async def reset_user_rate_limits(
+ admin: Annotated[UserResponse, Depends(admin_user)],
+ admin_user_service: FromDishka[AdminUserService],
user_id: str,
- request: Request,
- auth_service: FromDishka[AuthService],
- rate_limit_service: FromDishka[RateLimitService],
) -> MessageResponse:
- current_user = await auth_service.require_admin(request)
- logger.info(
- "Admin resetting user rate limits",
- extra={
- "admin_username": current_user.username,
- "target_user_id": user_id,
- },
- )
-
- try:
- await rate_limit_service.reset_user_limits(user_id)
-
- return MessageResponse(message=f"Rate limits reset successfully for user {user_id}")
-
- except Exception as e:
- logger.error(f"Failed to reset rate limits for user {user_id}: {str(e)}", exc_info=True)
- raise HTTPException(status_code=500, detail="Failed to reset rate limits")
+ await admin_user_service.reset_user_rate_limits(admin_username=admin.username, user_id=user_id)
+ return MessageResponse(message=f"Rate limits reset successfully for user {user_id}")
diff --git a/backend/app/api/routes/alertmanager.py b/backend/app/api/routes/alertmanager.py
deleted file mode 100644
index d09181f1..00000000
--- a/backend/app/api/routes/alertmanager.py
+++ /dev/null
@@ -1,144 +0,0 @@
-from typing import Any, Dict
-
-from dishka import FromDishka
-from dishka.integrations.fastapi import DishkaRoute
-from fastapi import APIRouter, BackgroundTasks
-
-from app.core.correlation import CorrelationContext
-from app.core.logging import logger
-from app.domain.enums.user import UserRole
-from app.schemas_pydantic.alertmanager import AlertmanagerWebhook, AlertResponse
-from app.services.notification_service import NotificationService
-
-router = APIRouter(prefix="/alertmanager",
- tags=["alertmanager"],
- route_class=DishkaRoute)
-
-
-@router.post("/webhook", response_model=AlertResponse)
-async def receive_alerts(
- webhook_payload: AlertmanagerWebhook,
- background_tasks: BackgroundTasks,
- notification_service: FromDishka[NotificationService]
-) -> AlertResponse:
- correlation_id = CorrelationContext.get_correlation_id()
-
- logger.info(
- "Received Alertmanager webhook",
- extra={
- "correlation_id": correlation_id,
- "receiver": webhook_payload.receiver,
- "status": webhook_payload.status,
- "alerts_count": len(webhook_payload.alerts),
- "group_key": webhook_payload.group_key,
- "group_labels": webhook_payload.group_labels
- }
- )
-
- errors: list[str] = []
- processed_count = 0
-
- # Process each alert
- for alert in webhook_payload.alerts:
- try:
- # Determine severity from labels
- severity = alert.labels.get("severity", "warning")
- alert_name = alert.labels.get("alertname", "Unknown Alert")
-
- # Create notification message
- title = f"๐จ Alert: {alert_name}"
- if alert.status == "resolved":
- title = f"โ
Resolved: {alert_name}"
-
- message = alert.annotations.get("summary", "Alert triggered")
- description = alert.annotations.get("description", "")
-
- if description:
- message = f"{message}\n\n{description}"
-
- # Add labels info
- labels_text = "\n".join(
- [f"{k}: {v}" for k, v in alert.labels.items() if k not in ["alertname", "severity"]])
- if labels_text:
- message = f"{message}\n\nLabels:\n{labels_text}"
-
- # Map severity to notification type
- notification_type = "error" if severity in ["critical", "error"] else "warning"
- if alert.status == "resolved":
- notification_type = "success"
-
- # Create system-wide notification
- background_tasks.add_task(
- notification_service.create_system_notification,
- title=title,
- message=message,
- notification_type=notification_type,
- metadata={
- "alert_fingerprint": alert.fingerprint,
- "alert_status": alert.status,
- "severity": severity,
- "generator_url": alert.generator_url,
- "starts_at": alert.starts_at,
- "ends_at": alert.ends_at,
- "receiver": webhook_payload.receiver,
- "group_key": webhook_payload.group_key,
- "correlation_id": correlation_id
- },
- # For critical alerts, notify all active users
- # For other alerts, notify only admin and moderator users
- target_roles=[UserRole.ADMIN, UserRole.MODERATOR] if severity not in ["critical", "error"] else None
- )
-
- processed_count += 1
-
- logger.info(
- f"Processing alert: {alert_name}",
- extra={
- "correlation_id": correlation_id,
- "alert_fingerprint": alert.fingerprint,
- "alert_status": alert.status,
- "severity": severity,
- "starts_at": alert.starts_at
- }
- )
-
- except Exception as e:
- error_msg = f"Failed to process alert {alert.fingerprint}: {str(e)}"
- errors.append(error_msg)
- logger.error(
- error_msg,
- extra={
- "correlation_id": correlation_id,
- "alert_fingerprint": alert.fingerprint,
- "error": str(e)
- },
- exc_info=True
- )
-
- # Log final status
- logger.info(
- "Alertmanager webhook processing completed",
- extra={
- "correlation_id": correlation_id,
- "alerts_received": len(webhook_payload.alerts),
- "alerts_processed": processed_count,
- "errors_count": len(errors)
- }
- )
-
- return AlertResponse(
- message="Webhook received and processed",
- alerts_received=len(webhook_payload.alerts),
- alerts_processed=processed_count,
- errors=errors
- )
-
-
-@router.get("/test")
-async def test_alertmanager_endpoint() -> Dict[str, Any]:
- """Test endpoint to verify Alertmanager route is accessible"""
- return {
- "status": "ok",
- "message": "Alertmanager webhook endpoint is ready",
- "webhook_url": "/api/v1/alertmanager/webhook"
- }
diff --git a/backend/app/api/routes/auth.py b/backend/app/api/routes/auth.py
index 52fa7b57..a4d46953 100644
--- a/backend/app/api/routes/auth.py
+++ b/backend/app/api/routes/auth.py
@@ -1,18 +1,19 @@
-from datetime import timedelta
+from datetime import datetime, timedelta, timezone
from typing import Dict, Union
+from uuid import uuid4
from dishka import FromDishka
from dishka.integrations.fastapi import DishkaRoute
from fastapi import APIRouter, Depends, HTTPException, Request, Response
from fastapi.security import OAuth2PasswordRequestForm
-from app.api.dependencies import AuthService
-from app.api.rate_limit import DynamicRateLimiter
from app.core.logging import logger
from app.core.security import security_service
-from app.core.service_dependencies import UserRepositoryDep
from app.core.utils import get_client_ip
-from app.schemas_pydantic.user import UserCreate, UserInDB, UserResponse
+from app.db.repositories import UserRepository
+from app.domain.user import User as DomainAdminUser
+from app.schemas_pydantic.user import UserCreate, UserResponse
+from app.services.auth_service import AuthService
from app.settings import get_settings
router = APIRouter(prefix="/auth",
@@ -20,11 +21,11 @@
route_class=DishkaRoute)
-@router.post("/login", dependencies=[Depends(DynamicRateLimiter)])
+@router.post("/login")
async def login(
request: Request,
response: Response,
- user_repo: UserRepositoryDep,
+ user_repo: FromDishka[UserRepository],
form_data: OAuth2PasswordRequestForm = Depends(),
) -> Dict[str, str]:
logger.info(
@@ -121,11 +122,11 @@ async def login(
}
-@router.post("/register", response_model=UserResponse, dependencies=[Depends(DynamicRateLimiter)])
+@router.post("/register", response_model=UserResponse)
async def register(
request: Request,
user: UserCreate,
- user_repo: UserRepositoryDep,
+ user_repo: FromDishka[UserRepository],
) -> UserResponse:
logger.info(
"Registration attempt",
@@ -151,11 +152,19 @@ async def register(
try:
hashed_password = security_service.get_password_hash(user.password)
- db_user = UserInDB(
- **user.model_dump(exclude={"password"}),
- hashed_password=hashed_password
+ now = datetime.now(timezone.utc)
+ domain_user = DomainAdminUser(
+ user_id=str(uuid4()),
+ username=user.username,
+ email=str(user.email),
+ role=user.role,
+ is_active=True,
+ is_superuser=False,
+ hashed_password=hashed_password,
+ created_at=now,
+ updated_at=now,
)
- created_user = await user_repo.create_user(db_user)
+ created_user = await user_repo.create_user(domain_user)
logger.info(
"Registration successful",
@@ -166,7 +175,15 @@ async def register(
},
)
- return UserResponse.model_validate(created_user.model_dump())
+ return UserResponse(
+ user_id=created_user.user_id,
+ username=created_user.username,
+ email=created_user.email,
+ role=created_user.role,
+ is_superuser=created_user.is_superuser,
+ created_at=created_user.created_at,
+ updated_at=created_user.updated_at,
+ )
except Exception as e:
logger.error(
@@ -183,7 +200,7 @@ async def register(
raise HTTPException(status_code=500, detail="Error creating user") from e
-@router.get("/me", response_model=UserResponse, dependencies=[Depends(DynamicRateLimiter)])
+@router.get("/me", response_model=UserResponse)
async def get_current_user_profile(
request: Request,
response: Response,
@@ -207,7 +224,7 @@ async def get_current_user_profile(
return current_user
-@router.get("/verify-token", dependencies=[Depends(DynamicRateLimiter)])
+@router.get("/verify-token")
async def verify_token(
request: Request,
auth_service: FromDishka[AuthService],
@@ -261,7 +278,7 @@ async def verify_token(
-@router.post("/logout", dependencies=[Depends(DynamicRateLimiter)])
+@router.post("/logout")
async def logout(
request: Request,
response: Response,
diff --git a/backend/app/api/routes/dlq.py b/backend/app/api/routes/dlq.py
index 40a20234..beb422d0 100644
--- a/backend/app/api/routes/dlq.py
+++ b/backend/app/api/routes/dlq.py
@@ -5,9 +5,10 @@
from dishka.integrations.fastapi import DishkaRoute
from fastapi import APIRouter, Depends, HTTPException, Query
-from app.api.dependencies import require_auth_guard
-from app.core.service_dependencies import DLQRepositoryDep
-from app.dlq.manager import DLQManager, RetryPolicy
+from app.api.dependencies import current_user
+from app.db.repositories.dlq_repository import DLQRepository
+from app.dlq import RetryPolicy
+from app.dlq.manager import DLQManager
from app.schemas_pydantic.dlq import (
DLQBatchRetryResponse,
DLQMessageDetail,
@@ -25,28 +26,31 @@
prefix="/dlq",
tags=["Dead Letter Queue"],
route_class=DishkaRoute,
- dependencies=[Depends(require_auth_guard)]
+ dependencies=[Depends(current_user)]
)
@router.get("/stats", response_model=DLQStats)
async def get_dlq_statistics(
- repository: DLQRepositoryDep
+ repository: FromDishka[DLQRepository]
) -> DLQStats:
stats = await repository.get_dlq_stats()
- # Convert DLQStatistics to DLQStats
return DLQStats(
by_status=stats.by_status,
- by_topic=[item.to_dict() for item in stats.by_topic],
- by_event_type=[item.to_dict() for item in stats.by_event_type],
- age_stats=stats.age_stats.to_dict() if stats.age_stats else {},
- timestamp=stats.timestamp
+ by_topic=[{"topic": t.topic, "count": t.count, "avg_retry_count": t.avg_retry_count} for t in stats.by_topic],
+ by_event_type=[{"event_type": e.event_type, "count": e.count} for e in stats.by_event_type],
+ age_stats={
+ "min_age": stats.age_stats.min_age_seconds,
+ "max_age": stats.age_stats.max_age_seconds,
+ "avg_age": stats.age_stats.avg_age_seconds,
+ } if stats.age_stats else {},
+ timestamp=stats.timestamp,
)
@router.get("/messages", response_model=DLQMessagesResponse)
async def get_dlq_messages(
- repository: DLQRepositoryDep,
+ repository: FromDishka[DLQRepository],
status: DLQMessageStatus | None = Query(None),
topic: str | None = None,
event_type: str | None = None,
@@ -94,7 +98,7 @@ async def get_dlq_messages(
@router.get("/messages/{event_id}", response_model=DLQMessageDetail)
async def get_dlq_message(
event_id: str,
- repository: DLQRepositoryDep
+ repository: FromDishka[DLQRepository]
) -> DLQMessageDetail:
message = await repository.get_message_by_id(event_id)
if not message:
@@ -125,7 +129,7 @@ async def get_dlq_message(
@router.post("/retry", response_model=DLQBatchRetryResponse)
async def retry_dlq_messages(
retry_request: ManualRetryRequest,
- repository: DLQRepositoryDep,
+ repository: FromDishka[DLQRepository],
dlq_manager: FromDishka[DLQManager]
) -> DLQBatchRetryResponse:
result = await repository.retry_messages_batch(retry_request.event_ids, dlq_manager)
@@ -133,7 +137,8 @@ async def retry_dlq_messages(
total=result.total,
successful=result.successful,
failed=result.failed,
- details=[d.to_dict() for d in result.details]
+ details=[{"event_id": d.event_id, "status": d.status, **({"error": d.error} if d.error else {})} for d in
+ result.details],
)
@@ -161,11 +166,11 @@ async def set_retry_policy(
@router.delete("/messages/{event_id}", response_model=MessageResponse)
async def discard_dlq_message(
event_id: str,
- repository: DLQRepositoryDep,
+ repository: FromDishka[DLQRepository],
dlq_manager: FromDishka[DLQManager],
reason: str = Query(..., description="Reason for discarding")
) -> MessageResponse:
- message_data = await repository.get_message_for_retry(event_id)
+ message_data = await repository.get_message_by_id(event_id)
if not message_data:
raise HTTPException(status_code=404, detail="Message not found")
@@ -176,7 +181,7 @@ async def discard_dlq_message(
@router.get("/topics", response_model=List[DLQTopicSummaryResponse])
async def get_dlq_topics(
- repository: DLQRepositoryDep
+ repository: FromDishka[DLQRepository]
) -> List[DLQTopicSummaryResponse]:
topics = await repository.get_topics_summary()
return [
diff --git a/backend/app/api/routes/events.py b/backend/app/api/routes/events.py
index d30d4a6a..c9f73531 100644
--- a/backend/app/api/routes/events.py
+++ b/backend/app/api/routes/events.py
@@ -1,17 +1,18 @@
import asyncio
from datetime import datetime, timedelta, timezone
-from typing import Any, Dict, List
+from typing import Annotated, Any, Dict, List
from dishka import FromDishka
from dishka.integrations.fastapi import DishkaRoute
from fastapi import APIRouter, Depends, HTTPException, Query, Request
-from app.api.dependencies import AuthService
-from app.api.rate_limit import check_rate_limit
+from app.api.dependencies import admin_user, current_user
from app.core.correlation import CorrelationContext
from app.core.logging import logger
+from app.core.utils import get_client_ip
from app.domain.events.event_models import EventFilter
-from app.infrastructure.mappers.event_mapper import EventMapper, EventStatisticsMapper
+from app.infrastructure.kafka.events.metadata import EventMetadata
+from app.infrastructure.mappers import EventMapper, EventStatisticsMapper
from app.schemas_pydantic.events import (
DeleteEventResponse,
EventAggregationRequest,
@@ -24,8 +25,10 @@
ReplayAggregateResponse,
SortOrder,
)
+from app.schemas_pydantic.user import UserResponse
from app.services.event_service import EventService
from app.services.kafka_event_service import KafkaEventService
+from app.settings import get_settings
router = APIRouter(prefix="/events",
tags=["events"],
@@ -33,19 +36,16 @@
@router.get("/executions/{execution_id}/events",
- response_model=EventListResponse,
- dependencies=[Depends(check_rate_limit)])
+ response_model=EventListResponse)
async def get_execution_events(
execution_id: str,
+ current_user: Annotated[UserResponse, Depends(current_user)],
event_service: FromDishka[EventService],
- request: Request,
- auth_service: FromDishka[AuthService],
include_system_events: bool = Query(
False,
description="Include system-generated events"
)
) -> EventListResponse:
- current_user = await auth_service.get_current_user(request)
mapper = EventMapper()
events = await event_service.get_execution_events(
execution_id=execution_id,
@@ -70,9 +70,8 @@ async def get_execution_events(
@router.get("/user", response_model=EventListResponse)
async def get_user_events(
+ current_user: Annotated[UserResponse, Depends(current_user)],
event_service: FromDishka[EventService],
- request: Request,
- auth_service: FromDishka[AuthService],
event_types: List[str] | None = Query(None),
start_time: datetime | None = Query(None),
end_time: datetime | None = Query(None),
@@ -81,7 +80,6 @@ async def get_user_events(
sort_order: SortOrder = Query(SortOrder.DESC)
) -> EventListResponse:
"""Get events for the current user"""
- current_user = await auth_service.get_current_user(request)
mapper = EventMapper()
result = await event_service.get_user_events_paginated(
user_id=current_user.user_id,
@@ -106,12 +104,10 @@ async def get_user_events(
@router.post("/query", response_model=EventListResponse)
async def query_events(
- event_service: FromDishka[EventService],
+ current_user: Annotated[UserResponse, Depends(current_user)],
filter_request: EventFilterRequest,
- request: Request,
- auth_service: FromDishka[AuthService],
+ event_service: FromDishka[EventService],
) -> EventListResponse:
- current_user = await auth_service.get_current_user(request)
mapper = EventMapper()
event_filter = EventFilter(
event_types=[str(et) for et in filter_request.event_types] if filter_request.event_types else None,
@@ -153,16 +149,14 @@ async def query_events(
@router.get("/correlation/{correlation_id}", response_model=EventListResponse)
async def get_events_by_correlation(
correlation_id: str,
+ current_user: Annotated[UserResponse, Depends(current_user)],
event_service: FromDishka[EventService],
- request: Request,
- auth_service: FromDishka[AuthService],
include_all_users: bool = Query(
False,
description="Include events from all users (admin only)"
),
limit: int = Query(100, ge=1, le=1000)
) -> EventListResponse:
- current_user = await auth_service.get_current_user(request)
mapper = EventMapper()
events = await event_service.get_events_by_correlation(
correlation_id=correlation_id,
@@ -185,12 +179,10 @@ async def get_events_by_correlation(
@router.get("/current-request", response_model=EventListResponse)
async def get_current_request_events(
- request: Request,
+ current_user: Annotated[UserResponse, Depends(current_user)],
event_service: FromDishka[EventService],
- auth_service: FromDishka[AuthService],
limit: int = Query(100, ge=1, le=1000),
) -> EventListResponse:
- current_user = await auth_service.get_current_user(request)
mapper = EventMapper()
correlation_id = CorrelationContext.get_correlation_id()
if not correlation_id:
@@ -223,9 +215,8 @@ async def get_current_request_events(
@router.get("/statistics", response_model=EventStatistics)
async def get_event_statistics(
- request: Request,
+ current_user: Annotated[UserResponse, Depends(current_user)],
event_service: FromDishka[EventService],
- auth_service: FromDishka[AuthService],
start_time: datetime | None = Query(
None,
description="Start time for statistics (defaults to 24 hours ago)"
@@ -239,7 +230,6 @@ async def get_event_statistics(
description="Include stats from all users (admin only)"
),
) -> EventStatistics:
- current_user = await auth_service.get_current_user(request)
if not start_time:
start_time = datetime.now(timezone.utc) - timedelta(days=1) # 24 hours ago
if not end_time:
@@ -260,12 +250,10 @@ async def get_event_statistics(
@router.get("/{event_id}", response_model=EventResponse)
async def get_event(
event_id: str,
- event_service: FromDishka[EventService],
- request: Request,
- auth_service: FromDishka[AuthService]
+ current_user: Annotated[UserResponse, Depends(current_user)],
+ event_service: FromDishka[EventService]
) -> EventResponse:
"""Get a specific event by ID"""
- current_user = await auth_service.get_current_user(request)
mapper = EventMapper()
event = await event_service.get_event(
event_id=event_id,
@@ -279,21 +267,29 @@ async def get_event(
@router.post("/publish", response_model=PublishEventResponse)
async def publish_custom_event(
+ admin: Annotated[UserResponse, Depends(admin_user)],
event_request: PublishEventRequest,
request: Request,
- event_service: FromDishka[KafkaEventService],
- auth_service: FromDishka[AuthService]
+ event_service: FromDishka[KafkaEventService]
) -> PublishEventResponse:
- current_user = await auth_service.require_admin(request)
+ settings = get_settings()
+ base_meta = EventMetadata(
+ service_name=settings.SERVICE_NAME,
+ service_version=settings.SERVICE_VERSION,
+ user_id=admin.user_id,
+ ip_address=get_client_ip(request),
+ user_agent=request.headers.get("user-agent"),
+ )
+ # Merge any additional metadata provided in request (extra allowed)
+ if event_request.metadata:
+ base_meta = base_meta.model_copy(update=event_request.metadata)
event_id = await event_service.publish_event(
event_type=event_request.event_type,
payload=event_request.payload,
aggregate_id=event_request.aggregate_id,
correlation_id=event_request.correlation_id,
- metadata=event_request.metadata,
- user_id=current_user.user_id,
- request=request
+ metadata=base_meta,
)
return PublishEventResponse(
@@ -305,12 +301,10 @@ async def publish_custom_event(
@router.post("/aggregate", response_model=List[Dict[str, Any]])
async def aggregate_events(
+ current_user: Annotated[UserResponse, Depends(current_user)],
aggregation: EventAggregationRequest,
event_service: FromDishka[EventService],
- request: Request,
- auth_service: FromDishka[AuthService],
) -> List[Dict[str, Any]]:
- current_user = await auth_service.get_current_user(request)
result = await event_service.aggregate_events(
user_id=current_user.user_id,
user_role=current_user.role,
@@ -323,11 +317,9 @@ async def aggregate_events(
@router.get("/types/list", response_model=List[str])
async def list_event_types(
- event_service: FromDishka[EventService],
- request: Request,
- auth_service: FromDishka[AuthService]
+ current_user: Annotated[UserResponse, Depends(current_user)],
+ event_service: FromDishka[EventService]
) -> List[str]:
- current_user = await auth_service.get_current_user(request)
event_types = await event_service.list_event_types(
user_id=current_user.user_id,
user_role=current_user.role
@@ -338,21 +330,19 @@ async def list_event_types(
@router.delete("/{event_id}", response_model=DeleteEventResponse)
async def delete_event(
event_id: str,
+ admin: Annotated[UserResponse, Depends(admin_user)],
event_service: FromDishka[EventService],
- request: Request,
- auth_service: FromDishka[AuthService],
) -> DeleteEventResponse:
- current_user = await auth_service.require_admin(request)
result = await event_service.delete_event_with_archival(
event_id=event_id,
- deleted_by=str(current_user.email)
+ deleted_by=str(admin.email)
)
if result is None:
raise HTTPException(status_code=404, detail="Event not found")
logger.warning(
- f"Event {event_id} deleted by admin {current_user.email}",
+ f"Event {event_id} deleted by admin {admin.email}",
extra={
"event_type": result.event_type,
"aggregate_id": result.aggregate_id,
@@ -370,10 +360,9 @@ async def delete_event(
@router.post("/replay/{aggregate_id}", response_model=ReplayAggregateResponse)
async def replay_aggregate_events(
aggregate_id: str,
- request: Request,
+ admin: Annotated[UserResponse, Depends(admin_user)],
event_service: FromDishka[EventService],
kafka_event_service: FromDishka[KafkaEventService],
- auth_service: FromDishka[AuthService],
target_service: str | None = Query(
None,
description="Service to replay events to"
@@ -383,7 +372,6 @@ async def replay_aggregate_events(
description="If true, only show what would be replayed"
),
) -> ReplayAggregateResponse:
- current_user = await auth_service.require_admin(request)
replay_info = await event_service.get_aggregate_replay_info(aggregate_id)
if not replay_info:
raise HTTPException(
@@ -411,18 +399,18 @@ async def replay_aggregate_events(
await asyncio.sleep(0.1)
try:
+ settings = get_settings()
+ meta = EventMetadata(
+ service_name=settings.SERVICE_NAME,
+ service_version=settings.SERVICE_VERSION,
+ user_id=admin.user_id,
+ )
await kafka_event_service.publish_event(
event_type=f"replay.{event.event_type}",
payload=event.payload,
aggregate_id=aggregate_id,
correlation_id=replay_correlation_id,
- metadata={
- "original_event_id": event.event_id,
- "replay_target": target_service,
- "replayed_by": current_user.email,
- "replayed_at": datetime.now(timezone.utc)
- },
- user_id=current_user.user_id
+ metadata=meta,
)
replayed_count += 1
except Exception as e:
diff --git a/backend/app/api/routes/execution.py b/backend/app/api/routes/execution.py
index b19c3283..8218b2cc 100644
--- a/backend/app/api/routes/execution.py
+++ b/backend/app/api/routes/execution.py
@@ -6,8 +6,7 @@
from dishka.integrations.fastapi import DishkaRoute, inject
from fastapi import APIRouter, Depends, Header, HTTPException, Path, Query, Request
-from app.api.dependencies import AuthService
-from app.api.rate_limit import DynamicRateLimiter
+from app.api.dependencies import admin_user, current_user
from app.core.exceptions import IntegrationException
from app.core.tracing import EventAttributes, add_span_attributes
from app.core.utils import get_client_ip
@@ -18,7 +17,7 @@
from app.domain.enums.user import UserRole
from app.infrastructure.kafka.events.base import BaseEvent
from app.infrastructure.kafka.events.metadata import EventMetadata
-from app.infrastructure.mappers.execution_api_mapper import ExecutionApiMapper
+from app.infrastructure.mappers import ExecutionApiMapper
from app.schemas_pydantic.execution import (
CancelExecutionRequest,
CancelResponse,
@@ -33,10 +32,12 @@
ResourceLimits,
RetryExecutionRequest,
)
+from app.schemas_pydantic.user import UserResponse
from app.services.event_service import EventService
from app.services.execution_service import ExecutionService
from app.services.idempotency import IdempotencyManager
from app.services.kafka_event_service import KafkaEventService
+from app.settings import get_settings
router = APIRouter(route_class=DishkaRoute)
@@ -44,11 +45,9 @@
@inject
async def get_execution_with_access(
execution_id: Annotated[str, Path()],
- request: Request,
+ current_user: Annotated[UserResponse, Depends(current_user)],
execution_service: FromDishka[ExecutionService],
- auth_service: FromDishka[AuthService],
) -> ExecutionInDB:
- current_user = await auth_service.get_current_user(request)
domain_exec = await execution_service.get_execution_result(execution_id)
if domain_exec.user_id and domain_exec.user_id != current_user.user_id and current_user.role != UserRole.ADMIN:
@@ -65,8 +64,8 @@ async def get_execution_with_access(
execution_id=domain_exec.execution_id,
script=domain_exec.script,
status=domain_exec.status,
- output=domain_exec.output,
- errors=domain_exec.errors,
+ stdout=domain_exec.stdout,
+ stderr=domain_exec.stderr,
lang=domain_exec.lang,
lang_version=domain_exec.lang_version,
resource_usage=ru,
@@ -78,17 +77,15 @@ async def get_execution_with_access(
)
-@router.post("/execute", response_model=ExecutionResponse, dependencies=[Depends(DynamicRateLimiter)])
+@router.post("/execute", response_model=ExecutionResponse)
async def create_execution(
request: Request,
+ current_user: Annotated[UserResponse, Depends(current_user)],
execution: ExecutionRequest,
execution_service: FromDishka[ExecutionService],
- auth_service: FromDishka[AuthService],
idempotency_manager: FromDishka[IdempotencyManager],
idempotency_key: Annotated[str | None, Header(alias="Idempotency-Key")] = None,
) -> ExecutionResponse:
- current_user = await auth_service.get_current_user(request)
-
add_span_attributes(
**{
"http.method": "POST",
@@ -125,14 +122,13 @@ async def create_execution(
ttl_seconds=86400 # 24 hours TTL for HTTP idempotency
)
- if idempotency_result.is_duplicate and idempotency_result.result:
- # Return cached result if available
- cached_result = idempotency_result.result
- if isinstance(cached_result, dict):
- return ExecutionResponse(
- execution_id=cached_result.get("execution_id", ""),
- status=cached_result.get("status", ExecutionStatus.QUEUED)
- )
+ if idempotency_result.is_duplicate:
+ cached_json = await idempotency_manager.get_cached_json(
+ event=pseudo_event,
+ key_strategy="custom",
+ custom_key=f"http:{current_user.user_id}:{idempotency_key}",
+ )
+ return ExecutionResponse.model_validate_json(cached_json)
try:
client_ip = get_client_ip(request)
@@ -148,12 +144,10 @@ async def create_execution(
# Store result for idempotency if key was provided
if idempotency_key and pseudo_event:
- await idempotency_manager.mark_completed(
+ response_model = ExecutionApiMapper.to_response(exec_result)
+ await idempotency_manager.mark_completed_with_json(
event=pseudo_event,
- result={
- "execution_id": exec_result.execution_id,
- "status": exec_result.status
- },
+ cached_json=response_model.model_dump_json(),
key_strategy="custom",
custom_key=f"http:{current_user.user_id}:{idempotency_key}"
)
@@ -185,24 +179,20 @@ async def create_execution(
) from e
-@router.get("/result/{execution_id}", response_model=ExecutionResult, dependencies=[Depends(DynamicRateLimiter)])
+@router.get("/result/{execution_id}", response_model=ExecutionResult)
async def get_result(
execution: Annotated[ExecutionInDB, Depends(get_execution_with_access)],
- request: Request,
) -> ExecutionResult:
return ExecutionResult.model_validate(execution)
-@router.post("/{execution_id}/cancel", response_model=CancelResponse, dependencies=[Depends(DynamicRateLimiter)])
+@router.post("/{execution_id}/cancel", response_model=CancelResponse)
async def cancel_execution(
execution: Annotated[ExecutionInDB, Depends(get_execution_with_access)],
+ current_user: Annotated[UserResponse, Depends(current_user)],
cancel_request: CancelExecutionRequest,
- request: Request,
event_service: FromDishka[KafkaEventService],
- auth_service: FromDishka[AuthService],
) -> CancelResponse:
- current_user = await auth_service.get_current_user(request)
-
# Handle terminal states
terminal_states = [ExecutionStatus.COMPLETED, ExecutionStatus.FAILED, ExecutionStatus.TIMEOUT]
@@ -221,15 +211,23 @@ async def cancel_execution(
event_id="-1" # exact event_id unknown
)
- event_id = await event_service.publish_execution_event(
- event_type=EventType.EXECUTION_CANCELLED,
- execution_id=execution.execution_id,
- status=ExecutionStatus.CANCELLED,
+ settings = get_settings()
+ payload = {
+ "execution_id": execution.execution_id,
+ "status": str(ExecutionStatus.CANCELLED),
+ "reason": cancel_request.reason or "User requested cancellation",
+ "previous_status": str(execution.status),
+ }
+ meta = EventMetadata(
+ service_name=settings.SERVICE_NAME,
+ service_version=settings.SERVICE_VERSION,
user_id=current_user.user_id,
- metadata={
- "reason": cancel_request.reason or "User requested cancellation",
- "previous_status": execution.status,
- }
+ )
+ event_id = await event_service.publish_event(
+ event_type=EventType.EXECUTION_CANCELLED,
+ payload=payload,
+ aggregate_id=execution.execution_id,
+ metadata=meta,
)
return CancelResponse(
@@ -240,16 +238,15 @@ async def cancel_execution(
)
-@router.post("/{execution_id}/retry", response_model=ExecutionResponse, dependencies=[Depends(DynamicRateLimiter)])
+@router.post("/{execution_id}/retry", response_model=ExecutionResponse)
async def retry_execution(
original_execution: Annotated[ExecutionInDB, Depends(get_execution_with_access)],
+ current_user: Annotated[UserResponse, Depends(current_user)],
retry_request: RetryExecutionRequest,
request: Request,
execution_service: FromDishka[ExecutionService],
- auth_service: FromDishka[AuthService],
) -> ExecutionResponse:
"""Retry a failed or completed execution."""
- current_user = await auth_service.get_current_user(request)
if original_execution.status in [ExecutionStatus.RUNNING, ExecutionStatus.QUEUED]:
raise HTTPException(
@@ -272,12 +269,10 @@ async def retry_execution(
@router.get("/executions/{execution_id}/events",
- response_model=list[ExecutionEventResponse],
- dependencies=[Depends(DynamicRateLimiter)])
+ response_model=list[ExecutionEventResponse])
async def get_execution_events(
execution: Annotated[ExecutionInDB, Depends(get_execution_with_access)],
event_service: FromDishka[EventService],
- request: Request,
event_types: str | None = Query(
None, description="Comma-separated event types to filter"
),
@@ -305,11 +300,10 @@ async def get_execution_events(
]
-@router.get("/user/executions", response_model=ExecutionListResponse, dependencies=[Depends(DynamicRateLimiter)])
+@router.get("/user/executions", response_model=ExecutionListResponse)
async def get_user_executions(
- request: Request,
+ current_user: Annotated[UserResponse, Depends(current_user)],
execution_service: FromDishka[ExecutionService],
- auth_service: FromDishka[AuthService],
status: ExecutionStatus | None = Query(None),
lang: str | None = Query(None),
start_time: datetime | None = Query(None),
@@ -318,7 +312,6 @@ async def get_user_executions(
skip: int = Query(0, ge=0),
) -> ExecutionListResponse:
"""Get executions for the current user."""
- current_user = await auth_service.get_current_user(request)
executions = await execution_service.get_user_executions(
user_id=current_user.user_id,
@@ -370,15 +363,13 @@ async def get_k8s_resource_limits(
) from e
-@router.delete("/{execution_id}", response_model=DeleteResponse, dependencies=[Depends(DynamicRateLimiter)])
+@router.delete("/{execution_id}", response_model=DeleteResponse)
async def delete_execution(
execution_id: str,
- request: Request,
+ admin: Annotated[UserResponse, Depends(admin_user)],
execution_service: FromDishka[ExecutionService],
- auth_service: FromDishka[AuthService],
) -> DeleteResponse:
"""Delete an execution and its associated data (admin only)."""
- _ = await auth_service.require_admin(request)
await execution_service.delete_execution(execution_id)
return DeleteResponse(
message="Execution deleted successfully",
diff --git a/backend/app/api/routes/grafana_alerts.py b/backend/app/api/routes/grafana_alerts.py
new file mode 100644
index 00000000..8a8614e6
--- /dev/null
+++ b/backend/app/api/routes/grafana_alerts.py
@@ -0,0 +1,39 @@
+from dishka import FromDishka
+from dishka.integrations.fastapi import DishkaRoute
+from fastapi import APIRouter
+
+from app.core.correlation import CorrelationContext
+from app.schemas_pydantic.grafana import AlertResponse, GrafanaWebhook
+from app.services.grafana_alert_processor import GrafanaAlertProcessor
+
+router = APIRouter(prefix="/alerts", tags=["alerts"], route_class=DishkaRoute)
+
+
+@router.post("/grafana", response_model=AlertResponse)
+async def receive_grafana_alerts(
+ webhook_payload: GrafanaWebhook,
+ processor: FromDishka[GrafanaAlertProcessor],
+) -> AlertResponse:
+ correlation_id = CorrelationContext.get_correlation_id()
+
+ processed_count, errors = await processor.process_webhook(
+ webhook_payload, correlation_id
+ )
+
+ alerts_count = len(webhook_payload.alerts or [])
+
+ return AlertResponse(
+ message="Webhook received and processed",
+ alerts_received=alerts_count,
+ alerts_processed=processed_count,
+ errors=errors,
+ )
+
+
+@router.get("/grafana/test")
+async def test_grafana_alert_endpoint() -> dict[str, str]:
+ return {
+ "status": "ok",
+ "message": "Grafana webhook endpoint is ready",
+ "webhook_url": "/api/v1/alerts/grafana",
+ }
diff --git a/backend/app/api/routes/notifications.py b/backend/app/api/routes/notifications.py
index d7e5addd..2550a36c 100644
--- a/backend/app/api/routes/notifications.py
+++ b/backend/app/api/routes/notifications.py
@@ -1,10 +1,8 @@
from dishka import FromDishka
from dishka.integrations.fastapi import DishkaRoute
-from fastapi import APIRouter, Depends, Query, Request, Response
+from fastapi import APIRouter, Query, Request, Response
-from app.api.dependencies import AuthService
-from app.api.rate_limit import check_rate_limit
-from app.infrastructure.mappers.notification_api_mapper import NotificationApiMapper
+from app.infrastructure.mappers import NotificationApiMapper
from app.schemas_pydantic.notification import (
DeleteNotificationResponse,
NotificationChannel,
@@ -15,17 +13,21 @@
SubscriptionUpdate,
UnreadCountResponse,
)
+from app.services.auth_service import AuthService
from app.services.notification_service import NotificationService
router = APIRouter(prefix="/notifications", tags=["notifications"], route_class=DishkaRoute)
-@router.get("", response_model=NotificationListResponse, dependencies=[Depends(check_rate_limit)])
+@router.get("", response_model=NotificationListResponse)
async def get_notifications(
request: Request,
notification_service: FromDishka[NotificationService],
auth_service: FromDishka[AuthService],
status: NotificationStatus | None = Query(None),
+ include_tags: list[str] | None = Query(None, description="Only notifications with any of these tags"),
+ exclude_tags: list[str] | None = Query(None, description="Exclude notifications with any of these tags"),
+ tag_prefix: str | None = Query(None, description="Only notifications having a tag starting with this prefix"),
limit: int = Query(50, ge=1, le=100),
offset: int = Query(0, ge=0),
) -> NotificationListResponse:
@@ -35,11 +37,14 @@ async def get_notifications(
status=status,
limit=limit,
offset=offset,
+ include_tags=include_tags,
+ exclude_tags=exclude_tags,
+ tag_prefix=tag_prefix,
)
return NotificationApiMapper.list_result_to_response(result)
-@router.put("/{notification_id}/read", status_code=204, dependencies=[Depends(check_rate_limit)])
+@router.put("/{notification_id}/read", status_code=204)
async def mark_notification_read(
notification_id: str,
notification_service: FromDishka[NotificationService],
@@ -93,7 +98,9 @@ async def update_subscription(
enabled=subscription.enabled,
webhook_url=subscription.webhook_url,
slack_webhook=subscription.slack_webhook,
- notification_types=subscription.notification_types
+ severities=subscription.severities,
+ include_tags=subscription.include_tags,
+ exclude_tags=subscription.exclude_tags,
)
return NotificationApiMapper.subscription_to_pydantic(updated_sub)
diff --git a/backend/app/api/routes/replay.py b/backend/app/api/routes/replay.py
index 1204655b..d53795aa 100644
--- a/backend/app/api/routes/replay.py
+++ b/backend/app/api/routes/replay.py
@@ -1,10 +1,10 @@
+from dishka import FromDishka
from dishka.integrations.fastapi import DishkaRoute
from fastapi import APIRouter, Depends, Query
-from app.api.dependencies import require_admin_guard
-from app.core.service_dependencies import FromDishka
+from app.api.dependencies import admin_user
from app.domain.enums.replay import ReplayStatus
-from app.infrastructure.mappers.replay_api_mapper import ReplayApiMapper
+from app.infrastructure.mappers import ReplayApiMapper
from app.schemas_pydantic.replay import (
CleanupResponse,
ReplayRequest,
@@ -17,7 +17,7 @@
router = APIRouter(prefix="/replay",
tags=["Event Replay"],
route_class=DishkaRoute,
- dependencies=[Depends(require_admin_guard)])
+ dependencies=[Depends(admin_user)])
@router.post("/sessions", response_model=ReplayResponse)
@@ -26,7 +26,7 @@ async def create_replay_session(
service: FromDishka[ReplayService],
) -> ReplayResponse:
cfg = ReplayApiMapper.request_to_config(replay_request)
- result = await service.create_session(cfg)
+ result = await service.create_session_from_config(cfg)
return ReplayApiMapper.op_to_response(result.session_id, result.status, result.message)
diff --git a/backend/app/api/routes/saga.py b/backend/app/api/routes/saga.py
index 037606bc..30089720 100644
--- a/backend/app/api/routes/saga.py
+++ b/backend/app/api/routes/saga.py
@@ -1,25 +1,23 @@
from dishka import FromDishka
from dishka.integrations.fastapi import DishkaRoute
-from fastapi import APIRouter, Depends, Query, Request
+from fastapi import APIRouter, Query, Request
-from app.api.dependencies import AuthService
-from app.api.rate_limit import check_rate_limit
from app.domain.enums.saga import SagaState
-from app.infrastructure.mappers.admin_mapper import UserMapper as AdminUserMapper
-from app.infrastructure.mappers.saga_mapper import SagaResponseMapper
+from app.infrastructure.mappers import SagaResponseMapper
+from app.infrastructure.mappers import UserMapper as AdminUserMapper
from app.schemas_pydantic.saga import (
SagaCancellationResponse,
SagaListResponse,
SagaStatusResponse,
)
from app.schemas_pydantic.user import User
-from app.services.saga_service import SagaService
+from app.services.auth_service import AuthService
+from app.services.saga.saga_service import SagaService
router = APIRouter(
prefix="/sagas",
tags=["sagas"],
route_class=DishkaRoute,
- dependencies=[Depends(check_rate_limit)]
)
diff --git a/backend/app/api/routes/saved_scripts.py b/backend/app/api/routes/saved_scripts.py
index 99485d57..67689ff2 100644
--- a/backend/app/api/routes/saved_scripts.py
+++ b/backend/app/api/routes/saved_scripts.py
@@ -1,20 +1,19 @@
from dishka import FromDishka
from dishka.integrations.fastapi import DishkaRoute
-from fastapi import APIRouter, Depends, Request
+from fastapi import APIRouter, Request
-from app.api.dependencies import AuthService
-from app.api.rate_limit import DynamicRateLimiter
-from app.infrastructure.mappers.saved_script_api_mapper import SavedScriptApiMapper
+from app.infrastructure.mappers import SavedScriptApiMapper
from app.schemas_pydantic.saved_script import (
SavedScriptCreateRequest,
SavedScriptResponse,
)
+from app.services.auth_service import AuthService
from app.services.saved_script_service import SavedScriptService
router = APIRouter(route_class=DishkaRoute)
-@router.post("/scripts", response_model=SavedScriptResponse, dependencies=[Depends(DynamicRateLimiter)])
+@router.post("/scripts", response_model=SavedScriptResponse)
async def create_saved_script(
request: Request,
saved_script: SavedScriptCreateRequest,
@@ -30,7 +29,7 @@ async def create_saved_script(
return SavedScriptApiMapper.to_response(domain)
-@router.get("/scripts", response_model=list[SavedScriptResponse], dependencies=[Depends(DynamicRateLimiter)])
+@router.get("/scripts", response_model=list[SavedScriptResponse])
async def list_saved_scripts(
request: Request,
saved_script_service: FromDishka[SavedScriptService],
@@ -41,7 +40,7 @@ async def list_saved_scripts(
return SavedScriptApiMapper.list_to_response(items)
-@router.get("/scripts/{script_id}", response_model=SavedScriptResponse, dependencies=[Depends(DynamicRateLimiter)])
+@router.get("/scripts/{script_id}", response_model=SavedScriptResponse)
async def get_saved_script(
request: Request,
script_id: str,
@@ -57,7 +56,7 @@ async def get_saved_script(
return SavedScriptApiMapper.to_response(domain)
-@router.put("/scripts/{script_id}", response_model=SavedScriptResponse, dependencies=[Depends(DynamicRateLimiter)])
+@router.put("/scripts/{script_id}", response_model=SavedScriptResponse)
async def update_saved_script(
request: Request,
script_id: str,
@@ -76,7 +75,7 @@ async def update_saved_script(
return SavedScriptApiMapper.to_response(domain)
-@router.delete("/scripts/{script_id}", status_code=204, dependencies=[Depends(DynamicRateLimiter)])
+@router.delete("/scripts/{script_id}", status_code=204)
async def delete_saved_script(
request: Request,
script_id: str,
diff --git a/backend/app/api/routes/sse.py b/backend/app/api/routes/sse.py
index 2c6b7e01..b51865ad 100644
--- a/backend/app/api/routes/sse.py
+++ b/backend/app/api/routes/sse.py
@@ -3,9 +3,9 @@
from fastapi import APIRouter, Request
from sse_starlette.sse import EventSourceResponse
-from app.api.dependencies import AuthService
-from app.domain.sse.models import SSEHealthDomain
+from app.domain.sse import SSEHealthDomain
from app.schemas_pydantic.sse import SSEHealthResponse
+from app.services.auth_service import AuthService
from app.services.sse.sse_service import SSEService
router = APIRouter(
diff --git a/backend/app/api/routes/user_settings.py b/backend/app/api/routes/user_settings.py
index 9edd647b..ef323ad0 100644
--- a/backend/app/api/routes/user_settings.py
+++ b/backend/app/api/routes/user_settings.py
@@ -1,9 +1,12 @@
+from typing import Annotated
+
+from dishka import FromDishka
from dishka.integrations.fastapi import DishkaRoute
-from fastapi import APIRouter, Request
+from fastapi import APIRouter, Depends
-from app.api.dependencies import AuthService
-from app.core.service_dependencies import FromDishka
-from app.infrastructure.mappers.user_settings_api_mapper import UserSettingsApiMapper
+from app.api.dependencies import current_user
+from app.infrastructure.mappers import UserSettingsApiMapper
+from app.schemas_pydantic.user import UserResponse
from app.schemas_pydantic.user_settings import (
EditorSettings,
NotificationSettings,
@@ -22,23 +25,19 @@
@router.get("/", response_model=UserSettings)
async def get_user_settings(
+ current_user: Annotated[UserResponse, Depends(current_user)],
settings_service: FromDishka[UserSettingsService],
- request: Request,
- auth_service: FromDishka[AuthService]
) -> UserSettings:
- current_user = await auth_service.get_current_user(request)
domain = await settings_service.get_user_settings(current_user.user_id)
return UserSettingsApiMapper.to_api_settings(domain)
@router.put("/", response_model=UserSettings)
async def update_user_settings(
+ current_user: Annotated[UserResponse, Depends(current_user)],
updates: UserSettingsUpdate,
settings_service: FromDishka[UserSettingsService],
- request: Request,
- auth_service: FromDishka[AuthService]
) -> UserSettings:
- current_user = await auth_service.get_current_user(request)
domain_updates = UserSettingsApiMapper.to_domain_update(updates)
domain = await settings_service.update_user_settings(current_user.user_id, domain_updates)
return UserSettingsApiMapper.to_api_settings(domain)
@@ -46,24 +45,20 @@ async def update_user_settings(
@router.put("/theme", response_model=UserSettings)
async def update_theme(
- request: Request,
+ current_user: Annotated[UserResponse, Depends(current_user)],
update_request: ThemeUpdateRequest,
settings_service: FromDishka[UserSettingsService],
- auth_service: FromDishka[AuthService]
) -> UserSettings:
- current_user = await auth_service.get_current_user(request)
domain = await settings_service.update_theme(current_user.user_id, update_request.theme)
return UserSettingsApiMapper.to_api_settings(domain)
@router.put("/notifications", response_model=UserSettings)
async def update_notification_settings(
+ current_user: Annotated[UserResponse, Depends(current_user)],
notifications: NotificationSettings,
settings_service: FromDishka[UserSettingsService],
- request: Request,
- auth_service: FromDishka[AuthService]
) -> UserSettings:
- current_user = await auth_service.get_current_user(request)
domain = await settings_service.update_notification_settings(
current_user.user_id,
UserSettingsApiMapper._to_domain_notifications(notifications),
@@ -73,12 +68,10 @@ async def update_notification_settings(
@router.put("/editor", response_model=UserSettings)
async def update_editor_settings(
+ current_user: Annotated[UserResponse, Depends(current_user)],
editor: EditorSettings,
settings_service: FromDishka[UserSettingsService],
- request: Request,
- auth_service: FromDishka[AuthService]
) -> UserSettings:
- current_user = await auth_service.get_current_user(request)
domain = await settings_service.update_editor_settings(
current_user.user_id,
UserSettingsApiMapper._to_domain_editor(editor),
@@ -88,36 +81,30 @@ async def update_editor_settings(
@router.get("/history", response_model=SettingsHistoryResponse)
async def get_settings_history(
- request: Request,
+ current_user: Annotated[UserResponse, Depends(current_user)],
settings_service: FromDishka[UserSettingsService],
- auth_service: FromDishka[AuthService],
limit: int = 50,
) -> SettingsHistoryResponse:
- current_user = await auth_service.get_current_user(request)
history = await settings_service.get_settings_history(current_user.user_id, limit=limit)
return UserSettingsApiMapper.history_to_api(history)
@router.post("/restore", response_model=UserSettings)
async def restore_settings(
- request: Request,
+ current_user: Annotated[UserResponse, Depends(current_user)],
restore_request: RestoreSettingsRequest,
settings_service: FromDishka[UserSettingsService],
- auth_service: FromDishka[AuthService]
) -> UserSettings:
- current_user = await auth_service.get_current_user(request)
domain = await settings_service.restore_settings_to_point(current_user.user_id, restore_request.timestamp)
return UserSettingsApiMapper.to_api_settings(domain)
@router.put("/custom/{key}")
async def update_custom_setting(
+ current_user: Annotated[UserResponse, Depends(current_user)],
key: str,
value: dict[str, object],
settings_service: FromDishka[UserSettingsService],
- request: Request,
- auth_service: FromDishka[AuthService]
) -> UserSettings:
- current_user = await auth_service.get_current_user(request)
domain = await settings_service.update_custom_setting(current_user.user_id, key, value)
return UserSettingsApiMapper.to_api_settings(domain)
diff --git a/backend/app/core/adaptive_sampling.py b/backend/app/core/adaptive_sampling.py
index a242e9e6..ecb2700e 100644
--- a/backend/app/core/adaptive_sampling.py
+++ b/backend/app/core/adaptive_sampling.py
@@ -1,3 +1,4 @@
+import logging
import threading
import time
from collections import deque
@@ -8,7 +9,6 @@
from opentelemetry.trace import Link, SpanKind, TraceState, get_current_span
from opentelemetry.util.types import Attributes
-from app.core.logging import logger
from app.settings import get_settings
@@ -67,7 +67,9 @@ def __init__(
self._adjustment_thread = threading.Thread(target=self._adjustment_loop, daemon=True)
self._adjustment_thread.start()
- logger.info(f"Adaptive sampler initialized with base rate: {base_rate}")
+ logging.getLogger("integr8scode").info(
+ f"Adaptive sampler initialized with base rate: {base_rate}"
+ )
def should_sample(
self,
@@ -208,7 +210,7 @@ def _adjust_sampling_rate(self) -> None:
# Scale up based on error rate
error_multiplier: float = min(10.0, 1 + (error_rate / self.error_rate_threshold))
new_rate = min(self.max_rate, self.base_rate * error_multiplier)
- logger.warning(
+ logging.getLogger("integr8scode").warning(
f"High error rate detected ({error_rate:.1%}), "
f"increasing sampling to {new_rate:.1%}"
)
@@ -218,7 +220,7 @@ def _adjust_sampling_rate(self) -> None:
# Scale down based on traffic
traffic_divisor = request_rate / self.high_traffic_threshold
new_rate = max(self.min_rate, self.base_rate / traffic_divisor)
- logger.info(
+ logging.getLogger("integr8scode").info(
f"High traffic detected ({request_rate} req/min), "
f"decreasing sampling to {new_rate:.1%}"
)
@@ -231,7 +233,7 @@ def _adjust_sampling_rate(self) -> None:
self._current_rate + (new_rate - self._current_rate) * change_rate
)
- logger.info(
+ logging.getLogger("integr8scode").info(
f"Adjusted sampling rate to {self._current_rate:.1%} "
f"(error_rate: {error_rate:.1%}, request_rate: {request_rate} req/min)"
)
@@ -244,7 +246,9 @@ def _adjustment_loop(self) -> None:
try:
self._adjust_sampling_rate()
except Exception as e:
- logger.error(f"Error adjusting sampling rate: {e}")
+ logging.getLogger("integr8scode").error(
+ f"Error adjusting sampling rate: {e}"
+ )
def shutdown(self) -> None:
"""Shutdown the sampler"""
diff --git a/backend/app/core/container.py b/backend/app/core/container.py
index a2135117..fef3e1b3 100644
--- a/backend/app/core/container.py
+++ b/backend/app/core/container.py
@@ -11,6 +11,7 @@
EventProvider,
MessagingProvider,
RedisProvider,
+ ResultProcessorProvider,
SettingsProvider,
UserServicesProvider,
)
@@ -41,17 +42,12 @@ def create_result_processor_container() -> AsyncContainer:
Create a minimal DI container for the ResultProcessor worker.
Includes only settings, database, event/kafka, and required repositories.
"""
- from app.core.providers import (
- DatabaseProvider,
- EventProvider,
- MessagingProvider,
- ResultProcessorProvider,
- SettingsProvider,
- )
-
return make_async_container(
SettingsProvider(),
DatabaseProvider(),
+ CoreServicesProvider(),
+ ConnectionProvider(),
+ RedisProvider(),
EventProvider(),
MessagingProvider(),
ResultProcessorProvider(),
diff --git a/backend/app/core/correlation.py b/backend/app/core/correlation.py
index eaff474e..6dd452fd 100644
--- a/backend/app/core/correlation.py
+++ b/backend/app/core/correlation.py
@@ -1,10 +1,9 @@
import uuid
from datetime import datetime, timezone
-from typing import Any, Awaitable, Callable, Dict
+from typing import Any, Dict
-from fastapi import Request
-from starlette.middleware.base import BaseHTTPMiddleware
-from starlette.responses import Response
+from starlette.datastructures import MutableHeaders
+from starlette.types import ASGIApp, Message, Receive, Scope, Send
from app.core.logging import correlation_id_context, logger, request_metadata_context
@@ -40,18 +39,26 @@ def clear() -> None:
logger.debug("Cleared correlation context")
-class CorrelationMiddleware(BaseHTTPMiddleware):
+class CorrelationMiddleware:
CORRELATION_HEADER = "X-Correlation-ID"
REQUEST_ID_HEADER = "X-Request-ID"
- async def dispatch(self, request: Request, call_next: Callable[[Request], Awaitable[Response]]) -> Response:
+ def __init__(self, app: ASGIApp) -> None:
+ self.app = app
+
+ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+ if scope["type"] != "http":
+ await self.app(scope, receive, send)
+ return
+
# Try to get correlation ID from headers
- correlation_id = (
- request.headers.get(self.CORRELATION_HEADER) or
- request.headers.get(self.REQUEST_ID_HEADER) or
- request.headers.get("x-correlation-id") or
- request.headers.get("x-request-id")
- )
+ headers = dict(scope["headers"])
+ correlation_id = None
+
+ for header_name in [b"x-correlation-id", b"x-request-id"]:
+ if header_name in headers:
+ correlation_id = headers[header_name].decode("latin-1")
+ break
# Generate correlation ID if not provided
if not correlation_id:
@@ -61,25 +68,27 @@ async def dispatch(self, request: Request, call_next: Callable[[Request], Awaita
correlation_id = CorrelationContext.set_correlation_id(correlation_id)
# Set request metadata
- client_ip = request.client.host if request.client else None
+ client = scope.get("client")
+ client_ip = client[0] if client else None
metadata = {
- "method": request.method,
- "path": request.url.path,
+ "method": scope["method"],
+ "path": scope["path"],
"client": {
"host": client_ip
} if client_ip else None
}
CorrelationContext.set_request_metadata(metadata)
- # Process request
- try:
- response = await call_next(request)
+ # Add correlation ID to response headers
+ async def send_wrapper(message: Message) -> None:
+ if message["type"] == "http.response.start":
+ headers = MutableHeaders(scope=message)
+ headers[self.CORRELATION_HEADER] = correlation_id
+ await send(message)
- # Add correlation ID to response headers
- response.headers[self.CORRELATION_HEADER] = correlation_id
-
- return response
- finally:
- # Clear context after request
- CorrelationContext.clear()
+ # Process request
+ await self.app(scope, receive, send_wrapper)
+
+ # Clear context after request
+ CorrelationContext.clear()
diff --git a/backend/app/core/database_context.py b/backend/app/core/database_context.py
index 9a350187..a8b53e9c 100644
--- a/backend/app/core/database_context.py
+++ b/backend/app/core/database_context.py
@@ -4,7 +4,11 @@
from dataclasses import dataclass
from typing import Any, AsyncContextManager, Protocol, TypeVar, runtime_checkable
-from motor.motor_asyncio import AsyncIOMotorClient, AsyncIOMotorClientSession, AsyncIOMotorDatabase
+from motor.motor_asyncio import (
+ AsyncIOMotorClient,
+ AsyncIOMotorClientSession,
+ AsyncIOMotorDatabase,
+)
from pymongo.errors import ServerSelectionTimeoutError
from app.core.logging import logger
@@ -95,7 +99,9 @@ async def connect(self) -> None:
logger.info(f"Connecting to MongoDB database: {self._db_name}")
- # Create client with configuration
+ # Always explicitly bind to current event loop for consistency
+ import asyncio
+
client: AsyncIOMotorClient = AsyncIOMotorClient(
self._config.mongodb_url,
serverSelectionTimeoutMS=self._config.server_selection_timeout_ms,
@@ -106,6 +112,7 @@ async def connect(self) -> None:
retryReads=self._config.retry_reads,
w=self._config.write_concern,
journal=self._config.journal,
+ io_loop=asyncio.get_running_loop() # Always bind to current loop
)
# Verify connection
@@ -204,6 +211,9 @@ def session(self) -> AsyncContextManager[DBSession]:
return self._connection.session()
+
+
+
class DatabaseConnectionPool:
def __init__(self) -> None:
self._connections: dict[str, AsyncDatabaseConnection] = {}
diff --git a/backend/app/core/dishka_lifespan.py b/backend/app/core/dishka_lifespan.py
index b3da5454..deb6961d 100644
--- a/backend/app/core/dishka_lifespan.py
+++ b/backend/app/core/dishka_lifespan.py
@@ -11,7 +11,7 @@
from app.core.tracing import init_tracing
from app.db.schema.schema_manager import SchemaManager
from app.events.schema.schema_registry import SchemaRegistryManager, initialize_event_schemas
-from app.services.sse.partitioned_event_router import PartitionedSSERouter
+from app.services.sse.kafka_redis_bridge import SSEKafkaRedisBridge
from app.settings import get_settings
@@ -34,8 +34,8 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
},
)
- # Metrics are now initialized directly by each service that needs them
- logger.info("OpenTelemetry metrics will be initialized by individual services")
+ # Metrics setup moved to app creation to allow middleware registration
+ logger.info("Lifespan start: tracing and services initialization")
# Initialize tracing
instrumentation_report = init_tracing(
@@ -62,7 +62,7 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
schema_registry = await container.get(SchemaRegistryManager)
await initialize_event_schemas(schema_registry)
- # Initialize database schema once per app startup
+ # Initialize database schema at application scope using app-scoped DB
database = await container.get(AsyncIOMotorDatabase)
schema_manager = SchemaManager(database)
await schema_manager.apply_all()
@@ -78,15 +78,15 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
await initialize_rate_limits(redis_client, settings)
logger.info("Rate limits initialized in Redis")
- # Start SSE router to ensure consumers are running before any events are published
- _ = await container.get(PartitionedSSERouter)
- logger.info("SSE router started with consumer pool")
+ # Rate limit middleware added during app creation; service resolved lazily at runtime
+
+ # Start SSE KafkaโRedis bridge to ensure consumers are running before any events are published
+ _ = await container.get(SSEKafkaRedisBridge)
+ logger.info("SSE KafkaโRedis bridge started with consumer pool")
# All services initialized by dishka providers
logger.info("All services initialized by dishka providers")
- # Note: Daemonset creation is now handled by k8s_worker service
-
try:
yield
finally:
diff --git a/backend/app/core/logging.py b/backend/app/core/logging.py
index 544e61ee..6ece3d5d 100644
--- a/backend/app/core/logging.py
+++ b/backend/app/core/logging.py
@@ -5,6 +5,8 @@
from datetime import datetime, timezone
from typing import Any, Dict
+from opentelemetry import trace
+
from app.settings import get_settings
correlation_id_context: contextvars.ContextVar[str | None] = contextvars.ContextVar(
@@ -83,6 +85,12 @@ def format(self, record: logging.LogRecord) -> str:
if hasattr(record, 'client_host'):
log_data['client_host'] = record.client_host
+ # OpenTelemetry trace context (hexadecimal ids)
+ if hasattr(record, 'trace_id'):
+ log_data['trace_id'] = record.trace_id
+ if hasattr(record, 'span_id'):
+ log_data['span_id'] = record.span_id
+
if record.exc_info:
exc_text = self.formatException(record.exc_info)
log_data['exc_info'] = self._sanitize_sensitive_data(exc_text)
@@ -106,6 +114,25 @@ def setup_logger() -> logging.Logger:
correlation_filter = CorrelationFilter()
console_handler.addFilter(correlation_filter)
+ class TracingFilter(logging.Filter):
+ def filter(self, record: logging.LogRecord) -> bool:
+ # Inline minimal helpers to avoid circular import on tracing.utils
+ span = trace.get_current_span()
+ trace_id = None
+ span_id = None
+ if span and span.is_recording():
+ span_context = span.get_span_context()
+ if span_context.is_valid:
+ trace_id = format(span_context.trace_id, '032x')
+ span_id = format(span_context.span_id, '016x')
+ if trace_id:
+ record.trace_id = trace_id
+ if span_id:
+ record.span_id = span_id
+ return True
+
+ console_handler.addFilter(TracingFilter())
+
logger.addHandler(console_handler)
# Get log level from configuration
diff --git a/backend/app/core/metrics/__init__.py b/backend/app/core/metrics/__init__.py
index 5937450a..16f45150 100644
--- a/backend/app/core/metrics/__init__.py
+++ b/backend/app/core/metrics/__init__.py
@@ -1,16 +1,3 @@
-"""
-Metrics package for application monitoring and observability.
-
-This package provides a modular metrics collection system organized by domain:
-- execution: Script execution metrics
-- events: Event processing and Kafka metrics
-- health: Health check metrics
-- connections: SSE/WebSocket connection metrics
-- database: Database operation metrics
-- kubernetes: Kubernetes and pod metrics
-- security: Security-related metrics
-"""
-
from app.core.metrics.base import BaseMetrics, MetricsConfig
from app.core.metrics.connections import ConnectionMetrics
from app.core.metrics.coordinator import CoordinatorMetrics
diff --git a/backend/app/core/metrics/notifications.py b/backend/app/core/metrics/notifications.py
index 2b80cddb..9797c270 100644
--- a/backend/app/core/metrics/notifications.py
+++ b/backend/app/core/metrics/notifications.py
@@ -43,10 +43,10 @@ def _create_instruments(self) -> None:
unit="1"
)
- # Priority metrics
- self.notifications_by_priority = self._meter.create_counter(
- name="notifications.by.priority.total",
- description="Total notifications by priority level",
+ # Severity metrics
+ self.notifications_by_severity = self._meter.create_counter(
+ name="notifications.by.severity.total",
+ description="Total notifications by severity level",
unit="1"
)
@@ -192,25 +192,25 @@ def _create_instruments(self) -> None:
)
def record_notification_sent(self, notification_type: str, channel: str = "in_app",
- priority: str = "medium") -> None:
+ severity: str = "medium") -> None:
self.notifications_sent.add(
1,
- attributes={"type": notification_type}
+ attributes={"category": notification_type}
)
self.notifications_by_channel.add(
1,
attributes={
"channel": channel,
- "type": notification_type
+ "category": notification_type
}
)
- self.notifications_by_priority.add(
+ self.notifications_by_severity.add(
1,
attributes={
- "priority": priority,
- "type": notification_type
+ "severity": severity,
+ "category": notification_type
}
)
@@ -218,7 +218,7 @@ def record_notification_failed(self, notification_type: str, error: str, channel
self.notifications_failed.add(
1,
attributes={
- "type": notification_type,
+ "category": notification_type,
"error": error
}
)
@@ -235,14 +235,14 @@ def record_notification_delivery_time(self, duration_seconds: float, notificatio
channel: str = "in_app") -> None:
self.notification_delivery_time.record(
duration_seconds,
- attributes={"type": notification_type}
+ attributes={"category": notification_type}
)
self.channel_delivery_time.record(
duration_seconds,
attributes={
"channel": channel,
- "type": notification_type
+ "category": notification_type
}
)
@@ -269,18 +269,18 @@ def record_notification_status_change(self, notification_id: str, from_status: s
def record_notification_read(self, notification_type: str, time_to_read_seconds: float) -> None:
self.notifications_read.add(
1,
- attributes={"type": notification_type}
+ attributes={"category": notification_type}
)
self.time_to_read.record(
time_to_read_seconds,
- attributes={"type": notification_type}
+ attributes={"category": notification_type}
)
def record_notification_clicked(self, notification_type: str) -> None:
self.notifications_clicked.add(
1,
- attributes={"type": notification_type}
+ attributes={"category": notification_type}
)
def update_unread_count(self, user_id: str, count: int) -> None:
@@ -299,7 +299,7 @@ def record_notification_throttled(self, notification_type: str, user_id: str) ->
self.notifications_throttled.add(
1,
attributes={
- "type": notification_type,
+ "category": notification_type,
"user_id": user_id
}
)
@@ -314,7 +314,7 @@ def record_notification_retry(self, notification_type: str, attempt_number: int,
self.notification_retries.add(
1,
attributes={
- "type": notification_type,
+ "category": notification_type,
"attempt": str(attempt_number),
"success": str(success)
}
@@ -323,24 +323,24 @@ def record_notification_retry(self, notification_type: str, attempt_number: int,
if attempt_number > 1: # Only record retry success rate for actual retries
self.retry_success_rate.record(
100.0 if success else 0.0,
- attributes={"type": notification_type}
+ attributes={"category": notification_type}
)
def record_batch_processed(self, batch_size_count: int, processing_time_seconds: float,
notification_type: str = "mixed") -> None:
self.batch_notifications_processed.add(
batch_size_count,
- attributes={"type": notification_type}
+ attributes={"category": notification_type}
)
self.batch_processing_time.record(
processing_time_seconds,
- attributes={"type": notification_type}
+ attributes={"category": notification_type}
)
self.batch_size.record(
batch_size_count,
- attributes={"type": notification_type}
+ attributes={"category": notification_type}
)
def record_template_render(self, duration_seconds: float, template_name: str, success: bool) -> None:
@@ -411,7 +411,7 @@ def record_subscription_change(self, user_id: str, notification_type: str, actio
1,
attributes={
"user_id": user_id,
- "type": notification_type,
+ "category": notification_type,
"action": action # "subscribe" or "unsubscribe"
}
)
diff --git a/backend/app/core/metrics/rate_limit.py b/backend/app/core/metrics/rate_limit.py
index d1e43ffb..89665023 100644
--- a/backend/app/core/metrics/rate_limit.py
+++ b/backend/app/core/metrics/rate_limit.py
@@ -103,14 +103,5 @@ def _create_instruments(self) -> None:
unit="1",
)
- # IP vs User metrics
- self.ip_checks = self._meter.create_counter(
- name="rate_limit.ip.checks.total",
- description="Number of IP-based rate limit checks",
- unit="1",
- )
- self.user_checks = self._meter.create_counter(
- name="rate_limit.user.checks.total",
- description="Number of user-based rate limit checks",
- unit="1",
- )
+ # Authenticated vs anonymous checks can be derived from labels on requests_total
+ # No separate ip/user counters to avoid duplication and complexity.
diff --git a/backend/app/core/middlewares/__init__.py b/backend/app/core/middlewares/__init__.py
index e69de29b..a1a2441d 100644
--- a/backend/app/core/middlewares/__init__.py
+++ b/backend/app/core/middlewares/__init__.py
@@ -0,0 +1,13 @@
+from .cache import CacheControlMiddleware
+from .metrics import MetricsMiddleware, create_system_metrics, setup_metrics
+from .rate_limit import RateLimitMiddleware
+from .request_size_limit import RequestSizeLimitMiddleware
+
+__all__ = [
+ "CacheControlMiddleware",
+ "MetricsMiddleware",
+ "setup_metrics",
+ "create_system_metrics",
+ "RequestSizeLimitMiddleware",
+ "RateLimitMiddleware",
+]
diff --git a/backend/app/core/middlewares/cache.py b/backend/app/core/middlewares/cache.py
index 65e74ee9..e2e8a780 100644
--- a/backend/app/core/middlewares/cache.py
+++ b/backend/app/core/middlewares/cache.py
@@ -1,13 +1,12 @@
-from typing import Awaitable, Callable, Dict
+from typing import Dict
-from fastapi import Request, Response
-from starlette.middleware.base import BaseHTTPMiddleware
-from starlette.types import ASGIApp
+from starlette.datastructures import MutableHeaders
+from starlette.types import ASGIApp, Message, Receive, Scope, Send
-class CacheControlMiddleware(BaseHTTPMiddleware):
+class CacheControlMiddleware:
def __init__(self, app: ASGIApp):
- super().__init__(app)
+ self.app = app
self.cache_policies: Dict[str, str] = {
"/api/v1/k8s-limits": "public, max-age=300", # 5 minutes
"/api/v1/example-scripts": "public, max-age=600", # 10 minutes
@@ -16,23 +15,39 @@ def __init__(self, app: ASGIApp):
"/api/v1/notifications/unread-count": "private, no-cache", # Always revalidate
}
- async def dispatch(self, request: Request, call_next: Callable[[Request], Awaitable[Response]]) -> Response:
- response: Response = await call_next(request)
-
- # Only add cache headers for successful GET requests
- if request.method == "GET" and response.status_code == 200:
- path = request.url.path
-
- # Find matching cache policy
- cache_control = self._get_cache_policy(path)
- if cache_control:
- response.headers["Cache-Control"] = cache_control
-
- # Add ETag support for better caching
- if "public" in cache_control:
- response.headers["Vary"] = "Accept-Encoding"
-
- return response
+ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+ if scope["type"] != "http":
+ await self.app(scope, receive, send)
+ return
+
+ method = scope["method"]
+ path = scope["path"]
+
+ # Only modify headers for GET requests
+ if method != "GET":
+ await self.app(scope, receive, send)
+ return
+
+ cache_control = self._get_cache_policy(path)
+ if not cache_control:
+ await self.app(scope, receive, send)
+ return
+
+ async def send_wrapper(message: Message) -> None:
+ if message["type"] == "http.response.start":
+ # Only add cache headers for successful responses
+ status_code = message.get("status", 200)
+ if status_code == 200:
+ headers = MutableHeaders(scope=message)
+ headers["Cache-Control"] = cache_control
+
+ # Add ETag support for better caching
+ if "public" in cache_control:
+ headers["Vary"] = "Accept-Encoding"
+
+ await send(message)
+
+ await self.app(scope, receive, send_wrapper)
def _get_cache_policy(self, path: str) -> str | None:
# Exact match first
diff --git a/backend/app/core/middlewares/metrics.py b/backend/app/core/middlewares/metrics.py
index 9f47982a..58920c8e 100644
--- a/backend/app/core/middlewares/metrics.py
+++ b/backend/app/core/middlewares/metrics.py
@@ -1,27 +1,26 @@
-"""OpenTelemetry metrics configuration and setup."""
import os
+import re
import time
-from typing import Callable, cast
import psutil
-from fastapi import FastAPI, Request, Response
+from fastapi import FastAPI
from opentelemetry import metrics
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.metrics import CallbackOptions, Observation
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import SERVICE_NAME, SERVICE_VERSION, Resource
-from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.types import ASGIApp, Message, Receive, Scope, Send
from app.core.logging import logger
from app.settings import get_settings
-class MetricsMiddleware(BaseHTTPMiddleware):
+class MetricsMiddleware:
"""Middleware to collect HTTP metrics using OpenTelemetry."""
- def __init__(self, app: FastAPI) -> None:
- super().__init__(app)
+ def __init__(self, app: ASGIApp) -> None:
+ self.app = app
self.meter = metrics.get_meter(__name__)
# Create metrics instruments
@@ -55,26 +54,27 @@ def __init__(self, app: FastAPI) -> None:
unit="requests"
)
- async def dispatch(self, request: Request, call_next: Callable) -> Response:
- """Process request and collect metrics."""
- # Skip metrics endpoint to avoid recursion
- if request.url.path == "/metrics":
- response = await call_next(request)
- return cast(Response, response)
+ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+ if scope["type"] != "http":
+ await self.app(scope, receive, send)
+ return
- # Extract labels
- method = request.method
- path = request.url.path
+ path = scope["path"]
+
+ # Skip metrics endpoint to avoid recursion
+ if path == "/metrics":
+ await self.app(scope, receive, send)
+ return
- # Clean path for cardinality (remove IDs)
- # e.g., /api/v1/users/123 -> /api/v1/users/{id}
+ method = scope["method"]
path_template = self._get_path_template(path)
# Increment active requests
self.active_requests.add(1, {"method": method, "path": path_template})
# Record request size
- content_length = request.headers.get("content-length")
+ headers = dict(scope["headers"])
+ content_length = headers.get(b"content-length")
if content_length:
self.request_size.record(
int(content_length),
@@ -83,57 +83,45 @@ async def dispatch(self, request: Request, call_next: Callable) -> Response:
# Time the request
start_time = time.time()
+ status_code = 500 # Default to error if not set
+ response_content_length = None
- try:
- response = await call_next(request)
- status_code = response.status_code
-
- # Record metrics
- duration = time.time() - start_time
-
- labels = {
- "method": method,
- "path": path_template,
- "status": str(status_code)
- }
-
- self.request_counter.add(1, labels)
- self.request_duration.record(duration, labels)
-
- # Record response size if available
- response_headers = getattr(response, "headers", None)
- if response_headers and "content-length" in response_headers:
- self.response_size.record(
- int(response_headers["content-length"]),
- labels
- )
+ async def send_wrapper(message: Message) -> None:
+ nonlocal status_code, response_content_length
+
+ if message["type"] == "http.response.start":
+ status_code = message["status"]
+ response_headers = dict(message.get("headers", []))
+ content_length_header = response_headers.get(b"content-length")
+ if content_length_header:
+ response_content_length = int(content_length_header)
+
+ await send(message)
- return cast(Response, response)
+ await self.app(scope, receive, send_wrapper)
- except Exception:
- # Record error metrics
- duration = time.time() - start_time
+ # Record metrics after response
+ duration = time.time() - start_time
- labels = {
- "method": method,
- "path": path_template,
- "status": "500"
- }
+ labels = {
+ "method": method,
+ "path": path_template,
+ "status": str(status_code)
+ }
- self.request_counter.add(1, labels)
- self.request_duration.record(duration, labels)
+ self.request_counter.add(1, labels)
+ self.request_duration.record(duration, labels)
- raise
+ if response_content_length is not None:
+ self.response_size.record(response_content_length, labels)
- finally:
- # Decrement active requests
- self.active_requests.add(-1, {"method": method, "path": path_template})
+ # Decrement active requests
+ self.active_requests.add(-1, {"method": method, "path": path_template})
@staticmethod
def _get_path_template(path: str) -> str:
"""Convert path to template for lower cardinality."""
# Common patterns to replace
- import re
# UUID pattern
path = re.sub(
diff --git a/backend/app/core/middlewares/rate_limit.py b/backend/app/core/middlewares/rate_limit.py
index f9ebd367..e21098d1 100644
--- a/backend/app/core/middlewares/rate_limit.py
+++ b/backend/app/core/middlewares/rate_limit.py
@@ -1,12 +1,13 @@
-from datetime import datetime, timedelta, timezone
-from typing import Awaitable, Callable, Optional
+from datetime import datetime, timezone
-from fastapi import FastAPI, Request, Response
-from fastapi.responses import JSONResponse
+from starlette.datastructures import MutableHeaders
+from starlette.requests import Request
+from starlette.responses import JSONResponse
+from starlette.types import ASGIApp, Message, Receive, Scope, Send
-from app.core.logging import logger
from app.core.utils import get_client_ip
-from app.domain.rate_limit import RateLimitAlgorithm, RateLimitStatus
+from app.domain.rate_limit import RateLimitStatus
+from app.domain.user.user_models import User
from app.services.rate_limit_service import RateLimitService
from app.settings import Settings
@@ -21,7 +22,7 @@ class RateLimitMiddleware:
- Dynamic configuration via Redis
- Graceful degradation on errors
"""
-
+
# Paths exempt from rate limiting
EXCLUDED_PATHS = frozenset({
"/health",
@@ -33,97 +34,97 @@ class RateLimitMiddleware:
"/api/v1/auth/register",
"/api/v1/auth/logout"
})
-
+
def __init__(
self,
- app: FastAPI,
- rate_limit_service: RateLimitService,
- settings: Settings
- ):
+ app: ASGIApp,
+ rate_limit_service: RateLimitService | None = None,
+ settings: Settings | None = None,
+ ) -> None:
self.app = app
self.rate_limit_service = rate_limit_service
self.settings = settings
- self.enabled = settings.RATE_LIMIT_ENABLED
-
- async def __call__(
- self,
- request: Request,
- call_next: Callable[[Request], Awaitable[Response]]
- ) -> Response:
- """Process request through rate limiting."""
-
- # Fast path: skip if disabled or excluded
- if not self.enabled or request.url.path in self.EXCLUDED_PATHS:
- return await call_next(request)
-
- # Extract identifier
- identifier = self._extract_identifier(request)
- username = self._extract_username(request)
-
- # Check rate limit
- status = await self._check_rate_limit(identifier, request.url.path, username)
+ # Default to enabled unless settings says otherwise
+ self.enabled = bool(settings.RATE_LIMIT_ENABLED) if settings else True
+
+ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+ if scope["type"] != "http":
+ await self.app(scope, receive, send)
+ return
+
+ path = scope["path"]
- # Handle rate limit exceeded
+ if not self.enabled or path in self.EXCLUDED_PATHS:
+ await self.app(scope, receive, send)
+ return
+
+ # Try to get service if not initialized
+ if self.rate_limit_service is None:
+ asgi_app = scope.get("app")
+ if asgi_app:
+ container = asgi_app.state.dishka_container
+ async with container() as container_scope:
+ self.rate_limit_service = await container_scope.get(RateLimitService)
+
+ if self.rate_limit_service is None:
+ await self.app(scope, receive, send)
+ return
+
+ # Build request object to access state
+ request = Request(scope, receive=receive)
+ user_id = self._extract_user_id(request)
+
+ status = await self._check_rate_limit(user_id, path)
+
if not status.allowed:
- return self._rate_limit_exceeded_response(status)
-
- # Process request and add headers
- response = await call_next(request)
- self._add_rate_limit_headers(response, status)
-
- return response
-
- def _extract_identifier(self, request: Request) -> str:
- """Extract user ID or IP address as identifier."""
- # Check for authenticated user in request state
- if hasattr(request.state, "user") and request.state.user:
- return str(request.state.user.user_id)
-
- # Fall back to IP address for anonymous users
+ response = self._rate_limit_exceeded_response(status)
+ await response(scope, receive, send)
+ return
+
+ # Add rate limit headers to response
+ async def send_wrapper(message: Message) -> None:
+ if message["type"] == "http.response.start":
+ headers = MutableHeaders(scope=message)
+ headers["X-RateLimit-Limit"] = str(status.limit)
+ headers["X-RateLimit-Remaining"] = str(status.remaining)
+ headers["X-RateLimit-Reset"] = str(int(status.reset_at.timestamp()))
+ await send(message)
+
+ await self.app(scope, receive, send_wrapper)
+
+ def _extract_user_id(self, request: Request) -> str:
+ user: User | None = request.state.__dict__.get("user")
+ if user:
+ return str(user.user_id)
return f"ip:{get_client_ip(request)}"
-
- def _extract_username(self, request: Request) -> Optional[str]:
- """Extract username if authenticated."""
- if hasattr(request.state, "user") and request.state.user:
- return getattr(request.state.user, "username", None)
- return None
-
+
async def _check_rate_limit(
- self,
- identifier: str,
- endpoint: str,
- username: Optional[str]
+ self,
+ user_id: str,
+ endpoint: str
) -> RateLimitStatus:
- """Check rate limit, with fallback on errors."""
- try:
- return await self.rate_limit_service.check_rate_limit(
- user_id=identifier,
- endpoint=endpoint,
- username=username
- )
- except Exception as e:
- # Log error but don't block request
- logger.error(f"Rate limit check failed for {identifier}: {e}")
- # Return unlimited status on error (fail open)
+ # At this point service should be available; if not, allow request
+ if self.rate_limit_service is None:
return RateLimitStatus(
allowed=True,
- limit=999999,
- remaining=999999,
- reset_at=datetime.now(timezone.utc) + timedelta(hours=1),
- retry_after=None,
- matched_rule=None,
- algorithm=RateLimitAlgorithm.SLIDING_WINDOW
+ limit=0,
+ remaining=0,
+ reset_at=datetime.now(timezone.utc),
)
-
- def _rate_limit_exceeded_response(self, status: RateLimitStatus) -> Response:
- """Create 429 response for rate limit exceeded."""
+
+ return await self.rate_limit_service.check_rate_limit(
+ user_id=user_id,
+ endpoint=endpoint
+ )
+
+ def _rate_limit_exceeded_response(self, status: RateLimitStatus) -> JSONResponse:
headers = {
"X-RateLimit-Limit": str(status.limit),
"X-RateLimit-Remaining": "0",
"X-RateLimit-Reset": str(int(status.reset_at.timestamp())),
"Retry-After": str(status.retry_after or 60)
}
-
+
return JSONResponse(
status_code=429,
content={
@@ -133,9 +134,3 @@ def _rate_limit_exceeded_response(self, status: RateLimitStatus) -> Response:
},
headers=headers
)
-
- def _add_rate_limit_headers(self, response: Response, status: RateLimitStatus) -> None:
- """Add rate limit headers to response."""
- response.headers["X-RateLimit-Limit"] = str(status.limit)
- response.headers["X-RateLimit-Remaining"] = str(status.remaining)
- response.headers["X-RateLimit-Reset"] = str(int(status.reset_at.timestamp()))
diff --git a/backend/app/core/middlewares/request_size_limit.py b/backend/app/core/middlewares/request_size_limit.py
index 3ef95860..a4ff33b0 100644
--- a/backend/app/core/middlewares/request_size_limit.py
+++ b/backend/app/core/middlewares/request_size_limit.py
@@ -1,25 +1,32 @@
-from typing import Awaitable, Callable
+from starlette.responses import JSONResponse
+from starlette.types import ASGIApp, Receive, Scope, Send
-from fastapi import FastAPI, HTTPException, Request
-from starlette.middleware.base import BaseHTTPMiddleware
-from starlette.responses import Response
-
-class RequestSizeLimitMiddleware(BaseHTTPMiddleware):
+class RequestSizeLimitMiddleware:
"""Middleware to limit request size, default 10MB"""
- def __init__(self, app: FastAPI, max_size_mb: int = 10) -> None:
- super().__init__(app)
+ def __init__(self, app: ASGIApp, max_size_mb: int = 10) -> None:
+ self.app = app
self.max_size_bytes = max_size_mb * 1024 * 1024
- async def dispatch(
- self, request: Request, call_next: Callable[[Request], Awaitable[Response]]
- ) -> Response:
- if request.headers.get("content-length"):
- content_length = int(request.headers.get("content-length", 0))
+ async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+ if scope["type"] != "http":
+ await self.app(scope, receive, send)
+ return
+
+ headers = dict(scope["headers"])
+ content_length_header = headers.get(b"content-length")
+
+ if content_length_header:
+ content_length = int(content_length_header)
if content_length > self.max_size_bytes:
- raise HTTPException(
+ response = JSONResponse(
status_code=413,
- detail=f"Request too large. Maximum size is {self.max_size_bytes / 1024 / 1024}MB",
+ content={
+ "detail": f"Request too large. Maximum size is {self.max_size_bytes / 1024 / 1024}MB"
+ }
)
- return await call_next(request)
+ await response(scope, receive, send)
+ return
+
+ await self.app(scope, receive, send)
diff --git a/backend/app/core/providers.py b/backend/app/core/providers.py
index ae7c32a3..356098e5 100644
--- a/backend/app/core/providers.py
+++ b/backend/app/core/providers.py
@@ -1,8 +1,9 @@
+from typing import AsyncIterator
+
import redis.asyncio as redis
from dishka import Provider, Scope, provide
from motor.motor_asyncio import AsyncIOMotorDatabase
-from app.api.dependencies import AuthService
from app.core.database_context import (
AsyncDatabaseConnection,
DatabaseConfig,
@@ -36,32 +37,35 @@
from app.db.repositories.admin.admin_settings_repository import AdminSettingsRepository
from app.db.repositories.admin.admin_user_repository import AdminUserRepository
from app.db.repositories.dlq_repository import DLQRepository
-from app.db.repositories.idempotency_repository import IdempotencyRepository
from app.db.repositories.replay_repository import ReplayRepository
from app.db.repositories.resource_allocation_repository import ResourceAllocationRepository
from app.db.repositories.user_settings_repository import UserSettingsRepository
-from app.dlq.consumer import DLQConsumerRegistry
from app.dlq.manager import DLQManager, create_dlq_manager
-from app.events.core.producer import ProducerConfig, UnifiedProducer
+from app.domain.saga.models import SagaConfig
+from app.events.core import ProducerConfig, UnifiedProducer
from app.events.event_store import EventStore, create_event_store
from app.events.event_store_consumer import EventStoreConsumer, create_event_store_consumer
from app.events.schema.schema_registry import SchemaRegistryManager, create_schema_registry_manager
from app.infrastructure.kafka.topics import get_all_topics
-from app.services.admin_user_service import AdminUserService
+from app.services.admin import AdminEventsService, AdminSettingsService, AdminUserService
+from app.services.auth_service import AuthService
from app.services.coordinator.coordinator import ExecutionCoordinator
from app.services.event_bus import EventBusManager
from app.services.event_replay.replay_service import EventReplayService
from app.services.event_service import EventService
from app.services.execution_service import ExecutionService
+from app.services.grafana_alert_processor import GrafanaAlertProcessor
from app.services.idempotency import IdempotencyConfig, IdempotencyManager
+from app.services.idempotency.idempotency_manager import create_idempotency_manager
+from app.services.idempotency.redis_repository import RedisIdempotencyRepository
from app.services.kafka_event_service import KafkaEventService
from app.services.notification_service import NotificationService
from app.services.rate_limit_service import RateLimitService
from app.services.replay_service import ReplayService
-from app.services.saga.saga_orchestrator import SagaOrchestrator, create_saga_orchestrator
-from app.services.saga_service import SagaService
+from app.services.saga import SagaOrchestrator, create_saga_orchestrator
+from app.services.saga.saga_service import SagaService
from app.services.saved_script_service import SavedScriptService
-from app.services.sse.partitioned_event_router import PartitionedSSERouter, create_partitioned_sse_router
+from app.services.sse.kafka_redis_bridge import SSEKafkaRedisBridge, create_sse_kafka_redis_bridge
from app.services.sse.redis_bus import SSERedisBus
from app.services.sse.sse_service import SSEService
from app.services.sse.sse_shutdown_manager import SSEShutdownManager, create_sse_shutdown_manager
@@ -80,8 +84,8 @@ def get_settings(self) -> Settings:
class DatabaseProvider(Provider):
scope = Scope.APP
- @provide
- async def get_database_connection(self, settings: Settings) -> AsyncDatabaseConnection:
+ @provide(scope=Scope.APP)
+ async def get_database_connection(self, settings: Settings) -> AsyncIterator[AsyncDatabaseConnection]:
db_config = DatabaseConfig(
mongodb_url=settings.MONGODB_URL,
db_name=settings.PROJECT_NAME + "_test" if settings.TESTING else settings.PROJECT_NAME,
@@ -93,7 +97,10 @@ async def get_database_connection(self, settings: Settings) -> AsyncDatabaseConn
db_connection = create_database_connection(db_config)
await db_connection.connect()
- return db_connection
+ try:
+ yield db_connection
+ finally:
+ await db_connection.disconnect()
@provide
def get_database(self, db_connection: AsyncDatabaseConnection) -> AsyncIOMotorDatabase:
@@ -104,7 +111,8 @@ class RedisProvider(Provider):
scope = Scope.APP
@provide
- async def get_redis_client(self, settings: Settings) -> redis.Redis:
+ async def get_redis_client(self, settings: Settings) -> AsyncIterator[redis.Redis]:
+ # Create Redis client - it will automatically use the current event loop
client = redis.Redis(
host=settings.REDIS_HOST,
port=settings.REDIS_PORT,
@@ -118,7 +126,10 @@ async def get_redis_client(self, settings: Settings) -> redis.Redis:
)
# Test connection
await client.ping()
- return client
+ try:
+ yield client
+ finally:
+ await client.aclose()
@provide
def get_rate_limit_service(
@@ -146,33 +157,42 @@ async def get_kafka_producer(
self,
settings: Settings,
schema_registry: SchemaRegistryManager
- ) -> UnifiedProducer:
+ ) -> AsyncIterator[UnifiedProducer]:
config = ProducerConfig(
bootstrap_servers=settings.KAFKA_BOOTSTRAP_SERVERS
)
producer = UnifiedProducer(config, schema_registry)
await producer.start()
- return producer
+ try:
+ yield producer
+ finally:
+ await producer.stop()
@provide
- async def get_dlq_manager(self, database: AsyncIOMotorDatabase) -> DLQManager:
+ async def get_dlq_manager(self, database: AsyncIOMotorDatabase) -> AsyncIterator[DLQManager]:
manager = create_dlq_manager(database)
await manager.start()
- return manager
+ try:
+ yield manager
+ finally:
+ await manager.stop()
@provide
- def get_dlq_consumer_registry(self) -> DLQConsumerRegistry:
- return DLQConsumerRegistry()
+ def get_idempotency_repository(self,
+ redis_client: redis.Redis) -> RedisIdempotencyRepository:
+ return RedisIdempotencyRepository(redis_client,
+ key_prefix="idempotency")
@provide
- def get_idempotency_repository(self, database: AsyncIOMotorDatabase) -> IdempotencyRepository:
- return IdempotencyRepository(database)
-
- @provide
- async def get_idempotency_manager(self, idempotency_repository: IdempotencyRepository) -> IdempotencyManager:
- manager = IdempotencyManager(IdempotencyConfig(), idempotency_repository)
+ async def get_idempotency_manager(self,
+ repo: RedisIdempotencyRepository) -> AsyncIterator[IdempotencyManager]:
+ manager = create_idempotency_manager(repository=repo,
+ config=IdempotencyConfig())
await manager.initialize()
- return manager
+ try:
+ yield manager
+ finally:
+ await manager.close()
class EventProvider(Provider):
@@ -201,7 +221,7 @@ async def get_event_store_consumer(
event_store: EventStore,
schema_registry: SchemaRegistryManager,
kafka_producer: UnifiedProducer
- ) -> EventStoreConsumer:
+ ) -> AsyncIterator[EventStoreConsumer]:
topics = get_all_topics()
consumer = create_event_store_consumer(
event_store=event_store,
@@ -210,7 +230,10 @@ async def get_event_store_consumer(
producer=kafka_producer
)
await consumer.start()
- return consumer
+ try:
+ yield consumer
+ finally:
+ await consumer.stop()
@provide
def get_event_bus_manager(self) -> EventBusManager:
@@ -272,31 +295,29 @@ def get_replay_metrics(self) -> ReplayMetrics:
def get_security_metrics(self) -> SecurityMetrics:
return SecurityMetrics()
- @provide
+ @provide(scope=Scope.REQUEST)
def get_sse_shutdown_manager(self) -> SSEShutdownManager:
return create_sse_shutdown_manager()
@provide(scope=Scope.APP)
- async def get_partitioned_sse_router(
+ async def get_sse_kafka_redis_bridge(
self,
schema_registry: SchemaRegistryManager,
settings: Settings,
event_metrics: EventMetrics,
- connection_metrics: ConnectionMetrics,
- shutdown_manager: SSEShutdownManager,
sse_redis_bus: SSERedisBus,
- ) -> PartitionedSSERouter:
- router = create_partitioned_sse_router(
+ ) -> AsyncIterator[SSEKafkaRedisBridge]:
+ router = create_sse_kafka_redis_bridge(
schema_registry=schema_registry,
settings=settings,
event_metrics=event_metrics,
- connection_metrics=connection_metrics,
sse_bus=sse_redis_bus,
)
- # Connect shutdown manager with router for coordination
- shutdown_manager.set_router(router)
await router.start()
- return router
+ try:
+ yield router
+ finally:
+ await router.stop()
@provide
def get_sse_repository(
@@ -306,18 +327,21 @@ def get_sse_repository(
return SSERepository(database)
@provide
- def get_sse_redis_bus(self, redis_client: redis.Redis) -> SSERedisBus:
- return SSERedisBus(redis_client)
+ async def get_sse_redis_bus(self, redis_client: redis.Redis) -> AsyncIterator[SSERedisBus]:
+ bus = SSERedisBus(redis_client)
+ yield bus
- @provide
+ @provide(scope=Scope.REQUEST)
def get_sse_service(
self,
sse_repository: SSERepository,
- router: PartitionedSSERouter,
+ router: SSEKafkaRedisBridge,
sse_redis_bus: SSERedisBus,
shutdown_manager: SSEShutdownManager,
settings: Settings
) -> SSEService:
+ # Ensure shutdown manager coordinates with the router in this request scope
+ shutdown_manager.set_router(router)
return SSEService(
repository=sse_repository,
router=router,
@@ -369,9 +393,11 @@ async def get_kafka_event_service(
async def get_user_settings_service(
self,
repository: UserSettingsRepository,
- kafka_event_service: KafkaEventService
+ kafka_event_service: KafkaEventService,
+ event_bus_manager: EventBusManager
) -> UserSettingsService:
service = UserSettingsService(repository, kafka_event_service)
+ await service.initialize(event_bus_manager)
return service
@@ -382,15 +408,29 @@ class AdminServicesProvider(Provider):
def get_admin_events_repository(self, database: AsyncIOMotorDatabase) -> AdminEventsRepository:
return AdminEventsRepository(database)
+ @provide(scope=Scope.REQUEST)
+ def get_admin_events_service(
+ self,
+ admin_events_repository: AdminEventsRepository,
+ replay_service: ReplayService,
+ ) -> AdminEventsService:
+ return AdminEventsService(admin_events_repository, replay_service)
+
@provide
def get_admin_settings_repository(self, database: AsyncIOMotorDatabase) -> AdminSettingsRepository:
return AdminSettingsRepository(database)
+ @provide
+ def get_admin_settings_service(
+ self,
+ admin_settings_repository: AdminSettingsRepository,
+ ) -> AdminSettingsService:
+ return AdminSettingsService(admin_settings_repository)
+
@provide
def get_admin_user_repository(self, database: AsyncIOMotorDatabase) -> AdminUserRepository:
return AdminUserRepository(database)
-
@provide
def get_saga_repository(self, database: AsyncIOMotorDatabase) -> SagaRepository:
return SagaRepository(database)
@@ -400,22 +440,33 @@ def get_notification_repository(self, database: AsyncIOMotorDatabase) -> Notific
return NotificationRepository(database)
@provide
- async def get_notification_service(
+ def get_notification_service(
self,
notification_repository: NotificationRepository,
kafka_event_service: KafkaEventService,
event_bus_manager: EventBusManager,
- schema_registry: SchemaRegistryManager
+ schema_registry: SchemaRegistryManager,
+ sse_redis_bus: SSERedisBus,
+ settings: Settings,
) -> NotificationService:
service = NotificationService(
notification_repository=notification_repository,
event_service=kafka_event_service,
event_bus_manager=event_bus_manager,
- schema_registry_manager=schema_registry
+ schema_registry_manager=schema_registry,
+ sse_bus=sse_redis_bus,
+ settings=settings,
)
- await service.initialize()
+ service.initialize()
return service
+ @provide
+ def get_grafana_alert_processor(
+ self,
+ notification_service: NotificationService,
+ ) -> GrafanaAlertProcessor:
+ return GrafanaAlertProcessor(notification_service)
+
class BusinessServicesProvider(Provider):
scope = Scope.REQUEST
@@ -441,7 +492,7 @@ def get_replay_repository(self, database: AsyncIOMotorDatabase) -> ReplayReposit
return ReplayRepository(database)
@provide
- def get_saga_orchestrator(
+ async def get_saga_orchestrator(
self,
saga_repository: SagaRepository,
kafka_producer: UnifiedProducer,
@@ -449,8 +500,7 @@ def get_saga_orchestrator(
idempotency_manager: IdempotencyManager,
resource_allocation_repository: ResourceAllocationRepository,
settings: Settings,
- ) -> SagaOrchestrator:
- from app.domain.saga.models import SagaConfig
+ ) -> AsyncIterator[SagaOrchestrator]:
config = SagaConfig(
name="main-orchestrator",
timeout_seconds=300,
@@ -460,7 +510,7 @@ def get_saga_orchestrator(
store_events=True,
publish_commands=True,
)
- return create_saga_orchestrator(
+ orchestrator = create_saga_orchestrator(
saga_repository=saga_repository,
producer=kafka_producer,
event_store=event_store,
@@ -468,6 +518,10 @@ def get_saga_orchestrator(
resource_allocation_repository=resource_allocation_repository,
config=config,
)
+ try:
+ yield orchestrator
+ finally:
+ await orchestrator.stop()
@provide
def get_saga_service(
@@ -534,21 +588,25 @@ def get_admin_user_service(
)
@provide
- def get_execution_coordinator(
+ async def get_execution_coordinator(
self,
kafka_producer: UnifiedProducer,
schema_registry: SchemaRegistryManager,
event_store: EventStore,
execution_repository: ExecutionRepository,
idempotency_manager: IdempotencyManager,
- ) -> ExecutionCoordinator:
- return ExecutionCoordinator(
+ ) -> AsyncIterator[ExecutionCoordinator]:
+ coordinator = ExecutionCoordinator(
producer=kafka_producer,
schema_registry_manager=schema_registry,
event_store=event_store,
execution_repository=execution_repository,
idempotency_manager=idempotency_manager,
)
+ try:
+ yield coordinator
+ finally:
+ await coordinator.stop()
class ResultProcessorProvider(Provider):
diff --git a/backend/app/core/security.py b/backend/app/core/security.py
index 4066102a..eb9b362f 100644
--- a/backend/app/core/security.py
+++ b/backend/app/core/security.py
@@ -6,7 +6,7 @@
from fastapi.security import OAuth2PasswordBearer
from passlib.context import CryptContext
-from app.schemas_pydantic.user import UserInDB
+from app.domain.user import User as DomainAdminUser
from app.settings import get_settings
oauth2_scheme = OAuth2PasswordBearer(tokenUrl="/api/v1/login")
@@ -49,7 +49,7 @@ async def get_current_user(
self,
token: str,
user_repo: Any, # Avoid circular import by using Any
- ) -> UserInDB:
+ ) -> DomainAdminUser:
credentials_exception = HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Could not validate credentials",
diff --git a/backend/app/core/service_dependencies.py b/backend/app/core/service_dependencies.py
deleted file mode 100644
index cbf4f5ff..00000000
--- a/backend/app/core/service_dependencies.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from dishka import FromDishka
-
-from app.db.repositories import (
- UserRepository,
-)
-from app.db.repositories.admin.admin_events_repository import AdminEventsRepository
-from app.db.repositories.admin.admin_settings_repository import AdminSettingsRepository
-from app.db.repositories.admin.admin_user_repository import AdminUserRepository
-from app.db.repositories.dlq_repository import DLQRepository
-
-# Repositories (request-scoped)
-UserRepositoryDep = FromDishka[UserRepository]
-DLQRepositoryDep = FromDishka[DLQRepository]
-AdminEventsRepositoryDep = FromDishka[AdminEventsRepository]
-AdminSettingsRepositoryDep = FromDishka[AdminSettingsRepository]
-AdminUserRepositoryDep = FromDishka[AdminUserRepository]
diff --git a/backend/app/db/__init__.py b/backend/app/db/__init__.py
index e69de29b..110071b1 100644
--- a/backend/app/db/__init__.py
+++ b/backend/app/db/__init__.py
@@ -0,0 +1,29 @@
+from app.db.repositories import (
+ AdminSettingsRepository,
+ AdminUserRepository,
+ EventRepository,
+ ExecutionRepository,
+ NotificationRepository,
+ ReplayRepository,
+ SagaRepository,
+ SavedScriptRepository,
+ SSERepository,
+ UserRepository,
+ UserSettingsRepository,
+)
+from app.db.schema.schema_manager import SchemaManager
+
+__all__ = [
+ "AdminSettingsRepository",
+ "AdminUserRepository",
+ "EventRepository",
+ "ExecutionRepository",
+ "NotificationRepository",
+ "ReplayRepository",
+ "SagaRepository",
+ "SavedScriptRepository",
+ "SSERepository",
+ "UserRepository",
+ "UserSettingsRepository",
+ "SchemaManager",
+]
diff --git a/backend/app/db/repositories/__init__.py b/backend/app/db/repositories/__init__.py
index 917f5518..1e985797 100644
--- a/backend/app/db/repositories/__init__.py
+++ b/backend/app/db/repositories/__init__.py
@@ -3,10 +3,12 @@
from app.db.repositories.event_repository import EventRepository
from app.db.repositories.execution_repository import ExecutionRepository
from app.db.repositories.notification_repository import NotificationRepository
+from app.db.repositories.replay_repository import ReplayRepository
from app.db.repositories.saga_repository import SagaRepository
from app.db.repositories.saved_script_repository import SavedScriptRepository
from app.db.repositories.sse_repository import SSERepository
from app.db.repositories.user_repository import UserRepository
+from app.db.repositories.user_settings_repository import UserSettingsRepository
__all__ = [
"AdminSettingsRepository",
@@ -14,8 +16,10 @@
"EventRepository",
"ExecutionRepository",
"NotificationRepository",
+ "ReplayRepository",
"SagaRepository",
"SavedScriptRepository",
"SSERepository",
+ "UserSettingsRepository",
"UserRepository",
]
diff --git a/backend/app/db/repositories/admin/__init__.py b/backend/app/db/repositories/admin/__init__.py
index 9c03ee24..24ab6877 100644
--- a/backend/app/db/repositories/admin/__init__.py
+++ b/backend/app/db/repositories/admin/__init__.py
@@ -1 +1,9 @@
-"""Admin repositories package."""
+from app.db.repositories.admin.admin_events_repository import AdminEventsRepository
+from app.db.repositories.admin.admin_settings_repository import AdminSettingsRepository
+from app.db.repositories.admin.admin_user_repository import AdminUserRepository
+
+__all__ = [
+ "AdminEventsRepository",
+ "AdminSettingsRepository",
+ "AdminUserRepository",
+]
diff --git a/backend/app/db/repositories/admin/admin_events_repository.py b/backend/app/db/repositories/admin/admin_events_repository.py
index 805579f4..80f4dc24 100644
--- a/backend/app/db/repositories/admin/admin_events_repository.py
+++ b/backend/app/db/repositories/admin/admin_events_repository.py
@@ -4,15 +4,15 @@
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
from pymongo import ReturnDocument
-from app.core.logging import logger
-from app.domain.admin.replay_models import (
+from app.domain.admin import (
ReplayQuery,
ReplaySession,
ReplaySessionData,
ReplaySessionFields,
- ReplaySessionStatus,
ReplaySessionStatusDetail,
)
+from app.domain.admin.replay_updates import ReplaySessionUpdate
+from app.domain.enums.replay import ReplayStatus
from app.domain.events.event_models import (
CollectionNames,
Event,
@@ -30,8 +30,14 @@
from app.domain.events.query_builders import (
EventStatsAggregation,
)
-from app.infrastructure.mappers.event_mapper import EventMapper, EventSummaryMapper
-from app.infrastructure.mappers.replay_mapper import ReplayQueryMapper, ReplaySessionMapper
+from app.infrastructure.mappers import (
+ EventExportRowMapper,
+ EventFilterMapper,
+ EventMapper,
+ EventSummaryMapper,
+ ReplayQueryMapper,
+ ReplaySessionMapper,
+)
class AdminEventsRepository:
@@ -41,6 +47,11 @@ def __init__(self, db: AsyncIOMotorDatabase):
self.db = db
self.events_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EVENTS)
self.event_store_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EVENT_STORE)
+ # Bind related collections used by this repository
+ self.executions_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EXECUTIONS)
+ self.events_archive_collection: AsyncIOMotorCollection = self.db.get_collection(
+ CollectionNames.EVENTS_ARCHIVE
+ )
self.replay_mapper = ReplaySessionMapper()
self.replay_query_mapper = ReplayQueryMapper()
self.replay_sessions_collection: AsyncIOMotorCollection = self.db.get_collection(
@@ -57,396 +68,334 @@ async def browse_events(
sort_order: int = SortDirection.DESCENDING
) -> EventBrowseResult:
"""Browse events with filters using domain models."""
- try:
-
- # Convert filter to MongoDB query
- query = filter.to_query()
+ query = EventFilterMapper.to_mongo_query(filter)
- # Get total count
- total = await self.events_collection.count_documents(query)
+ # Get total count
+ total = await self.events_collection.count_documents(query)
- # Execute query with pagination
- cursor = self.events_collection.find(query)
- cursor = cursor.sort(sort_by, sort_order)
- cursor = cursor.skip(skip).limit(limit)
+ # Execute query with pagination
+ cursor = self.events_collection.find(query)
+ cursor = cursor.sort(sort_by, sort_order)
+ cursor = cursor.skip(skip).limit(limit)
- # Fetch events and convert to domain models
- event_docs = await cursor.to_list(length=limit)
- events = [self.mapper.from_mongo_document(doc) for doc in event_docs]
+ # Fetch events and convert to domain models
+ event_docs = await cursor.to_list(length=limit)
+ events = [self.mapper.from_mongo_document(doc) for doc in event_docs]
- return EventBrowseResult(
- events=events,
- total=total,
- skip=skip,
- limit=limit
- )
- except Exception as e:
- logger.error(f"Error browsing events: {e}")
- raise
+ return EventBrowseResult(
+ events=events,
+ total=total,
+ skip=skip,
+ limit=limit
+ )
async def get_event_detail(self, event_id: str) -> EventDetail | None:
"""Get detailed information about an event."""
- try:
- # Find event by ID
- event_doc = await self.events_collection.find_one({EventFields.EVENT_ID: event_id})
+ event_doc = await self.events_collection.find_one({EventFields.EVENT_ID: event_id})
- if not event_doc:
- return None
+ if not event_doc:
+ return None
- event = self.mapper.from_mongo_document(event_doc)
+ event = self.mapper.from_mongo_document(event_doc)
- # Get related events if correlation ID exists
- related_events: List[EventSummary] = []
- if event.correlation_id:
- cursor = self.events_collection.find({
- EventFields.METADATA_CORRELATION_ID: event.correlation_id,
- EventFields.EVENT_ID: {"$ne": event_id}
- }).sort(EventFields.TIMESTAMP, SortDirection.ASCENDING).limit(10)
+ # Get related events
+ cursor = self.events_collection.find({
+ EventFields.METADATA_CORRELATION_ID: event.correlation_id,
+ EventFields.EVENT_ID: {"$ne": event_id}
+ }).sort(EventFields.TIMESTAMP, SortDirection.ASCENDING).limit(10)
- related_docs = await cursor.to_list(length=10)
- related_events = [self.summary_mapper.from_mongo_document(doc) for doc in related_docs]
+ related_docs = await cursor.to_list(length=10)
+ related_events = [self.summary_mapper.from_mongo_document(doc) for doc in related_docs]
- # Build timeline (could be expanded with more logic)
- timeline = related_events[:5] # Simple timeline for now
+ # Build timeline (could be expanded with more logic)
+ timeline = related_events[:5] # Simple timeline for now
- detail = EventDetail(
- event=event,
- related_events=related_events,
- timeline=timeline
- )
+ detail = EventDetail(
+ event=event,
+ related_events=related_events,
+ timeline=timeline
+ )
- return detail
-
- except Exception as e:
- logger.error(f"Error getting event detail: {e}")
- raise
+ return detail
async def delete_event(self, event_id: str) -> bool:
"""Delete an event."""
- try:
- result = await self.events_collection.delete_one({EventFields.EVENT_ID: event_id})
- return result.deleted_count > 0
- except Exception as e:
- logger.error(f"Error deleting event: {e}")
- raise
+ result = await self.events_collection.delete_one({EventFields.EVENT_ID: event_id})
+ return result.deleted_count > 0
async def get_event_stats(self, hours: int = 24) -> EventStatistics:
"""Get event statistics for the last N hours."""
- try:
- start_time = datetime.now(timezone.utc) - timedelta(hours=hours)
-
- # Get overview statistics
- overview_pipeline = EventStatsAggregation.build_overview_pipeline(start_time)
- overview_result = await self.events_collection.aggregate(overview_pipeline).to_list(1)
-
- stats = overview_result[0] if overview_result else {
- "total_events": 0,
- "event_type_count": 0,
- "unique_user_count": 0,
- "service_count": 0
+ start_time = datetime.now(timezone.utc) - timedelta(hours=hours)
+
+ # Get overview statistics
+ overview_pipeline = EventStatsAggregation.build_overview_pipeline(start_time)
+ overview_result = await self.events_collection.aggregate(overview_pipeline).to_list(1)
+
+ stats = overview_result[0] if overview_result else {
+ "total_events": 0,
+ "event_type_count": 0,
+ "unique_user_count": 0,
+ "service_count": 0
+ }
+
+ # Get error rate
+ error_count = await self.events_collection.count_documents({
+ EventFields.TIMESTAMP: {"$gte": start_time},
+ EventFields.EVENT_TYPE: {"$regex": "failed|error|timeout", "$options": "i"}
+ })
+
+ error_rate = (error_count / stats["total_events"] * 100) if stats["total_events"] > 0 else 0
+
+ # Get event types with counts
+ type_pipeline = EventStatsAggregation.build_event_types_pipeline(start_time)
+ top_types = await self.events_collection.aggregate(type_pipeline).to_list(10)
+ events_by_type = {t["_id"]: t["count"] for t in top_types}
+
+ # Get events by hour
+ hourly_pipeline = EventStatsAggregation.build_hourly_events_pipeline(start_time)
+ hourly_cursor = self.events_collection.aggregate(hourly_pipeline)
+ events_by_hour: list[HourlyEventCount | dict[str, Any]] = [
+ HourlyEventCount(hour=doc["_id"], count=doc["count"])
+ async for doc in hourly_cursor
+ ]
+
+ # Get top users
+ user_pipeline = EventStatsAggregation.build_top_users_pipeline(start_time)
+ top_users_cursor = self.events_collection.aggregate(user_pipeline)
+ top_users = [
+ UserEventCount(user_id=doc["_id"], event_count=doc["count"])
+ async for doc in top_users_cursor
+ if doc["_id"] # Filter out None user_ids
+ ]
+
+ # Get average processing time from executions collection
+ # Since execution timing data is stored in executions, not events
+ executions_collection = self.executions_collection
+
+ # Calculate average execution time from completed executions in the last 24 hours
+ exec_pipeline: list[dict[str, Any]] = [
+ {
+ "$match": {
+ "created_at": {"$gte": start_time},
+ "status": "completed",
+ "resource_usage.execution_time_wall_seconds": {"$exists": True}
+ }
+ },
+ {
+ "$group": {
+ "_id": None,
+ "avg_duration": {"$avg": "$resource_usage.execution_time_wall_seconds"}
+ }
}
+ ]
- # Get error rate
- error_count = await self.events_collection.count_documents({
- EventFields.TIMESTAMP: {"$gte": start_time},
- EventFields.EVENT_TYPE: {"$regex": "failed|error|timeout", "$options": "i"}
- })
-
- error_rate = (error_count / stats["total_events"] * 100) if stats["total_events"] > 0 else 0
-
- # Get event types with counts
- type_pipeline = EventStatsAggregation.build_event_types_pipeline(start_time)
- top_types = await self.events_collection.aggregate(type_pipeline).to_list(10)
- events_by_type = {t["_id"]: t["count"] for t in top_types}
-
- # Get events by hour
- hourly_pipeline = EventStatsAggregation.build_hourly_events_pipeline(start_time)
- hourly_cursor = self.events_collection.aggregate(hourly_pipeline)
- events_by_hour: list[HourlyEventCount | dict[str, Any]] = [
- HourlyEventCount(hour=doc["_id"], count=doc["count"])
- async for doc in hourly_cursor
- ]
-
- # Get top users
- user_pipeline = EventStatsAggregation.build_top_users_pipeline(start_time)
- top_users_cursor = self.events_collection.aggregate(user_pipeline)
- top_users = [
- UserEventCount(user_id=doc["_id"], event_count=doc["count"])
- async for doc in top_users_cursor
- if doc["_id"] # Filter out None user_ids
- ]
-
- # Get average processing time from executions collection
- # Since execution timing data is stored in executions, not events
- executions_collection = self.db.get_collection("executions")
-
- # Calculate average execution time from completed executions in the last 24 hours
- exec_pipeline: list[dict[str, Any]] = [
- {
- "$match": {
- "created_at": {"$gte": start_time},
- "status": "completed",
- "resource_usage.execution_time_wall_seconds": {"$exists": True}
- }
- },
- {
- "$group": {
- "_id": None,
- "avg_duration": {"$avg": "$resource_usage.execution_time_wall_seconds"}
- }
- }
- ]
-
- exec_result = await executions_collection.aggregate(exec_pipeline).to_list(1)
- avg_processing_time = exec_result[0]["avg_duration"] if exec_result and exec_result[0].get(
- "avg_duration") else 0
-
- statistics = EventStatistics(
- total_events=stats["total_events"],
- events_by_type=events_by_type,
- events_by_hour=events_by_hour,
- top_users=top_users,
- error_rate=round(error_rate, 2),
- avg_processing_time=round(avg_processing_time, 2)
- )
+ exec_result = await executions_collection.aggregate(exec_pipeline).to_list(1)
+ avg_processing_time = exec_result[0]["avg_duration"] if exec_result and exec_result[0].get(
+ "avg_duration") else 0
- return statistics
+ statistics = EventStatistics(
+ total_events=stats["total_events"],
+ events_by_type=events_by_type,
+ events_by_hour=events_by_hour,
+ top_users=top_users,
+ error_rate=round(error_rate, 2),
+ avg_processing_time=round(avg_processing_time, 2)
+ )
- except Exception as e:
- logger.error(f"Error getting event stats: {e}")
- raise
+ return statistics
async def export_events_csv(self, filter: EventFilter) -> List[EventExportRow]:
"""Export events as CSV data."""
- try:
-
- query = filter.to_query()
-
- cursor = self.events_collection.find(query).sort(
- EventFields.TIMESTAMP,
- SortDirection.DESCENDING
- ).limit(10000)
+ query = EventFilterMapper.to_mongo_query(filter)
- event_docs = await cursor.to_list(length=10000)
+ cursor = self.events_collection.find(query).sort(
+ EventFields.TIMESTAMP,
+ SortDirection.DESCENDING
+ ).limit(10000)
- # Convert to export rows
- export_rows = []
- for doc in event_docs:
- event = self.mapper.from_mongo_document(doc)
- export_row = EventExportRow.from_event(event)
- export_rows.append(export_row)
+ event_docs = await cursor.to_list(length=10000)
- return export_rows
+ # Convert to export rows
+ export_rows = []
+ for doc in event_docs:
+ event = self.mapper.from_mongo_document(doc)
+ export_row = EventExportRowMapper.from_event(event)
+ export_rows.append(export_row)
- except Exception as e:
- logger.error(f"Error exporting events: {e}")
- raise
+ return export_rows
async def archive_event(self, event: Event, deleted_by: str) -> bool:
"""Archive an event before deletion."""
- try:
+ # Add deletion metadata
+ event_dict = self.mapper.to_mongo_document(event)
+ event_dict["_deleted_at"] = datetime.now(timezone.utc)
+ event_dict["_deleted_by"] = deleted_by
- # Add deletion metadata
- event_dict = self.mapper.to_mongo_document(event)
- event_dict["_deleted_at"] = datetime.now(timezone.utc)
- event_dict["_deleted_by"] = deleted_by
-
- # Create events_archive collection if it doesn't exist
- events_archive = self.db.get_collection(CollectionNames.EVENTS_ARCHIVE)
- await events_archive.insert_one(event_dict)
- return True
- except Exception as e:
- logger.error(f"Error archiving event: {e}")
- raise
+ # Insert into bound archive collection
+ result = await self.events_archive_collection.insert_one(event_dict)
+ return result.inserted_id is not None
async def create_replay_session(self, session: ReplaySession) -> str:
"""Create a new replay session."""
- try:
-
- session_dict = self.replay_mapper.to_dict(session)
- await self.replay_sessions_collection.insert_one(session_dict)
- return session.session_id
- except Exception as e:
- logger.error(f"Error creating replay session: {e}")
- raise
+ session_dict = self.replay_mapper.to_dict(session)
+ await self.replay_sessions_collection.insert_one(session_dict)
+ return session.session_id
async def get_replay_session(self, session_id: str) -> ReplaySession | None:
"""Get replay session by ID."""
- try:
- doc = await self.replay_sessions_collection.find_one({
- ReplaySessionFields.SESSION_ID: session_id
- })
- return self.replay_mapper.from_dict(doc) if doc else None
- except Exception as e:
- logger.error(f"Error getting replay session: {e}")
- raise
-
- async def update_replay_session(self, session_id: str, updates: Dict[str, Any]) -> bool:
+ doc = await self.replay_sessions_collection.find_one({
+ ReplaySessionFields.SESSION_ID: session_id
+ })
+ return self.replay_mapper.from_dict(doc) if doc else None
+
+ async def update_replay_session(self, session_id: str, updates: ReplaySessionUpdate) -> bool:
"""Update replay session fields."""
- try:
- # Convert field names to use str() for MongoDB
- mongo_updates = {}
- for key, value in updates.items():
- mongo_updates[str(key)] = value
-
- result = await self.replay_sessions_collection.update_one(
- {ReplaySessionFields.SESSION_ID: session_id},
- {"$set": mongo_updates}
- )
- return result.modified_count > 0
- except Exception as e:
- logger.error(f"Error updating replay session: {e}")
- raise
+ if not updates.has_updates():
+ return False
+
+ mongo_updates = updates.to_dict()
+
+ result = await self.replay_sessions_collection.update_one(
+ {ReplaySessionFields.SESSION_ID: session_id},
+ {"$set": mongo_updates}
+ )
+ return result.modified_count > 0
async def get_replay_status_with_progress(self, session_id: str) -> ReplaySessionStatusDetail | None:
"""Get replay session status with progress updates."""
- try:
- doc = await self.replay_sessions_collection.find_one({
- ReplaySessionFields.SESSION_ID: session_id
- })
-
- if not doc:
- return None
-
- session = self.replay_mapper.from_dict(doc)
- current_time = datetime.now(timezone.utc)
-
- # Update status based on time if needed
- if session.status == ReplaySessionStatus.SCHEDULED and session.created_at:
- time_since_created = current_time - session.created_at
- if time_since_created.total_seconds() > 2:
- # Use atomic update to prevent race conditions
- update_result = await self.replay_sessions_collection.find_one_and_update(
- {
- ReplaySessionFields.SESSION_ID: session_id,
- ReplaySessionFields.STATUS: ReplaySessionStatus.SCHEDULED
- },
- {
- "$set": {
- ReplaySessionFields.STATUS: ReplaySessionStatus.RUNNING,
- ReplaySessionFields.STARTED_AT: current_time
- }
- },
- return_document=ReturnDocument.AFTER
- )
- if update_result:
- # Update local session object with the atomically updated values
- session = self.replay_mapper.from_dict(update_result)
-
- # Simulate progress if running
- if session.is_running and session.started_at:
- time_since_started = current_time - session.started_at
- # Assume 10 events per second processing rate
- estimated_progress = min(
- int(time_since_started.total_seconds() * 10),
- session.total_events
+ doc = await self.replay_sessions_collection.find_one({
+ ReplaySessionFields.SESSION_ID: session_id
+ })
+ if not doc:
+ return None
+
+ session = self.replay_mapper.from_dict(doc)
+ current_time = datetime.now(timezone.utc)
+
+ # Update status based on time if needed
+ if session.status == ReplayStatus.SCHEDULED and session.created_at:
+ time_since_created = current_time - session.created_at
+ if time_since_created.total_seconds() > 2:
+ # Use atomic update to prevent race conditions
+ update_result = await self.replay_sessions_collection.find_one_and_update(
+ {
+ ReplaySessionFields.SESSION_ID: session_id,
+ ReplaySessionFields.STATUS: ReplayStatus.SCHEDULED
+ },
+ {
+ "$set": {
+ ReplaySessionFields.STATUS: ReplayStatus.RUNNING,
+ ReplaySessionFields.STARTED_AT: current_time
+ }
+ },
+ return_document=ReturnDocument.AFTER
)
+ if update_result:
+ # Update local session object with the atomically updated values
+ session = self.replay_mapper.from_dict(update_result)
+
+ # Simulate progress if running
+ if session.is_running and session.started_at:
+ time_since_started = current_time - session.started_at
+ # Assume 10 events per second processing rate
+ estimated_progress = min(
+ int(time_since_started.total_seconds() * 10),
+ session.total_events
+ )
- # Update progress - returns new instance
- updated_session = session.update_progress(estimated_progress)
-
- # Update in database
- updates: Dict[str, Any] = {
- ReplaySessionFields.REPLAYED_EVENTS: updated_session.replayed_events
- }
+ # Update progress - returns new instance
+ updated_session = session.update_progress(estimated_progress)
- if updated_session.is_completed:
- updates[ReplaySessionFields.STATUS] = updated_session.status
- updates[ReplaySessionFields.COMPLETED_AT] = updated_session.completed_at
-
- await self.update_replay_session(session_id, updates)
-
- # Use the updated session for the rest of the method
- session = updated_session
-
- # Calculate estimated completion
- estimated_completion = None
- if session.is_running and session.replayed_events > 0 and session.started_at:
- rate = session.replayed_events / (current_time - session.started_at).total_seconds()
- remaining = session.total_events - session.replayed_events
- if rate > 0:
- estimated_completion = current_time + timedelta(seconds=remaining / rate)
-
- # Fetch execution results from the original events that were replayed
- execution_results = []
- # Get the query that was used for replay from the session's config
- original_query = {}
- if doc and "config" in doc:
- config = doc.get("config", {})
- filter_config = config.get("filter", {})
- original_query = filter_config.get("custom_query", {})
-
- if original_query:
- # Find the original events that were replayed
- original_events = await self.events_collection.find(original_query).to_list(10)
-
- # Get unique execution IDs from original events
- execution_ids = set()
- for event in original_events:
- # Try to get execution_id from various locations
- exec_id = event.get("execution_id")
- if not exec_id and event.get("payload"):
- exec_id = event.get("payload", {}).get("execution_id")
- if not exec_id:
- exec_id = event.get("aggregate_id")
- if exec_id:
- execution_ids.add(exec_id)
-
- # Fetch execution details
- if execution_ids:
- executions_collection = self.db.get_collection("executions")
- for exec_id in list(execution_ids)[:10]: # Limit to 10
- exec_doc = await executions_collection.find_one({"execution_id": exec_id})
- if exec_doc:
- execution_results.append({
- "execution_id": exec_doc.get("execution_id"),
- "status": exec_doc.get("status"),
- "output": exec_doc.get("output"),
- "errors": exec_doc.get("errors"),
- "exit_code": exec_doc.get("exit_code"),
- "execution_time": exec_doc.get("execution_time"),
- "lang": exec_doc.get("lang"),
- "lang_version": exec_doc.get("lang_version"),
- "created_at": exec_doc.get("created_at"),
- "updated_at": exec_doc.get("updated_at")
- })
-
- return ReplaySessionStatusDetail(
- session=session,
- estimated_completion=estimated_completion,
- execution_results=execution_results
+ # Update in database
+ session_update = ReplaySessionUpdate(
+ replayed_events=updated_session.replayed_events
)
- except Exception as e:
- logger.error(f"Error getting replay status with progress: {e}")
- raise
+ if updated_session.is_completed:
+ session_update.status = updated_session.status
+ session_update.completed_at = updated_session.completed_at
+
+ await self.update_replay_session(session_id, session_update)
+
+ # Use the updated session for the rest of the method
+ session = updated_session
+
+ # Calculate estimated completion
+ estimated_completion = None
+ if session.is_running and session.replayed_events > 0 and session.started_at:
+ rate = session.replayed_events / (current_time - session.started_at).total_seconds()
+ remaining = session.total_events - session.replayed_events
+ if rate > 0:
+ estimated_completion = current_time + timedelta(seconds=remaining / rate)
+
+ # Fetch execution results from the original events that were replayed
+ execution_results = []
+ # Get the query that was used for replay from the session's config
+ original_query = {}
+ if doc and "config" in doc:
+ config = doc.get("config", {})
+ filter_config = config.get("filter", {})
+ original_query = filter_config.get("custom_query", {})
+
+ if original_query:
+ # Find the original events that were replayed
+ original_events = await self.events_collection.find(original_query).to_list(10)
+
+ # Get unique execution IDs from original events
+ execution_ids = set()
+ for event in original_events:
+ # Try to get execution_id from various locations
+ exec_id = event.get("execution_id")
+ if not exec_id and event.get("payload"):
+ exec_id = event.get("payload", {}).get("execution_id")
+ if not exec_id:
+ exec_id = event.get("aggregate_id")
+ if exec_id:
+ execution_ids.add(exec_id)
+
+ # Fetch execution details
+ if execution_ids:
+ executions_collection = self.executions_collection
+ for exec_id in list(execution_ids)[:10]: # Limit to 10
+ exec_doc = await executions_collection.find_one({"execution_id": exec_id})
+ if exec_doc:
+ execution_results.append({
+ "execution_id": exec_doc.get("execution_id"),
+ "status": exec_doc.get("status"),
+ "stdout": exec_doc.get("stdout"),
+ "stderr": exec_doc.get("stderr"),
+ "exit_code": exec_doc.get("exit_code"),
+ "execution_time": exec_doc.get("execution_time"),
+ "lang": exec_doc.get("lang"),
+ "lang_version": exec_doc.get("lang_version"),
+ "created_at": exec_doc.get("created_at"),
+ "updated_at": exec_doc.get("updated_at")
+ })
+
+ return ReplaySessionStatusDetail(
+ session=session,
+ estimated_completion=estimated_completion,
+ execution_results=execution_results
+ )
async def count_events_for_replay(self, query: Dict[str, Any]) -> int:
"""Count events matching replay query."""
- try:
- return await self.events_collection.count_documents(query)
- except Exception as e:
- logger.error(f"Error counting events for replay: {e}")
- raise
+ return await self.events_collection.count_documents(query)
async def get_events_preview_for_replay(self, query: Dict[str, Any], limit: int = 100) -> List[Dict[str, Any]]:
"""Get preview of events for replay."""
- try:
- cursor = self.events_collection.find(query).limit(limit)
- event_docs = await cursor.to_list(length=limit)
-
- # Convert to event summaries
- summaries: List[Dict[str, Any]] = []
- for doc in event_docs:
- summary = self.summary_mapper.from_mongo_document(doc)
- summary_dict = self.summary_mapper.to_dict(summary)
- # Convert EventFields enum keys to strings
- summaries.append({str(k): v for k, v in summary_dict.items()})
-
- return summaries
- except Exception as e:
- logger.error(f"Error getting events preview: {e}")
- raise
+ cursor = self.events_collection.find(query).limit(limit)
+ event_docs = await cursor.to_list(length=limit)
+
+ # Convert to event summaries
+ summaries: List[Dict[str, Any]] = []
+ for doc in event_docs:
+ summary = self.summary_mapper.from_mongo_document(doc)
+ summary_dict = self.summary_mapper.to_dict(summary)
+ # Convert EventFields enum keys to strings
+ summaries.append({str(k): v for k, v in summary_dict.items()})
+
+ return summaries
def build_replay_query(self, replay_query: ReplayQuery) -> Dict[str, Any]:
"""Build MongoDB query from replay query model."""
@@ -460,36 +409,28 @@ async def prepare_replay_session(
max_events: int = 1000
) -> ReplaySessionData:
"""Prepare replay session with validation and preview."""
- try:
- # Count matching events
- event_count = await self.count_events_for_replay(query)
-
- if event_count == 0:
- raise ValueError("No events found matching the criteria")
-
- if event_count > max_events and not dry_run:
- raise ValueError(f"Too many events to replay ({event_count}). Maximum is {max_events}.")
-
- # Get events preview for dry run
- events_preview: List[EventSummary] = []
- if dry_run:
- preview_docs = await self.get_events_preview_for_replay(query, limit=100)
- events_preview = [self.summary_mapper.from_mongo_document(e) for e in preview_docs]
-
- # Return unified session data
- session_data = ReplaySessionData(
- total_events=event_count,
- replay_correlation_id=replay_correlation_id,
- dry_run=dry_run,
- query=query,
- events_preview=events_preview
- )
-
- return session_data
-
- except Exception as e:
- logger.error(f"Error preparing replay session: {e}")
- raise
+ event_count = await self.count_events_for_replay(query)
+ if event_count == 0:
+ raise ValueError("No events found matching the criteria")
+ if event_count > max_events and not dry_run:
+ raise ValueError(f"Too many events to replay ({event_count}). Maximum is {max_events}.")
+
+ # Get events preview for dry run
+ events_preview: List[EventSummary] = []
+ if dry_run:
+ preview_docs = await self.get_events_preview_for_replay(query, limit=100)
+ events_preview = [self.summary_mapper.from_mongo_document(e) for e in preview_docs]
+
+ # Return unified session data
+ session_data = ReplaySessionData(
+ total_events=event_count,
+ replay_correlation_id=replay_correlation_id,
+ dry_run=dry_run,
+ query=query,
+ events_preview=events_preview
+ )
+
+ return session_data
async def get_replay_events_preview(
self,
@@ -498,33 +439,28 @@ async def get_replay_events_preview(
aggregate_id: str | None = None
) -> Dict[str, Any]:
"""Get preview of events that would be replayed - backward compatibility."""
- try:
- replay_query = ReplayQuery(
- event_ids=event_ids,
- correlation_id=correlation_id,
- aggregate_id=aggregate_id
- )
-
- query = self.replay_query_mapper.to_mongodb_query(replay_query)
+ replay_query = ReplayQuery(
+ event_ids=event_ids,
+ correlation_id=correlation_id,
+ aggregate_id=aggregate_id
+ )
- if not query:
- return {"events": [], "total": 0}
+ query = self.replay_query_mapper.to_mongodb_query(replay_query)
- total = await self.event_store_collection.count_documents(query)
+ if not query:
+ return {"events": [], "total": 0}
- cursor = self.event_store_collection.find(query).sort(
- EventFields.TIMESTAMP,
- SortDirection.ASCENDING
- ).limit(100)
+ total = await self.event_store_collection.count_documents(query)
- # Batch fetch all events from cursor
- events = await cursor.to_list(length=100)
+ cursor = self.event_store_collection.find(query).sort(
+ EventFields.TIMESTAMP,
+ SortDirection.ASCENDING
+ ).limit(100)
- return {
- "events": events,
- "total": total
- }
+ # Batch fetch all events from cursor
+ events = await cursor.to_list(length=100)
- except Exception as e:
- logger.error(f"Error getting replay preview: {e}")
- raise
+ return {
+ "events": events,
+ "total": total
+ }
diff --git a/backend/app/db/repositories/admin/admin_settings_repository.py b/backend/app/db/repositories/admin/admin_settings_repository.py
index a049cdef..04323046 100644
--- a/backend/app/db/repositories/admin/admin_settings_repository.py
+++ b/backend/app/db/repositories/admin/admin_settings_repository.py
@@ -3,12 +3,12 @@
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
from app.core.logging import logger
-from app.domain.admin.settings_models import (
+from app.domain.admin import (
AuditAction,
AuditLogEntry,
SystemSettings,
)
-from app.infrastructure.mappers.admin_mapper import AuditLogMapper, SettingsMapper
+from app.infrastructure.mappers import AuditLogMapper, SettingsMapper
class AdminSettingsRepository:
@@ -27,11 +27,11 @@ async def get_system_settings(self) -> SystemSettings:
# Create default settings
default_settings = SystemSettings()
settings_dict = self.settings_mapper.system_settings_to_dict(default_settings)
-
+
# Insert default settings
await self.settings_collection.insert_one(settings_dict)
return default_settings
-
+
return self.settings_mapper.system_settings_from_dict(settings_doc)
async def update_system_settings(
@@ -41,57 +41,47 @@ async def update_system_settings(
user_id: str
) -> SystemSettings:
"""Update system-wide settings."""
- try:
- # Update settings metadata
- settings.updated_at = datetime.now(timezone.utc)
-
- # Convert to dict and save
- settings_dict = self.settings_mapper.system_settings_to_dict(settings)
-
- await self.settings_collection.replace_one(
- {"_id": "global"},
- settings_dict,
- upsert=True
- )
-
- # Create audit log entry
- audit_entry = AuditLogEntry(
- action=AuditAction.SYSTEM_SETTINGS_UPDATED,
- user_id=user_id,
- username=updated_by,
- timestamp=datetime.now(timezone.utc),
- changes=settings_dict
- )
-
- await self.audit_log_collection.insert_one(
- self.audit_mapper.to_dict(audit_entry)
- )
-
- return settings
-
- except Exception as e:
- logger.error(f"Error updating system settings: {e}")
- raise
+ # Update settings metadata
+ settings.updated_at = datetime.now(timezone.utc)
+
+ # Convert to dict and save
+ settings_dict = self.settings_mapper.system_settings_to_dict(settings)
+
+ await self.settings_collection.replace_one(
+ {"_id": "global"},
+ settings_dict,
+ upsert=True
+ )
+
+ # Create audit log entry
+ audit_entry = AuditLogEntry(
+ action=AuditAction.SYSTEM_SETTINGS_UPDATED,
+ user_id=user_id,
+ username=updated_by,
+ timestamp=datetime.now(timezone.utc),
+ changes=settings_dict
+ )
+
+ await self.audit_log_collection.insert_one(
+ self.audit_mapper.to_dict(audit_entry)
+ )
+
+ return settings
async def reset_system_settings(self, username: str, user_id: str) -> SystemSettings:
"""Reset system settings to defaults."""
- try:
- # Delete current settings
- await self.settings_collection.delete_one({"_id": "global"})
-
- # Create audit log entry
- audit_entry = AuditLogEntry(
- action=AuditAction.SYSTEM_SETTINGS_RESET,
- user_id=user_id,
- username=username,
- timestamp=datetime.now(timezone.utc)
- )
-
- await self.audit_log_collection.insert_one(self.audit_mapper.to_dict(audit_entry))
-
- # Return default settings
- return SystemSettings()
-
- except Exception as e:
- logger.error(f"Error resetting system settings: {e}")
- raise
+ # Delete current settings
+ await self.settings_collection.delete_one({"_id": "global"})
+
+ # Create audit log entry
+ audit_entry = AuditLogEntry(
+ action=AuditAction.SYSTEM_SETTINGS_RESET,
+ user_id=user_id,
+ username=username,
+ timestamp=datetime.now(timezone.utc)
+ )
+
+ await self.audit_log_collection.insert_one(self.audit_mapper.to_dict(audit_entry))
+
+ # Return default settings
+ return SystemSettings()
diff --git a/backend/app/db/repositories/admin/admin_user_repository.py b/backend/app/db/repositories/admin/admin_user_repository.py
index fcac7fa8..ef04ed37 100644
--- a/backend/app/db/repositories/admin/admin_user_repository.py
+++ b/backend/app/db/repositories/admin/admin_user_repository.py
@@ -2,9 +2,10 @@
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
-from app.core.logging import logger
from app.core.security import SecurityService
-from app.domain.admin.user_models import (
+from app.domain.enums import UserRole
+from app.domain.events.event_models import CollectionNames
+from app.domain.user import (
PasswordReset,
User,
UserFields,
@@ -12,13 +13,21 @@
UserSearchFilter,
UserUpdate,
)
-from app.infrastructure.mappers.admin_mapper import UserMapper
+from app.infrastructure.mappers import UserMapper
class AdminUserRepository:
def __init__(self, db: AsyncIOMotorDatabase):
self.db = db
- self.users_collection: AsyncIOMotorCollection = self.db.get_collection("users")
+ self.users_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.USERS)
+
+ # Related collections used by this repository (e.g., cascade deletes)
+ self.executions_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EXECUTIONS)
+ self.saved_scripts_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.SAVED_SCRIPTS)
+ self.notifications_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.NOTIFICATIONS)
+ self.user_settings_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.USER_SETTINGS)
+ self.events_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EVENTS)
+ self.sagas_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.SAGAS)
self.security_service = SecurityService()
self.mapper = UserMapper()
@@ -27,51 +36,40 @@ async def list_users(
limit: int = 100,
offset: int = 0,
search: str | None = None,
- role: str | None = None
+ role: UserRole | None = None
) -> UserListResult:
"""List all users with optional filtering."""
- try:
- # Create search filter
- from app.domain.enums.user import UserRole
- search_filter = UserSearchFilter(
- search_text=search,
- role=UserRole(role) if role else None
- )
+ # Create search filter
+ search_filter = UserSearchFilter(
+ search_text=search,
+ role=role
+ )
- query = search_filter.to_query()
+ query = self.mapper.search_filter_to_query(search_filter)
- # Get total count
- total = await self.users_collection.count_documents(query)
+ # Get total count
+ total = await self.users_collection.count_documents(query)
- # Get users with pagination
- cursor = self.users_collection.find(query).skip(offset).limit(limit)
+ # Get users with pagination
+ cursor = self.users_collection.find(query).skip(offset).limit(limit)
- users = []
- async for user_doc in cursor:
- users.append(self.mapper.from_mongo_document(user_doc))
+ users = []
+ async for user_doc in cursor:
+ users.append(self.mapper.from_mongo_document(user_doc))
- return UserListResult(
- users=users,
- total=total,
- offset=offset,
- limit=limit
- )
-
- except Exception as e:
- logger.error(f"Error listing users: {e}")
- raise
+ return UserListResult(
+ users=users,
+ total=total,
+ offset=offset,
+ limit=limit
+ )
async def get_user_by_id(self, user_id: str) -> User | None:
"""Get user by ID."""
- try:
- user_doc = await self.users_collection.find_one({UserFields.USER_ID: user_id})
- if user_doc:
- return self.mapper.from_mongo_document(user_doc)
- return None
-
- except Exception as e:
- logger.error(f"Error getting user by ID: {e}")
- raise
+ user_doc = await self.users_collection.find_one({UserFields.USER_ID: user_id})
+ if user_doc:
+ return self.mapper.from_mongo_document(user_doc)
+ return None
async def update_user(
self,
@@ -79,106 +77,80 @@ async def update_user(
update_data: UserUpdate
) -> User | None:
"""Update user details."""
- try:
- if not update_data.has_updates():
- return await self.get_user_by_id(user_id)
-
- # Get update dict
- update_dict = self.mapper.to_update_dict(update_data)
+ if not update_data.has_updates():
+ return await self.get_user_by_id(user_id)
- # Hash password if provided
- if update_data.password:
- update_dict[UserFields.HASHED_PASSWORD] = self.security_service.get_password_hash(update_data.password)
- # Ensure no plaintext password field is persisted
- update_dict.pop("password", None)
+ # Get update dict
+ update_dict = self.mapper.to_update_dict(update_data)
- # Add updated_at timestamp
- update_dict[UserFields.UPDATED_AT] = datetime.now(timezone.utc)
+ # Hash password if provided
+ if update_data.password:
+ update_dict[UserFields.HASHED_PASSWORD] = self.security_service.get_password_hash(update_data.password)
+ # Ensure no plaintext password field is persisted
+ update_dict.pop("password", None)
- result = await self.users_collection.update_one(
- {UserFields.USER_ID: user_id},
- {"$set": update_dict}
- )
+ # Add updated_at timestamp
+ update_dict[UserFields.UPDATED_AT] = datetime.now(timezone.utc)
- if result.modified_count > 0:
- return await self.get_user_by_id(user_id)
+ result = await self.users_collection.update_one(
+ {UserFields.USER_ID: user_id},
+ {"$set": update_dict}
+ )
- return None
+ if result.modified_count > 0:
+ return await self.get_user_by_id(user_id)
- except Exception as e:
- logger.error(f"Error updating user: {e}")
- raise
+ return None
async def delete_user(self, user_id: str, cascade: bool = True) -> dict[str, int]:
"""Delete user with optional cascade deletion of related data."""
- try:
- deleted_counts = {}
-
- if cascade:
- # Delete user's executions
- executions_result = await self.db.get_collection("executions").delete_many(
- {"user_id": user_id}
- )
- deleted_counts["executions"] = executions_result.deleted_count
-
- # Delete user's saved scripts
- scripts_result = await self.db.get_collection("saved_scripts").delete_many(
- {"user_id": user_id}
- )
- deleted_counts["saved_scripts"] = scripts_result.deleted_count
-
- # Delete user's notifications
- notifications_result = await self.db.get_collection("notifications").delete_many(
- {"user_id": user_id}
- )
- deleted_counts["notifications"] = notifications_result.deleted_count
-
- # Delete user's settings
- settings_result = await self.db.get_collection("user_settings").delete_many(
- {"user_id": user_id}
- )
- deleted_counts["user_settings"] = settings_result.deleted_count
-
- # Delete user's events (if needed)
- events_result = await self.db.get_collection("events").delete_many(
- {"metadata.user_id": user_id}
- )
- deleted_counts["events"] = events_result.deleted_count
-
- # Delete user's sagas
- sagas_result = await self.db.get_collection("sagas").delete_many(
- {"user_id": user_id}
- )
- deleted_counts["sagas"] = sagas_result.deleted_count
-
- # Delete the user
- result = await self.users_collection.delete_one({UserFields.USER_ID: user_id})
- deleted_counts["user"] = result.deleted_count
-
+ deleted_counts = {}
+
+ result = await self.users_collection.delete_one({UserFields.USER_ID: user_id})
+ deleted_counts["user"] = result.deleted_count
+
+ if not cascade:
return deleted_counts
- except Exception as e:
- logger.error(f"Error deleting user: {e}")
- raise
+ # Delete user's executions
+ executions_result = await self.executions_collection.delete_many({"user_id": user_id})
+ deleted_counts["executions"] = executions_result.deleted_count
+
+ # Delete user's saved scripts
+ scripts_result = await self.saved_scripts_collection.delete_many({"user_id": user_id})
+ deleted_counts["saved_scripts"] = scripts_result.deleted_count
+
+ # Delete user's notifications
+ notifications_result = await self.notifications_collection.delete_many({"user_id": user_id})
+ deleted_counts["notifications"] = notifications_result.deleted_count
+
+ # Delete user's settings
+ settings_result = await self.user_settings_collection.delete_many({"user_id": user_id})
+ deleted_counts["user_settings"] = settings_result.deleted_count
+
+ # Delete user's events (if needed)
+ events_result = await self.events_collection.delete_many({"user_id": user_id})
+ deleted_counts["events"] = events_result.deleted_count
+
+ # Delete user's sagas
+ sagas_result = await self.sagas_collection.delete_many({"user_id": user_id})
+ deleted_counts["sagas"] = sagas_result.deleted_count
+
+ return deleted_counts
async def reset_user_password(self, password_reset: PasswordReset) -> bool:
"""Reset user password."""
- try:
- if not password_reset.is_valid():
- raise ValueError("Invalid password reset data")
-
- hashed_password = self.security_service.get_password_hash(password_reset.new_password)
+ if not password_reset.is_valid():
+ raise ValueError("Invalid password reset data")
- result = await self.users_collection.update_one(
- {UserFields.USER_ID: password_reset.user_id},
- {"$set": {
- UserFields.HASHED_PASSWORD: hashed_password,
- UserFields.UPDATED_AT: datetime.now(timezone.utc)
- }}
- )
+ hashed_password = self.security_service.get_password_hash(password_reset.new_password)
- return result.modified_count > 0
+ result = await self.users_collection.update_one(
+ {UserFields.USER_ID: password_reset.user_id},
+ {"$set": {
+ UserFields.HASHED_PASSWORD: hashed_password,
+ UserFields.UPDATED_AT: datetime.now(timezone.utc)
+ }}
+ )
- except Exception as e:
- logger.error(f"Error resetting user password: {e}")
- raise
+ return result.modified_count > 0
diff --git a/backend/app/db/repositories/dlq_repository.py b/backend/app/db/repositories/dlq_repository.py
index 9789c95d..cb15f9cd 100644
--- a/backend/app/db/repositories/dlq_repository.py
+++ b/backend/app/db/repositories/dlq_repository.py
@@ -4,8 +4,7 @@
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
from app.core.logging import logger
-from app.dlq.manager import DLQManager
-from app.dlq.models import (
+from app.dlq import (
AgeStatistics,
DLQBatchRetryResult,
DLQFields,
@@ -19,106 +18,104 @@
EventTypeStatistic,
TopicStatistic,
)
+from app.dlq.manager import DLQManager
+from app.domain.events.event_models import CollectionNames
+from app.infrastructure.mappers.dlq_mapper import DLQMapper
class DLQRepository:
def __init__(self, db: AsyncIOMotorDatabase):
self.db = db
- self.dlq_collection: AsyncIOMotorCollection = self.db.get_collection("dlq_messages")
+ self.dlq_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.DLQ_MESSAGES)
async def get_dlq_stats(self) -> DLQStatistics:
- try:
- # Get counts by status
- status_pipeline: list[Mapping[str, object]] = [
- {"$group": {
- "_id": f"${DLQFields.STATUS}",
- "count": {"$sum": 1}
- }}
- ]
-
- status_results = []
- async for doc in self.dlq_collection.aggregate(status_pipeline):
- status_results.append(doc)
-
- # Convert status results to dict
- by_status: Dict[str, int] = {}
- for doc in status_results:
- if doc["_id"]:
- by_status[doc["_id"]] = doc["count"]
-
- # Get counts by topic
- topic_pipeline: list[Mapping[str, object]] = [
- {"$group": {
- "_id": f"${DLQFields.ORIGINAL_TOPIC}",
- "count": {"$sum": 1},
- "avg_retry_count": {"$avg": f"${DLQFields.RETRY_COUNT}"}
- }},
- {"$sort": {"count": -1}},
- {"$limit": 10}
- ]
-
- by_topic: List[TopicStatistic] = []
- async for doc in self.dlq_collection.aggregate(topic_pipeline):
- by_topic.append(TopicStatistic(
- topic=doc["_id"],
- count=doc["count"],
- avg_retry_count=round(doc["avg_retry_count"], 2)
+ # Get counts by status
+ status_pipeline: list[Mapping[str, object]] = [
+ {"$group": {
+ "_id": f"${DLQFields.STATUS}",
+ "count": {"$sum": 1}
+ }}
+ ]
+
+ status_results = []
+ async for doc in self.dlq_collection.aggregate(status_pipeline):
+ status_results.append(doc)
+
+ # Convert status results to dict
+ by_status: Dict[str, int] = {}
+ for doc in status_results:
+ if doc["_id"]:
+ by_status[doc["_id"]] = doc["count"]
+
+ # Get counts by topic
+ topic_pipeline: list[Mapping[str, object]] = [
+ {"$group": {
+ "_id": f"${DLQFields.ORIGINAL_TOPIC}",
+ "count": {"$sum": 1},
+ "avg_retry_count": {"$avg": f"${DLQFields.RETRY_COUNT}"}
+ }},
+ {"$sort": {"count": -1}},
+ {"$limit": 10}
+ ]
+
+ by_topic: List[TopicStatistic] = []
+ async for doc in self.dlq_collection.aggregate(topic_pipeline):
+ by_topic.append(TopicStatistic(
+ topic=doc["_id"],
+ count=doc["count"],
+ avg_retry_count=round(doc["avg_retry_count"], 2)
+ ))
+
+ # Get counts by event type
+ event_type_pipeline: list[Mapping[str, object]] = [
+ {"$group": {
+ "_id": f"${DLQFields.EVENT_TYPE}",
+ "count": {"$sum": 1}
+ }},
+ {"$sort": {"count": -1}},
+ {"$limit": 10}
+ ]
+
+ by_event_type: List[EventTypeStatistic] = []
+ async for doc in self.dlq_collection.aggregate(event_type_pipeline):
+ if doc["_id"]: # Skip null event types
+ by_event_type.append(EventTypeStatistic(
+ event_type=doc["_id"],
+ count=doc["count"]
))
- # Get counts by event type
- event_type_pipeline: list[Mapping[str, object]] = [
- {"$group": {
- "_id": f"${DLQFields.EVENT_TYPE}",
- "count": {"$sum": 1}
- }},
- {"$sort": {"count": -1}},
- {"$limit": 10}
- ]
-
- by_event_type: List[EventTypeStatistic] = []
- async for doc in self.dlq_collection.aggregate(event_type_pipeline):
- if doc["_id"]: # Skip null event types
- by_event_type.append(EventTypeStatistic(
- event_type=doc["_id"],
- count=doc["count"]
- ))
+ # Get age statistics
+ age_pipeline: list[Mapping[str, object]] = [
+ {"$project": {
+ "age_seconds": {
+ "$divide": [
+ {"$subtract": [datetime.now(timezone.utc), f"${DLQFields.FAILED_AT}"]},
+ 1000
+ ]
+ }
+ }},
+ {"$group": {
+ "_id": None,
+ "min_age": {"$min": "$age_seconds"},
+ "max_age": {"$max": "$age_seconds"},
+ "avg_age": {"$avg": "$age_seconds"}
+ }}
+ ]
+
+ age_result = await self.dlq_collection.aggregate(age_pipeline).to_list(1)
+ age_stats_data = age_result[0] if age_result else {}
+ age_stats = AgeStatistics(
+ min_age_seconds=age_stats_data.get("min_age", 0.0),
+ max_age_seconds=age_stats_data.get("max_age", 0.0),
+ avg_age_seconds=age_stats_data.get("avg_age", 0.0)
+ )
- # Get age statistics
- age_pipeline: list[Mapping[str, object]] = [
- {"$project": {
- "age_seconds": {
- "$divide": [
- {"$subtract": [datetime.now(timezone.utc), f"${DLQFields.FAILED_AT}"]},
- 1000
- ]
- }
- }},
- {"$group": {
- "_id": None,
- "min_age": {"$min": "$age_seconds"},
- "max_age": {"$max": "$age_seconds"},
- "avg_age": {"$avg": "$age_seconds"}
- }}
- ]
-
- age_result = await self.dlq_collection.aggregate(age_pipeline).to_list(1)
- age_stats_data = age_result[0] if age_result else {}
- age_stats = AgeStatistics(
- min_age_seconds=age_stats_data.get("min_age", 0.0),
- max_age_seconds=age_stats_data.get("max_age", 0.0),
- avg_age_seconds=age_stats_data.get("avg_age", 0.0)
- )
-
- return DLQStatistics(
- by_status=by_status,
- by_topic=by_topic,
- by_event_type=by_event_type,
- age_stats=age_stats
- )
-
- except Exception as e:
- logger.error(f"Error getting DLQ stats: {e}")
- raise
+ return DLQStatistics(
+ by_status=by_status,
+ by_topic=by_topic,
+ by_event_type=by_event_type,
+ age_stats=age_stats
+ )
async def get_messages(
self,
@@ -128,137 +125,98 @@ async def get_messages(
limit: int = 50,
offset: int = 0
) -> DLQMessageListResult:
- try:
- # Create filter
- filter = DLQMessageFilter(
- status=DLQMessageStatus(status) if status else None,
- topic=topic,
- event_type=event_type
- )
-
- query = filter.to_query()
- total_count = await self.dlq_collection.count_documents(query)
-
- cursor = self.dlq_collection.find(query).sort(
- DLQFields.FAILED_AT, -1
- ).skip(offset).limit(limit)
-
- messages = []
- async for doc in cursor:
- messages.append(DLQMessage.from_dict(doc))
-
- return DLQMessageListResult(
- messages=messages,
- total=total_count,
- offset=offset,
- limit=limit
- )
-
- except Exception as e:
- logger.error(f"Error getting DLQ messages: {e}")
- raise
-
- async def get_message_by_id(self, event_id: str) -> DLQMessage | None:
- try:
- doc = await self.dlq_collection.find_one({DLQFields.EVENT_ID: event_id})
-
- if not doc:
- return None
+ # Create filter
+ filter = DLQMessageFilter(
+ status=DLQMessageStatus(status) if status else None,
+ topic=topic,
+ event_type=event_type
+ )
- return DLQMessage.from_dict(doc)
+ query = DLQMapper.filter_to_query(filter)
+ total_count = await self.dlq_collection.count_documents(query)
- except Exception as e:
- logger.error(f"Error getting DLQ message {event_id}: {e}")
- raise
+ cursor = self.dlq_collection.find(query).sort(
+ DLQFields.FAILED_AT, -1
+ ).skip(offset).limit(limit)
- async def get_message_for_retry(self, event_id: str) -> DLQMessage | None:
- try:
- doc = await self.dlq_collection.find_one({DLQFields.EVENT_ID: event_id})
+ messages = []
+ async for doc in cursor:
+ messages.append(DLQMapper.from_mongo_document(doc))
- if not doc:
- return None
+ return DLQMessageListResult(
+ messages=messages,
+ total=total_count,
+ offset=offset,
+ limit=limit
+ )
- return DLQMessage.from_dict(doc)
+ async def get_message_by_id(self, event_id: str) -> DLQMessage | None:
+ doc = await self.dlq_collection.find_one({DLQFields.EVENT_ID: event_id})
+ if not doc:
+ return None
- except Exception as e:
- logger.error(f"Error getting message for retry {event_id}: {e}")
- raise
+ return DLQMapper.from_mongo_document(doc)
async def get_topics_summary(self) -> list[DLQTopicSummary]:
- try:
- pipeline: list[Mapping[str, object]] = [
- {"$group": {
- "_id": f"${DLQFields.ORIGINAL_TOPIC}",
- "count": {"$sum": 1},
- "statuses": {"$push": f"${DLQFields.STATUS}"},
- "oldest_message": {"$min": f"${DLQFields.FAILED_AT}"},
- "newest_message": {"$max": f"${DLQFields.FAILED_AT}"},
- "avg_retry_count": {"$avg": f"${DLQFields.RETRY_COUNT}"},
- "max_retry_count": {"$max": f"${DLQFields.RETRY_COUNT}"}
- }},
- {"$sort": {"count": -1}}
- ]
-
- topics = []
- async for result in self.dlq_collection.aggregate(pipeline):
- status_counts: dict[str, int] = {}
- for status in result["statuses"]:
- status_counts[status] = status_counts.get(status, 0) + 1
-
- topics.append(DLQTopicSummary(
- topic=result["_id"],
- total_messages=result["count"],
- status_breakdown=status_counts,
- oldest_message=result["oldest_message"],
- newest_message=result["newest_message"],
- avg_retry_count=round(result["avg_retry_count"], 2),
- max_retry_count=result["max_retry_count"]
- ))
-
- return topics
-
- except Exception as e:
- logger.error(f"Error getting DLQ topics summary: {e}")
- raise
+ pipeline: list[Mapping[str, object]] = [
+ {"$group": {
+ "_id": f"${DLQFields.ORIGINAL_TOPIC}",
+ "count": {"$sum": 1},
+ "statuses": {"$push": f"${DLQFields.STATUS}"},
+ "oldest_message": {"$min": f"${DLQFields.FAILED_AT}"},
+ "newest_message": {"$max": f"${DLQFields.FAILED_AT}"},
+ "avg_retry_count": {"$avg": f"${DLQFields.RETRY_COUNT}"},
+ "max_retry_count": {"$max": f"${DLQFields.RETRY_COUNT}"}
+ }},
+ {"$sort": {"count": -1}}
+ ]
+
+ topics = []
+ async for result in self.dlq_collection.aggregate(pipeline):
+ status_counts: dict[str, int] = {}
+ for status in result["statuses"]:
+ status_counts[status] = status_counts.get(status, 0) + 1
+
+ topics.append(DLQTopicSummary(
+ topic=result["_id"],
+ total_messages=result["count"],
+ status_breakdown=status_counts,
+ oldest_message=result["oldest_message"],
+ newest_message=result["newest_message"],
+ avg_retry_count=round(result["avg_retry_count"], 2),
+ max_retry_count=result["max_retry_count"]
+ ))
+
+ return topics
async def mark_message_retried(self, event_id: str) -> bool:
- try:
- now = datetime.now(timezone.utc)
- result = await self.dlq_collection.update_one(
- {DLQFields.EVENT_ID: event_id},
- {
- "$set": {
- DLQFields.STATUS: DLQMessageStatus.RETRIED,
- DLQFields.RETRIED_AT: now,
- DLQFields.LAST_UPDATED: now
- }
+ now = datetime.now(timezone.utc)
+ result = await self.dlq_collection.update_one(
+ {DLQFields.EVENT_ID: event_id},
+ {
+ "$set": {
+ DLQFields.STATUS: DLQMessageStatus.RETRIED,
+ DLQFields.RETRIED_AT: now,
+ DLQFields.LAST_UPDATED: now
}
- )
- return result.modified_count > 0
-
- except Exception as e:
- logger.error(f"Error marking message as retried {event_id}: {e}")
- raise
+ }
+ )
+ return result.modified_count > 0
async def mark_message_discarded(self, event_id: str, reason: str) -> bool:
- try:
- now = datetime.now(timezone.utc)
- result = await self.dlq_collection.update_one(
- {DLQFields.EVENT_ID: event_id},
- {
- "$set": {
- DLQFields.STATUS: DLQMessageStatus.DISCARDED.value,
- DLQFields.DISCARDED_AT: now,
- DLQFields.DISCARD_REASON: reason,
- DLQFields.LAST_UPDATED: now
- }
+ now = datetime.now(timezone.utc)
+ result = await self.dlq_collection.update_one(
+ {DLQFields.EVENT_ID: event_id},
+ {
+ "$set": {
+ DLQFields.STATUS: DLQMessageStatus.DISCARDED.value,
+ DLQFields.DISCARDED_AT: now,
+ DLQFields.DISCARD_REASON: reason,
+ DLQFields.LAST_UPDATED: now
}
- )
- return result.modified_count > 0
-
- except Exception as e:
- logger.error(f"Error marking message as discarded {event_id}: {e}")
- raise
+ }
+ )
+ return result.modified_count > 0
async def retry_messages_batch(self, event_ids: list[str], dlq_manager: DLQManager) -> DLQBatchRetryResult:
"""Retry a batch of DLQ messages."""
@@ -269,7 +227,7 @@ async def retry_messages_batch(self, event_ids: list[str], dlq_manager: DLQManag
for event_id in event_ids:
try:
# Get message from repository
- message = await self.get_message_for_retry(event_id)
+ message = await self.get_message_by_id(event_id)
if not message:
failed += 1
diff --git a/backend/app/db/repositories/event_repository.py b/backend/app/db/repositories/event_repository.py
index 790e8290..2d2789a9 100644
--- a/backend/app/db/repositories/event_repository.py
+++ b/backend/app/db/repositories/event_repository.py
@@ -1,13 +1,14 @@
-import time
from dataclasses import replace
-from datetime import datetime, timezone
+from datetime import datetime, timedelta, timezone
+from types import MappingProxyType
from typing import Any, AsyncIterator, Mapping
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
from pymongo import ASCENDING, DESCENDING
-from pymongo.errors import DuplicateKeyError
from app.core.logging import logger
+from app.core.tracing import EventAttributes
+from app.core.tracing.utils import add_span_attributes
from app.domain.enums.user import UserRole
from app.domain.events import (
ArchivedEvent,
@@ -19,26 +20,20 @@
EventReplayInfo,
EventStatistics,
)
-from app.infrastructure.mappers.event_mapper import ArchivedEventMapper, EventMapper
+from app.domain.events.event_models import CollectionNames
+from app.infrastructure.mappers import ArchivedEventMapper, EventFilterMapper, EventMapper
class EventRepository:
def __init__(self, database: AsyncIOMotorDatabase) -> None:
self.database = database
- self._collection: AsyncIOMotorCollection | None = None
self.mapper = EventMapper()
-
- @property
- def collection(self) -> AsyncIOMotorCollection:
- if self._collection is None:
- self._collection = self.database.events
- return self._collection
-
+ self._collection: AsyncIOMotorCollection = self.database.get_collection(CollectionNames.EVENTS)
def _build_time_filter(
self,
- start_time: datetime | float | None,
- end_time: datetime | float | None
+ start_time: datetime | None,
+ end_time: datetime | None
) -> dict[str, object]:
"""Build time range filter, eliminating if-else branching."""
return {
@@ -48,28 +43,6 @@ def _build_time_filter(
}.items() if value is not None
}
- def _build_query(self, **filters: object) -> dict[str, object]:
- """Build MongoDB query from non-None filters, eliminating if-else branching."""
- query: dict[str, object] = {}
-
- # Handle special cases
- for key, value in filters.items():
- if value is None:
- continue
-
- if key == "time_range" and isinstance(value, tuple):
- start_time, end_time = value
- time_filter = self._build_time_filter(start_time, end_time)
- if time_filter:
- query[EventFields.TIMESTAMP] = time_filter
- elif key == "event_types" and isinstance(value, list):
- query[EventFields.EVENT_TYPE] = {"$in": value}
- else:
- # Direct field mapping
- query[key] = value
-
- return query
-
async def store_event(self, event: Event) -> str:
"""
Store an event in the collection
@@ -83,22 +56,21 @@ async def store_event(self, event: Event) -> str:
Raises:
DuplicateKeyError: If event with same ID already exists
"""
- try:
- if not event.stored_at:
- event = replace(event, stored_at=datetime.now(timezone.utc))
-
- event_doc = self.mapper.to_mongo_document(event)
- _ = await self.collection.insert_one(event_doc)
-
- logger.debug(f"Stored event {event.event_id} of type {event.event_type}")
- return event.event_id
+ if not event.stored_at:
+ event = replace(event, stored_at=datetime.now(timezone.utc))
+
+ event_doc = self.mapper.to_mongo_document(event)
+ add_span_attributes(
+ **{
+ str(EventAttributes.EVENT_TYPE): event.event_type,
+ str(EventAttributes.EVENT_ID): event.event_id,
+ str(EventAttributes.EXECUTION_ID): event.aggregate_id or "",
+ }
+ )
+ _ = await self._collection.insert_one(event_doc)
- except DuplicateKeyError:
- logger.warning(f"Duplicate event ID: {event.event_id}")
- raise
- except Exception as e:
- logger.error(f"Failed to store event: {e}")
- raise
+ logger.debug(f"Stored event {event.event_id} of type {event.event_type}")
+ return event.event_id
async def store_events_batch(self, events: list[Event]) -> list[str]:
"""
@@ -112,38 +84,26 @@ async def store_events_batch(self, events: list[Event]) -> list[str]:
"""
if not events:
return []
+ now = datetime.now(timezone.utc)
+ event_docs = []
+ for event in events:
+ if not event.stored_at:
+ event = replace(event, stored_at=now)
+ event_docs.append(self.mapper.to_mongo_document(event))
- try:
- now = datetime.now(timezone.utc)
- event_docs = []
- for event in events:
- if not event.stored_at:
- event = replace(event, stored_at=now)
- event_docs.append(self.mapper.to_mongo_document(event))
-
- result = await self.collection.insert_many(event_docs, ordered=False)
-
- logger.info(f"Stored {len(result.inserted_ids)} events in batch")
- return [event.event_id for event in events]
-
- except Exception as e:
- logger.error(f"Failed to store event batch: {e}")
- stored_ids = []
- for event in events:
- try:
- await self.store_event(event)
- stored_ids.append(event.event_id)
- except DuplicateKeyError:
- continue
- return stored_ids
+ result = await self._collection.insert_many(event_docs, ordered=False)
+ add_span_attributes(
+ **{
+ "events.batch.count": len(event_docs),
+ }
+ )
+
+ logger.info(f"Stored {len(result.inserted_ids)} events in batch")
+ return [event.event_id for event in events]
async def get_event(self, event_id: str) -> Event | None:
- try:
- result = await self.collection.find_one({EventFields.EVENT_ID: event_id})
- return self.mapper.from_mongo_document(result) if result else None
- except Exception as e:
- logger.error(f"Failed to get event: {e}")
- return None
+ result = await self._collection.find_one({EventFields.EVENT_ID: event_id})
+ return self.mapper.from_mongo_document(result) if result else None
async def get_events_by_type(
self,
@@ -158,7 +118,7 @@ async def get_events_by_type(
if time_filter:
query[EventFields.TIMESTAMP] = time_filter
- cursor = self.collection.find(query).sort(EventFields.TIMESTAMP, DESCENDING).skip(skip).limit(limit)
+ cursor = self._collection.find(query).sort(EventFields.TIMESTAMP, DESCENDING).skip(skip).limit(limit)
docs = await cursor.to_list(length=limit)
return [self.mapper.from_mongo_document(doc) for doc in docs]
@@ -168,24 +128,20 @@ async def get_events_by_aggregate(
event_types: list[str] | None = None,
limit: int = 100
) -> list[Event]:
- try:
- query: dict[str, Any] = {EventFields.AGGREGATE_ID: aggregate_id}
- if event_types:
- query[EventFields.EVENT_TYPE] = {"$in": event_types}
-
- cursor = self.collection.find(query).sort(EventFields.TIMESTAMP, ASCENDING).limit(limit)
- docs = await cursor.to_list(length=limit)
- return [self.mapper.from_mongo_document(doc) for doc in docs]
- except Exception as e:
- logger.error(f"Failed to get events by aggregate: {e}")
- return []
+ query: dict[str, Any] = {EventFields.AGGREGATE_ID: aggregate_id}
+ if event_types:
+ query[EventFields.EVENT_TYPE] = {"$in": event_types}
+
+ cursor = self._collection.find(query).sort(EventFields.TIMESTAMP, ASCENDING).limit(limit)
+ docs = await cursor.to_list(length=limit)
+ return [self.mapper.from_mongo_document(doc) for doc in docs]
async def get_events_by_correlation(
self,
correlation_id: str,
limit: int = 100
) -> list[Event]:
- cursor = (self.collection.find({EventFields.METADATA_CORRELATION_ID: correlation_id})
+ cursor = (self._collection.find({EventFields.METADATA_CORRELATION_ID: correlation_id})
.sort(EventFields.TIMESTAMP, ASCENDING).limit(limit))
docs = await cursor.to_list(length=limit)
return [self.mapper.from_mongo_document(doc) for doc in docs]
@@ -206,7 +162,7 @@ async def get_events_by_user(
if time_filter:
query[EventFields.TIMESTAMP] = time_filter
- cursor = self.collection.find(query).sort(EventFields.TIMESTAMP, DESCENDING).skip(skip).limit(limit)
+ cursor = self._collection.find(query).sort(EventFields.TIMESTAMP, DESCENDING).skip(skip).limit(limit)
docs = await cursor.to_list(length=limit)
return [self.mapper.from_mongo_document(doc) for doc in docs]
@@ -222,7 +178,7 @@ async def get_execution_events(
]
}
- cursor = self.collection.find(query).sort(EventFields.TIMESTAMP, ASCENDING).limit(limit)
+ cursor = self._collection.find(query).sort(EventFields.TIMESTAMP, ASCENDING).limit(limit)
docs = await cursor.to_list(length=limit)
return [self.mapper.from_mongo_document(doc) for doc in docs]
@@ -237,14 +193,14 @@ async def search_events(
if filters:
query.update(filters)
- cursor = self.collection.find(query).sort(EventFields.TIMESTAMP, DESCENDING).skip(skip).limit(limit)
+ cursor = self._collection.find(query).sort(EventFields.TIMESTAMP, DESCENDING).skip(skip).limit(limit)
docs = await cursor.to_list(length=limit)
return [self.mapper.from_mongo_document(doc) for doc in docs]
async def get_event_statistics(
self,
- start_time: float | None = None,
- end_time: float | None = None
+ start_time: datetime | None = None,
+ end_time: datetime | None = None
) -> EventStatistics:
pipeline: list[Mapping[str, object]] = []
@@ -284,7 +240,7 @@ async def get_event_statistics(
}
])
- result = await self.collection.aggregate(pipeline).to_list(length=1)
+ result = await self._collection.aggregate(pipeline).to_list(length=1)
if result:
stats = result[0]
@@ -304,7 +260,7 @@ async def get_event_statistics(
async def get_event_statistics_filtered(
self,
- match: dict[str, object] | None = None,
+ match: Mapping[str, object] = MappingProxyType({}),
start_time: datetime | None = None,
end_time: datetime | None = None,
) -> EventStatistics:
@@ -312,7 +268,7 @@ async def get_event_statistics_filtered(
and_clauses: list[dict[str, object]] = []
if match:
- and_clauses.append(match)
+ and_clauses.append(dict(match))
time_filter = self._build_time_filter(start_time, end_time)
if time_filter:
and_clauses.append({EventFields.TIMESTAMP: time_filter})
@@ -351,7 +307,7 @@ async def get_event_statistics_filtered(
}
])
- result = await self.collection.aggregate(pipeline).to_list(length=1)
+ result = await self._collection.aggregate(pipeline).to_list(length=1)
if result:
stats = result[0]
return EventStatistics(
@@ -378,7 +334,7 @@ async def stream_events(
if filters:
pipeline.append({"$match": filters})
- async with self.collection.watch(
+ async with self._collection.watch(
pipeline,
start_after=start_after,
full_document="updateLookup"
@@ -404,18 +360,18 @@ async def cleanup_old_events(
Returns:
Number of events deleted (or would be deleted if dry_run)
"""
- cutoff_timestamp = time.time() - (older_than_days * 24 * 60 * 60)
+ cutoff_dt = datetime.now(timezone.utc) - timedelta(days=older_than_days)
- query: dict[str, Any] = {EventFields.TIMESTAMP: {"$lt": cutoff_timestamp}}
+ query: dict[str, Any] = {EventFields.TIMESTAMP: {"$lt": cutoff_dt}}
if event_types:
query[EventFields.EVENT_TYPE] = {"$in": event_types}
if dry_run:
- count = await self.collection.count_documents(query)
+ count = await self._collection.count_documents(query)
logger.info(f"Would delete {count} events older than {older_than_days} days")
return count
- result = await self.collection.delete_many(query)
+ result = await self._collection.delete_many(query)
logger.info(f"Deleted {result.deleted_count} events older than {older_than_days} days")
return result.deleted_count
@@ -439,10 +395,10 @@ async def get_user_events_paginated(
if time_filter:
query[EventFields.TIMESTAMP] = time_filter
- total_count = await self.collection.count_documents(query)
+ total_count = await self._collection.count_documents(query)
sort_direction = DESCENDING if sort_order == "desc" else ASCENDING
- cursor = self.collection.find(query)
+ cursor = self._collection.find(query)
cursor = cursor.sort(EventFields.TIMESTAMP, sort_direction)
cursor = cursor.skip(skip).limit(limit)
@@ -475,16 +431,16 @@ async def query_events_advanced(
elif user_role != UserRole.ADMIN:
query[EventFields.METADATA_USER_ID] = user_id
- # Apply filters using EventFilter's to_query method
- base_query = filters.to_query()
+ # Apply filters using mapper from domain filter
+ base_query = EventFilterMapper.to_mongo_query(filters)
query.update(base_query)
- total_count = await self.collection.count_documents(query)
+ total_count = await self._collection.count_documents(query)
sort_field = EventFields.TIMESTAMP
sort_direction = DESCENDING
- cursor = self.collection.find(query)
+ cursor = self._collection.find(query)
cursor = cursor.sort(sort_field, sort_direction)
cursor = cursor.skip(0).limit(100)
@@ -492,21 +448,15 @@ async def query_events_advanced(
async for doc in cursor:
docs.append(doc)
- return EventListResult(
+ result_obj = EventListResult(
events=[self.mapper.from_mongo_document(doc) for doc in docs],
total=total_count,
skip=0,
limit=100,
has_more=100 < total_count
)
-
- # Access checks are handled in the service layer.
-
- # Access checks are handled in the service layer.
-
- # Access checks are handled in the service layer.
-
- # Access checks are handled in the service layer.
+ add_span_attributes(**{"events.query.total": total_count})
+ return result_obj
async def aggregate_events(
self,
@@ -517,25 +467,23 @@ async def aggregate_events(
pipeline.append({"$limit": limit})
results = []
- async for doc in self.collection.aggregate(pipeline):
+ async for doc in self._collection.aggregate(pipeline):
if "_id" in doc and isinstance(doc["_id"], dict):
doc["_id"] = str(doc["_id"])
results.append(doc)
return EventAggregationResult(results=results, pipeline=pipeline)
- # Access checks are handled in the service layer.
-
- async def list_event_types(self, match: dict[str, object] | None = None) -> list[str]:
+ async def list_event_types(self, match: Mapping[str, object] = MappingProxyType({})) -> list[str]:
pipeline: list[Mapping[str, object]] = []
if match:
- pipeline.append({"$match": match})
+ pipeline.append({"$match": dict(match)})
pipeline.extend([
{"$group": {"_id": f"${EventFields.EVENT_TYPE}"}},
{"$sort": {"_id": 1}}
])
event_types: list[str] = []
- async for doc in self.collection.aggregate(pipeline):
+ async for doc in self._collection.aggregate(pipeline):
event_types.append(doc["_id"])
return event_types
@@ -547,9 +495,9 @@ async def query_events_generic(
skip: int,
limit: int,
) -> EventListResult:
- total_count = await self.collection.count_documents(query)
+ total_count = await self._collection.count_documents(query)
- cursor = self.collection.find(query)
+ cursor = self._collection.find(query)
cursor = cursor.sort(sort_field, sort_direction)
cursor = cursor.skip(skip).limit(limit)
@@ -596,12 +544,12 @@ async def delete_event_with_archival(
)
# Archive the event
- archive_collection = self.database["events_archive"]
+ archive_collection = self.database.get_collection(CollectionNames.EVENTS_ARCHIVE)
archived_mapper = ArchivedEventMapper()
await archive_collection.insert_one(archived_mapper.to_mongo_document(archived_event))
# Delete from main collection
- result = await self.collection.delete_one({EventFields.EVENT_ID: event_id})
+ result = await self._collection.delete_one({EventFields.EVENT_ID: event_id})
if result.deleted_count == 0:
raise Exception("Failed to delete event")
diff --git a/backend/app/db/repositories/execution_repository.py b/backend/app/db/repositories/execution_repository.py
index d0f8d676..55fad752 100644
--- a/backend/app/db/repositories/execution_repository.py
+++ b/backend/app/db/repositories/execution_repository.py
@@ -3,91 +3,124 @@
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
from app.core.logging import logger
-from app.domain.execution.models import DomainExecution, ExecutionResultDomain, ResourceUsageDomain
+from app.domain.enums.execution import ExecutionStatus
+from app.domain.events.event_models import CollectionNames
+from app.domain.execution import DomainExecution, ExecutionResultDomain, ResourceUsageDomain
class ExecutionRepository:
def __init__(self, db: AsyncIOMotorDatabase):
self.db = db
- self.collection: AsyncIOMotorCollection = self.db.get_collection("executions")
- self.results_collection: AsyncIOMotorCollection = self.db.get_collection("execution_results")
+ self.collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EXECUTIONS)
+ self.results_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EXECUTION_RESULTS)
async def create_execution(self, execution: DomainExecution) -> DomainExecution:
- try:
- execution_dict = {
- "execution_id": execution.execution_id,
- "script": execution.script,
- "status": execution.status,
- "output": execution.output,
- "errors": execution.errors,
- "lang": execution.lang,
- "lang_version": execution.lang_version,
- "created_at": execution.created_at,
- "updated_at": execution.updated_at,
- "resource_usage": execution.resource_usage.to_dict() if execution.resource_usage else None,
- "user_id": execution.user_id,
- "exit_code": execution.exit_code,
- "error_type": execution.error_type,
- }
- logger.info(f"Inserting execution {execution.execution_id} into MongoDB")
- result = await self.collection.insert_one(execution_dict)
- logger.info(f"Inserted execution {execution.execution_id} with _id: {result.inserted_id}")
- return execution
- except Exception as e:
- logger.error(f"Database error creating execution {execution.execution_id}: {type(e).__name__}",
- exc_info=True)
- raise
+ execution_dict = {
+ "execution_id": execution.execution_id,
+ "script": execution.script,
+ "status": execution.status,
+ "stdout": execution.stdout,
+ "stderr": execution.stderr,
+ "lang": execution.lang,
+ "lang_version": execution.lang_version,
+ "created_at": execution.created_at,
+ "updated_at": execution.updated_at,
+ "resource_usage": execution.resource_usage.to_dict() if execution.resource_usage else None,
+ "user_id": execution.user_id,
+ "exit_code": execution.exit_code,
+ "error_type": execution.error_type,
+ }
+ logger.info(f"Inserting execution {execution.execution_id} into MongoDB")
+ result = await self.collection.insert_one(execution_dict)
+ logger.info(f"Inserted execution {execution.execution_id} with _id: {result.inserted_id}")
+ return execution
async def get_execution(self, execution_id: str) -> DomainExecution | None:
- try:
- logger.info(f"Searching for execution {execution_id} in MongoDB")
- document = await self.collection.find_one({"execution_id": execution_id})
- if document:
- logger.info(f"Found execution {execution_id} in MongoDB")
- from app.domain.enums.execution import ExecutionStatus
- sv = document.get("status")
- try:
- st = sv if isinstance(sv, ExecutionStatus) else ExecutionStatus(str(sv))
- except Exception:
- st = ExecutionStatus.QUEUED
- return DomainExecution(
- execution_id=document.get("execution_id"),
- script=document.get("script", ""),
- status=st,
- output=document.get("output"),
- errors=document.get("errors"),
- lang=document.get("lang", "python"),
- lang_version=document.get("lang_version", "3.11"),
- created_at=document.get("created_at", datetime.now(timezone.utc)),
- updated_at=document.get("updated_at", datetime.now(timezone.utc)),
- resource_usage=(
- ResourceUsageDomain.from_dict(document.get("resource_usage"))
- if document.get("resource_usage") is not None
- else None
- ),
- user_id=document.get("user_id"),
- exit_code=document.get("exit_code"),
- error_type=document.get("error_type"),
- )
- else:
- logger.warning(f"Execution {execution_id} not found in MongoDB")
- return None
- except Exception as e:
- logger.error(f"Database error fetching execution {execution_id}: {type(e).__name__}", exc_info=True)
+ logger.info(f"Searching for execution {execution_id} in MongoDB")
+ document = await self.collection.find_one({"execution_id": execution_id})
+ if not document:
+ logger.warning(f"Execution {execution_id} not found in MongoDB")
return None
- async def update_execution(self, execution_id: str, update_data: dict) -> bool:
- try:
- update_data.setdefault("updated_at", datetime.now(timezone.utc))
- update_payload = {"$set": update_data}
+ logger.info(f"Found execution {execution_id} in MongoDB")
+
+ result_doc = await self.results_collection.find_one({"execution_id": execution_id})
+ if result_doc:
+ document["stdout"] = result_doc.get("stdout")
+ document["stderr"] = result_doc.get("stderr")
+ document["exit_code"] = result_doc.get("exit_code")
+ document["resource_usage"] = result_doc.get("resource_usage")
+ document["error_type"] = result_doc.get("error_type")
+ if result_doc.get("status"):
+ document["status"] = result_doc.get("status")
+
+ sv = document.get("status")
+ return DomainExecution(
+ execution_id=document.get("execution_id"),
+ script=document.get("script", ""),
+ status=ExecutionStatus(str(sv)),
+ stdout=document.get("stdout"),
+ stderr=document.get("stderr"),
+ lang=document.get("lang", "python"),
+ lang_version=document.get("lang_version", "3.11"),
+ created_at=document.get("created_at", datetime.now(timezone.utc)),
+ updated_at=document.get("updated_at", datetime.now(timezone.utc)),
+ resource_usage=(
+ ResourceUsageDomain.from_dict(document.get("resource_usage"))
+ if document.get("resource_usage") is not None
+ else None
+ ),
+ user_id=document.get("user_id"),
+ exit_code=document.get("exit_code"),
+ error_type=document.get("error_type"),
+ )
- result = await self.collection.update_one(
- {"execution_id": execution_id}, update_payload
- )
- return result.matched_count > 0
- except Exception as e:
- logger.error(f"Database error updating execution {execution_id}: {type(e).__name__}", exc_info=True)
- return False
+ async def update_execution(self, execution_id: str, update_data: dict) -> bool:
+ update_data.setdefault("updated_at", datetime.now(timezone.utc))
+ update_payload = {"$set": update_data}
+
+ result = await self.collection.update_one(
+ {"execution_id": execution_id}, update_payload
+ )
+ return result.matched_count > 0
+
+ async def write_terminal_result(self, exec_result: ExecutionResultDomain) -> bool:
+ base = await self.collection.find_one({"execution_id": exec_result.execution_id}, {"user_id": 1}) or {}
+ user_id = base.get("user_id")
+
+ doc = {
+ "_id": exec_result.execution_id,
+ "execution_id": exec_result.execution_id,
+ "status": exec_result.status.value,
+ "exit_code": exec_result.exit_code,
+ "stdout": exec_result.stdout,
+ "stderr": exec_result.stderr,
+ "resource_usage": exec_result.resource_usage.to_dict(),
+ "created_at": exec_result.created_at,
+ "metadata": exec_result.metadata,
+ }
+ if exec_result.error_type is not None:
+ doc["error_type"] = exec_result.error_type
+ if user_id is not None:
+ doc["user_id"] = user_id
+
+ await self.results_collection.replace_one({"_id": exec_result.execution_id}, doc, upsert=True)
+
+ update_data = {
+ "status": exec_result.status.value,
+ "updated_at": datetime.now(timezone.utc),
+ "stdout": exec_result.stdout,
+ "stderr": exec_result.stderr,
+ "exit_code": exec_result.exit_code,
+ "resource_usage": exec_result.resource_usage.to_dict(),
+ }
+ if exec_result.error_type is not None:
+ update_data["error_type"] = exec_result.error_type
+
+ res = await self.collection.update_one({"execution_id": exec_result.execution_id}, {"$set": update_data})
+ if res.matched_count == 0:
+ logger.warning(f"No execution found to patch for {exec_result.execution_id} after result upsert")
+ return True
async def get_executions(
self,
@@ -96,80 +129,41 @@ async def get_executions(
skip: int = 0,
sort: list | None = None
) -> list[DomainExecution]:
- try:
- cursor = self.collection.find(query)
- if sort:
- cursor = cursor.sort(sort)
- cursor = cursor.skip(skip).limit(limit)
-
- executions: list[DomainExecution] = []
- async for doc in cursor:
- from app.domain.enums.execution import ExecutionStatus
- sv = doc.get("status")
- try:
- st = sv if isinstance(sv, ExecutionStatus) else ExecutionStatus(str(sv))
- except Exception:
- st = ExecutionStatus.QUEUED
- executions.append(
- DomainExecution(
- execution_id=doc.get("execution_id"),
- script=doc.get("script", ""),
- status=st,
- output=doc.get("output"),
- errors=doc.get("errors"),
- lang=doc.get("lang", "python"),
- lang_version=doc.get("lang_version", "3.11"),
- created_at=doc.get("created_at", datetime.now(timezone.utc)),
- updated_at=doc.get("updated_at", datetime.now(timezone.utc)),
- resource_usage=ResourceUsageDomain.from_dict(doc.get("resource_usage")),
- user_id=doc.get("user_id"),
- exit_code=doc.get("exit_code"),
- error_type=doc.get("error_type"),
- )
+ cursor = self.collection.find(query)
+ if sort:
+ cursor = cursor.sort(sort)
+ cursor = cursor.skip(skip).limit(limit)
+
+ executions: list[DomainExecution] = []
+ async for doc in cursor:
+ sv = doc.get("status")
+ executions.append(
+ DomainExecution(
+ execution_id=doc.get("execution_id"),
+ script=doc.get("script", ""),
+ status=ExecutionStatus(str(sv)),
+ stdout=doc.get("stdout"),
+ stderr=doc.get("stderr"),
+ lang=doc.get("lang", "python"),
+ lang_version=doc.get("lang_version", "3.11"),
+ created_at=doc.get("created_at", datetime.now(timezone.utc)),
+ updated_at=doc.get("updated_at", datetime.now(timezone.utc)),
+ resource_usage=(
+ ResourceUsageDomain.from_dict(doc.get("resource_usage"))
+ if doc.get("resource_usage") is not None
+ else None
+ ),
+ user_id=doc.get("user_id"),
+ exit_code=doc.get("exit_code"),
+ error_type=doc.get("error_type"),
)
+ )
- return executions
- except Exception as e:
- logger.error(f"Database error fetching executions: {type(e).__name__}", exc_info=True)
- return []
+ return executions
async def count_executions(self, query: dict) -> int:
- try:
- return await self.collection.count_documents(query)
- except Exception as e:
- logger.error(f"Database error counting executions: {type(e).__name__}", exc_info=True)
- return 0
+ return await self.collection.count_documents(query)
async def delete_execution(self, execution_id: str) -> bool:
- try:
- result = await self.collection.delete_one({"execution_id": execution_id})
- return result.deleted_count > 0
- except Exception as e:
- logger.error(f"Database error deleting execution {execution_id}: {type(e).__name__}", exc_info=True)
- return False
-
- async def upsert_result(self, result: ExecutionResultDomain) -> bool:
- """Create or update an execution result record.
-
- Stored in the dedicated 'execution_results' collection.
- """
- try:
- doc = {
- "_id": result.execution_id,
- "execution_id": result.execution_id,
- "status": result.status.value,
- "exit_code": result.exit_code,
- "stdout": result.stdout,
- "stderr": result.stderr,
- "resource_usage": result.resource_usage.to_dict(),
- "created_at": result.created_at,
- "metadata": result.metadata,
- }
- if result.error_type is not None:
- doc["error_type"] = result.error_type
-
- await self.results_collection.replace_one({"_id": result.execution_id}, doc, upsert=True)
- return True
- except Exception as e:
- logger.error(f"Database error upserting result {result.execution_id}: {type(e).__name__}", exc_info=True)
- return False
+ result = await self.collection.delete_one({"execution_id": execution_id})
+ return result.deleted_count > 0
diff --git a/backend/app/db/repositories/idempotency_repository.py b/backend/app/db/repositories/idempotency_repository.py
deleted file mode 100644
index e6042eb2..00000000
--- a/backend/app/db/repositories/idempotency_repository.py
+++ /dev/null
@@ -1,65 +0,0 @@
-from __future__ import annotations
-
-from datetime import datetime
-
-from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
-
-
-class IdempotencyRepository:
- """Repository for idempotency key persistence.
-
- Encapsulates all Mongo operations and document mapping to keep
- services free of database concerns.
- """
-
- def __init__(self, db: AsyncIOMotorDatabase, collection_name: str = "idempotency_keys") -> None:
- self._db = db
- self._collection: AsyncIOMotorCollection = self._db.get_collection(collection_name)
-
- async def find_by_key(self, key: str) -> dict[str, object] | None:
- return await self._collection.find_one({"key": key})
-
- async def insert_processing(
- self,
- *,
- key: str,
- event_type: str,
- event_id: str,
- created_at: datetime,
- ttl_seconds: int,
- ) -> None:
- doc = {
- "key": key,
- "status": "processing",
- "event_type": event_type,
- "event_id": event_id,
- "created_at": created_at,
- "ttl_seconds": ttl_seconds,
- }
- await self._collection.insert_one(doc)
-
- async def update_set(self, key: str, fields: dict[str, object]) -> int:
- """Apply $set update. Returns modified count."""
- res = await self._collection.update_one({"key": key}, {"$set": fields})
- return getattr(res, "modified_count", 0) or 0
-
- async def delete_key(self, key: str) -> int:
- res = await self._collection.delete_one({"key": key})
- return getattr(res, "deleted_count", 0) or 0
-
- async def aggregate_status_counts(self, key_prefix: str) -> dict[str, int]:
- pipeline: list[dict[str, object]] = [
- {"$match": {"key": {"$regex": f"^{key_prefix}:"}}},
- {"$group": {"_id": "$status", "count": {"$sum": 1}}},
- ]
- counts: dict[str, int] = {}
- async for doc in self._collection.aggregate(pipeline):
- status = str(doc.get("_id"))
- count = int(doc.get("count", 0))
- counts[status] = count
- return counts
-
- async def health_check(self) -> None:
- # A lightweight op to verify connectivity/permissions
- await self._collection.find_one({}, {"_id": 1})
-
diff --git a/backend/app/db/repositories/notification_repository.py b/backend/app/db/repositories/notification_repository.py
index b78851a5..dfc99308 100644
--- a/backend/app/db/repositories/notification_repository.py
+++ b/backend/app/db/repositories/notification_repository.py
@@ -4,192 +4,68 @@
from pymongo import ASCENDING, DESCENDING, IndexModel
from app.core.logging import logger
-from app.domain.admin.user_models import UserFields
from app.domain.enums.notification import (
NotificationChannel,
NotificationStatus,
- NotificationType,
)
from app.domain.enums.user import UserRole
-from app.domain.notification.models import (
- DomainNotification,
- DomainNotificationRule,
- DomainNotificationSubscription,
- DomainNotificationTemplate,
-)
+from app.domain.events.event_models import CollectionNames
+from app.domain.notification import DomainNotification, DomainNotificationSubscription
+from app.domain.user import UserFields
+from app.infrastructure.mappers import NotificationMapper
class NotificationRepository:
def __init__(self, database: AsyncIOMotorDatabase):
self.db: AsyncIOMotorDatabase = database
- # Collections
- self.notifications_collection: AsyncIOMotorCollection = self.db.notifications
- self.templates_collection: AsyncIOMotorCollection = self.db.notification_templates
- self.subscriptions_collection: AsyncIOMotorCollection = self.db.notification_subscriptions
- self.rules_collection: AsyncIOMotorCollection = self.db.notification_rules
+ self.notifications_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.NOTIFICATIONS)
+ self.subscriptions_collection: AsyncIOMotorCollection = self.db.get_collection(
+ CollectionNames.NOTIFICATION_SUBSCRIPTIONS)
+ self.mapper = NotificationMapper()
async def create_indexes(self) -> None:
- try:
- # Create indexes if only _id exists
- notif_indexes = await self.notifications_collection.list_indexes().to_list(None)
- if len(notif_indexes) <= 1:
- await self.notifications_collection.create_indexes([
- IndexModel([("user_id", ASCENDING), ("created_at", DESCENDING)]),
- IndexModel([("status", ASCENDING), ("scheduled_for", ASCENDING)]),
- IndexModel([("created_at", ASCENDING)]),
- IndexModel([("notification_id", ASCENDING)], unique=True),
- ])
-
- rules_indexes = await self.rules_collection.list_indexes().to_list(None)
- if len(rules_indexes) <= 1:
- await self.rules_collection.create_indexes([
- IndexModel([("event_types", ASCENDING)]),
- IndexModel([("enabled", ASCENDING)]),
- ])
-
- subs_indexes = await self.subscriptions_collection.list_indexes().to_list(None)
- if len(subs_indexes) <= 1:
- await self.subscriptions_collection.create_indexes([
- IndexModel([("user_id", ASCENDING), ("channel", ASCENDING)], unique=True),
- IndexModel([("enabled", ASCENDING)]),
- ])
- except Exception as e:
- logger.error(f"Error creating notification indexes: {e}")
- raise
-
- # Templates
- async def upsert_template(self, template: DomainNotificationTemplate) -> None:
- await self.templates_collection.update_one(
- {"notification_type": template.notification_type},
- {"$set": {
- "notification_type": template.notification_type,
- "channels": template.channels,
- "priority": template.priority,
- "subject_template": template.subject_template,
- "body_template": template.body_template,
- "action_url_template": template.action_url_template,
- "metadata": template.metadata,
- }},
- upsert=True,
- )
-
- async def bulk_upsert_templates(self, templates: list[DomainNotificationTemplate]) -> None:
- for t in templates:
- await self.upsert_template(t)
- logger.info(f"Bulk upserted {len(templates)} templates")
-
- async def get_template(self, notification_type: NotificationType) -> DomainNotificationTemplate | None:
- doc = await self.templates_collection.find_one({"notification_type": notification_type})
- if not doc:
- return None
- return DomainNotificationTemplate(
- notification_type=doc.get("notification_type"),
- channels=doc.get("channels", []),
- priority=doc.get("priority"),
- subject_template=doc.get("subject_template", ""),
- body_template=doc.get("body_template", ""),
- action_url_template=doc.get("action_url_template"),
- metadata=doc.get("metadata", {}),
- )
+ # Create indexes if only _id exists
+ notif_indexes = await self.notifications_collection.list_indexes().to_list(None)
+ if len(notif_indexes) <= 1:
+ await self.notifications_collection.create_indexes([
+ IndexModel([("user_id", ASCENDING), ("created_at", DESCENDING)]),
+ IndexModel([("status", ASCENDING), ("scheduled_for", ASCENDING)]),
+ IndexModel([("created_at", ASCENDING)]),
+ IndexModel([("notification_id", ASCENDING)], unique=True),
+ # Multikey index to speed up tag queries (include/exclude/prefix)
+ IndexModel([("tags", ASCENDING)]),
+ ])
+
+ subs_indexes = await self.subscriptions_collection.list_indexes().to_list(None)
+ if len(subs_indexes) <= 1:
+ await self.subscriptions_collection.create_indexes([
+ IndexModel([("user_id", ASCENDING), ("channel", ASCENDING)], unique=True),
+ IndexModel([("enabled", ASCENDING)]),
+ IndexModel([("include_tags", ASCENDING)]),
+ IndexModel([("severities", ASCENDING)]),
+ ])
# Notifications
async def create_notification(self, notification: DomainNotification) -> str:
- result = await self.notifications_collection.insert_one({
- "notification_id": notification.notification_id,
- "user_id": notification.user_id,
- "notification_type": notification.notification_type,
- "channel": notification.channel,
- "priority": notification.priority,
- "status": notification.status,
- "subject": notification.subject,
- "body": notification.body,
- "action_url": notification.action_url,
- "created_at": notification.created_at,
- "scheduled_for": notification.scheduled_for,
- "sent_at": notification.sent_at,
- "delivered_at": notification.delivered_at,
- "read_at": notification.read_at,
- "clicked_at": notification.clicked_at,
- "failed_at": notification.failed_at,
- "retry_count": notification.retry_count,
- "max_retries": notification.max_retries,
- "error_message": notification.error_message,
- "correlation_id": notification.correlation_id,
- "related_entity_id": notification.related_entity_id,
- "related_entity_type": notification.related_entity_type,
- "metadata": notification.metadata,
- "webhook_url": notification.webhook_url,
- "webhook_headers": notification.webhook_headers,
- })
+ doc = self.mapper.to_mongo_document(notification)
+ result = await self.notifications_collection.insert_one(doc)
return str(result.inserted_id)
async def update_notification(self, notification: DomainNotification) -> bool:
- update = {
- "user_id": notification.user_id,
- "notification_type": notification.notification_type,
- "channel": notification.channel,
- "priority": notification.priority,
- "status": notification.status,
- "subject": notification.subject,
- "body": notification.body,
- "action_url": notification.action_url,
- "created_at": notification.created_at,
- "scheduled_for": notification.scheduled_for,
- "sent_at": notification.sent_at,
- "delivered_at": notification.delivered_at,
- "read_at": notification.read_at,
- "clicked_at": notification.clicked_at,
- "failed_at": notification.failed_at,
- "retry_count": notification.retry_count,
- "max_retries": notification.max_retries,
- "error_message": notification.error_message,
- "correlation_id": notification.correlation_id,
- "related_entity_id": notification.related_entity_id,
- "related_entity_type": notification.related_entity_type,
- "metadata": notification.metadata,
- "webhook_url": notification.webhook_url,
- "webhook_headers": notification.webhook_headers,
- }
+ update = self.mapper.to_update_dict(notification)
result = await self.notifications_collection.update_one(
{"notification_id": str(notification.notification_id)}, {"$set": update}
)
return result.modified_count > 0
async def get_notification(self, notification_id: str, user_id: str) -> DomainNotification | None:
- doc = await self.notifications_collection.find_one({
- "notification_id": notification_id,
- "user_id": user_id,
- })
+ doc = await self.notifications_collection.find_one(
+ {"notification_id": notification_id, "user_id": user_id}
+ )
if not doc:
return None
- return DomainNotification(
- notification_id=doc.get("notification_id"),
- user_id=doc.get("user_id"),
- notification_type=doc.get("notification_type"),
- channel=doc.get("channel"),
- priority=doc.get("priority"),
- status=doc.get("status"),
- subject=doc.get("subject", ""),
- body=doc.get("body", ""),
- action_url=doc.get("action_url"),
- created_at=doc.get("created_at", datetime.now(UTC)),
- scheduled_for=doc.get("scheduled_for"),
- sent_at=doc.get("sent_at"),
- delivered_at=doc.get("delivered_at"),
- read_at=doc.get("read_at"),
- clicked_at=doc.get("clicked_at"),
- failed_at=doc.get("failed_at"),
- retry_count=doc.get("retry_count", 0),
- max_retries=doc.get("max_retries", 3),
- error_message=doc.get("error_message"),
- correlation_id=doc.get("correlation_id"),
- related_entity_id=doc.get("related_entity_id"),
- related_entity_type=doc.get("related_entity_type"),
- metadata=doc.get("metadata", {}),
- webhook_url=doc.get("webhook_url"),
- webhook_headers=doc.get("webhook_headers"),
- )
+ return self.mapper.from_mongo_document(doc)
async def mark_as_read(self, notification_id: str, user_id: str) -> bool:
result = await self.notifications_collection.update_one(
@@ -200,7 +76,7 @@ async def mark_as_read(self, notification_id: str, user_id: str) -> bool:
async def mark_all_as_read(self, user_id: str) -> int:
result = await self.notifications_collection.update_many(
- {"user_id": user_id, "status": {"$in": [NotificationStatus.SENT, NotificationStatus.DELIVERED]}},
+ {"user_id": user_id, "status": {"$in": [NotificationStatus.DELIVERED]}},
{"$set": {"status": NotificationStatus.READ, "read_at": datetime.now(UTC)}},
)
return result.modified_count
@@ -212,18 +88,31 @@ async def delete_notification(self, notification_id: str, user_id: str) -> bool:
return result.deleted_count > 0
async def list_notifications(
- self,
- user_id: str,
- status: NotificationStatus | None = None,
- skip: int = 0,
- limit: int = 20,
+ self,
+ user_id: str,
+ status: NotificationStatus | None = None,
+ skip: int = 0,
+ limit: int = 20,
+ include_tags: list[str] | None = None,
+ exclude_tags: list[str] | None = None,
+ tag_prefix: str | None = None,
) -> list[DomainNotification]:
- query: dict[str, object] = {"user_id": user_id}
+ base: dict[str, object] = {"user_id": user_id}
if status:
- query["status"] = status
+ base["status"] = status
+ query: dict[str, object] | None = base
+ tag_filters: list[dict[str, object]] = []
+ if include_tags:
+ tag_filters.append({"tags": {"$in": include_tags}})
+ if exclude_tags:
+ tag_filters.append({"tags": {"$nin": exclude_tags}})
+ if tag_prefix:
+ tag_filters.append({"tags": {"$elemMatch": {"$regex": f"^{tag_prefix}"}}})
+ if tag_filters:
+ query = {"$and": [base] + tag_filters}
cursor = (
- self.notifications_collection.find(query)
+ self.notifications_collection.find(query or base)
.sort("created_at", DESCENDING)
.skip(skip)
.limit(limit)
@@ -231,39 +120,26 @@ async def list_notifications(
items: list[DomainNotification] = []
async for doc in cursor:
- items.append(
- DomainNotification(
- notification_id=doc.get("notification_id"),
- user_id=doc.get("user_id"),
- notification_type=doc.get("notification_type"),
- channel=doc.get("channel"),
- priority=doc.get("priority"),
- status=doc.get("status"),
- subject=doc.get("subject", ""),
- body=doc.get("body", ""),
- action_url=doc.get("action_url"),
- created_at=doc.get("created_at", datetime.now(UTC)),
- scheduled_for=doc.get("scheduled_for"),
- sent_at=doc.get("sent_at"),
- delivered_at=doc.get("delivered_at"),
- read_at=doc.get("read_at"),
- clicked_at=doc.get("clicked_at"),
- failed_at=doc.get("failed_at"),
- retry_count=doc.get("retry_count", 0),
- max_retries=doc.get("max_retries", 3),
- error_message=doc.get("error_message"),
- correlation_id=doc.get("correlation_id"),
- related_entity_id=doc.get("related_entity_id"),
- related_entity_type=doc.get("related_entity_type"),
- metadata=doc.get("metadata", {}),
- webhook_url=doc.get("webhook_url"),
- webhook_headers=doc.get("webhook_headers"),
- )
- )
+ items.append(self.mapper.from_mongo_document(doc))
return items
+ async def list_notifications_by_tag(
+ self,
+ user_id: str,
+ tag: str,
+ skip: int = 0,
+ limit: int = 20,
+ ) -> list[DomainNotification]:
+ """Convenience helper to list notifications filtered by a single exact tag."""
+ return await self.list_notifications(
+ user_id=user_id,
+ skip=skip,
+ limit=limit,
+ include_tags=[tag],
+ )
+
async def count_notifications(
- self, user_id: str, additional_filters: dict[str, object] | None = None
+ self, user_id: str, additional_filters: dict[str, object] | None = None
) -> int:
query: dict[str, object] = {"user_id": user_id}
if additional_filters:
@@ -274,10 +150,27 @@ async def get_unread_count(self, user_id: str) -> int:
return await self.notifications_collection.count_documents(
{
"user_id": user_id,
- "status": {"$in": [NotificationStatus.SENT, NotificationStatus.DELIVERED]},
+ "status": {"$in": [NotificationStatus.DELIVERED]},
}
)
+ async def try_claim_pending(self, notification_id: str) -> bool:
+ """Atomically claim a pending notification for delivery.
+
+ Transitions PENDING -> SENDING when scheduled_for is None or due.
+ Returns True if the document was claimed by this caller.
+ """
+ now = datetime.now(UTC)
+ result = await self.notifications_collection.update_one(
+ {
+ "notification_id": notification_id,
+ "status": NotificationStatus.PENDING,
+ "$or": [{"scheduled_for": None}, {"scheduled_for": {"$lte": now}}],
+ },
+ {"$set": {"status": NotificationStatus.SENDING, "sent_at": now}},
+ )
+ return result.modified_count > 0
+
async def find_pending_notifications(self, batch_size: int = 10) -> list[DomainNotification]:
cursor = self.notifications_collection.find(
{
@@ -288,35 +181,7 @@ async def find_pending_notifications(self, batch_size: int = 10) -> list[DomainN
items: list[DomainNotification] = []
async for doc in cursor:
- items.append(
- DomainNotification(
- notification_id=doc.get("notification_id"),
- user_id=doc.get("user_id"),
- notification_type=doc.get("notification_type"),
- channel=doc.get("channel"),
- priority=doc.get("priority"),
- status=doc.get("status"),
- subject=doc.get("subject", ""),
- body=doc.get("body", ""),
- action_url=doc.get("action_url"),
- created_at=doc.get("created_at", datetime.now(UTC)),
- scheduled_for=doc.get("scheduled_for"),
- sent_at=doc.get("sent_at"),
- delivered_at=doc.get("delivered_at"),
- read_at=doc.get("read_at"),
- clicked_at=doc.get("clicked_at"),
- failed_at=doc.get("failed_at"),
- retry_count=doc.get("retry_count", 0),
- max_retries=doc.get("max_retries", 3),
- error_message=doc.get("error_message"),
- correlation_id=doc.get("correlation_id"),
- related_entity_id=doc.get("related_entity_id"),
- related_entity_type=doc.get("related_entity_type"),
- metadata=doc.get("metadata", {}),
- webhook_url=doc.get("webhook_url"),
- webhook_headers=doc.get("webhook_headers"),
- )
- )
+ items.append(self.mapper.from_mongo_document(doc))
return items
async def find_scheduled_notifications(self, batch_size: int = 10) -> list[DomainNotification]:
@@ -329,35 +194,7 @@ async def find_scheduled_notifications(self, batch_size: int = 10) -> list[Domai
items: list[DomainNotification] = []
async for doc in cursor:
- items.append(
- DomainNotification(
- notification_id=doc.get("notification_id"),
- user_id=doc.get("user_id"),
- notification_type=doc.get("notification_type"),
- channel=doc.get("channel"),
- priority=doc.get("priority"),
- status=doc.get("status"),
- subject=doc.get("subject", ""),
- body=doc.get("body", ""),
- action_url=doc.get("action_url"),
- created_at=doc.get("created_at", datetime.now(UTC)),
- scheduled_for=doc.get("scheduled_for"),
- sent_at=doc.get("sent_at"),
- delivered_at=doc.get("delivered_at"),
- read_at=doc.get("read_at"),
- clicked_at=doc.get("clicked_at"),
- failed_at=doc.get("failed_at"),
- retry_count=doc.get("retry_count", 0),
- max_retries=doc.get("max_retries", 3),
- error_message=doc.get("error_message"),
- correlation_id=doc.get("correlation_id"),
- related_entity_id=doc.get("related_entity_id"),
- related_entity_type=doc.get("related_entity_type"),
- metadata=doc.get("metadata", {}),
- webhook_url=doc.get("webhook_url"),
- webhook_headers=doc.get("webhook_headers"),
- )
- )
+ items.append(self.mapper.from_mongo_document(doc))
return items
async def cleanup_old_notifications(self, days: int = 30) -> int:
@@ -367,59 +204,31 @@ async def cleanup_old_notifications(self, days: int = 30) -> int:
# Subscriptions
async def get_subscription(
- self, user_id: str, channel: NotificationChannel
+ self, user_id: str, channel: NotificationChannel
) -> DomainNotificationSubscription | None:
doc = await self.subscriptions_collection.find_one(
{"user_id": user_id, "channel": channel}
)
if not doc:
return None
- return DomainNotificationSubscription(
- user_id=doc.get("user_id"),
- channel=doc.get("channel"),
- enabled=doc.get("enabled", True),
- notification_types=doc.get("notification_types", []),
- webhook_url=doc.get("webhook_url"),
- slack_webhook=doc.get("slack_webhook"),
- quiet_hours_enabled=doc.get("quiet_hours_enabled", False),
- quiet_hours_start=doc.get("quiet_hours_start"),
- quiet_hours_end=doc.get("quiet_hours_end"),
- timezone=doc.get("timezone", "UTC"),
- batch_interval_minutes=doc.get("batch_interval_minutes", 60),
- created_at=doc.get("created_at", datetime.now(UTC)),
- updated_at=doc.get("updated_at", datetime.now(UTC)),
- )
+ return self.mapper.subscription_from_mongo_document(doc)
async def upsert_subscription(
- self,
- user_id: str,
- channel: NotificationChannel,
- subscription: DomainNotificationSubscription,
+ self,
+ user_id: str,
+ channel: NotificationChannel,
+ subscription: DomainNotificationSubscription,
) -> None:
subscription.user_id = user_id
subscription.channel = channel
subscription.updated_at = datetime.now(UTC)
- doc = {
- "user_id": subscription.user_id,
- "channel": subscription.channel,
- "enabled": subscription.enabled,
- "notification_types": subscription.notification_types,
- "webhook_url": subscription.webhook_url,
- "slack_webhook": subscription.slack_webhook,
- "quiet_hours_enabled": subscription.quiet_hours_enabled,
- "quiet_hours_start": subscription.quiet_hours_start,
- "quiet_hours_end": subscription.quiet_hours_end,
- "timezone": subscription.timezone,
- "batch_interval_minutes": subscription.batch_interval_minutes,
- "created_at": subscription.created_at,
- "updated_at": subscription.updated_at,
- }
+ doc = self.mapper.subscription_to_mongo_document(subscription)
await self.subscriptions_collection.replace_one(
{"user_id": user_id, "channel": channel}, doc, upsert=True
)
async def get_all_subscriptions(
- self, user_id: str
+ self, user_id: str
) -> dict[str, DomainNotificationSubscription]:
subs: dict[str, DomainNotificationSubscription] = {}
for channel in NotificationChannel:
@@ -427,101 +236,13 @@ async def get_all_subscriptions(
{"user_id": user_id, "channel": channel}
)
if doc:
- subs[str(channel)] = DomainNotificationSubscription(
- user_id=doc.get("user_id"),
- channel=doc.get("channel"),
- enabled=doc.get("enabled", True),
- notification_types=doc.get("notification_types", []),
- webhook_url=doc.get("webhook_url"),
- slack_webhook=doc.get("slack_webhook"),
- quiet_hours_enabled=doc.get("quiet_hours_enabled", False),
- quiet_hours_start=doc.get("quiet_hours_start"),
- quiet_hours_end=doc.get("quiet_hours_end"),
- timezone=doc.get("timezone", "UTC"),
- batch_interval_minutes=doc.get("batch_interval_minutes", 60),
- created_at=doc.get("created_at", datetime.now(UTC)),
- updated_at=doc.get("updated_at", datetime.now(UTC)),
- )
+ subs[channel] = self.mapper.subscription_from_mongo_document(doc)
else:
- subs[str(channel)] = DomainNotificationSubscription(
- user_id=user_id, channel=channel, enabled=True, notification_types=[]
+ subs[channel] = DomainNotificationSubscription(
+ user_id=user_id, channel=channel, enabled=True
)
return subs
- # Rules
- async def create_rule(self, rule: DomainNotificationRule) -> str:
- doc = {
- "rule_id": rule.rule_id,
- "name": rule.name,
- "description": rule.description,
- "enabled": rule.enabled,
- "event_types": rule.event_types,
- "conditions": rule.conditions,
- "notification_type": rule.notification_type,
- "channels": rule.channels,
- "priority": rule.priority,
- "template_id": rule.template_id,
- "throttle_minutes": rule.throttle_minutes,
- "max_per_hour": rule.max_per_hour,
- "max_per_day": rule.max_per_day,
- "created_at": rule.created_at,
- "updated_at": rule.updated_at,
- "created_by": rule.created_by,
- }
- result = await self.rules_collection.insert_one(doc)
- return str(result.inserted_id)
-
- async def get_rules_for_event(self, event_type: str) -> list[DomainNotificationRule]:
- cursor = self.rules_collection.find({"event_types": event_type, "enabled": True})
- rules: list[DomainNotificationRule] = []
- async for doc in cursor:
- rules.append(
- DomainNotificationRule(
- rule_id=doc.get("rule_id"),
- name=doc.get("name", ""),
- description=doc.get("description"),
- enabled=doc.get("enabled", True),
- event_types=list(doc.get("event_types", [])),
- conditions=dict(doc.get("conditions", {})),
- notification_type=doc.get("notification_type"),
- channels=list(doc.get("channels", [])),
- priority=doc.get("priority"),
- template_id=doc.get("template_id"),
- throttle_minutes=doc.get("throttle_minutes"),
- max_per_hour=doc.get("max_per_hour"),
- max_per_day=doc.get("max_per_day"),
- created_at=doc.get("created_at", datetime.now(UTC)),
- updated_at=doc.get("updated_at", datetime.now(UTC)),
- created_by=doc.get("created_by"),
- )
- )
- return rules
-
- async def update_rule(self, rule_id: str, rule: DomainNotificationRule) -> bool:
- update = {
- "name": rule.name,
- "description": rule.description,
- "enabled": rule.enabled,
- "event_types": rule.event_types,
- "conditions": rule.conditions,
- "notification_type": rule.notification_type,
- "channels": rule.channels,
- "priority": rule.priority,
- "template_id": rule.template_id,
- "throttle_minutes": rule.throttle_minutes,
- "max_per_hour": rule.max_per_hour,
- "max_per_day": rule.max_per_day,
- "updated_at": datetime.now(UTC),
- }
- result = await self.rules_collection.update_one(
- {"rule_id": rule_id}, {"$set": update}
- )
- return result.modified_count > 0
-
- async def delete_rule(self, rule_id: str) -> bool:
- result = await self.rules_collection.delete_one({"rule_id": rule_id})
- return result.deleted_count > 0
-
# User query operations for system notifications
async def get_users_by_roles(self, roles: list[UserRole]) -> list[str]:
users_collection = self.db.users
@@ -570,4 +291,3 @@ async def get_active_users(self, days: int = 30) -> list[str]:
user_ids.add(execution["user_id"])
return list(user_ids)
-
diff --git a/backend/app/db/repositories/replay_repository.py b/backend/app/db/repositories/replay_repository.py
index 2fd22c66..6f5620d1 100644
--- a/backend/app/db/repositories/replay_repository.py
+++ b/backend/app/db/repositories/replay_repository.py
@@ -1,43 +1,40 @@
-from datetime import datetime, timezone
from typing import Any, AsyncIterator, Dict, List
-from motor.motor_asyncio import AsyncIOMotorDatabase
+from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
from pymongo import ASCENDING, DESCENDING
from app.core.logging import logger
-from app.domain.enums.events import EventType
-from app.domain.enums.replay import ReplayStatus
-from app.domain.replay.models import ReplayConfig, ReplayFilter, ReplaySessionState
+from app.domain.admin.replay_updates import ReplaySessionUpdate
+from app.domain.events.event_models import CollectionNames
+from app.domain.replay import ReplayFilter, ReplaySessionState
+from app.infrastructure.mappers import ReplayStateMapper
class ReplayRepository:
def __init__(self, database: AsyncIOMotorDatabase) -> None:
self.db = database
+ self.replay_collection: AsyncIOMotorCollection = database.get_collection(CollectionNames.REPLAY_SESSIONS)
+ self.events_collection: AsyncIOMotorCollection = database.get_collection(CollectionNames.EVENTS)
+ self._mapper = ReplayStateMapper()
async def create_indexes(self) -> None:
- try:
- # Replay sessions indexes
- collection = self.db.replay_sessions
- await collection.create_index([("session_id", ASCENDING)], unique=True)
- await collection.create_index([("status", ASCENDING)])
- await collection.create_index([("created_at", DESCENDING)])
- await collection.create_index([("user_id", ASCENDING)])
-
- # Events collection indexes for replay queries
- events_collection = self.db.events
- await events_collection.create_index([("execution_id", 1), ("timestamp", 1)])
- await events_collection.create_index([("event_type", 1), ("timestamp", 1)])
- await events_collection.create_index([("metadata.user_id", 1), ("timestamp", 1)])
-
- logger.info("Replay repository indexes created successfully")
- except Exception as e:
- logger.error(f"Error creating replay repository indexes: {e}")
- raise
+ # Replay sessions indexes
+ await self.replay_collection.create_index([("session_id", ASCENDING)], unique=True)
+ await self.replay_collection.create_index([("status", ASCENDING)])
+ await self.replay_collection.create_index([("created_at", DESCENDING)])
+ await self.replay_collection.create_index([("user_id", ASCENDING)])
+
+ # Events collection indexes for replay queries
+ await self.events_collection.create_index([("execution_id", 1), ("timestamp", 1)])
+ await self.events_collection.create_index([("event_type", 1), ("timestamp", 1)])
+ await self.events_collection.create_index([("metadata.user_id", 1), ("timestamp", 1)])
+
+ logger.info("Replay repository indexes created successfully")
async def save_session(self, session: ReplaySessionState) -> None:
"""Save or update a replay session (domain โ persistence)."""
- doc = self._session_state_to_doc(session)
- await self.db.replay_sessions.update_one(
+ doc = self._mapper.to_mongo_document(session)
+ await self.replay_collection.update_one(
{"session_id": session.session_id},
{"$set": doc},
upsert=True
@@ -45,8 +42,8 @@ async def save_session(self, session: ReplaySessionState) -> None:
async def get_session(self, session_id: str) -> ReplaySessionState | None:
"""Get a replay session by ID (persistence โ domain)."""
- data = await self.db.replay_sessions.find_one({"session_id": session_id})
- return self._doc_to_session_state(data) if data else None
+ data = await self.replay_collection.find_one({"session_id": session_id})
+ return self._mapper.from_mongo_document(data) if data else None
async def list_sessions(
self,
@@ -55,7 +52,7 @@ async def list_sessions(
limit: int = 100,
skip: int = 0
) -> list[ReplaySessionState]:
- collection = self.db.replay_sessions
+ collection = self.replay_collection
query = {}
if status:
@@ -66,14 +63,12 @@ async def list_sessions(
cursor = collection.find(query).sort("created_at", DESCENDING).skip(skip).limit(limit)
sessions: list[ReplaySessionState] = []
async for doc in cursor:
- state = self._doc_to_session_state(doc)
- if state:
- sessions.append(state)
+ sessions.append(self._mapper.from_mongo_document(doc))
return sessions
async def update_session_status(self, session_id: str, status: str) -> bool:
"""Update the status of a replay session"""
- result = await self.db.replay_sessions.update_one(
+ result = await self.replay_collection.update_one(
{"session_id": session_id},
{"$set": {"status": status}}
)
@@ -81,7 +76,7 @@ async def update_session_status(self, session_id: str, status: str) -> bool:
async def delete_old_sessions(self, cutoff_time: str) -> int:
"""Delete old completed/failed/cancelled sessions"""
- result = await self.db.replay_sessions.delete_many({
+ result = await self.replay_collection.delete_many({
"created_at": {"$lt": cutoff_time},
"status": {"$in": ["completed", "failed", "cancelled"]}
})
@@ -89,121 +84,29 @@ async def delete_old_sessions(self, cutoff_time: str) -> int:
async def count_sessions(self, query: dict[str, object] | None = None) -> int:
"""Count sessions matching the given query"""
- return await self.db.replay_sessions.count_documents(query or {})
-
+ return await self.replay_collection.count_documents(query or {})
+
async def update_replay_session(
self,
session_id: str,
- updates: Dict[str, Any]
+ updates: ReplaySessionUpdate
) -> bool:
"""Update specific fields of a replay session"""
- result = await self.db.replay_sessions.update_one(
+ if not updates.has_updates():
+ return False
+
+ mongo_updates = updates.to_dict()
+ result = await self.replay_collection.update_one(
{"session_id": session_id},
- {"$set": updates}
+ {"$set": mongo_updates}
)
return result.modified_count > 0
- def _session_state_to_doc(self, s: ReplaySessionState) -> Dict[str, Any]:
- """Serialize domain session state to a MongoDB document."""
- cfg = s.config
- flt = cfg.filter
- return {
- "session_id": s.session_id,
- "status": s.status,
- "total_events": s.total_events,
- "replayed_events": s.replayed_events,
- "failed_events": s.failed_events,
- "skipped_events": s.skipped_events,
- "created_at": s.created_at,
- "started_at": s.started_at,
- "completed_at": s.completed_at,
- "last_event_at": s.last_event_at,
- "errors": s.errors,
- "config": {
- "replay_type": cfg.replay_type,
- "target": cfg.target,
- "speed_multiplier": cfg.speed_multiplier,
- "preserve_timestamps": cfg.preserve_timestamps,
- "batch_size": cfg.batch_size,
- "max_events": cfg.max_events,
- "skip_errors": cfg.skip_errors,
- "retry_failed": cfg.retry_failed,
- "retry_attempts": cfg.retry_attempts,
- "target_file_path": cfg.target_file_path,
- "target_topics": {k: v for k, v in (cfg.target_topics or {}).items()},
- "filter": {
- "execution_id": flt.execution_id,
- "event_types": flt.event_types if flt.event_types else None,
- "exclude_event_types": flt.exclude_event_types if flt.exclude_event_types else None,
- "start_time": flt.start_time,
- "end_time": flt.end_time,
- "user_id": flt.user_id,
- "service_name": flt.service_name,
- "custom_query": flt.custom_query,
- },
- },
- }
-
- def _doc_to_session_state(self, doc: Dict[str, Any]) -> ReplaySessionState | None:
- try:
- cfg_dict = doc.get("config", {})
- flt_dict = cfg_dict.get("filter", {})
-
- # Rehydrate domain filter/config
- event_types = [EventType(et) for et in flt_dict.get("event_types", [])] \
- if flt_dict.get("event_types") else None
- exclude_event_types = [EventType(et) for et in flt_dict.get("exclude_event_types", [])] \
- if flt_dict.get("exclude_event_types") else None
- flt = ReplayFilter(
- execution_id=flt_dict.get("execution_id"),
- event_types=event_types,
- start_time=flt_dict.get("start_time"),
- end_time=flt_dict.get("end_time"),
- user_id=flt_dict.get("user_id"),
- service_name=flt_dict.get("service_name"),
- custom_query=flt_dict.get("custom_query"),
- exclude_event_types=exclude_event_types,
- )
- cfg = ReplayConfig(
- replay_type=cfg_dict.get("replay_type"),
- target=cfg_dict.get("target"),
- filter=flt,
- speed_multiplier=cfg_dict.get("speed_multiplier", 1.0),
- preserve_timestamps=cfg_dict.get("preserve_timestamps", False),
- batch_size=cfg_dict.get("batch_size", 100),
- max_events=cfg_dict.get("max_events"),
- target_topics=None, # string-keyed map not used by domain; optional override remains None
- target_file_path=cfg_dict.get("target_file_path"),
- skip_errors=cfg_dict.get("skip_errors", True),
- retry_failed=cfg_dict.get("retry_failed", False),
- retry_attempts=cfg_dict.get("retry_attempts", 3),
- enable_progress_tracking=cfg_dict.get("enable_progress_tracking", True),
- )
- status_str = doc.get("status", ReplayStatus.CREATED)
- status = status_str if isinstance(status_str, ReplayStatus) else ReplayStatus(str(status_str))
- return ReplaySessionState(
- session_id=doc.get("session_id", ""),
- config=cfg,
- status=status,
- total_events=doc.get("total_events", 0),
- replayed_events=doc.get("replayed_events", 0),
- failed_events=doc.get("failed_events", 0),
- skipped_events=doc.get("skipped_events", 0),
- created_at=doc.get("created_at", datetime.now(timezone.utc)),
- started_at=doc.get("started_at"),
- completed_at=doc.get("completed_at"),
- last_event_at=doc.get("last_event_at"),
- errors=doc.get("errors", []),
- )
- except Exception as e:
- logger.error(f"Failed to deserialize replay session document: {e}")
- return None
-
async def count_events(self, filter: ReplayFilter) -> int:
"""Count events matching the given filter"""
query = filter.to_mongo_query()
- return await self.db.events.count_documents(query)
-
+ return await self.events_collection.count_documents(query)
+
async def fetch_events(
self,
filter: ReplayFilter,
@@ -212,14 +115,14 @@ async def fetch_events(
) -> AsyncIterator[List[Dict[str, Any]]]:
"""Fetch events in batches based on filter"""
query = filter.to_mongo_query()
- cursor = self.db.events.find(query).sort("timestamp", 1).skip(skip)
-
+ cursor = self.events_collection.find(query).sort("timestamp", 1).skip(skip)
+
batch = []
async for doc in cursor:
batch.append(doc)
if len(batch) >= batch_size:
yield batch
batch = []
-
+
if batch:
yield batch
diff --git a/backend/app/db/repositories/resource_allocation_repository.py b/backend/app/db/repositories/resource_allocation_repository.py
index 2de64c46..56a4d5a9 100644
--- a/backend/app/db/repositories/resource_allocation_repository.py
+++ b/backend/app/db/repositories/resource_allocation_repository.py
@@ -2,7 +2,7 @@
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
-from app.core.logging import logger
+from app.domain.events.event_models import CollectionNames
class ResourceAllocationRepository:
@@ -10,28 +10,24 @@ class ResourceAllocationRepository:
def __init__(self, database: AsyncIOMotorDatabase):
self._db = database
- self._collection: AsyncIOMotorCollection = self._db.get_collection("resource_allocations")
+ self._collection: AsyncIOMotorCollection = self._db.get_collection(CollectionNames.RESOURCE_ALLOCATIONS)
async def count_active(self, language: str) -> int:
- try:
- return await self._collection.count_documents({
- "status": "active",
- "language": language,
- })
- except Exception as e:
- logger.error(f"Failed to count active allocations: {e}")
- return 0
+ return await self._collection.count_documents({
+ "status": "active",
+ "language": language,
+ })
async def create_allocation(
- self,
- allocation_id: str,
- *,
- execution_id: str,
- language: str,
- cpu_request: str,
- memory_request: str,
- cpu_limit: str,
- memory_limit: str,
+ self,
+ allocation_id: str,
+ *,
+ execution_id: str,
+ language: str,
+ cpu_request: str,
+ memory_request: str,
+ cpu_limit: str,
+ memory_limit: str,
) -> bool:
doc = {
"_id": allocation_id,
@@ -44,21 +40,12 @@ async def create_allocation(
"status": "active",
"allocated_at": datetime.now(timezone.utc),
}
- try:
- await self._collection.insert_one(doc)
- return True
- except Exception as e:
- logger.error(f"Failed to create resource allocation for {allocation_id}: {e}")
- return False
+ result = await self._collection.insert_one(doc)
+ return result.inserted_id is not None
async def release_allocation(self, allocation_id: str) -> bool:
- try:
- result = await self._collection.update_one(
- {"_id": allocation_id},
- {"$set": {"status": "released", "released_at": datetime.now(timezone.utc)}}
- )
- return result.modified_count > 0
- except Exception as e:
- logger.error(f"Failed to release resource allocation {allocation_id}: {e}")
- return False
-
+ result = await self._collection.update_one(
+ {"_id": allocation_id},
+ {"$set": {"status": "released", "released_at": datetime.now(timezone.utc)}}
+ )
+ return result.modified_count > 0
diff --git a/backend/app/db/repositories/saga_repository.py b/backend/app/db/repositories/saga_repository.py
index 0ac38c93..6ee276dc 100644
--- a/backend/app/db/repositories/saga_repository.py
+++ b/backend/app/db/repositories/saga_repository.py
@@ -1,12 +1,12 @@
-from datetime import datetime
+from datetime import datetime, timezone
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
from pymongo import DESCENDING
-from app.core.logging import logger
from app.domain.enums.saga import SagaState
+from app.domain.events.event_models import CollectionNames
from app.domain.saga.models import Saga, SagaFilter, SagaListResult
-from app.infrastructure.mappers.saga_mapper import SagaFilterMapper, SagaMapper
+from app.infrastructure.mappers import SagaFilterMapper, SagaMapper
class SagaRepository:
@@ -18,100 +18,44 @@ class SagaRepository:
"""
def __init__(self, database: AsyncIOMotorDatabase):
- """Initialize saga repository.
-
- Args:
- database: MongoDB database instance
- """
self.db = database
- self.collection: AsyncIOMotorCollection = self.db.get_collection("sagas")
+ self.sagas: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.SAGAS)
+ self.executions: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EXECUTIONS)
self.mapper = SagaMapper()
self.filter_mapper = SagaFilterMapper()
async def upsert_saga(self, saga: Saga) -> bool:
- """Create or update a saga document from domain.
-
- Args:
- saga: Domain saga to persist
-
- Returns:
- True if upsert acknowledged
- """
- try:
- doc = self.mapper.to_mongo(saga)
- _ = await self.collection.replace_one(
- {"saga_id": saga.saga_id},
- doc,
- upsert=True,
- )
- return True
- except Exception as e:
- logger.error(f"Error upserting saga {saga.saga_id}: {e}")
- return False
+ doc = self.mapper.to_mongo(saga)
+ result = await self.sagas.replace_one(
+ {"saga_id": saga.saga_id},
+ doc,
+ upsert=True,
+ )
+ return result.modified_count > 0
async def get_saga_by_execution_and_name(self, execution_id: str, saga_name: str) -> Saga | None:
- """Fetch a saga by execution and saga name.
-
- Args:
- execution_id: Execution identifier
- saga_name: Saga type/name
-
- Returns:
- Saga if found, else None
- """
- try:
- doc = await self.collection.find_one({
- "execution_id": execution_id,
- "saga_name": saga_name,
- })
- return self.mapper.from_mongo(doc) if doc else None
- except Exception as e:
- logger.error(
- f"Error getting saga for execution {execution_id} and name {saga_name}: {e}"
- )
- return None
+ doc = await self.sagas.find_one({
+ "execution_id": execution_id,
+ "saga_name": saga_name,
+ })
+ return self.mapper.from_mongo(doc) if doc else None
async def get_saga(self, saga_id: str) -> Saga | None:
- """Get saga by ID.
-
- Args:
- saga_id: The saga identifier
-
- Returns:
- Saga domain model if found, None otherwise
- """
- try:
- doc = await self.collection.find_one({"saga_id": saga_id})
- return self.mapper.from_mongo(doc) if doc else None
- except Exception as e:
- logger.error(f"Error getting saga {saga_id}: {e}")
- return None
+ doc = await self.sagas.find_one({"saga_id": saga_id})
+ return self.mapper.from_mongo(doc) if doc else None
async def get_sagas_by_execution(
self,
execution_id: str,
state: str | None = None
) -> list[Saga]:
- """Get all sagas for an execution.
-
- Args:
- execution_id: The execution identifier
- state: Optional state filter
-
- Returns:
- List of saga domain models, sorted by created_at descending
- """
- try:
- query: dict[str, object] = {"execution_id": execution_id}
- if state:
- query["state"] = state
+ query: dict[str, object] = {"execution_id": execution_id}
+ if state:
+ query["state"] = state
- cursor = self.collection.find(query).sort("created_at", DESCENDING)
- docs = await cursor.to_list(length=None)
- return [self.mapper.from_mongo(doc) for doc in docs]
- except Exception as e:
- logger.error(f"Error getting sagas for execution {execution_id}: {e}")
- return []
+ cursor = self.sagas.find(query).sort("created_at", DESCENDING)
+ docs = await cursor.to_list(length=None)
+ return [self.mapper.from_mongo(doc) for doc in docs]
async def list_sagas(
self,
@@ -119,40 +63,26 @@ async def list_sagas(
limit: int = 100,
skip: int = 0
) -> SagaListResult:
- """List sagas with filtering and pagination.
-
- Args:
- filter: Filter criteria for sagas
- limit: Maximum number of results
- skip: Number of results to skip
-
- Returns:
- SagaListResult with sagas and pagination info
- """
- try:
- query = self.filter_mapper.to_mongodb_query(filter)
+ query = self.filter_mapper.to_mongodb_query(filter)
- # Get total count
- total = await self.collection.count_documents(query)
+ # Get total count
+ total = await self.sagas.count_documents(query)
- # Get sagas with pagination
- cursor = (self.collection.find(query)
- .sort("created_at", DESCENDING)
- .skip(skip)
- .limit(limit))
- docs = await cursor.to_list(length=limit)
+ # Get sagas with pagination
+ cursor = (self.sagas.find(query)
+ .sort("created_at", DESCENDING)
+ .skip(skip)
+ .limit(limit))
+ docs = await cursor.to_list(length=limit)
- sagas = [self.mapper.from_mongo(doc) for doc in docs]
+ sagas = [self.mapper.from_mongo(doc) for doc in docs]
- return SagaListResult(
- sagas=sagas,
- total=total,
- skip=skip,
- limit=limit
- )
- except Exception as e:
- logger.error(f"Error listing sagas: {e}")
- return SagaListResult(sagas=[], total=0, skip=skip, limit=limit)
+ return SagaListResult(
+ sagas=sagas,
+ total=total,
+ skip=skip,
+ limit=limit
+ )
async def update_saga_state(
self,
@@ -160,83 +90,42 @@ async def update_saga_state(
state: str,
error_message: str | None = None
) -> bool:
- """Update saga state.
-
- Args:
- saga_id: The saga identifier
- state: New state value
- error_message: Optional error message
-
- Returns:
- True if updated successfully, False otherwise
- """
- try:
- from datetime import datetime, timezone
+ update_data = {
+ "state": state,
+ "updated_at": datetime.now(timezone.utc)
+ }
- update_data = {
- "state": state,
- "updated_at": datetime.now(timezone.utc)
- }
+ if error_message:
+ update_data["error_message"] = error_message
- if error_message:
- update_data["error_message"] = error_message
+ result = await self.sagas.update_one(
+ {"saga_id": saga_id},
+ {"$set": update_data}
+ )
- result = await self.collection.update_one(
- {"saga_id": saga_id},
- {"$set": update_data}
- )
-
- return result.modified_count > 0
- except Exception as e:
- logger.error(f"Error updating saga {saga_id} state: {e}")
- return False
+ return result.modified_count > 0
async def get_user_execution_ids(self, user_id: str) -> list[str]:
- """Get execution IDs accessible by a user.
-
- This is a helper method that queries executions collection
- to find executions owned by a user.
-
- Args:
- user_id: The user identifier
-
- Returns:
- List of execution IDs
- """
- try:
- executions_collection = self.db.get_collection("executions")
- cursor = executions_collection.find(
- {"user_id": user_id},
- {"execution_id": 1}
- )
- docs = await cursor.to_list(length=None)
- return [doc["execution_id"] for doc in docs]
- except Exception as e:
- logger.error(f"Error getting user execution IDs: {e}")
- return []
+ cursor = self.executions.find(
+ {"user_id": user_id},
+ {"execution_id": 1}
+ )
+ docs = await cursor.to_list(length=None)
+ return [doc["execution_id"] for doc in docs]
async def count_sagas_by_state(self) -> dict[str, int]:
- """Get count of sagas by state.
-
- Returns:
- Dictionary mapping state to count
- """
- try:
- pipeline = [
- {"$group": {
- "_id": "$state",
- "count": {"$sum": 1}
- }}
- ]
+ pipeline = [
+ {"$group": {
+ "_id": "$state",
+ "count": {"$sum": 1}
+ }}
+ ]
- result = {}
- async for doc in self.collection.aggregate(pipeline):
- result[doc["_id"]] = doc["count"]
+ result = {}
+ async for doc in self.sagas.aggregate(pipeline):
+ result[doc["_id"]] = doc["count"]
- return result
- except Exception as e:
- logger.error(f"Error counting sagas by state: {e}")
- return {}
+ return result
async def find_timed_out_sagas(
self,
@@ -244,84 +133,58 @@ async def find_timed_out_sagas(
states: list[SagaState] | None = None,
limit: int = 100,
) -> list[Saga]:
- """Return sagas older than cutoff in provided states.
-
- Args:
- cutoff_time: datetime threshold for created_at
- states: filter states (defaults to RUNNING and COMPENSATING)
- limit: max items to return
-
- Returns:
- List of Saga domain objects
- """
- try:
- states = states or [SagaState.RUNNING, SagaState.COMPENSATING]
- query = {
- "state": {"$in": [s.value for s in states]},
- "created_at": {"$lt": cutoff_time},
- }
- cursor = self.collection.find(query)
- docs = await cursor.to_list(length=limit)
- return [self.mapper.from_mongo(doc) for doc in docs]
- except Exception as e:
- logger.error(f"Error finding timed out sagas: {e}")
- return []
+ states = states or [SagaState.RUNNING, SagaState.COMPENSATING]
+ query = {
+ "state": {"$in": [s.value for s in states]},
+ "created_at": {"$lt": cutoff_time},
+ }
+ cursor = self.sagas.find(query)
+ docs = await cursor.to_list(length=limit)
+ return [self.mapper.from_mongo(doc) for doc in docs]
async def get_saga_statistics(
self,
filter: SagaFilter | None = None
) -> dict[str, object]:
- """Get saga statistics.
-
- Args:
- filter: Optional filter criteria
-
- Returns:
- Dictionary with statistics
- """
- try:
- query = self.filter_mapper.to_mongodb_query(filter) if filter else {}
-
- # Basic counts
- total = await self.collection.count_documents(query)
-
- # State distribution
- state_pipeline = [
- {"$match": query},
- {"$group": {
- "_id": "$state",
- "count": {"$sum": 1}
- }}
- ]
-
- states = {}
- async for doc in self.collection.aggregate(state_pipeline):
- states[doc["_id"]] = doc["count"]
-
- # Average duration for completed sagas
- duration_pipeline = [
- {"$match": {**query, "state": "completed", "completed_at": {"$ne": None}}},
- {"$project": {
- "duration": {
- "$subtract": ["$completed_at", "$created_at"]
- }
- }},
- {"$group": {
- "_id": None,
- "avg_duration": {"$avg": "$duration"}
- }}
- ]
-
- avg_duration = 0.0
- async for doc in self.collection.aggregate(duration_pipeline):
- # Convert milliseconds to seconds
- avg_duration = doc["avg_duration"] / 1000.0 if doc["avg_duration"] else 0.0
-
- return {
- "total": total,
- "by_state": states,
- "average_duration_seconds": avg_duration
- }
- except Exception as e:
- logger.error(f"Error getting saga statistics: {e}")
- return {"total": 0, "by_state": {}, "average_duration_seconds": 0.0}
+ query = self.filter_mapper.to_mongodb_query(filter) if filter else {}
+
+ # Basic counts
+ total = await self.sagas.count_documents(query)
+
+ # State distribution
+ state_pipeline = [
+ {"$match": query},
+ {"$group": {
+ "_id": "$state",
+ "count": {"$sum": 1}
+ }}
+ ]
+
+ states = {}
+ async for doc in self.sagas.aggregate(state_pipeline):
+ states[doc["_id"]] = doc["count"]
+
+ # Average duration for completed sagas
+ duration_pipeline = [
+ {"$match": {**query, "state": "completed", "completed_at": {"$ne": None}}},
+ {"$project": {
+ "duration": {
+ "$subtract": ["$completed_at", "$created_at"]
+ }
+ }},
+ {"$group": {
+ "_id": None,
+ "avg_duration": {"$avg": "$duration"}
+ }}
+ ]
+
+ avg_duration = 0.0
+ async for doc in self.sagas.aggregate(duration_pipeline):
+ # Convert milliseconds to seconds
+ avg_duration = doc["avg_duration"] / 1000.0 if doc["avg_duration"] else 0.0
+
+ return {
+ "total": total,
+ "by_state": states,
+ "average_duration_seconds": avg_duration
+ }
diff --git a/backend/app/db/repositories/saved_script_repository.py b/backend/app/db/repositories/saved_script_repository.py
index 90358b44..6aa557af 100644
--- a/backend/app/db/repositories/saved_script_repository.py
+++ b/backend/app/db/repositories/saved_script_repository.py
@@ -1,110 +1,54 @@
-from datetime import datetime, timezone
-from uuid import uuid4
+from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
-from motor.motor_asyncio import AsyncIOMotorDatabase
-
-from app.domain.saved_script.models import (
+from app.domain.events.event_models import CollectionNames
+from app.domain.saved_script import (
DomainSavedScript,
DomainSavedScriptCreate,
DomainSavedScriptUpdate,
)
+from app.infrastructure.mappers import SavedScriptMapper
class SavedScriptRepository:
def __init__(self, database: AsyncIOMotorDatabase):
self.db = database
+ self.collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.SAVED_SCRIPTS)
+ self.mapper = SavedScriptMapper()
async def create_saved_script(self, saved_script: DomainSavedScriptCreate, user_id: str) -> DomainSavedScript:
# Build DB document with defaults
- now = datetime.now(timezone.utc)
- doc = {
- "script_id": str(uuid4()),
- "user_id": user_id,
- "name": saved_script.name,
- "script": saved_script.script,
- "lang": saved_script.lang,
- "lang_version": saved_script.lang_version,
- "description": saved_script.description,
- "created_at": now,
- "updated_at": now,
- }
-
- result = await self.db.saved_scripts.insert_one(doc)
+ doc = self.mapper.to_insert_document(saved_script, user_id)
- saved_doc = await self.db.saved_scripts.find_one({"_id": result.inserted_id})
- if not saved_doc:
- raise ValueError("Could not find saved script after insert")
-
- return DomainSavedScript(
- script_id=str(saved_doc.get("script_id")),
- user_id=str(saved_doc.get("user_id")),
- name=str(saved_doc.get("name")),
- script=str(saved_doc.get("script")),
- lang=str(saved_doc.get("lang")),
- lang_version=str(saved_doc.get("lang_version")),
- description=saved_doc.get("description"),
- created_at=saved_doc.get("created_at", now),
- updated_at=saved_doc.get("updated_at", now),
- )
+ result = await self.collection.insert_one(doc)
+ if result.inserted_id is None:
+ raise ValueError("Insert not acknowledged")
+ return self.mapper.from_mongo_document(doc)
async def get_saved_script(
self, script_id: str, user_id: str
) -> DomainSavedScript | None:
- saved_script = await self.db.saved_scripts.find_one(
- {"script_id": str(script_id), "user_id": user_id}
+ saved_script = await self.collection.find_one(
+ {"script_id": script_id, "user_id": user_id}
)
if not saved_script:
return None
- return DomainSavedScript(
- script_id=str(saved_script.get("script_id")),
- user_id=str(saved_script.get("user_id")),
- name=str(saved_script.get("name")),
- script=str(saved_script.get("script")),
- lang=str(saved_script.get("lang")),
- lang_version=str(saved_script.get("lang_version")),
- description=saved_script.get("description"),
- created_at=saved_script.get("created_at"),
- updated_at=saved_script.get("updated_at"),
- )
+ return self.mapper.from_mongo_document(saved_script)
async def update_saved_script(
self, script_id: str, user_id: str, update_data: DomainSavedScriptUpdate
) -> None:
- update: dict = {}
- if update_data.name is not None:
- update["name"] = update_data.name
- if update_data.script is not None:
- update["script"] = update_data.script
- if update_data.lang is not None:
- update["lang"] = update_data.lang
- if update_data.lang_version is not None:
- update["lang_version"] = update_data.lang_version
- if update_data.description is not None:
- update["description"] = update_data.description
- update["updated_at"] = datetime.now(timezone.utc)
+ update = self.mapper.to_update_dict(update_data)
- await self.db.saved_scripts.update_one(
- {"script_id": str(script_id), "user_id": user_id}, {"$set": update}
+ await self.collection.update_one(
+ {"script_id": script_id, "user_id": user_id}, {"$set": update}
)
async def delete_saved_script(self, script_id: str, user_id: str) -> None:
- await self.db.saved_scripts.delete_one({"script_id": str(script_id), "user_id": user_id})
+ await self.collection.delete_one({"script_id": script_id, "user_id": user_id})
async def list_saved_scripts(self, user_id: str) -> list[DomainSavedScript]:
- cursor = self.db.saved_scripts.find({"user_id": user_id})
+ cursor = self.collection.find({"user_id": user_id})
scripts: list[DomainSavedScript] = []
async for script in cursor:
- scripts.append(
- DomainSavedScript(
- script_id=str(script.get("script_id")),
- user_id=str(script.get("user_id")),
- name=str(script.get("name")),
- script=str(script.get("script")),
- lang=str(script.get("lang")),
- lang_version=str(script.get("lang_version")),
- description=script.get("description"),
- created_at=script.get("created_at"),
- updated_at=script.get("updated_at"),
- )
- )
+ scripts.append(self.mapper.from_mongo_document(script))
return scripts
diff --git a/backend/app/db/repositories/sse_repository.py b/backend/app/db/repositories/sse_repository.py
index 5be577da..536aa076 100644
--- a/backend/app/db/repositories/sse_repository.py
+++ b/backend/app/db/repositories/sse_repository.py
@@ -1,18 +1,17 @@
-from datetime import datetime, timezone
-from typing import Any, Dict
-
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
-from app.domain.enums.execution import ExecutionStatus
-from app.domain.execution.models import DomainExecution, ResourceUsageDomain
-from app.domain.sse.models import SSEEventDomain, SSEExecutionStatusDomain
+from app.domain.events.event_models import CollectionNames
+from app.domain.execution import DomainExecution
+from app.domain.sse import SSEEventDomain, SSEExecutionStatusDomain
+from app.infrastructure.mappers import SSEMapper
class SSERepository:
def __init__(self, database: AsyncIOMotorDatabase):
self.db = database
- self.executions_collection: AsyncIOMotorCollection = self.db.get_collection("executions")
- self.events_collection: AsyncIOMotorCollection = self.db.get_collection("events")
+ self.executions_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EXECUTIONS)
+ self.events_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EVENTS)
+ self.mapper = SSEMapper()
async def get_execution_status(self, execution_id: str) -> SSEExecutionStatusDomain | None:
execution = await self.executions_collection.find_one(
@@ -21,11 +20,7 @@ async def get_execution_status(self, execution_id: str) -> SSEExecutionStatusDom
)
if execution:
- return SSEExecutionStatusDomain(
- execution_id=execution_id,
- status=str(execution.get("status", "unknown")),
- timestamp=datetime.now(timezone.utc).isoformat(),
- )
+ return self.mapper.to_execution_status(execution_id, execution.get("status", "unknown"))
return None
async def get_execution_events(
@@ -40,10 +35,7 @@ async def get_execution_events(
events: list[SSEEventDomain] = []
async for event in cursor:
- events.append(SSEEventDomain(
- aggregate_id=str(event.get("aggregate_id", "")),
- timestamp=event.get("timestamp"),
- ))
+ events.append(self.mapper.event_from_mongo_document(event))
return events
async def get_execution_for_user(self, execution_id: str, user_id: str) -> DomainExecution | None:
@@ -53,7 +45,7 @@ async def get_execution_for_user(self, execution_id: str, user_id: str) -> Domai
})
if not doc:
return None
- return self._doc_to_execution(doc)
+ return self.mapper.execution_from_mongo_document(doc)
async def get_execution(self, execution_id: str) -> DomainExecution | None:
doc = await self.executions_collection.find_one({
@@ -61,26 +53,4 @@ async def get_execution(self, execution_id: str) -> DomainExecution | None:
})
if not doc:
return None
- return self._doc_to_execution(doc)
-
- def _doc_to_execution(self, doc: Dict[str, Any]) -> DomainExecution:
- sv = doc.get("status")
- try:
- st = sv if isinstance(sv, ExecutionStatus) else ExecutionStatus(str(sv))
- except Exception:
- st = ExecutionStatus.QUEUED
- return DomainExecution(
- execution_id=str(doc.get("execution_id")),
- script=str(doc.get("script", "")),
- status=st,
- output=doc.get("output"),
- errors=doc.get("errors"),
- lang=str(doc.get("lang", "python")),
- lang_version=str(doc.get("lang_version", "3.11")),
- created_at=doc.get("created_at", datetime.now(timezone.utc)),
- updated_at=doc.get("updated_at", datetime.now(timezone.utc)),
- resource_usage=ResourceUsageDomain.from_dict(doc.get("resource_usage") or {}),
- user_id=doc.get("user_id"),
- exit_code=doc.get("exit_code"),
- error_type=doc.get("error_type"),
- )
+ return self.mapper.execution_from_mongo_document(doc)
diff --git a/backend/app/db/repositories/user_repository.py b/backend/app/db/repositories/user_repository.py
index f6808c52..64a761a9 100644
--- a/backend/app/db/repositories/user_repository.py
+++ b/backend/app/db/repositories/user_repository.py
@@ -1,33 +1,45 @@
import re
import uuid
+from datetime import datetime, timezone
-from motor.motor_asyncio import AsyncIOMotorDatabase
+from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
from app.domain.enums.user import UserRole
-from app.schemas_pydantic.user import UserInDB
+from app.domain.events.event_models import CollectionNames
+from app.domain.user import User as DomainAdminUser
+from app.domain.user import UserFields
+from app.domain.user import UserUpdate as DomainUserUpdate
+from app.infrastructure.mappers import UserMapper
class UserRepository:
def __init__(self, db: AsyncIOMotorDatabase):
self.db = db
+ self.collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.USERS)
+ self.mapper = UserMapper()
- async def get_user(self, username: str) -> UserInDB | None:
- user = await self.db.users.find_one({"username": username})
+ async def get_user(self, username: str) -> DomainAdminUser | None:
+ user = await self.collection.find_one({UserFields.USERNAME: username})
if user:
- return UserInDB(**user)
+ return self.mapper.from_mongo_document(user)
return None
- async def create_user(self, user: UserInDB) -> UserInDB:
+ async def create_user(self, user: DomainAdminUser) -> DomainAdminUser:
if not user.user_id:
user.user_id = str(uuid.uuid4())
- user_dict = user.model_dump()
- await self.db.users.insert_one(user_dict)
+ # Ensure timestamps
+ if not getattr(user, "created_at", None):
+ user.created_at = datetime.now(timezone.utc)
+ if not getattr(user, "updated_at", None):
+ user.updated_at = user.created_at
+ user_dict = self.mapper.to_mongo_document(user)
+ await self.collection.insert_one(user_dict)
return user
- async def get_user_by_id(self, user_id: str) -> UserInDB | None:
- user = await self.db.users.find_one({"user_id": user_id})
+ async def get_user_by_id(self, user_id: str) -> DomainAdminUser | None:
+ user = await self.collection.find_one({UserFields.USER_ID: user_id})
if user:
- return UserInDB(**user)
+ return self.mapper.from_mongo_document(user)
return None
async def list_users(
@@ -36,7 +48,7 @@ async def list_users(
offset: int = 0,
search: str | None = None,
role: UserRole | None = None
- ) -> list[UserInDB]:
+ ) -> list[DomainAdminUser]:
query: dict[str, object] = {}
if search:
@@ -50,24 +62,29 @@ async def list_users(
if role:
query["role"] = role.value
- cursor = self.db.users.find(query).skip(offset).limit(limit)
- users = []
+ cursor = self.collection.find(query).skip(offset).limit(limit)
+ users: list[DomainAdminUser] = []
async for user in cursor:
- users.append(UserInDB(**user))
+ users.append(self.mapper.from_mongo_document(user))
return users
- async def update_user(self, user_id: str, update_data: UserInDB) -> UserInDB | None:
- result = await self.db.users.update_one(
- {"user_id": user_id},
- {"$set": update_data.model_dump()}
+ async def update_user(self, user_id: str, update_data: DomainUserUpdate) -> DomainAdminUser | None:
+ update_dict = self.mapper.to_update_dict(update_data)
+ if not update_dict and update_data.password is None:
+ return await self.get_user_by_id(user_id)
+ # Handle password update separately if provided
+ if update_data.password:
+ update_dict[UserFields.HASHED_PASSWORD] = update_data.password # caller should pass hashed if desired
+ update_dict[UserFields.UPDATED_AT] = datetime.now(timezone.utc)
+ result = await self.collection.update_one(
+ {UserFields.USER_ID: user_id},
+ {"$set": update_dict}
)
-
if result.modified_count > 0:
return await self.get_user_by_id(user_id)
-
return None
async def delete_user(self, user_id: str) -> bool:
- result = await self.db.users.delete_one({"user_id": user_id})
+ result = await self.collection.delete_one({UserFields.USER_ID: user_id})
return result.deleted_count > 0
diff --git a/backend/app/db/repositories/user_settings_repository.py b/backend/app/db/repositories/user_settings_repository.py
index 362f6551..dfda8cda 100644
--- a/backend/app/db/repositories/user_settings_repository.py
+++ b/backend/app/db/repositories/user_settings_repository.py
@@ -1,106 +1,52 @@
-from datetime import datetime, timezone
+from datetime import datetime
from typing import Any, Dict, List
-from motor.motor_asyncio import AsyncIOMotorDatabase
+from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
from pymongo import ASCENDING, DESCENDING, IndexModel
from app.core.logging import logger
-from app.domain.enums import Theme
from app.domain.enums.events import EventType
+from app.domain.events.event_models import CollectionNames
from app.domain.user.settings_models import (
- DomainEditorSettings,
- DomainNotificationSettings,
DomainSettingsEvent,
DomainUserSettings,
)
+from app.infrastructure.mappers import UserSettingsMapper
class UserSettingsRepository:
def __init__(self, database: AsyncIOMotorDatabase) -> None:
self.db = database
+ self.snapshots_collection: AsyncIOMotorCollection = self.db.get_collection(
+ CollectionNames.USER_SETTINGS_SNAPSHOTS
+ )
+ self.events_collection: AsyncIOMotorCollection = self.db.get_collection(CollectionNames.EVENTS)
+ self.mapper = UserSettingsMapper()
async def create_indexes(self) -> None:
- try:
- # Create indexes for settings snapshots
- await self.db.user_settings_snapshots.create_indexes([
- IndexModel([("user_id", ASCENDING)], unique=True),
- IndexModel([("updated_at", DESCENDING)]),
- ])
-
- # Create indexes for settings events
- await self.db.events.create_indexes([
- IndexModel([("event_type", ASCENDING), ("aggregate_id", ASCENDING)]),
- IndexModel([("aggregate_id", ASCENDING), ("timestamp", ASCENDING)]),
- ])
-
- logger.info("User settings repository indexes created successfully")
- except Exception as e:
- logger.error(f"Error creating user settings indexes: {e}")
- raise
+ # Create indexes for settings snapshots
+ await self.snapshots_collection.create_indexes([
+ IndexModel([("user_id", ASCENDING)], unique=True),
+ IndexModel([("updated_at", DESCENDING)]),
+ ])
+
+ # Create indexes for settings events
+ await self.events_collection.create_indexes([
+ IndexModel([("event_type", ASCENDING), ("aggregate_id", ASCENDING)]),
+ IndexModel([("aggregate_id", ASCENDING), ("timestamp", ASCENDING)]),
+ ])
+
+ logger.info("User settings repository indexes created successfully")
async def get_snapshot(self, user_id: str) -> DomainUserSettings | None:
- doc = await self.db.user_settings_snapshots.find_one({"user_id": user_id})
+ doc = await self.snapshots_collection.find_one({"user_id": user_id})
if not doc:
return None
- # Map DB -> domain with defaults
- notifications = doc.get("notifications", {})
- editor = doc.get("editor", {})
- theme_val = doc.get("theme")
- return DomainUserSettings(
- user_id=str(doc.get("user_id")),
- theme=Theme(theme_val),
- timezone=doc.get("timezone", "UTC"),
- date_format=doc.get("date_format", "YYYY-MM-DD"),
- time_format=doc.get("time_format", "24h"),
- notifications=DomainNotificationSettings(
- execution_completed=notifications.get("execution_completed", True),
- execution_failed=notifications.get("execution_failed", True),
- system_updates=notifications.get("system_updates", True),
- security_alerts=notifications.get("security_alerts", True),
- channels=notifications.get("channels", []),
- ),
- editor=DomainEditorSettings(
- theme=editor.get("theme", "one-dark"),
- font_size=editor.get("font_size", 14),
- tab_size=editor.get("tab_size", 4),
- use_tabs=editor.get("use_tabs", False),
- word_wrap=editor.get("word_wrap", True),
- show_line_numbers=editor.get("show_line_numbers", True),
- ),
- custom_settings=doc.get("custom_settings", {}),
- version=doc.get("version", 1),
- created_at=doc.get("created_at", datetime.now(timezone.utc)),
- updated_at=doc.get("updated_at", datetime.now(timezone.utc)),
- )
+ return self.mapper.from_snapshot_document(doc)
async def create_snapshot(self, settings: DomainUserSettings) -> None:
- doc = {
- "user_id": settings.user_id,
- "theme": settings.theme,
- "timezone": settings.timezone,
- "date_format": settings.date_format,
- "time_format": settings.time_format,
- "notifications": {
- "execution_completed": settings.notifications.execution_completed,
- "execution_failed": settings.notifications.execution_failed,
- "system_updates": settings.notifications.system_updates,
- "security_alerts": settings.notifications.security_alerts,
- "channels": settings.notifications.channels,
- },
- "editor": {
- "theme": settings.editor.theme,
- "font_size": settings.editor.font_size,
- "tab_size": settings.editor.tab_size,
- "use_tabs": settings.editor.use_tabs,
- "word_wrap": settings.editor.word_wrap,
- "show_line_numbers": settings.editor.show_line_numbers,
- },
- "custom_settings": settings.custom_settings,
- "version": settings.version,
- "created_at": settings.created_at,
- "updated_at": settings.updated_at,
- }
- await self.db.user_settings_snapshots.replace_one(
+ doc = self.mapper.to_snapshot_document(settings)
+ await self.snapshots_collection.replace_one(
{"user_id": settings.user_id},
doc,
upsert=True
@@ -128,42 +74,40 @@ async def get_settings_events(
timestamp_query["$lte"] = until
query["timestamp"] = timestamp_query
- cursor = self.db.events.find(query).sort("timestamp", ASCENDING)
-
+ cursor = self.events_collection.find(query).sort("timestamp", ASCENDING)
+
if limit:
cursor = cursor.limit(limit)
docs = await cursor.to_list(None)
- events: List[DomainSettingsEvent] = []
- for d in docs:
- et = d.get("event_type")
- try:
- et_parsed: EventType = EventType(et)
- except Exception:
- # Fallback to generic settings-updated when type is unknown
- et_parsed = EventType.USER_SETTINGS_UPDATED
- events.append(DomainSettingsEvent(
- event_type=et_parsed,
- timestamp=d.get("timestamp"),
- payload=d.get("payload", {}),
- correlation_id=d.get("correlation_id")
- ))
- return events
+ return [self.mapper.event_from_mongo_document(d) for d in docs]
async def count_events_since_snapshot(self, user_id: str) -> int:
snapshot = await self.get_snapshot(user_id)
-
+
if not snapshot:
- return await self.db.events.count_documents({
+ return await self.events_collection.count_documents({
"aggregate_id": f"user_settings_{user_id}"
})
- return await self.db.events.count_documents({
+ return await self.events_collection.count_documents({
"aggregate_id": f"user_settings_{user_id}",
"timestamp": {"$gt": snapshot.updated_at}
})
async def count_events_for_user(self, user_id: str) -> int:
- return await self.db.events.count_documents({
+ return await self.events_collection.count_documents({
+ "aggregate_id": f"user_settings_{user_id}"
+ })
+
+ async def delete_user_settings(self, user_id: str) -> None:
+ """Delete all settings data for a user (snapshot and events)."""
+ # Delete snapshot
+ await self.snapshots_collection.delete_one({"user_id": user_id})
+
+ # Delete all events
+ await self.events_collection.delete_many({
"aggregate_id": f"user_settings_{user_id}"
})
+
+ logger.info(f"Deleted all settings data for user {user_id}")
diff --git a/backend/app/db/schema/__init__.py b/backend/app/db/schema/__init__.py
index e69de29b..e3849b9b 100644
--- a/backend/app/db/schema/__init__.py
+++ b/backend/app/db/schema/__init__.py
@@ -0,0 +1,5 @@
+from app.db.schema.schema_manager import SchemaManager
+
+__all__ = [
+ "SchemaManager",
+]
diff --git a/backend/app/dlq/__init__.py b/backend/app/dlq/__init__.py
index ffafae37..084dbb3e 100644
--- a/backend/app/dlq/__init__.py
+++ b/backend/app/dlq/__init__.py
@@ -1 +1,43 @@
-"""Dead Letter Queue (DLQ) module for handling failed messages."""
+"""Dead Letter Queue (DLQ) public API.
+
+This package exposes DLQ models at import time.
+Import the manager explicitly from `app.dlq.manager` to avoid cycles.
+"""
+
+from .models import (
+ AgeStatistics,
+ DLQBatchRetryResult,
+ DLQFields,
+ DLQMessage,
+ DLQMessageFilter,
+ DLQMessageListResult,
+ DLQMessageStatus,
+ DLQMessageUpdate,
+ DLQRetryResult,
+ DLQStatistics,
+ DLQTopicSummary,
+ EventTypeStatistic,
+ RetryPolicy,
+ RetryStrategy,
+ TopicStatistic,
+)
+
+__all__ = [
+ # Core models
+ "DLQMessageStatus",
+ "RetryStrategy",
+ "DLQFields",
+ "DLQMessage",
+ "DLQMessageUpdate",
+ "DLQMessageFilter",
+ "RetryPolicy",
+ # Stats models
+ "TopicStatistic",
+ "EventTypeStatistic",
+ "AgeStatistics",
+ "DLQStatistics",
+ "DLQRetryResult",
+ "DLQBatchRetryResult",
+ "DLQMessageListResult",
+ "DLQTopicSummary",
+]
diff --git a/backend/app/dlq/consumer.py b/backend/app/dlq/consumer.py
deleted file mode 100644
index 8372514b..00000000
--- a/backend/app/dlq/consumer.py
+++ /dev/null
@@ -1,446 +0,0 @@
-import asyncio
-from datetime import timedelta
-from typing import Any, Callable, Dict, List
-
-from confluent_kafka import OFFSET_BEGINNING, OFFSET_END, Message, TopicPartition
-
-from app.core.logging import logger
-from app.dlq.models import DLQMessage
-from app.domain.enums.events import EventType
-from app.domain.enums.kafka import GroupId, KafkaTopic
-from app.events.core.consumer import ConsumerConfig, UnifiedConsumer
-from app.events.core.dispatcher import EventDispatcher
-from app.events.core.producer import UnifiedProducer
-from app.events.schema.schema_registry import SchemaRegistryManager
-from app.infrastructure.kafka.events.base import BaseEvent
-from app.settings import get_settings
-
-
-class DLQConsumer:
- def __init__(
- self,
- dlq_topic: KafkaTopic,
- producer: UnifiedProducer,
- schema_registry_manager: SchemaRegistryManager,
- group_id: GroupId = GroupId.DLQ_PROCESSOR,
- max_retry_attempts: int = 5,
- retry_delay_hours: int = 1,
- max_age_days: int = 7,
- batch_size: int = 100,
- ):
- self.dlq_topic = dlq_topic
- self.group_id = group_id
- self.max_retry_attempts = max_retry_attempts
- self.retry_delay = timedelta(hours=retry_delay_hours)
- self.max_age = timedelta(days=max_age_days)
- self.batch_size = batch_size
-
- # Create consumer config
- settings = get_settings()
- self.config = ConsumerConfig(
- bootstrap_servers=settings.KAFKA_BOOTSTRAP_SERVERS,
- group_id=group_id,
- max_poll_records=batch_size,
- enable_auto_commit=False,
- )
-
- self.consumer: UnifiedConsumer | None = None
- self.producer: UnifiedProducer = producer
- self.schema_registry_manager = schema_registry_manager
- self.dispatcher = EventDispatcher()
- self._retry_handlers: Dict[str, Callable] = {}
- self._permanent_failure_handlers: List[Callable] = []
- self._running = False
- self._process_task: asyncio.Task | None = None
-
- # Statistics
- self.stats = {
- "processed": 0,
- "retried": 0,
- "permanently_failed": 0,
- "expired": 0,
- "errors": 0
- }
-
- async def start(self) -> None:
- """Start the DLQ consumer"""
- if self._running:
- return
-
- self.consumer = UnifiedConsumer(
- self.config,
- event_dispatcher=self.dispatcher
- )
-
- # Register handler for DLQ events through dispatcher
- # DLQ messages are generic, so we handle all event types
- for event_type in EventType:
- self.dispatcher.register(event_type)(self._process_dlq_event)
-
- await self.consumer.start([self.dlq_topic])
- self._running = True
-
- # Start periodic processing
- self._process_task = asyncio.create_task(self._periodic_process())
-
- logger.info(f"DLQ consumer started for topic: {self.dlq_topic}")
-
- async def _process_dlq_event(self, event: BaseEvent) -> None:
- """Process a single DLQ event from dispatcher."""
- try:
- # Extract DLQ-specific attributes from the event
- # These should be added by the producer when sending to DLQ
- original_topic = getattr(event, 'original_topic', str(event.topic))
- error = getattr(event, 'error', 'Unknown error')
- retry_count = getattr(event, 'retry_count', 0)
- producer_id = getattr(event, 'producer_id', 'unknown')
-
- # Create DLQMessage from the failed event
- dlq_message = DLQMessage.from_failed_event(
- event=event,
- original_topic=original_topic,
- error=error,
- producer_id=producer_id,
- retry_count=retry_count
- )
-
- # Process the message based on retry policy
- self.stats["processed"] += 1
-
- # Check if message is too old
- if dlq_message.age > self.max_age:
- await self._handle_expired_messages([dlq_message])
- return
-
- # Check retry count
- if dlq_message.retry_count >= self.max_retry_attempts:
- await self._handle_permanent_failures([dlq_message])
- return
-
- # Check if enough time has passed for retry
- if dlq_message.age >= self.retry_delay:
- await self._retry_messages([dlq_message])
- else:
- # Message is not ready for retry yet
- logger.debug(f"Message {dlq_message.event_id} not ready for retry yet")
-
- except Exception as e:
- logger.error(f"Failed to process DLQ event: {e}", exc_info=True)
- self.stats["errors"] += 1
-
- async def _process_dlq_message(self, message: Message) -> None:
- """Process a single DLQ message from confluent-kafka Message"""
- try:
- dlq_message = DLQMessage.from_kafka_message(message, self.schema_registry_manager)
-
- # Process individual message similar to batch processing
- self.stats["processed"] += 1
-
- # Check if message is too old
- if dlq_message.age > self.max_age:
- await self._handle_expired_messages([dlq_message])
- return
-
- # Check retry count
- if dlq_message.retry_count >= self.max_retry_attempts:
- await self._handle_permanent_failures([dlq_message])
- return
-
- # Check if enough time has passed for retry
- if dlq_message.age >= self.retry_delay:
- await self._retry_messages([dlq_message])
- else:
- # Message is not ready for retry yet, skip
- logger.debug(f"Message {dlq_message.event_id} not ready for retry yet")
-
- except Exception as e:
- logger.error(f"Failed to process DLQ message: {e}")
- self.stats["errors"] += 1
-
- async def stop(self) -> None:
- """Stop the DLQ consumer"""
- if not self._running:
- return
-
- self._running = False
-
- if self._process_task:
- self._process_task.cancel()
- try:
- await self._process_task
- except asyncio.CancelledError:
- pass
-
- if self.consumer:
- await self.consumer.stop()
-
- logger.info(f"DLQ consumer stopped. Stats: {self.stats}")
-
- def add_retry_handler(self, event_type: str, handler: Callable) -> None:
- self._retry_handlers[event_type] = handler
-
- def add_permanent_failure_handler(self, handler: Callable) -> None:
- self._permanent_failure_handlers.append(handler)
-
- async def _periodic_process(self) -> None:
- while self._running:
- try:
- # Process is triggered by the consumer's batch handler
- await asyncio.sleep(60) # Check every minute
-
- # Log statistics
- logger.info(f"DLQ stats: {self.stats}")
-
- except Exception as e:
- logger.error(f"Error in DLQ periodic process: {e}")
- await asyncio.sleep(60)
-
- async def _process_dlq_batch(self, events: List[tuple]) -> None:
- dlq_messages = []
-
- # Convert to DLQMessage objects
- for _, record in events:
- try:
- dlq_message = DLQMessage.from_kafka_message(record, self.schema_registry_manager)
- dlq_messages.append(dlq_message)
- except Exception as e:
- logger.error(f"Failed to parse DLQ message: {e}")
- self.stats["errors"] += 1
-
- # Group messages by action
- to_retry = []
- permanently_failed = []
- expired = []
-
- for msg in dlq_messages:
- self.stats["processed"] += 1
-
- # Check if message is too old
- if msg.age > self.max_age:
- expired.append(msg)
- continue
-
- # Check retry count
- if msg.retry_count >= self.max_retry_attempts:
- permanently_failed.append(msg)
- continue
-
- # Check if enough time has passed for retry
- if msg.age >= self.retry_delay:
- to_retry.append(msg)
- else:
- # Message is not ready for retry yet, skip
- continue
-
- # Process each group
- await self._retry_messages(to_retry)
- await self._handle_permanent_failures(permanently_failed)
- await self._handle_expired_messages(expired)
-
- async def _retry_messages(self, messages: List[DLQMessage]) -> None:
- if not messages:
- return
-
- for msg in messages:
- try:
- # Check if there's a custom retry handler
- handler = self._retry_handlers.get(msg.event_type)
-
- if handler:
- # Use custom handler
- if asyncio.iscoroutinefunction(handler):
- should_retry = await handler(msg)
- else:
- should_retry = await asyncio.to_thread(handler, msg)
-
- if not should_retry:
- logger.info(
- f"Custom handler rejected retry for event {msg.event_id}"
- )
- continue
-
- # Get the original event
- event = msg.event
- if not event:
- logger.error(f"Failed to get event {msg.event_id} for retry")
- self.stats["errors"] += 1
- continue
-
- # Add retry metadata to headers
- headers = {
- "retry_count": str(msg.retry_count + 1),
- "retry_from_dlq": "true",
- "original_error": msg.error[:100], # Truncate long errors
- "dlq_timestamp": msg.failed_at.isoformat()
- }
-
- # Send back to original topic
- await self.producer.produce(
- event_to_produce=event,
- headers=headers
- )
- success = True
-
- if success:
- logger.info(
- f"Retried event {msg.event_id} to topic {msg.original_topic} "
- f"(attempt {msg.retry_count + 1})"
- )
- self.stats["retried"] += 1
- else:
- logger.error(f"Failed to retry event {msg.event_id}")
- self.stats["errors"] += 1
-
- except Exception as e:
- logger.error(f"Error retrying message {msg.event_id}: {e}")
- self.stats["errors"] += 1
-
- async def _handle_permanent_failures(self, messages: List[DLQMessage]) -> None:
- if not messages:
- return
-
- for msg in messages:
- try:
- logger.warning(
- f"Event {msg.event_id} permanently failed after "
- f"{msg.retry_count} attempts. Error: {msg.error}"
- )
-
- # Call permanent failure handlers
- for handler in self._permanent_failure_handlers:
- try:
- if asyncio.iscoroutinefunction(handler):
- await handler(msg)
- else:
- await asyncio.to_thread(handler, msg)
- except Exception as e:
- logger.error(f"Permanent failure handler error: {e}")
-
- self.stats["permanently_failed"] += 1
-
- except Exception as e:
- logger.error(f"Error handling permanent failure: {e}")
- self.stats["errors"] += 1
-
- async def _handle_expired_messages(self, messages: List[DLQMessage]) -> None:
- if not messages:
- return
-
- for msg in messages:
- logger.warning(
- f"Event {msg.event_id} expired (age: {msg.age.days} days). "
- f"Will not retry."
- )
- self.stats["expired"] += 1
-
- async def reprocess_all(
- self,
- event_types: List[str] | None = None,
- force: bool = False
- ) -> Dict[str, int]:
- if not self.consumer:
- raise RuntimeError("Consumer not started")
-
- logger.info(
- f"Reprocessing all DLQ messages"
- f"{f' for types: {event_types}' if event_types else ''}"
- )
-
- # Seek to beginning using native confluent-kafka
- if self.consumer.consumer:
- try:
- # Get current assignment
- assignment = self.consumer.consumer.assignment()
- if assignment:
- for partition in assignment:
- # Create new TopicPartition with desired offset
- new_partition = TopicPartition(partition.topic, partition.partition, OFFSET_BEGINNING)
- self.consumer.consumer.seek(new_partition)
- logger.info(f"Seeked {len(assignment)} partitions to beginning")
- except Exception as e:
- logger.error(f"Failed to seek to beginning: {e}")
-
- # Temporarily adjust settings for bulk reprocessing
- original_retry_delay = self.retry_delay
- if force:
- self.retry_delay = timedelta(seconds=0)
-
- # Process until caught up
- reprocess_stats = {
- "total": 0,
- "retried": 0,
- "skipped": 0,
- "errors": 0
- }
-
- try:
- # Process messages
- # This will be handled by the batch processor
-
- # Wait for processing to complete
- await asyncio.sleep(5)
-
- # Copy stats
- reprocess_stats["total"] = self.stats["processed"]
- reprocess_stats["retried"] = self.stats["retried"]
- reprocess_stats["errors"] = self.stats["errors"]
-
- finally:
- # Restore original settings
- self.retry_delay = original_retry_delay
-
- # Seek back to end for normal processing using native confluent-kafka
- if self.consumer.consumer:
- try:
- # Get current assignment
- assignment = self.consumer.consumer.assignment()
- if assignment:
- for partition in assignment:
- # Create new TopicPartition with desired offset
- new_partition = TopicPartition(partition.topic, partition.partition, OFFSET_END)
- self.consumer.consumer.seek(new_partition)
- logger.info(f"Seeked {len(assignment)} partitions to end")
- except Exception as e:
- logger.error(f"Failed to seek to end: {e}")
-
- return reprocess_stats
-
- def get_stats(self) -> Dict[str, Any]:
- return {
- **self.stats,
- "topic": self.dlq_topic,
- "group_id": self.group_id,
- "running": self._running,
- "config": {
- "max_retry_attempts": self.max_retry_attempts,
- "retry_delay_hours": self.retry_delay.total_seconds() / 3600,
- "max_age_days": self.max_age.days,
- "batch_size": self.batch_size
- }
- }
-
-
-class DLQConsumerRegistry:
- def __init__(self) -> None:
- self._consumers: Dict[str, DLQConsumer] = {}
-
- def get(self, topic: str) -> DLQConsumer | None:
- return self._consumers.get(topic)
-
- def register(self, consumer: DLQConsumer) -> None:
- self._consumers[consumer.dlq_topic] = consumer
-
- async def start_all(self) -> None:
- for consumer in self._consumers.values():
- await consumer.start()
-
- async def stop_all(self) -> None:
- tasks = []
- for consumer in self._consumers.values():
- tasks.append(consumer.stop())
- if tasks:
- await asyncio.gather(*tasks, return_exceptions=True)
-
-
-def create_dlq_consumer_registry() -> DLQConsumerRegistry:
- return DLQConsumerRegistry()
diff --git a/backend/app/dlq/manager.py b/backend/app/dlq/manager.py
index 82578644..87c04e6c 100644
--- a/backend/app/dlq/manager.py
+++ b/backend/app/dlq/manager.py
@@ -3,20 +3,26 @@
from datetime import datetime, timezone
from typing import Any, Awaitable, Callable, Mapping, Sequence
-from confluent_kafka import Consumer, KafkaError, Producer
+from confluent_kafka import Consumer, KafkaError, Message, Producer
from motor.motor_asyncio import AsyncIOMotorCollection, AsyncIOMotorDatabase
+from opentelemetry.trace import SpanKind
from app.core.logging import logger
from app.core.metrics.context import get_dlq_metrics
+from app.core.tracing import EventAttributes
+from app.core.tracing.utils import extract_trace_context, get_tracer, inject_trace_context
from app.dlq.models import (
DLQFields,
DLQMessage,
DLQMessageStatus,
+ DLQMessageUpdate,
RetryPolicy,
RetryStrategy,
)
from app.domain.enums.kafka import GroupId, KafkaTopic
+from app.domain.events.event_models import CollectionNames
from app.events.schema.schema_registry import SchemaRegistryManager
+from app.infrastructure.mappers.dlq_mapper import DLQMapper
from app.settings import get_settings
@@ -24,11 +30,12 @@ class DLQManager:
def __init__(
self,
database: AsyncIOMotorDatabase,
+ consumer: Consumer,
+ producer: Producer,
dlq_topic: KafkaTopic = KafkaTopic.DEAD_LETTER_QUEUE,
retry_topic_suffix: str = "-retry",
default_retry_policy: RetryPolicy | None = None,
):
- self.database = database
self.metrics = get_dlq_metrics()
self.dlq_topic = dlq_topic
self.retry_topic_suffix = retry_topic_suffix
@@ -36,10 +43,9 @@ def __init__(
topic="default",
strategy=RetryStrategy.EXPONENTIAL_BACKOFF
)
-
- self.consumer: Consumer | None = None
- self.producer: Producer | None = None
- self.dlq_collection: AsyncIOMotorCollection[Any] = database.dlq_messages
+ self.consumer: Consumer = consumer
+ self.producer: Producer = producer
+ self.dlq_collection: AsyncIOMotorCollection[Any] = database.get_collection(CollectionNames.DLQ_MESSAGES)
self._running = False
self._process_task: asyncio.Task | None = None
@@ -63,34 +69,8 @@ async def start(self) -> None:
if self._running:
return
- if self.database is None:
- raise RuntimeError("Database not provided to DLQManager")
-
- settings = get_settings()
-
- # Initialize consumer
- self.consumer = Consumer({
- 'bootstrap.servers': settings.KAFKA_BOOTSTRAP_SERVERS,
- 'group.id': GroupId.DLQ_MANAGER,
- 'enable.auto.commit': False,
- 'auto.offset.reset': 'earliest',
- 'client.id': 'dlq-manager-consumer'
- })
self.consumer.subscribe([self.dlq_topic])
- # Initialize producer for retries
- self.producer = Producer({
- 'bootstrap.servers': settings.KAFKA_BOOTSTRAP_SERVERS,
- 'client.id': 'dlq-manager-producer',
- 'acks': 'all',
- 'enable.idempotence': True,
- 'compression.type': 'gzip',
- 'batch.size': 16384,
- 'linger.ms': 10
- })
-
- # Indexes ensured by SchemaManager at startup
-
self._running = True
# Start processing tasks
@@ -116,64 +96,91 @@ async def stop(self) -> None:
pass
# Stop Kafka clients
- if self.consumer:
- self.consumer.close()
-
- if self.producer:
- self.producer.flush(10) # Wait up to 10 seconds for pending messages
+ self.consumer.close()
+ self.producer.flush(10)
logger.info("DLQ Manager stopped")
- # Index creation handled by SchemaManager
-
async def _process_messages(self) -> None:
while self._running:
try:
- # Fetch messages using confluent-kafka poll
- if not self.consumer:
- logger.error("Consumer not initialized")
- continue
-
- # Poll for messages (non-blocking with asyncio)
- msg = await asyncio.to_thread(self.consumer.poll, timeout=1.0)
-
+ msg = await self._poll_message()
if msg is None:
continue
- if msg.error():
- error = msg.error()
- if error and error.code() == KafkaError._PARTITION_EOF:
- continue
- logger.error(f"Consumer error: {error}")
+ if not await self._validate_message(msg):
continue
start_time = asyncio.get_event_loop().time()
+ dlq_message = await self._parse_message(msg)
- schema_registry = SchemaRegistryManager()
- dlq_message = DLQMessage.from_kafka_message(msg, schema_registry)
-
- # Update metrics
- self.metrics.record_dlq_message_received(
- dlq_message.original_topic,
- dlq_message.event_type
- )
-
- self.metrics.record_dlq_message_age(dlq_message.age_seconds)
-
- # Process message
- await self._process_dlq_message(dlq_message)
-
- # Commit offset after successful processing
- await asyncio.to_thread(self.consumer.commit, asynchronous=False)
-
- # Record processing time
- duration = asyncio.get_event_loop().time() - start_time
- self.metrics.record_dlq_processing_duration(duration, "process")
+ await self._record_message_metrics(dlq_message)
+ await self._process_message_with_tracing(msg, dlq_message)
+ await self._commit_and_record_duration(start_time)
except Exception as e:
logger.error(f"Error in DLQ processing loop: {e}")
await asyncio.sleep(5)
+ async def _poll_message(self) -> Message | None:
+ """Poll for a message from Kafka."""
+ return await asyncio.to_thread(self.consumer.poll, timeout=1.0)
+
+ async def _validate_message(self, msg: Message) -> bool:
+ """Validate the Kafka message."""
+ if msg.error():
+ error = msg.error()
+ if error and error.code() == KafkaError._PARTITION_EOF:
+ return False
+ logger.error(f"Consumer error: {error}")
+ return False
+ return True
+
+ async def _parse_message(self, msg: Message) -> DLQMessage:
+ """Parse Kafka message into DLQMessage."""
+ schema_registry = SchemaRegistryManager()
+ return DLQMapper.from_kafka_message(msg, schema_registry)
+
+ def _extract_headers(self, msg: Message) -> dict[str, str]:
+ """Extract headers from Kafka message."""
+ headers_list = msg.headers() or []
+ headers: dict[str, str] = {}
+ for k, v in headers_list:
+ headers[str(k)] = v.decode("utf-8") if isinstance(v, (bytes, bytearray)) else (v or "")
+ return headers
+
+ async def _record_message_metrics(self, dlq_message: DLQMessage) -> None:
+ """Record metrics for received DLQ message."""
+ self.metrics.record_dlq_message_received(
+ dlq_message.original_topic,
+ dlq_message.event_type
+ )
+ self.metrics.record_dlq_message_age(dlq_message.age_seconds)
+
+ async def _process_message_with_tracing(self, msg: Message, dlq_message: DLQMessage) -> None:
+ """Process message with distributed tracing."""
+ headers = self._extract_headers(msg)
+ ctx = extract_trace_context(headers)
+ tracer = get_tracer()
+
+ with tracer.start_as_current_span(
+ name="dlq.consume",
+ context=ctx,
+ kind=SpanKind.CONSUMER,
+ attributes={
+ str(EventAttributes.KAFKA_TOPIC): str(self.dlq_topic),
+ str(EventAttributes.EVENT_TYPE): dlq_message.event_type,
+ str(EventAttributes.EVENT_ID): dlq_message.event_id or "",
+ },
+ ):
+ await self._process_dlq_message(dlq_message)
+
+ async def _commit_and_record_duration(self, start_time: float) -> None:
+ """Commit offset and record processing duration."""
+ await asyncio.to_thread(self.consumer.commit, asynchronous=False)
+ duration = asyncio.get_event_loop().time() - start_time
+ self.metrics.record_dlq_processing_duration(duration, "process")
+
async def _process_dlq_message(self, message: DLQMessage) -> None:
# Apply filters
for filter_func in self._filters:
@@ -199,12 +206,10 @@ async def _process_dlq_message(self, message: DLQMessage) -> None:
next_retry = retry_policy.get_next_retry_time(message)
# Update message status
- if message.event_id:
- await self._update_message_status(
- message.event_id,
- DLQMessageStatus.SCHEDULED,
- next_retry_at=next_retry
- )
+ await self._update_message_status(
+ message.event_id,
+ DLQMessageUpdate(status=DLQMessageStatus.SCHEDULED, next_retry_at=next_retry),
+ )
# If immediate retry, process now
if retry_policy.strategy == RetryStrategy.IMMEDIATE:
@@ -215,7 +220,7 @@ async def _store_message(self, message: DLQMessage) -> None:
message.status = DLQMessageStatus.PENDING
message.last_updated = datetime.now(timezone.utc)
- doc = message.to_dict()
+ doc = DLQMapper.to_mongo_document(message)
await self.dlq_collection.update_one(
{DLQFields.EVENT_ID: message.event_id},
@@ -223,38 +228,9 @@ async def _store_message(self, message: DLQMessage) -> None:
upsert=True
)
- async def _update_message_status(
- self,
- event_id: str,
- status: DLQMessageStatus,
- **kwargs: Any
- ) -> None:
- update_doc = {
- str(DLQFields.STATUS): status,
- str(DLQFields.LAST_UPDATED): datetime.now(timezone.utc)
- }
-
- # Add any additional fields
- for key, value in kwargs.items():
- if key == "next_retry_at":
- update_doc[str(DLQFields.NEXT_RETRY_AT)] = value
- elif key == "retried_at":
- update_doc[str(DLQFields.RETRIED_AT)] = value
- elif key == "discarded_at":
- update_doc[str(DLQFields.DISCARDED_AT)] = value
- elif key == "retry_count":
- update_doc[str(DLQFields.RETRY_COUNT)] = value
- elif key == "discard_reason":
- update_doc[str(DLQFields.DISCARD_REASON)] = value
- elif key == "last_error":
- update_doc[str(DLQFields.LAST_ERROR)] = value
- else:
- update_doc[key] = value
-
- await self.dlq_collection.update_one(
- {DLQFields.EVENT_ID: event_id},
- {"$set": update_doc}
- )
+ async def _update_message_status(self, event_id: str, update: DLQMessageUpdate) -> None:
+ update_doc = DLQMapper.update_to_mongo(update)
+ await self.dlq_collection.update_one({DLQFields.EVENT_ID: event_id}, {"$set": update_doc})
async def _retry_message(self, message: DLQMessage) -> None:
# Trigger before_retry callbacks
@@ -263,27 +239,14 @@ async def _retry_message(self, message: DLQMessage) -> None:
# Send to retry topic first (for monitoring)
retry_topic = f"{message.original_topic}{self.retry_topic_suffix}"
- # Prepare headers
- headers = [
- ("dlq_retry_count", str(message.retry_count + 1).encode()),
- ("dlq_original_error", message.error.encode()),
- ("dlq_retry_timestamp", datetime.now(timezone.utc).isoformat().encode()),
- ]
-
- # Send to retry topic
- if not self.producer:
- raise RuntimeError("Producer not initialized")
-
- if not message.event_id:
- raise ValueError("Message event_id is required")
-
- # Send to retry topic using confluent-kafka producer
- def delivery_callback(err: Any, msg: Any) -> None:
- if err:
- logger.error(f"Failed to deliver message to retry topic: {err}")
-
- # Convert headers to the format expected by confluent-kafka
- kafka_headers: list[tuple[str, str | bytes]] = [(k, v) for k, v in headers]
+ hdrs: dict[str, str] = {
+ "dlq_retry_count": str(message.retry_count + 1),
+ "dlq_original_error": message.error,
+ "dlq_retry_timestamp": datetime.now(timezone.utc).isoformat(),
+ }
+ hdrs = inject_trace_context(hdrs)
+ from typing import cast
+ kafka_headers = cast(list[tuple[str, str | bytes]], [(k, v.encode()) for k, v in hdrs.items()])
# Get the original event
event = message.event
@@ -294,7 +257,6 @@ def delivery_callback(err: Any, msg: Any) -> None:
value=json.dumps(event.to_dict()).encode(),
key=message.event_id.encode(),
headers=kafka_headers,
- callback=delivery_callback
)
# Send to original topic
@@ -304,7 +266,6 @@ def delivery_callback(err: Any, msg: Any) -> None:
value=json.dumps(event.to_dict()).encode(),
key=message.event_id.encode(),
headers=kafka_headers,
- callback=delivery_callback
)
# Flush to ensure messages are sent
@@ -318,13 +279,14 @@ def delivery_callback(err: Any, msg: Any) -> None:
)
# Update status
- if message.event_id:
- await self._update_message_status(
- message.event_id,
- DLQMessageStatus.RETRIED,
+ await self._update_message_status(
+ message.event_id,
+ DLQMessageUpdate(
+ status=DLQMessageStatus.RETRIED,
retried_at=datetime.now(timezone.utc),
- retry_count=message.retry_count + 1
- )
+ retry_count=message.retry_count + 1,
+ ),
+ )
# Trigger after_retry callbacks
await self._trigger_callbacks("after_retry", message, success=True)
@@ -340,13 +302,14 @@ async def _discard_message(self, message: DLQMessage, reason: str) -> None:
)
# Update status
- if message.event_id:
- await self._update_message_status(
- message.event_id,
- DLQMessageStatus.DISCARDED,
+ await self._update_message_status(
+ message.event_id,
+ DLQMessageUpdate(
+ status=DLQMessageStatus.DISCARDED,
discarded_at=datetime.now(timezone.utc),
- discard_reason=reason
- )
+ discard_reason=reason,
+ ),
+ )
# Trigger callbacks
await self._trigger_callbacks("on_discard", message, reason)
@@ -366,7 +329,7 @@ async def _monitor_dlq(self) -> None:
async for doc in cursor:
# Recreate DLQ message from MongoDB document
- message = DLQMessage.from_dict(doc)
+ message = DLQMapper.from_mongo_document(doc)
# Retry message
await self._retry_message(message)
@@ -419,83 +382,49 @@ async def retry_message_manually(self, event_id: str) -> bool:
logger.error(f"Message {event_id} not found in DLQ")
return False
- message = DLQMessage.from_dict(doc)
+ # Guard against invalid states
+ status = doc.get(str(DLQFields.STATUS))
+ if status in {DLQMessageStatus.DISCARDED, DLQMessageStatus.RETRIED}:
+ logger.info(f"Skipping manual retry for {event_id}: status={status}")
+ return False
+
+ message = DLQMapper.from_mongo_document(doc)
await self._retry_message(message)
return True
- async def get_dlq_stats(self) -> dict[str, Any]:
- pipeline = [
- {"$facet": {
- "by_status": [
- {"$group": {
- "_id": f"${DLQFields.STATUS}",
- "count": {"$sum": 1}
- }}
- ],
- "by_topic": [
- {"$group": {
- "_id": f"${DLQFields.ORIGINAL_TOPIC}",
- "count": {"$sum": 1},
- "avg_retry_count": {"$avg": f"${DLQFields.RETRY_COUNT}"},
- "max_retry_count": {"$max": f"${DLQFields.RETRY_COUNT}"}
- }}
- ],
- "by_event_type": [
- {"$group": {
- "_id": f"${DLQFields.EVENT_TYPE}",
- "count": {"$sum": 1}
- }}
- ],
- "age_stats": [
- {"$group": {
- "_id": None,
- "oldest_message": {"$min": f"${DLQFields.FAILED_AT}"},
- "newest_message": {"$max": f"${DLQFields.FAILED_AT}"},
- "total_count": {"$sum": 1}
- }}
- ]
- }}
- ]
-
- cursor = self.dlq_collection.aggregate(pipeline)
- results = await cursor.to_list(1)
-
- # Handle empty collection case
- if not results:
- return {
- "by_status": {},
- "by_topic": [],
- "by_event_type": [],
- "age_stats": {},
- "timestamp": datetime.now(timezone.utc)
- }
-
- result = results[0]
-
- return {
- "by_status": {item["_id"]: item["count"] for item in result["by_status"]},
- "by_topic": result["by_topic"],
- "by_event_type": result["by_event_type"],
- "age_stats": result["age_stats"][0] if result["age_stats"] else {},
- "timestamp": datetime.now(timezone.utc)
- }
-
-
def create_dlq_manager(
database: AsyncIOMotorDatabase,
dlq_topic: KafkaTopic = KafkaTopic.DEAD_LETTER_QUEUE,
retry_topic_suffix: str = "-retry",
default_retry_policy: RetryPolicy | None = None,
) -> DLQManager:
+ settings = get_settings()
+ consumer = Consumer({
+ 'bootstrap.servers': settings.KAFKA_BOOTSTRAP_SERVERS,
+ 'group.id': GroupId.DLQ_MANAGER,
+ 'enable.auto.commit': False,
+ 'auto.offset.reset': 'earliest',
+ 'client.id': 'dlq-manager-consumer'
+ })
+ producer = Producer({
+ 'bootstrap.servers': settings.KAFKA_BOOTSTRAP_SERVERS,
+ 'client.id': 'dlq-manager-producer',
+ 'acks': 'all',
+ 'enable.idempotence': True,
+ 'compression.type': 'gzip',
+ 'batch.size': 16384,
+ 'linger.ms': 10
+ })
if default_retry_policy is None:
default_retry_policy = RetryPolicy(
topic="default",
strategy=RetryStrategy.EXPONENTIAL_BACKOFF
)
-
return DLQManager(
database=database,
+ consumer=consumer,
+ producer=producer,
dlq_topic=dlq_topic,
retry_topic_suffix=retry_topic_suffix,
default_retry_policy=default_retry_policy,
diff --git a/backend/app/dlq/models.py b/backend/app/dlq/models.py
index 4525a52c..a960f2ab 100644
--- a/backend/app/dlq/models.py
+++ b/backend/app/dlq/models.py
@@ -1,12 +1,8 @@
-import json
-from collections.abc import Mapping
from dataclasses import dataclass, field
from datetime import datetime, timedelta, timezone
-
-from confluent_kafka import Message
+from typing import Any
from app.core.utils import StringEnum
-from app.events.schema.schema_registry import SchemaRegistryManager
from app.infrastructure.kafka.events import BaseEvent
@@ -63,7 +59,7 @@ class DLQMessage:
producer_id: str # ID of the producer that sent to DLQ
# Optional fields
- event_id: str | None = None
+ event_id: str = ""
created_at: datetime | None = None # When added to DLQ (UTC)
last_updated: datetime | None = None # Last status change (UTC)
next_retry_at: datetime | None = None # Next scheduled retry (UTC)
@@ -89,200 +85,23 @@ def age_seconds(self) -> float:
"""Get message age in seconds since failure."""
return (datetime.now(timezone.utc) - self.failed_at).total_seconds()
- @property
- def age(self) -> timedelta:
- """Get message age as timedelta."""
- return datetime.now(timezone.utc) - self.failed_at
-
@property
def event_type(self) -> str:
"""Get event type from the event."""
return str(self.event.event_type)
- def to_dict(self) -> dict[str, object]:
- """Convert to MongoDB document."""
- doc: dict[str, object] = {
- DLQFields.EVENT: self.event.to_dict(),
- DLQFields.ORIGINAL_TOPIC: self.original_topic,
- DLQFields.ERROR: self.error,
- DLQFields.RETRY_COUNT: self.retry_count,
- DLQFields.FAILED_AT: self.failed_at,
- DLQFields.STATUS: self.status,
- DLQFields.PRODUCER_ID: self.producer_id,
- }
-
- # Add optional fields only if present
- if self.event_id:
- doc[DLQFields.EVENT_ID] = self.event_id
- if self.created_at:
- doc[DLQFields.CREATED_AT] = self.created_at
- if self.last_updated:
- doc[DLQFields.LAST_UPDATED] = self.last_updated
- if self.next_retry_at:
- doc[DLQFields.NEXT_RETRY_AT] = self.next_retry_at
- if self.retried_at:
- doc[DLQFields.RETRIED_AT] = self.retried_at
- if self.discarded_at:
- doc[DLQFields.DISCARDED_AT] = self.discarded_at
- if self.discard_reason:
- doc[DLQFields.DISCARD_REASON] = self.discard_reason
- if self.dlq_offset is not None:
- doc[DLQFields.DLQ_OFFSET] = self.dlq_offset
- if self.dlq_partition is not None:
- doc[DLQFields.DLQ_PARTITION] = self.dlq_partition
- if self.last_error:
- doc[DLQFields.LAST_ERROR] = self.last_error
-
- return doc
-
- @classmethod
- def from_dict(cls, data: Mapping[str, object]) -> "DLQMessage":
- """Create from MongoDB document."""
-
- # Get schema registry for deserialization
- schema_registry = SchemaRegistryManager()
-
- # Helper for datetime conversion
- def parse_datetime(value: object) -> datetime | None:
- if value is None:
- return None
- if isinstance(value, datetime):
- return value if value.tzinfo else value.replace(tzinfo=timezone.utc)
- if isinstance(value, str):
- return datetime.fromisoformat(value).replace(tzinfo=timezone.utc)
- raise ValueError(f"Cannot parse datetime from {type(value).__name__}")
-
- # Parse required failed_at field
- failed_at_raw = data.get(DLQFields.FAILED_AT)
- if failed_at_raw is None:
- raise ValueError("Missing required field: failed_at")
- failed_at = parse_datetime(failed_at_raw)
- if failed_at is None:
- raise ValueError("Invalid failed_at value")
-
- # Parse event data
- event_data = data.get(DLQFields.EVENT)
- if not isinstance(event_data, dict):
- raise ValueError("Missing or invalid event data")
-
- # Deserialize event
- event = schema_registry.deserialize_json(event_data)
-
- # Parse status
- status_raw = data.get(DLQFields.STATUS, DLQMessageStatus.PENDING)
- status = DLQMessageStatus(str(status_raw))
-
- # Extract values with proper types
- retry_count_value: int = data.get(DLQFields.RETRY_COUNT, 0) # type: ignore[assignment]
- dlq_offset_value: int | None = data.get(DLQFields.DLQ_OFFSET) # type: ignore[assignment]
- dlq_partition_value: int | None = data.get(DLQFields.DLQ_PARTITION) # type: ignore[assignment]
-
- # Create DLQMessage
- return cls(
- event=event,
- original_topic=str(data.get(DLQFields.ORIGINAL_TOPIC, "")),
- error=str(data.get(DLQFields.ERROR, "")),
- retry_count=retry_count_value,
- failed_at=failed_at,
- status=status,
- producer_id=str(data.get(DLQFields.PRODUCER_ID, "unknown")),
- event_id=str(data.get(DLQFields.EVENT_ID, "")) or None,
- created_at=parse_datetime(data.get(DLQFields.CREATED_AT)),
- last_updated=parse_datetime(data.get(DLQFields.LAST_UPDATED)),
- next_retry_at=parse_datetime(data.get(DLQFields.NEXT_RETRY_AT)),
- retried_at=parse_datetime(data.get(DLQFields.RETRIED_AT)),
- discarded_at=parse_datetime(data.get(DLQFields.DISCARDED_AT)),
- discard_reason=str(data.get(DLQFields.DISCARD_REASON, "")) or None,
- dlq_offset=dlq_offset_value,
- dlq_partition=dlq_partition_value,
- last_error=str(data.get(DLQFields.LAST_ERROR, "")) or None,
- )
-
- @classmethod
- def from_kafka_message(cls, message: Message, schema_registry: SchemaRegistryManager) -> "DLQMessage":
- # Parse message value
- record_value = message.value()
- if record_value is None:
- raise ValueError("Message has no value")
-
- data = json.loads(record_value.decode('utf-8'))
-
- # Parse event from the data
- event_data = data.get("event", {})
- event = schema_registry.deserialize_json(event_data)
-
- # Parse headers
- headers = {}
- msg_headers = message.headers()
- if msg_headers:
- for key, value in msg_headers:
- headers[key] = value.decode('utf-8') if value else ""
-
- # Parse failed_at
- failed_at_str = data.get("failed_at")
- if failed_at_str:
- failed_at = datetime.fromisoformat(failed_at_str).replace(tzinfo=timezone.utc)
- else:
- failed_at = datetime.now(timezone.utc)
-
- # Get offset and partition with type assertions
- offset: int = message.offset() # type: ignore[assignment]
- partition: int = message.partition() # type: ignore[assignment]
-
- return cls(
- event=event,
- original_topic=data.get("original_topic", "unknown"),
- error=data.get("error", "Unknown error"),
- retry_count=data.get("retry_count", 0),
- failed_at=failed_at,
- status=DLQMessageStatus.PENDING,
- producer_id=data.get("producer_id", "unknown"),
- headers=headers,
- dlq_offset=offset if offset >= 0 else None,
- dlq_partition=partition if partition >= 0 else None,
- )
-
- @classmethod
- def from_failed_event(
- cls,
- event: BaseEvent,
- original_topic: str,
- error: str,
- producer_id: str,
- retry_count: int = 0
- ) -> "DLQMessage":
- """Create from a failed event."""
- return cls(
- event=event,
- original_topic=original_topic,
- error=error,
- retry_count=retry_count,
- failed_at=datetime.now(timezone.utc),
- status=DLQMessageStatus.PENDING,
- producer_id=producer_id,
- )
-
- def to_response_dict(self) -> dict[str, object]:
- """Convert to API response format."""
- return {
- "event_id": self.event_id,
- "event_type": self.event_type,
- "event": self.event.to_dict(),
- "original_topic": self.original_topic,
- "error": self.error,
- "retry_count": self.retry_count,
- "failed_at": self.failed_at,
- "status": self.status,
- "age_seconds": self.age_seconds,
- "producer_id": self.producer_id,
- "dlq_offset": self.dlq_offset,
- "dlq_partition": self.dlq_partition,
- "last_error": self.last_error,
- "next_retry_at": self.next_retry_at,
- "retried_at": self.retried_at,
- "discarded_at": self.discarded_at,
- "discard_reason": self.discard_reason,
- }
+
+@dataclass
+class DLQMessageUpdate:
+ """Strongly-typed update descriptor for DLQ message status changes."""
+ status: DLQMessageStatus
+ next_retry_at: datetime | None = None
+ retried_at: datetime | None = None
+ discarded_at: datetime | None = None
+ retry_count: int | None = None
+ discard_reason: str | None = None
+ last_error: str | None = None
+ extra: dict[str, Any] = field(default_factory=dict)
@dataclass
@@ -292,19 +111,6 @@ class DLQMessageFilter:
topic: str | None = None
event_type: str | None = None
- def to_query(self) -> dict[str, object]:
- """Convert to MongoDB query."""
- query: dict[str, object] = {}
-
- if self.status:
- query[DLQFields.STATUS] = self.status
- if self.topic:
- query[DLQFields.ORIGINAL_TOPIC] = self.topic
- if self.event_type:
- query[DLQFields.EVENT_TYPE] = self.event_type
-
- return query
-
@dataclass
class RetryPolicy:
@@ -347,18 +153,6 @@ def get_next_retry_time(self, message: DLQMessage) -> datetime:
return datetime.now(timezone.utc) + timedelta(seconds=delay)
- def to_dict(self) -> dict[str, object]:
- """Convert to dictionary."""
- return {
- "topic": self.topic,
- "strategy": self.strategy,
- "max_retries": self.max_retries,
- "base_delay_seconds": self.base_delay_seconds,
- "max_delay_seconds": self.max_delay_seconds,
- "retry_multiplier": self.retry_multiplier,
- "jitter_factor": self.jitter_factor,
- }
-
# Statistics models
@dataclass
@@ -368,14 +162,6 @@ class TopicStatistic:
count: int
avg_retry_count: float
- def to_dict(self) -> dict[str, object]:
- """Convert to dictionary."""
- return {
- "topic": self.topic,
- "count": self.count,
- "avg_retry_count": self.avg_retry_count,
- }
-
@dataclass
class EventTypeStatistic:
@@ -383,13 +169,6 @@ class EventTypeStatistic:
event_type: str
count: int
- def to_dict(self) -> dict[str, object]:
- """Convert to dictionary."""
- return {
- "event_type": self.event_type,
- "count": self.count,
- }
-
@dataclass
class AgeStatistics:
@@ -398,14 +177,6 @@ class AgeStatistics:
max_age_seconds: float
avg_age_seconds: float
- def to_dict(self) -> dict[str, object]:
- """Convert to dictionary."""
- return {
- "min_age": self.min_age_seconds,
- "max_age": self.max_age_seconds,
- "avg_age": self.avg_age_seconds,
- }
-
@dataclass
class DLQStatistics:
@@ -416,16 +187,6 @@ class DLQStatistics:
age_stats: AgeStatistics
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
- def to_dict(self) -> dict[str, object]:
- """Convert to dictionary."""
- return {
- "by_status": self.by_status,
- "by_topic": self.by_topic,
- "by_event_type": self.by_event_type,
- "age_stats": self.age_stats,
- "timestamp": self.timestamp,
- }
-
@dataclass
class DLQRetryResult:
@@ -434,16 +195,6 @@ class DLQRetryResult:
status: str # "success" or "failed"
error: str | None = None
- def to_dict(self) -> dict[str, object]:
- """Convert to dictionary."""
- result: dict[str, object] = {
- "event_id": self.event_id,
- "status": self.status,
- }
- if self.error:
- result["error"] = self.error
- return result
-
@dataclass
class DLQBatchRetryResult:
@@ -453,15 +204,6 @@ class DLQBatchRetryResult:
failed: int
details: list[DLQRetryResult]
- def to_dict(self) -> dict[str, object]:
- """Convert to dictionary."""
- return {
- "total": self.total,
- "successful": self.successful,
- "failed": self.failed,
- "details": [d.to_dict() for d in self.details],
- }
-
@dataclass
class DLQMessageListResult:
@@ -471,15 +213,6 @@ class DLQMessageListResult:
offset: int
limit: int
- def to_dict(self) -> dict[str, object]:
- """Convert to dictionary."""
- return {
- "messages": [msg.to_response_dict() for msg in self.messages],
- "total": self.total,
- "offset": self.offset,
- "limit": self.limit,
- }
-
@dataclass
class DLQTopicSummary:
@@ -491,15 +224,3 @@ class DLQTopicSummary:
newest_message: datetime
avg_retry_count: float
max_retry_count: int
-
- def to_dict(self) -> dict[str, object]:
- """Convert to dictionary."""
- return {
- "topic": self.topic,
- "total_messages": self.total_messages,
- "status_breakdown": self.status_breakdown,
- "oldest_message": self.oldest_message,
- "newest_message": self.newest_message,
- "avg_retry_count": self.avg_retry_count,
- "max_retry_count": self.max_retry_count,
- }
diff --git a/backend/app/domain/admin/__init__.py b/backend/app/domain/admin/__init__.py
index 19cb68be..a419a035 100644
--- a/backend/app/domain/admin/__init__.py
+++ b/backend/app/domain/admin/__init__.py
@@ -1 +1,48 @@
-"""Admin domain models"""
+from .overview_models import (
+ AdminUserOverviewDomain,
+ DerivedCountsDomain,
+ RateLimitSummaryDomain,
+)
+from .replay_models import (
+ ReplayQuery,
+ ReplaySession,
+ ReplaySessionData,
+ ReplaySessionFields,
+ ReplaySessionStatusDetail,
+ ReplaySessionStatusInfo,
+)
+from .settings_models import (
+ AuditAction,
+ AuditLogEntry,
+ AuditLogFields,
+ ExecutionLimits,
+ LogLevel,
+ MonitoringSettings,
+ SecuritySettings,
+ SettingsFields,
+ SystemSettings,
+)
+
+__all__ = [
+ # Overview
+ "AdminUserOverviewDomain",
+ "DerivedCountsDomain",
+ "RateLimitSummaryDomain",
+ # Settings
+ "SettingsFields",
+ "AuditLogFields",
+ "AuditAction",
+ "LogLevel",
+ "ExecutionLimits",
+ "SecuritySettings",
+ "MonitoringSettings",
+ "SystemSettings",
+ "AuditLogEntry",
+ # Replay
+ "ReplayQuery",
+ "ReplaySession",
+ "ReplaySessionData",
+ "ReplaySessionFields",
+ "ReplaySessionStatusDetail",
+ "ReplaySessionStatusInfo",
+]
diff --git a/backend/app/domain/admin/overview_models.py b/backend/app/domain/admin/overview_models.py
index f9352950..a208c953 100644
--- a/backend/app/domain/admin/overview_models.py
+++ b/backend/app/domain/admin/overview_models.py
@@ -3,8 +3,8 @@
from dataclasses import dataclass, field
from typing import List
-from app.domain.admin.user_models import User as DomainAdminUser
from app.domain.events import Event, EventStatistics
+from app.domain.user import User as DomainAdminUser
@dataclass
@@ -30,4 +30,3 @@ class AdminUserOverviewDomain:
derived_counts: DerivedCountsDomain
rate_limit_summary: RateLimitSummaryDomain
recent_events: List[Event] = field(default_factory=list)
-
diff --git a/backend/app/domain/admin/replay_models.py b/backend/app/domain/admin/replay_models.py
index ddd313f5..18479867 100644
--- a/backend/app/domain/admin/replay_models.py
+++ b/backend/app/domain/admin/replay_models.py
@@ -3,7 +3,8 @@
from typing import Any
from app.core.utils import StringEnum
-from app.domain.events.event_models import EventSummary, ReplaySessionStatus
+from app.domain.enums.replay import ReplayStatus
+from app.domain.events.event_models import EventSummary
class ReplaySessionFields(StringEnum):
@@ -28,7 +29,7 @@ class ReplaySessionFields(StringEnum):
@dataclass
class ReplaySession:
session_id: str
- status: ReplaySessionStatus
+ status: ReplayStatus
total_events: int
correlation_id: str
created_at: datetime
@@ -54,12 +55,12 @@ def progress_percentage(self) -> float:
@property
def is_completed(self) -> bool:
"""Check if session is completed."""
- return self.status in [ReplaySessionStatus.COMPLETED, ReplaySessionStatus.FAILED, ReplaySessionStatus.CANCELLED]
+ return self.status in [ReplayStatus.COMPLETED, ReplayStatus.FAILED, ReplayStatus.CANCELLED]
@property
def is_running(self) -> bool:
"""Check if session is running."""
- return self.status == ReplaySessionStatus.RUNNING
+ return self.status == ReplayStatus.RUNNING
def update_progress(self, replayed: int, failed: int = 0, skipped: int = 0) -> "ReplaySession":
# Create new instance with updated values
@@ -74,7 +75,7 @@ def update_progress(self, replayed: int, failed: int = 0, skipped: int = 0) -> "
if new_session.replayed_events >= new_session.total_events:
new_session = replace(
new_session,
- status=ReplaySessionStatus.COMPLETED,
+ status=ReplayStatus.COMPLETED,
completed_at=datetime.now(timezone.utc)
)
@@ -91,7 +92,7 @@ class ReplaySessionStatusDetail:
@dataclass
class ReplaySessionStatusInfo:
session_id: str
- status: ReplaySessionStatus
+ status: ReplayStatus
total_events: int
replayed_events: int
failed_events: int
diff --git a/backend/app/domain/admin/replay_updates.py b/backend/app/domain/admin/replay_updates.py
new file mode 100644
index 00000000..ec45d6bf
--- /dev/null
+++ b/backend/app/domain/admin/replay_updates.py
@@ -0,0 +1,56 @@
+"""Domain models for replay session updates."""
+
+from dataclasses import dataclass
+from datetime import datetime
+
+from app.domain.enums.replay import ReplayStatus
+
+
+@dataclass
+class ReplaySessionUpdate:
+ """Domain model for replay session updates."""
+
+ status: ReplayStatus | None = None
+ total_events: int | None = None
+ replayed_events: int | None = None
+ failed_events: int | None = None
+ skipped_events: int | None = None
+ correlation_id: str | None = None
+ started_at: datetime | None = None
+ completed_at: datetime | None = None
+ error: str | None = None
+ target_service: str | None = None
+ dry_run: bool | None = None
+
+ def to_dict(self) -> dict[str, object]:
+ """Convert to dictionary, excluding None values."""
+ result: dict[str, object] = {}
+
+ if self.status is not None:
+ result["status"] = self.status.value if hasattr(self.status, 'value') else self.status
+ if self.total_events is not None:
+ result["total_events"] = self.total_events
+ if self.replayed_events is not None:
+ result["replayed_events"] = self.replayed_events
+ if self.failed_events is not None:
+ result["failed_events"] = self.failed_events
+ if self.skipped_events is not None:
+ result["skipped_events"] = self.skipped_events
+ if self.correlation_id is not None:
+ result["correlation_id"] = self.correlation_id
+ if self.started_at is not None:
+ result["started_at"] = self.started_at
+ if self.completed_at is not None:
+ result["completed_at"] = self.completed_at
+ if self.error is not None:
+ result["error"] = self.error
+ if self.target_service is not None:
+ result["target_service"] = self.target_service
+ if self.dry_run is not None:
+ result["dry_run"] = self.dry_run
+
+ return result
+
+ def has_updates(self) -> bool:
+ """Check if there are any updates to apply."""
+ return bool(self.to_dict())
diff --git a/backend/app/domain/enums/__init__.py b/backend/app/domain/enums/__init__.py
index 1e1871a1..907a0b8a 100644
--- a/backend/app/domain/enums/__init__.py
+++ b/backend/app/domain/enums/__init__.py
@@ -3,9 +3,8 @@
from app.domain.enums.health import AlertSeverity, AlertStatus, ComponentStatus
from app.domain.enums.notification import (
NotificationChannel,
- NotificationPriority,
+ NotificationSeverity,
NotificationStatus,
- NotificationType,
)
from app.domain.enums.saga import SagaState
from app.domain.enums.user import UserRole
@@ -23,9 +22,8 @@
"ComponentStatus",
# Notification
"NotificationChannel",
- "NotificationPriority",
+ "NotificationSeverity",
"NotificationStatus",
- "NotificationType",
# Saga
"SagaState",
# User
diff --git a/backend/app/domain/enums/kafka.py b/backend/app/domain/enums/kafka.py
index 60baec99..036d9cc5 100644
--- a/backend/app/domain/enums/kafka.py
+++ b/backend/app/domain/enums/kafka.py
@@ -27,6 +27,9 @@ class KafkaTopic(StringEnum):
USER_EVENTS = "user_events"
USER_NOTIFICATIONS = "user_notifications"
USER_SETTINGS_EVENTS = "user_settings_events"
+ USER_SETTINGS_THEME_EVENTS = "user_settings_theme_events"
+ USER_SETTINGS_NOTIFICATION_EVENTS = "user_settings_notification_events"
+ USER_SETTINGS_EDITOR_EVENTS = "user_settings_editor_events"
# Script topics
SCRIPT_EVENTS = "script_events"
diff --git a/backend/app/domain/enums/notification.py b/backend/app/domain/enums/notification.py
index d701c12a..08576814 100644
--- a/backend/app/domain/enums/notification.py
+++ b/backend/app/domain/enums/notification.py
@@ -8,8 +8,8 @@ class NotificationChannel(StringEnum):
SLACK = "slack"
-class NotificationPriority(StringEnum):
- """Notification priority levels."""
+class NotificationSeverity(StringEnum):
+ """Notification severity levels."""
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
@@ -21,24 +21,11 @@ class NotificationStatus(StringEnum):
PENDING = "pending"
QUEUED = "queued"
SENDING = "sending"
- SENT = "sent"
DELIVERED = "delivered"
FAILED = "failed"
+ SKIPPED = "skipped"
READ = "read"
CLICKED = "clicked"
-class NotificationType(StringEnum):
- """Types of notifications."""
- EXECUTION_COMPLETED = "execution_completed"
- EXECUTION_FAILED = "execution_failed"
- EXECUTION_TIMEOUT = "execution_timeout"
- SYSTEM_UPDATE = "system_update"
- SYSTEM_ALERT = "system_alert"
- SECURITY_ALERT = "security_alert"
- RESOURCE_LIMIT = "resource_limit"
- QUOTA_WARNING = "quota_warning"
- ACCOUNT_UPDATE = "account_update"
- SETTINGS_CHANGED = "settings_changed"
- MAINTENANCE = "maintenance"
- CUSTOM = "custom"
+# SystemNotificationLevel removed in unified model (use NotificationSeverity + tags)
diff --git a/backend/app/domain/enums/replay.py b/backend/app/domain/enums/replay.py
index 1c3de12d..50d4f92e 100644
--- a/backend/app/domain/enums/replay.py
+++ b/backend/app/domain/enums/replay.py
@@ -10,6 +10,9 @@ class ReplayType(StringEnum):
class ReplayStatus(StringEnum):
+ # Unified replay lifecycle across admin + services
+ # "scheduled" retained for admin flows (alias of initial state semantics)
+ SCHEDULED = "scheduled"
CREATED = "created"
RUNNING = "running"
PAUSED = "paused"
diff --git a/backend/app/domain/events/__init__.py b/backend/app/domain/events/__init__.py
index 94af7e43..c9be24dd 100644
--- a/backend/app/domain/events/__init__.py
+++ b/backend/app/domain/events/__init__.py
@@ -1,5 +1,3 @@
-"""Domain models for event store."""
-
from app.domain.events.event_models import (
ArchivedEvent,
Event,
diff --git a/backend/app/domain/events/event_models.py b/backend/app/domain/events/event_models.py
index 14c5a39e..072f1d57 100644
--- a/backend/app/domain/events/event_models.py
+++ b/backend/app/domain/events/event_models.py
@@ -61,14 +61,20 @@ class CollectionNames(StringEnum):
EVENT_STORE = "event_store"
REPLAY_SESSIONS = "replay_sessions"
EVENTS_ARCHIVE = "events_archive"
+ RESOURCE_ALLOCATIONS = "resource_allocations"
+ USERS = "users"
+ EXECUTIONS = "executions"
+ EXECUTION_RESULTS = "execution_results"
+ SAVED_SCRIPTS = "saved_scripts"
+ NOTIFICATIONS = "notifications"
+ NOTIFICATION_SUBSCRIPTIONS = "notification_subscriptions"
+ USER_SETTINGS = "user_settings"
+ USER_SETTINGS_SNAPSHOTS = "user_settings_snapshots"
+ SAGAS = "sagas"
+ DLQ_MESSAGES = "dlq_messages"
-class ReplaySessionStatus(StringEnum):
- SCHEDULED = "scheduled"
- RUNNING = "running"
- COMPLETED = "completed"
- FAILED = "failed"
- CANCELLED = "cancelled"
+
@dataclass
@@ -88,7 +94,7 @@ class Event:
@property
def correlation_id(self) -> str | None:
- return self.metadata.correlation_id if self.metadata else None
+ return self.metadata.correlation_id
@dataclass
@@ -99,15 +105,6 @@ class EventSummary:
timestamp: datetime
aggregate_id: str | None = None
- @classmethod
- def from_event(cls, event: Event) -> "EventSummary":
- return cls(
- event_id=event.event_id,
- event_type=event.event_type,
- timestamp=event.timestamp,
- aggregate_id=event.aggregate_id
- )
-
@dataclass
class EventFilter:
@@ -123,37 +120,6 @@ class EventFilter:
text_search: str | None = None
status: str | None = None
- def to_query(self) -> MongoQuery:
- """Build MongoDB query from filter."""
- query: MongoQuery = {}
-
- if self.event_types:
- query[EventFields.EVENT_TYPE] = {"$in": self.event_types}
- if self.aggregate_id:
- query[EventFields.AGGREGATE_ID] = self.aggregate_id
- if self.correlation_id:
- query[EventFields.METADATA_CORRELATION_ID] = self.correlation_id
- if self.user_id:
- query[EventFields.METADATA_USER_ID] = self.user_id
- if self.service_name:
- query[EventFields.METADATA_SERVICE_NAME] = self.service_name
- if self.status:
- query[EventFields.STATUS] = self.status
-
- if self.start_time or self.end_time:
- time_query: dict[str, Any] = {}
- if self.start_time:
- time_query["$gte"] = self.start_time
- if self.end_time:
- time_query["$lte"] = self.end_time
- query[EventFields.TIMESTAMP] = time_query
-
- search = self.text_search or self.search_text
- if search:
- query["$text"] = {"$search": search}
-
- return query
-
@dataclass
class EventQuery:
@@ -286,33 +252,6 @@ class EventExportRow:
status: str
error: str
- def to_csv_dict(self) -> dict[str, str]:
- return {
- "Event ID": self.event_id,
- "Event Type": self.event_type,
- "Timestamp": self.timestamp,
- "Correlation ID": self.correlation_id,
- "Aggregate ID": self.aggregate_id,
- "User ID": self.user_id,
- "Service": self.service,
- "Status": self.status,
- "Error": self.error
- }
-
- @classmethod
- def from_event(cls, event: Event) -> "EventExportRow":
- return cls(
- event_id=event.event_id,
- event_type=event.event_type,
- timestamp=event.timestamp.isoformat(),
- correlation_id=event.metadata.correlation_id or "",
- aggregate_id=event.aggregate_id or "",
- user_id=event.metadata.user_id or "",
- service=event.metadata.service_name,
- status=event.status or "",
- error=event.error or ""
- )
-
@dataclass
class EventAggregationResult:
@@ -320,6 +259,3 @@ class EventAggregationResult:
results: list[dict[str, Any]]
pipeline: list[dict[str, Any]]
execution_time_ms: float | None = None
-
- def to_list(self) -> list[dict[str, Any]]:
- return self.results
diff --git a/backend/app/domain/execution/__init__.py b/backend/app/domain/execution/__init__.py
new file mode 100644
index 00000000..5ecff136
--- /dev/null
+++ b/backend/app/domain/execution/__init__.py
@@ -0,0 +1,21 @@
+from .exceptions import (
+ EventPublishError,
+ ExecutionNotFoundError,
+ ExecutionServiceError,
+ RuntimeNotSupportedError,
+)
+from .models import (
+ DomainExecution,
+ ExecutionResultDomain,
+ ResourceUsageDomain,
+)
+
+__all__ = [
+ "DomainExecution",
+ "ExecutionResultDomain",
+ "ResourceUsageDomain",
+ "ExecutionServiceError",
+ "RuntimeNotSupportedError",
+ "EventPublishError",
+ "ExecutionNotFoundError",
+]
diff --git a/backend/app/domain/execution/models.py b/backend/app/domain/execution/models.py
index 08a2071c..1442d3c2 100644
--- a/backend/app/domain/execution/models.py
+++ b/backend/app/domain/execution/models.py
@@ -14,8 +14,8 @@ class DomainExecution:
execution_id: str = field(default_factory=lambda: str(uuid4()))
script: str = ""
status: ExecutionStatus = ExecutionStatus.QUEUED
- output: Optional[str] = None
- errors: Optional[str] = None
+ stdout: Optional[str] = None
+ stderr: Optional[str] = None
lang: str = "python"
lang_version: str = "3.11"
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
diff --git a/backend/app/domain/idempotency/__init__.py b/backend/app/domain/idempotency/__init__.py
new file mode 100644
index 00000000..4e995ecc
--- /dev/null
+++ b/backend/app/domain/idempotency/__init__.py
@@ -0,0 +1,12 @@
+from .models import (
+ IdempotencyRecord,
+ IdempotencyStats,
+ IdempotencyStatus,
+)
+
+__all__ = [
+ "IdempotencyStatus",
+ "IdempotencyRecord",
+ "IdempotencyStats",
+]
+
diff --git a/backend/app/domain/idempotency/models.py b/backend/app/domain/idempotency/models.py
new file mode 100644
index 00000000..f3001c8f
--- /dev/null
+++ b/backend/app/domain/idempotency/models.py
@@ -0,0 +1,35 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime
+from typing import Dict, Optional
+
+from app.core.utils import StringEnum
+
+
+class IdempotencyStatus(StringEnum):
+ PROCESSING = "processing"
+ COMPLETED = "completed"
+ FAILED = "failed"
+ EXPIRED = "expired"
+
+
+@dataclass
+class IdempotencyRecord:
+ key: str
+ status: IdempotencyStatus
+ event_type: str
+ event_id: str
+ created_at: datetime
+ ttl_seconds: int
+ completed_at: Optional[datetime] = None
+ processing_duration_ms: Optional[int] = None
+ error: Optional[str] = None
+ result_json: Optional[str] = None
+
+
+@dataclass
+class IdempotencyStats:
+ total_keys: int
+ status_counts: Dict[IdempotencyStatus, int]
+ prefix: str
diff --git a/backend/app/domain/notification/__init__.py b/backend/app/domain/notification/__init__.py
new file mode 100644
index 00000000..bf8fba98
--- /dev/null
+++ b/backend/app/domain/notification/__init__.py
@@ -0,0 +1,11 @@
+from .models import (
+ DomainNotification,
+ DomainNotificationListResult,
+ DomainNotificationSubscription,
+)
+
+__all__ = [
+ "DomainNotification",
+ "DomainNotificationSubscription",
+ "DomainNotificationListResult",
+]
diff --git a/backend/app/domain/notification/models.py b/backend/app/domain/notification/models.py
index 274fc218..f46c1bc8 100644
--- a/backend/app/domain/notification/models.py
+++ b/backend/app/domain/notification/models.py
@@ -7,35 +7,23 @@
from app.domain.enums.notification import (
NotificationChannel,
- NotificationPriority,
+ NotificationSeverity,
NotificationStatus,
- NotificationType,
)
-@dataclass
-class DomainNotificationTemplate:
- notification_type: NotificationType
- channels: list[NotificationChannel]
- priority: NotificationPriority = NotificationPriority.MEDIUM
- subject_template: str = ""
- body_template: str = ""
- action_url_template: str | None = None
- metadata: dict[str, Any] = field(default_factory=dict)
-
-
@dataclass
class DomainNotification:
notification_id: str = field(default_factory=lambda: str(uuid4()))
user_id: str = ""
- notification_type: NotificationType = NotificationType.SYSTEM_UPDATE
channel: NotificationChannel = NotificationChannel.IN_APP
- priority: NotificationPriority = NotificationPriority.MEDIUM
+ severity: NotificationSeverity = NotificationSeverity.MEDIUM
status: NotificationStatus = NotificationStatus.PENDING
subject: str = ""
body: str = ""
action_url: str | None = None
+ tags: list[str] = field(default_factory=list)
created_at: datetime = field(default_factory=lambda: datetime.now(UTC))
scheduled_for: datetime | None = None
@@ -49,9 +37,6 @@ class DomainNotification:
max_retries: int = 3
error_message: str | None = None
- correlation_id: str | None = None
- related_entity_id: str | None = None
- related_entity_type: str | None = None
metadata: dict[str, Any] = field(default_factory=dict)
webhook_url: str | None = None
@@ -63,7 +48,9 @@ class DomainNotificationSubscription:
user_id: str
channel: NotificationChannel
enabled: bool = True
- notification_types: list[NotificationType] = field(default_factory=list)
+ severities: list[NotificationSeverity] = field(default_factory=list)
+ include_tags: list[str] = field(default_factory=list)
+ exclude_tags: list[str] = field(default_factory=list)
webhook_url: str | None = None
slack_webhook: str | None = None
@@ -77,29 +64,8 @@ class DomainNotificationSubscription:
updated_at: datetime = field(default_factory=lambda: datetime.now(UTC))
-@dataclass
-class DomainNotificationRule:
- rule_id: str = field(default_factory=lambda: str(uuid4()))
- name: str = ""
- description: str | None = None
- enabled: bool = True
- event_types: list[str] = field(default_factory=list)
- conditions: dict[str, Any] = field(default_factory=dict)
- notification_type: NotificationType = NotificationType.SYSTEM_UPDATE
- channels: list[NotificationChannel] = field(default_factory=list)
- priority: NotificationPriority = NotificationPriority.MEDIUM
- template_id: str | None = None
- throttle_minutes: int | None = None
- max_per_hour: int | None = None
- max_per_day: int | None = None
- created_at: datetime = field(default_factory=lambda: datetime.now(UTC))
- updated_at: datetime = field(default_factory=lambda: datetime.now(UTC))
- created_by: str | None = None
-
-
@dataclass
class DomainNotificationListResult:
notifications: list[DomainNotification]
total: int
unread_count: int
-
diff --git a/backend/app/domain/rate_limit/__init__.py b/backend/app/domain/rate_limit/__init__.py
index 8c7e1813..44c8e3e8 100644
--- a/backend/app/domain/rate_limit/__init__.py
+++ b/backend/app/domain/rate_limit/__init__.py
@@ -5,6 +5,7 @@
RateLimitRule,
RateLimitStatus,
UserRateLimit,
+ UserRateLimitSummary,
)
__all__ = [
@@ -13,5 +14,6 @@
"RateLimitConfig",
"RateLimitRule",
"RateLimitStatus",
- "UserRateLimit"
+ "UserRateLimit",
+ "UserRateLimitSummary",
]
diff --git a/backend/app/domain/rate_limit/rate_limit_models.py b/backend/app/domain/rate_limit/rate_limit_models.py
index 00ceb862..15246d5d 100644
--- a/backend/app/domain/rate_limit/rate_limit_models.py
+++ b/backend/app/domain/rate_limit/rate_limit_models.py
@@ -1,3 +1,4 @@
+import re
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Dict, List, Optional
@@ -32,6 +33,8 @@ class RateLimitRule:
algorithm: RateLimitAlgorithm = RateLimitAlgorithm.SLIDING_WINDOW
priority: int = 0
enabled: bool = True
+ # Internal cache for matching speed; not serialized
+ compiled_pattern: Optional[re.Pattern[str]] = field(default=None, repr=False, compare=False)
@dataclass
@@ -131,3 +134,16 @@ class RateLimitStatus:
retry_after: Optional[int] = None
matched_rule: Optional[str] = None
algorithm: RateLimitAlgorithm = RateLimitAlgorithm.SLIDING_WINDOW
+
+
+@dataclass
+class UserRateLimitSummary:
+ """Summary view for a user's rate limit configuration.
+
+ Always present for callers; reflects defaults when no override exists.
+ """
+ user_id: str
+ has_custom_limits: bool
+ bypass_rate_limit: bool
+ global_multiplier: float
+ rules_count: int
diff --git a/backend/app/domain/replay/__init__.py b/backend/app/domain/replay/__init__.py
new file mode 100644
index 00000000..10acf809
--- /dev/null
+++ b/backend/app/domain/replay/__init__.py
@@ -0,0 +1,16 @@
+from .models import (
+ CleanupResult,
+ ReplayConfig,
+ ReplayFilter,
+ ReplayOperationResult,
+ ReplaySessionState,
+)
+
+__all__ = [
+ "ReplayFilter",
+ "ReplayConfig",
+ "ReplaySessionState",
+ "ReplayOperationResult",
+ "CleanupResult",
+]
+
diff --git a/backend/app/domain/replay/models.py b/backend/app/domain/replay/models.py
index e18013b7..52bbc8cb 100644
--- a/backend/app/domain/replay/models.py
+++ b/backend/app/domain/replay/models.py
@@ -2,16 +2,17 @@
from datetime import datetime, timezone
from typing import Any, Dict, List
+from pydantic import BaseModel, Field, PrivateAttr
+
from app.domain.enums.events import EventType
from app.domain.enums.replay import ReplayStatus, ReplayTarget, ReplayType
-from pydantic import BaseModel, Field, PrivateAttr
class ReplayFilter(BaseModel):
execution_id: str | None = None
event_types: List[EventType] | None = None
- start_time: float | None = None
- end_time: float | None = None
+ start_time: datetime | None = None
+ end_time: datetime | None = None
user_id: str | None = None
service_name: str | None = None
custom_query: Dict[str, Any] | None = None
diff --git a/backend/app/domain/saved_script/__init__.py b/backend/app/domain/saved_script/__init__.py
new file mode 100644
index 00000000..f1ded779
--- /dev/null
+++ b/backend/app/domain/saved_script/__init__.py
@@ -0,0 +1,12 @@
+from .models import (
+ DomainSavedScript,
+ DomainSavedScriptCreate,
+ DomainSavedScriptUpdate,
+)
+
+__all__ = [
+ "DomainSavedScript",
+ "DomainSavedScriptCreate",
+ "DomainSavedScriptUpdate",
+]
+
diff --git a/backend/app/domain/saved_script/models.py b/backend/app/domain/saved_script/models.py
index bf00592c..ba819cbd 100644
--- a/backend/app/domain/saved_script/models.py
+++ b/backend/app/domain/saved_script/models.py
@@ -1,6 +1,6 @@
from __future__ import annotations
-from dataclasses import KW_ONLY, dataclass, field
+from dataclasses import dataclass, field
from datetime import datetime, timezone
@@ -8,21 +8,23 @@
class DomainSavedScriptBase:
name: str
script: str
- _: KW_ONLY
- lang: str = "python"
- lang_version: str = "3.11"
- description: str | None = None
@dataclass
class DomainSavedScriptCreate(DomainSavedScriptBase):
- pass
+ lang: str = "python"
+ lang_version: str = "3.11"
+ description: str | None = None
@dataclass
class DomainSavedScript(DomainSavedScriptBase):
script_id: str
user_id: str
+ # Optional/defaultable fields must come after non-defaults
+ lang: str = "python"
+ lang_version: str = "3.11"
+ description: str | None = None
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
updated_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
diff --git a/backend/app/domain/sse/__init__.py b/backend/app/domain/sse/__init__.py
new file mode 100644
index 00000000..faa2c31c
--- /dev/null
+++ b/backend/app/domain/sse/__init__.py
@@ -0,0 +1,12 @@
+from .models import (
+ SSEEventDomain,
+ SSEExecutionStatusDomain,
+ SSEHealthDomain,
+)
+
+__all__ = [
+ "SSEHealthDomain",
+ "SSEExecutionStatusDomain",
+ "SSEEventDomain",
+]
+
diff --git a/backend/app/domain/user/__init__.py b/backend/app/domain/user/__init__.py
new file mode 100644
index 00000000..e81c436d
--- /dev/null
+++ b/backend/app/domain/user/__init__.py
@@ -0,0 +1,40 @@
+from app.domain.enums.user import UserRole
+
+from .settings_models import (
+ CachedSettings,
+ DomainEditorSettings,
+ DomainNotificationSettings,
+ DomainSettingsEvent,
+ DomainSettingsHistoryEntry,
+ DomainUserSettings,
+ DomainUserSettingsUpdate,
+)
+from .user_models import (
+ PasswordReset,
+ User,
+ UserCreation,
+ UserFields,
+ UserFilterType,
+ UserListResult,
+ UserSearchFilter,
+ UserUpdate,
+)
+
+__all__ = [
+ "User",
+ "UserUpdate",
+ "UserListResult",
+ "UserCreation",
+ "PasswordReset",
+ "UserFields",
+ "UserFilterType",
+ "UserSearchFilter",
+ "UserRole",
+ "DomainNotificationSettings",
+ "DomainEditorSettings",
+ "DomainUserSettings",
+ "DomainUserSettingsUpdate",
+ "DomainSettingsEvent",
+ "DomainSettingsHistoryEntry",
+ "CachedSettings",
+]
diff --git a/backend/app/domain/user/settings_models.py b/backend/app/domain/user/settings_models.py
index 157b9fdc..f0e72f42 100644
--- a/backend/app/domain/user/settings_models.py
+++ b/backend/app/domain/user/settings_models.py
@@ -6,6 +6,7 @@
from app.domain.enums.common import Theme
from app.domain.enums.events import EventType
+from app.domain.enums.notification import NotificationChannel
@dataclass
@@ -14,7 +15,7 @@ class DomainNotificationSettings:
execution_failed: bool = True
system_updates: bool = True
security_alerts: bool = True
- channels: List[Any] = field(default_factory=list)
+ channels: List[NotificationChannel] = field(default_factory=list)
@dataclass
diff --git a/backend/app/domain/admin/user_models.py b/backend/app/domain/user/user_models.py
similarity index 84%
rename from backend/app/domain/admin/user_models.py
rename to backend/app/domain/user/user_models.py
index c8117287..9cc95b79 100644
--- a/backend/app/domain/admin/user_models.py
+++ b/backend/app/domain/user/user_models.py
@@ -1,7 +1,7 @@
import re
from dataclasses import dataclass
from datetime import datetime
-from typing import Any, Dict, List
+from typing import List
from app.core.utils import StringEnum
from app.domain.enums.user import UserRole
@@ -35,20 +35,6 @@ class UserSearchFilter:
search_text: str | None = None
role: UserRole | None = None
- def to_query(self) -> Dict[str, Any]:
- query: Dict[str, Any] = {}
-
- if self.search_text:
- query["$or"] = [
- {UserFields.USERNAME.value: {"$regex": self.search_text, "$options": "i"}},
- {UserFields.EMAIL.value: {"$regex": self.search_text, "$options": "i"}}
- ]
-
- if self.role:
- query[UserFields.ROLE] = self.role
-
- return query
-
@dataclass
class User:
diff --git a/backend/app/events/admin_utils.py b/backend/app/events/admin_utils.py
index c4a129af..4d0ce63f 100644
--- a/backend/app/events/admin_utils.py
+++ b/backend/app/events/admin_utils.py
@@ -1,4 +1,3 @@
-"""Minimal Kafka admin utilities using native AdminClient."""
import asyncio
from typing import Dict, List
diff --git a/backend/app/events/core/__init__.py b/backend/app/events/core/__init__.py
index e69de29b..f0957882 100644
--- a/backend/app/events/core/__init__.py
+++ b/backend/app/events/core/__init__.py
@@ -0,0 +1,32 @@
+from .consumer import UnifiedConsumer
+from .dispatcher import EventDispatcher
+from .dlq_handler import (
+ create_dlq_error_handler,
+ create_immediate_dlq_handler,
+)
+from .producer import UnifiedProducer
+from .types import (
+ ConsumerConfig,
+ ConsumerMetrics,
+ ConsumerState,
+ ProducerConfig,
+ ProducerMetrics,
+ ProducerState,
+)
+
+__all__ = [
+ # Types
+ "ProducerState",
+ "ConsumerState",
+ "ProducerConfig",
+ "ConsumerConfig",
+ "ProducerMetrics",
+ "ConsumerMetrics",
+ # Core components
+ "UnifiedProducer",
+ "UnifiedConsumer",
+ "EventDispatcher",
+ # Helpers
+ "create_dlq_error_handler",
+ "create_immediate_dlq_handler",
+]
diff --git a/backend/app/events/core/consumer.py b/backend/app/events/core/consumer.py
index ccd97ee1..89d89cd5 100644
--- a/backend/app/events/core/consumer.py
+++ b/backend/app/events/core/consumer.py
@@ -5,15 +5,19 @@
from confluent_kafka import OFFSET_BEGINNING, OFFSET_END, Consumer, Message, TopicPartition
from confluent_kafka.error import KafkaError
+from opentelemetry.trace import SpanKind
from app.core.logging import logger
from app.core.metrics.context import get_event_metrics
+from app.core.tracing import EventAttributes
+from app.core.tracing.utils import extract_trace_context, get_tracer
from app.domain.enums.kafka import KafkaTopic
-from app.events.core.dispatcher import EventDispatcher
-from app.events.core.types import ConsumerConfig, ConsumerMetrics, ConsumerState
from app.events.schema.schema_registry import SchemaRegistryManager
from app.infrastructure.kafka.events.base import BaseEvent
+from .dispatcher import EventDispatcher
+from .types import ConsumerConfig, ConsumerMetrics, ConsumerState
+
class UnifiedConsumer:
def __init__(
@@ -80,15 +84,15 @@ async def _consume_loop(self) -> None:
logger.info(f"Consumer loop started for group {self._config.group_id}")
poll_count = 0
message_count = 0
-
+
while self._running and self._consumer:
poll_count += 1
if poll_count % 100 == 0: # Log every 100 polls
logger.debug(f"Consumer loop active: polls={poll_count}, messages={message_count}")
-
+
msg = await asyncio.to_thread(self._consumer.poll, timeout=0.1)
- if msg:
+ if msg is not None:
error = msg.error()
if error:
if error.code() != KafkaError._PARTITION_EOF:
@@ -122,10 +126,34 @@ async def _process_message(self, message: Message) -> None:
event = self._schema_registry.deserialize_event(raw_value, topic)
logger.info(f"Deserialized event: type={event.event_type}, id={event.event_id}")
+ # Extract trace context from Kafka headers and start a consumer span
+ header_list = message.headers() or []
+ headers: dict[str, str] = {}
+ for k, v in header_list:
+ headers[str(k)] = v.decode("utf-8") if isinstance(v, (bytes, bytearray)) else (v or "")
+ ctx = extract_trace_context(headers)
+ tracer = get_tracer()
+
# Dispatch event through EventDispatcher
try:
logger.debug(f"Dispatching {event.event_type} to handlers")
- await self._dispatcher.dispatch(event)
+ partition_val = message.partition()
+ offset_val = message.offset()
+ part_attr = partition_val if partition_val is not None else -1
+ off_attr = offset_val if offset_val is not None else -1
+ with tracer.start_as_current_span(
+ name="kafka.consume",
+ context=ctx,
+ kind=SpanKind.CONSUMER,
+ attributes={
+ EventAttributes.KAFKA_TOPIC: topic,
+ EventAttributes.KAFKA_PARTITION: part_attr,
+ EventAttributes.KAFKA_OFFSET: off_attr,
+ EventAttributes.EVENT_TYPE: event.event_type,
+ EventAttributes.EVENT_ID: event.event_id,
+ },
+ ):
+ await self._dispatcher.dispatch(event)
logger.debug(f"Successfully dispatched {event.event_type}")
# Update metrics on successful dispatch
self._metrics.messages_consumed += 1
diff --git a/backend/app/events/core/dispatcher.py b/backend/app/events/core/dispatcher.py
index bf270edf..5cc0e1e3 100644
--- a/backend/app/events/core/dispatcher.py
+++ b/backend/app/events/core/dispatcher.py
@@ -6,6 +6,7 @@
from app.core.logging import logger
from app.domain.enums.events import EventType
from app.infrastructure.kafka.events.base import BaseEvent
+from app.infrastructure.kafka.mappings import get_event_class_for_type
T = TypeVar('T', bound=BaseEvent)
@@ -152,7 +153,6 @@ def get_topics_for_registered_handlers(self) -> set[str]:
topics = set()
for event_type in self._handlers.keys():
# Find event class for this type
- from app.infrastructure.kafka.mappings import get_event_class_for_type
event_class = get_event_class_for_type(event_type)
if event_class and hasattr(event_class, 'topic'):
topics.add(str(event_class.topic))
diff --git a/backend/app/events/core/dlq_handler.py b/backend/app/events/core/dlq_handler.py
index 337571b1..50ab0001 100644
--- a/backend/app/events/core/dlq_handler.py
+++ b/backend/app/events/core/dlq_handler.py
@@ -1,9 +1,10 @@
from typing import Awaitable, Callable
from app.core.logging import logger
-from app.events.core.producer import UnifiedProducer
from app.infrastructure.kafka.events.base import BaseEvent
+from .producer import UnifiedProducer
+
def create_dlq_error_handler(
producer: UnifiedProducer,
@@ -93,10 +94,8 @@ async def handle_error_immediate_dlq(error: Exception, event: BaseEvent) -> None
error: The exception that occurred
event: The event that failed processing
"""
- event_id = event.event_id or "unknown"
-
logger.error(
- f"Critical error processing event {event_id} ({event.event_type}): {error}. "
+ f"Critical error processing event {event.event_id} ({event.event_type}): {error}. "
f"Sending immediately to DLQ.",
exc_info=True
)
diff --git a/backend/app/events/core/producer.py b/backend/app/events/core/producer.py
index a8cd5705..b5b88f60 100644
--- a/backend/app/events/core/producer.py
+++ b/backend/app/events/core/producer.py
@@ -9,11 +9,12 @@
from app.core.logging import logger
from app.core.metrics.context import get_event_metrics
-from app.dlq.models import DLQMessage
from app.domain.enums.kafka import KafkaTopic
-from app.events.core.types import ProducerConfig, ProducerMetrics, ProducerState
from app.events.schema.schema_registry import SchemaRegistryManager
from app.infrastructure.kafka.events import BaseEvent
+from app.infrastructure.mappers.dlq_mapper import DLQMapper
+
+from .types import ProducerConfig, ProducerMetrics, ProducerState
DeliveryCallback: TypeAlias = Callable[[KafkaError | None, Message], None]
StatsCallback: TypeAlias = Callable[[dict[str, Any]], None]
@@ -230,7 +231,7 @@ async def send_to_dlq(
producer_id = f"{socket.gethostname()}-{task_name}"
# Create DLQ message
- dlq_message = DLQMessage.from_failed_event(
+ dlq_message = DLQMapper.from_failed_event(
event=original_event,
original_topic=original_topic,
error=str(error),
diff --git a/backend/app/events/event_store.py b/backend/app/events/event_store.py
index 5b838e66..a0040d59 100644
--- a/backend/app/events/event_store.py
+++ b/backend/app/events/event_store.py
@@ -9,6 +9,8 @@
from app.core.logging import logger
from app.core.metrics.context import get_event_metrics
+from app.core.tracing import EventAttributes
+from app.core.tracing.utils import add_span_attributes
from app.domain.enums.events import EventType
from app.events.schema.schema_registry import SchemaRegistryManager
from app.infrastructure.kafka.events.base import BaseEvent
@@ -79,6 +81,14 @@ async def store_event(self, event: BaseEvent) -> bool:
doc["stored_at"] = datetime.now(timezone.utc)
await self.collection.insert_one(doc)
+ add_span_attributes(
+ **{
+ str(EventAttributes.EVENT_TYPE): str(event.event_type),
+ str(EventAttributes.EVENT_ID): event.event_id,
+ str(EventAttributes.EXECUTION_ID): event.aggregate_id or "",
+ }
+ )
+
duration = asyncio.get_event_loop().time() - start
self.metrics.record_event_store_duration(duration, "store_single", self.collection_name)
self.metrics.record_event_stored(event.event_type, self.collection_name)
@@ -122,6 +132,7 @@ async def store_batch(self, events: List[BaseEvent]) -> Dict[str, int]:
duration = asyncio.get_event_loop().time() - start
self.metrics.record_event_store_duration(duration, "store_batch", self.collection_name)
+ add_span_attributes(**{"events.batch.count": len(events)})
if results["stored"] > 0:
for event in events:
self.metrics.record_event_stored(event.event_type, self.collection_name)
diff --git a/backend/app/events/event_store_consumer.py b/backend/app/events/event_store_consumer.py
index 8b3a570a..70a2ee3f 100644
--- a/backend/app/events/event_store_consumer.py
+++ b/backend/app/events/event_store_consumer.py
@@ -1,13 +1,12 @@
import asyncio
+from opentelemetry.trace import SpanKind
+
from app.core.logging import logger
-from app.db.schema.schema_manager import SchemaManager
+from app.core.tracing.utils import trace_span
from app.domain.enums.events import EventType
from app.domain.enums.kafka import GroupId, KafkaTopic
-from app.events.core.consumer import ConsumerConfig, UnifiedConsumer
-from app.events.core.dispatcher import EventDispatcher
-from app.events.core.dlq_handler import create_dlq_error_handler
-from app.events.core.producer import UnifiedProducer
+from app.events.core import ConsumerConfig, EventDispatcher, UnifiedConsumer, UnifiedProducer, create_dlq_error_handler
from app.events.event_store import EventStore
from app.events.schema.schema_registry import SchemaRegistryManager
from app.infrastructure.kafka.events.base import BaseEvent
@@ -47,8 +46,6 @@ async def start(self) -> None:
if self._running:
return
- await SchemaManager(self.event_store.db).apply_all()
-
settings = get_settings()
config = ConsumerConfig(
bootstrap_servers=settings.KAFKA_BOOTSTRAP_SERVERS,
@@ -149,8 +146,12 @@ async def _flush_batch(self) -> None:
self._last_batch_time = asyncio.get_event_loop().time()
logger.info(f"Event store flushing batch of {len(batch)} events")
-
- results = await self.event_store.store_batch(batch)
+ with trace_span(
+ name="event_store.flush_batch",
+ kind=SpanKind.CONSUMER,
+ attributes={"events.batch.count": len(batch)},
+ ):
+ results = await self.event_store.store_batch(batch)
logger.info(
f"Stored event batch: total={results['total']}, "
diff --git a/backend/app/events/schema/schema_registry.py b/backend/app/events/schema/schema_registry.py
index f5dd837f..4d6a8ce2 100644
--- a/backend/app/events/schema/schema_registry.py
+++ b/backend/app/events/schema/schema_registry.py
@@ -4,7 +4,7 @@
from typing import Any, Dict, Type, TypeVar
import httpx
-from confluent_kafka.schema_registry import Schema, SchemaRegistryClient
+from confluent_kafka.schema_registry import Schema, SchemaRegistryClient, record_subject_name_strategy
from confluent_kafka.schema_registry.avro import AvroDeserializer, AvroSerializer
from confluent_kafka.serialization import MessageField, SerializationContext
@@ -118,7 +118,12 @@ def serialize_event(self, event: BaseEvent) -> bytes:
subject = f"{event.__class__.__name__}-value"
if subject not in self._serializers:
schema_str = json.dumps(event.__class__.avro_schema(namespace=self.namespace))
- self._serializers[subject] = AvroSerializer(self.client, schema_str)
+ # Use record_subject_name_strategy to ensure subject is based on record name, not topic
+ self._serializers[subject] = AvroSerializer(
+ self.client,
+ schema_str,
+ conf={'subject.name.strategy': record_subject_name_strategy}
+ )
# Prepare payload dict (exclude event_type: schema id implies the concrete record)
# Don't use mode="json" as it converts datetime to string, breaking Avro timestamp-micros
diff --git a/backend/app/infrastructure/kafka/events/__init__.py b/backend/app/infrastructure/kafka/events/__init__.py
index 96be2983..6954a4a6 100644
--- a/backend/app/infrastructure/kafka/events/__init__.py
+++ b/backend/app/infrastructure/kafka/events/__init__.py
@@ -97,7 +97,6 @@
"UserDeletedEvent",
"UserSettingsUpdatedEvent",
"UserThemeChangedEvent",
- "UserLanguageChangedEvent",
"UserNotificationSettingsUpdatedEvent",
"UserEditorSettingsUpdatedEvent",
# Notification
diff --git a/backend/app/infrastructure/kafka/events/execution.py b/backend/app/infrastructure/kafka/events/execution.py
index 411f7474..7c891697 100644
--- a/backend/app/infrastructure/kafka/events/execution.py
+++ b/backend/app/infrastructure/kafka/events/execution.py
@@ -6,7 +6,7 @@
from app.domain.enums.events import EventType
from app.domain.enums.kafka import KafkaTopic
from app.domain.enums.storage import ExecutionErrorType
-from app.domain.execution.models import ResourceUsageDomain
+from app.domain.execution import ResourceUsageDomain
from app.infrastructure.kafka.events.base import BaseEvent
diff --git a/backend/app/infrastructure/kafka/events/notification.py b/backend/app/infrastructure/kafka/events/notification.py
index b8be4966..1659a0ed 100644
--- a/backend/app/infrastructure/kafka/events/notification.py
+++ b/backend/app/infrastructure/kafka/events/notification.py
@@ -1,10 +1,8 @@
-"""Notification-related Kafka events."""
-
from typing import ClassVar, Literal
from app.domain.enums.events import EventType
from app.domain.enums.kafka import KafkaTopic
-from app.domain.enums.notification import NotificationChannel, NotificationPriority
+from app.domain.enums.notification import NotificationChannel, NotificationSeverity
from app.infrastructure.kafka.events.base import BaseEvent
@@ -13,9 +11,10 @@ class NotificationCreatedEvent(BaseEvent):
topic: ClassVar[KafkaTopic] = KafkaTopic.NOTIFICATION_EVENTS
notification_id: str
user_id: str
- title: str
- message: str
- priority: NotificationPriority
+ subject: str
+ body: str
+ severity: NotificationSeverity
+ tags: list[str]
channels: list[NotificationChannel]
diff --git a/backend/app/infrastructure/kafka/events/pod.py b/backend/app/infrastructure/kafka/events/pod.py
index 625c1562..b8a6138a 100644
--- a/backend/app/infrastructure/kafka/events/pod.py
+++ b/backend/app/infrastructure/kafka/events/pod.py
@@ -1,5 +1,3 @@
-"""Pod lifecycle Kafka events."""
-
from typing import ClassVar, Literal
from app.domain.enums.events import EventType
diff --git a/backend/app/infrastructure/kafka/events/saga.py b/backend/app/infrastructure/kafka/events/saga.py
index fc2c4a3c..fb3b2133 100644
--- a/backend/app/infrastructure/kafka/events/saga.py
+++ b/backend/app/infrastructure/kafka/events/saga.py
@@ -1,5 +1,3 @@
-"""Saga-related Kafka events."""
-
from datetime import datetime
from typing import ClassVar, Literal
diff --git a/backend/app/infrastructure/kafka/events/user.py b/backend/app/infrastructure/kafka/events/user.py
index d6adc32c..6378bb1f 100644
--- a/backend/app/infrastructure/kafka/events/user.py
+++ b/backend/app/infrastructure/kafka/events/user.py
@@ -1,5 +1,3 @@
-"""User-related Kafka events."""
-
from typing import ClassVar, Literal
from app.domain.enums.auth import LoginMethod, SettingsType
@@ -58,7 +56,7 @@ class UserSettingsUpdatedEvent(BaseEvent):
class UserThemeChangedEvent(BaseEvent):
event_type: Literal[EventType.USER_THEME_CHANGED] = EventType.USER_THEME_CHANGED
- topic: ClassVar[KafkaTopic] = KafkaTopic.USER_SETTINGS_EVENTS
+ topic: ClassVar[KafkaTopic] = KafkaTopic.USER_SETTINGS_THEME_EVENTS
user_id: str
old_theme: str
new_theme: str
@@ -66,13 +64,14 @@ class UserThemeChangedEvent(BaseEvent):
class UserNotificationSettingsUpdatedEvent(BaseEvent):
event_type: Literal[EventType.USER_NOTIFICATION_SETTINGS_UPDATED] = EventType.USER_NOTIFICATION_SETTINGS_UPDATED
- topic: ClassVar[KafkaTopic] = KafkaTopic.USER_SETTINGS_EVENTS
+ topic: ClassVar[KafkaTopic] = KafkaTopic.USER_SETTINGS_NOTIFICATION_EVENTS
user_id: str
settings: dict[str, bool]
+ channels: list[str] | None = None
class UserEditorSettingsUpdatedEvent(BaseEvent):
event_type: Literal[EventType.USER_EDITOR_SETTINGS_UPDATED] = EventType.USER_EDITOR_SETTINGS_UPDATED
- topic: ClassVar[KafkaTopic] = KafkaTopic.USER_SETTINGS_EVENTS
+ topic: ClassVar[KafkaTopic] = KafkaTopic.USER_SETTINGS_EDITOR_EVENTS
user_id: str
settings: dict[str, str | int | bool]
diff --git a/backend/app/infrastructure/kafka/topics.py b/backend/app/infrastructure/kafka/topics.py
index c64c878b..0fae304a 100644
--- a/backend/app/infrastructure/kafka/topics.py
+++ b/backend/app/infrastructure/kafka/topics.py
@@ -1,5 +1,3 @@
-"""Kafka topic configuration and utilities."""
-
from typing import Any
from app.domain.enums.kafka import KafkaTopic
@@ -132,6 +130,30 @@ def get_topic_configs() -> dict[KafkaTopic, dict[str, Any]]:
"compression.type": "gzip",
}
},
+ KafkaTopic.USER_SETTINGS_THEME_EVENTS: {
+ "num_partitions": 3,
+ "replication_factor": 1,
+ "config": {
+ "retention.ms": "2592000000", # 30 days
+ "compression.type": "gzip",
+ }
+ },
+ KafkaTopic.USER_SETTINGS_NOTIFICATION_EVENTS: {
+ "num_partitions": 3,
+ "replication_factor": 1,
+ "config": {
+ "retention.ms": "2592000000", # 30 days
+ "compression.type": "gzip",
+ }
+ },
+ KafkaTopic.USER_SETTINGS_EDITOR_EVENTS: {
+ "num_partitions": 3,
+ "replication_factor": 1,
+ "config": {
+ "retention.ms": "2592000000", # 30 days
+ "compression.type": "gzip",
+ }
+ },
# Script topics
KafkaTopic.SCRIPT_EVENTS: {
diff --git a/backend/app/infrastructure/mappers/__init__.py b/backend/app/infrastructure/mappers/__init__.py
index e69de29b..ce001bc0 100644
--- a/backend/app/infrastructure/mappers/__init__.py
+++ b/backend/app/infrastructure/mappers/__init__.py
@@ -0,0 +1,101 @@
+from .admin_mapper import (
+ AuditLogMapper,
+ SettingsMapper,
+ UserListResultMapper,
+ UserMapper,
+)
+from .admin_overview_api_mapper import AdminOverviewApiMapper
+from .event_mapper import (
+ ArchivedEventMapper,
+ EventBrowseResultMapper,
+ EventDetailMapper,
+ EventExportRowMapper,
+ EventFilterMapper,
+ EventListResultMapper,
+ EventMapper,
+ EventProjectionMapper,
+ EventReplayInfoMapper,
+ EventStatisticsMapper,
+ EventSummaryMapper,
+)
+from .execution_api_mapper import ExecutionApiMapper
+from .notification_api_mapper import NotificationApiMapper
+from .notification_mapper import NotificationMapper
+from .rate_limit_mapper import (
+ RateLimitConfigMapper,
+ RateLimitRuleMapper,
+ RateLimitStatusMapper,
+ UserRateLimitMapper,
+)
+from .replay_api_mapper import ReplayApiMapper
+from .replay_mapper import ReplayApiMapper as AdminReplayApiMapper
+from .replay_mapper import (
+ ReplayQueryMapper,
+ ReplaySessionDataMapper,
+ ReplaySessionMapper,
+ ReplayStateMapper,
+)
+from .saga_mapper import (
+ SagaEventMapper,
+ SagaFilterMapper,
+ SagaInstanceMapper,
+ SagaMapper,
+ SagaResponseMapper,
+)
+from .saved_script_api_mapper import SavedScriptApiMapper
+from .saved_script_mapper import SavedScriptMapper
+from .sse_mapper import SSEMapper
+from .user_settings_api_mapper import UserSettingsApiMapper
+from .user_settings_mapper import UserSettingsMapper
+
+__all__ = [
+ # Admin
+ "UserMapper",
+ "UserListResultMapper",
+ "SettingsMapper",
+ "AuditLogMapper",
+ "AdminOverviewApiMapper",
+ # Events
+ "EventMapper",
+ "EventSummaryMapper",
+ "EventDetailMapper",
+ "EventListResultMapper",
+ "EventBrowseResultMapper",
+ "EventStatisticsMapper",
+ "EventProjectionMapper",
+ "ArchivedEventMapper",
+ "EventExportRowMapper",
+ "EventFilterMapper",
+ "EventReplayInfoMapper",
+ # Execution
+ "ExecutionApiMapper",
+ # Notification
+ "NotificationApiMapper",
+ "NotificationMapper",
+ # Rate limit
+ "RateLimitRuleMapper",
+ "UserRateLimitMapper",
+ "RateLimitConfigMapper",
+ "RateLimitStatusMapper",
+ # Replay
+ "ReplayApiMapper",
+ "AdminReplayApiMapper",
+ "ReplaySessionMapper",
+ "ReplayQueryMapper",
+ "ReplaySessionDataMapper",
+ "ReplayStateMapper",
+ # Saved scripts
+ "SavedScriptApiMapper",
+ "SavedScriptMapper",
+ # SSE
+ "SSEMapper",
+ # User settings
+ "UserSettingsApiMapper",
+ "UserSettingsMapper",
+ # Saga
+ "SagaMapper",
+ "SagaFilterMapper",
+ "SagaResponseMapper",
+ "SagaEventMapper",
+ "SagaInstanceMapper",
+]
diff --git a/backend/app/infrastructure/mappers/admin_mapper.py b/backend/app/infrastructure/mappers/admin_mapper.py
index 12cbfc28..10c9c008 100644
--- a/backend/app/infrastructure/mappers/admin_mapper.py
+++ b/backend/app/infrastructure/mappers/admin_mapper.py
@@ -2,7 +2,7 @@
from datetime import datetime, timezone
from typing import Any, Dict
-from app.domain.admin.settings_models import (
+from app.domain.admin import (
AuditAction,
AuditLogEntry,
AuditLogFields,
@@ -13,14 +13,15 @@
SettingsFields,
SystemSettings,
)
-from app.domain.admin.user_models import (
+from app.domain.user import (
User as DomainAdminUser,
)
-from app.domain.admin.user_models import (
+from app.domain.user import (
UserCreation,
UserFields,
UserListResult,
UserRole,
+ UserSearchFilter,
UserUpdate,
)
from app.schemas_pydantic.user import User as ServiceUser
@@ -42,18 +43,18 @@ def to_mongo_document(user: DomainAdminUser) -> Dict[str, Any]:
UserFields.CREATED_AT: user.created_at,
UserFields.UPDATED_AT: user.updated_at
}
-
+
@staticmethod
def from_mongo_document(data: Dict[str, Any]) -> DomainAdminUser:
required_fields = [UserFields.USER_ID, UserFields.USERNAME, UserFields.EMAIL]
for field in required_fields:
if field not in data or not data[field]:
raise ValueError(f"Missing required field: {field}")
-
+
email = data[UserFields.EMAIL]
if not EMAIL_PATTERN.match(email):
raise ValueError(f"Invalid email format: {email}")
-
+
return DomainAdminUser(
user_id=data[UserFields.USER_ID],
username=data[UserFields.USERNAME],
@@ -65,12 +66,12 @@ def from_mongo_document(data: Dict[str, Any]) -> DomainAdminUser:
created_at=data.get(UserFields.CREATED_AT, datetime.now(timezone.utc)),
updated_at=data.get(UserFields.UPDATED_AT, datetime.now(timezone.utc))
)
-
+
@staticmethod
def to_response_dict(user: DomainAdminUser) -> Dict[str, Any]:
created_at_ts = user.created_at.timestamp() if user.created_at else 0.0
updated_at_ts = user.updated_at.timestamp() if user.updated_at else 0.0
-
+
return {
"user_id": user.user_id,
"username": user.username,
@@ -96,11 +97,11 @@ def from_pydantic_service_user(user: ServiceUser) -> DomainAdminUser:
created_at=user.created_at or datetime.now(timezone.utc),
updated_at=user.updated_at or datetime.now(timezone.utc),
)
-
+
@staticmethod
def to_update_dict(update: UserUpdate) -> Dict[str, Any]:
update_dict: Dict[str, Any] = {}
-
+
if update.username is not None:
update_dict[UserFields.USERNAME] = update.username
if update.email is not None:
@@ -111,9 +112,21 @@ def to_update_dict(update: UserUpdate) -> Dict[str, Any]:
update_dict[UserFields.ROLE] = update.role.value
if update.is_active is not None:
update_dict[UserFields.IS_ACTIVE] = update.is_active
-
+
return update_dict
-
+
+ @staticmethod
+ def search_filter_to_query(f: UserSearchFilter) -> Dict[str, Any]:
+ query: Dict[str, Any] = {}
+ if f.search_text:
+ query["$or"] = [
+ {UserFields.USERNAME.value: {"$regex": f.search_text, "$options": "i"}},
+ {UserFields.EMAIL.value: {"$regex": f.search_text, "$options": "i"}},
+ ]
+ if f.role:
+ query[UserFields.ROLE] = f.role
+ return query
+
@staticmethod
def user_creation_to_dict(creation: UserCreation) -> Dict[str, Any]:
return {
@@ -148,7 +161,7 @@ def execution_limits_to_dict(limits: ExecutionLimits) -> dict[str, int]:
"max_cpu_cores": limits.max_cpu_cores,
"max_concurrent_executions": limits.max_concurrent_executions
}
-
+
@staticmethod
def execution_limits_from_dict(data: dict[str, Any] | None) -> ExecutionLimits:
if not data:
@@ -159,7 +172,7 @@ def execution_limits_from_dict(data: dict[str, Any] | None) -> ExecutionLimits:
max_cpu_cores=data.get("max_cpu_cores", 2),
max_concurrent_executions=data.get("max_concurrent_executions", 10)
)
-
+
@staticmethod
def security_settings_to_dict(settings: SecuritySettings) -> dict[str, int]:
return {
@@ -168,7 +181,7 @@ def security_settings_to_dict(settings: SecuritySettings) -> dict[str, int]:
"max_login_attempts": settings.max_login_attempts,
"lockout_duration_minutes": settings.lockout_duration_minutes
}
-
+
@staticmethod
def security_settings_from_dict(data: dict[str, Any] | None) -> SecuritySettings:
if not data:
@@ -179,7 +192,7 @@ def security_settings_from_dict(data: dict[str, Any] | None) -> SecuritySettings
max_login_attempts=data.get("max_login_attempts", 5),
lockout_duration_minutes=data.get("lockout_duration_minutes", 15)
)
-
+
@staticmethod
def monitoring_settings_to_dict(settings: MonitoringSettings) -> dict[str, Any]:
return {
@@ -188,7 +201,7 @@ def monitoring_settings_to_dict(settings: MonitoringSettings) -> dict[str, Any]:
"enable_tracing": settings.enable_tracing,
"sampling_rate": settings.sampling_rate
}
-
+
@staticmethod
def monitoring_settings_from_dict(data: dict[str, Any] | None) -> MonitoringSettings:
if not data:
@@ -199,7 +212,7 @@ def monitoring_settings_from_dict(data: dict[str, Any] | None) -> MonitoringSett
enable_tracing=data.get("enable_tracing", True),
sampling_rate=data.get("sampling_rate", 0.1)
)
-
+
@staticmethod
def system_settings_to_dict(settings: SystemSettings) -> dict[str, Any]:
mapper = SettingsMapper()
@@ -210,7 +223,7 @@ def system_settings_to_dict(settings: SystemSettings) -> dict[str, Any]:
SettingsFields.CREATED_AT: settings.created_at,
SettingsFields.UPDATED_AT: settings.updated_at
}
-
+
@staticmethod
def system_settings_from_dict(data: dict[str, Any] | None) -> SystemSettings:
if not data:
@@ -223,7 +236,7 @@ def system_settings_from_dict(data: dict[str, Any] | None) -> SystemSettings:
created_at=data.get(SettingsFields.CREATED_AT, datetime.now(timezone.utc)),
updated_at=data.get(SettingsFields.UPDATED_AT, datetime.now(timezone.utc))
)
-
+
@staticmethod
def system_settings_to_pydantic_dict(settings: SystemSettings) -> dict[str, Any]:
mapper = SettingsMapper()
@@ -232,7 +245,7 @@ def system_settings_to_pydantic_dict(settings: SystemSettings) -> dict[str, Any]
"security_settings": mapper.security_settings_to_dict(settings.security_settings),
"monitoring_settings": mapper.monitoring_settings_to_dict(settings.monitoring_settings)
}
-
+
@staticmethod
def system_settings_from_pydantic(data: dict[str, Any]) -> SystemSettings:
mapper = SettingsMapper()
@@ -254,7 +267,7 @@ def to_dict(entry: AuditLogEntry) -> dict[str, Any]:
AuditLogFields.CHANGES: entry.changes,
"reason": entry.reason # reason is not in the enum but used as additional field
}
-
+
@staticmethod
def from_dict(data: dict[str, Any]) -> AuditLogEntry:
return AuditLogEntry(
diff --git a/backend/app/infrastructure/mappers/admin_overview_api_mapper.py b/backend/app/infrastructure/mappers/admin_overview_api_mapper.py
index 230950a8..a624ad84 100644
--- a/backend/app/infrastructure/mappers/admin_overview_api_mapper.py
+++ b/backend/app/infrastructure/mappers/admin_overview_api_mapper.py
@@ -2,11 +2,7 @@
from typing import Any, Dict, List
-from app.domain.admin.overview_models import (
- AdminUserOverviewDomain,
-)
-from app.infrastructure.mappers.admin_mapper import UserMapper
-from app.infrastructure.mappers.event_mapper import EventMapper, EventStatisticsMapper
+from app.domain.admin import AdminUserOverviewDomain
from app.schemas_pydantic.admin_user_overview import (
AdminUserOverview,
DerivedCounts,
@@ -15,6 +11,9 @@
from app.schemas_pydantic.events import EventStatistics as EventStatisticsSchema
from app.schemas_pydantic.user import UserResponse
+from .admin_mapper import UserMapper
+from .event_mapper import EventMapper, EventStatisticsMapper
+
class AdminOverviewApiMapper:
def __init__(self) -> None:
@@ -46,4 +45,3 @@ def to_response(self, d: AdminUserOverviewDomain) -> AdminUserOverview:
rate_limit_summary=rl,
recent_events=recent_events,
)
-
diff --git a/backend/app/infrastructure/mappers/dlq_mapper.py b/backend/app/infrastructure/mappers/dlq_mapper.py
new file mode 100644
index 00000000..3f9d3b22
--- /dev/null
+++ b/backend/app/infrastructure/mappers/dlq_mapper.py
@@ -0,0 +1,239 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from typing import Mapping
+
+from confluent_kafka import Message
+
+from app.dlq.models import (
+ DLQBatchRetryResult,
+ DLQFields,
+ DLQMessage,
+ DLQMessageFilter,
+ DLQMessageStatus,
+ DLQMessageUpdate,
+ DLQRetryResult,
+)
+from app.events.schema.schema_registry import SchemaRegistryManager
+from app.infrastructure.kafka.events import BaseEvent
+
+
+class DLQMapper:
+ """Mongo/Kafka โ DLQMessage conversions."""
+
+ @staticmethod
+ def to_mongo_document(message: DLQMessage) -> dict[str, object]:
+ doc: dict[str, object] = {
+ DLQFields.EVENT: message.event.to_dict(),
+ DLQFields.ORIGINAL_TOPIC: message.original_topic,
+ DLQFields.ERROR: message.error,
+ DLQFields.RETRY_COUNT: message.retry_count,
+ DLQFields.FAILED_AT: message.failed_at,
+ DLQFields.STATUS: message.status,
+ DLQFields.PRODUCER_ID: message.producer_id,
+ }
+ if message.event_id:
+ doc[DLQFields.EVENT_ID] = message.event_id
+ if message.created_at:
+ doc[DLQFields.CREATED_AT] = message.created_at
+ if message.last_updated:
+ doc[DLQFields.LAST_UPDATED] = message.last_updated
+ if message.next_retry_at:
+ doc[DLQFields.NEXT_RETRY_AT] = message.next_retry_at
+ if message.retried_at:
+ doc[DLQFields.RETRIED_AT] = message.retried_at
+ if message.discarded_at:
+ doc[DLQFields.DISCARDED_AT] = message.discarded_at
+ if message.discard_reason:
+ doc[DLQFields.DISCARD_REASON] = message.discard_reason
+ if message.dlq_offset is not None:
+ doc[DLQFields.DLQ_OFFSET] = message.dlq_offset
+ if message.dlq_partition is not None:
+ doc[DLQFields.DLQ_PARTITION] = message.dlq_partition
+ if message.last_error:
+ doc[DLQFields.LAST_ERROR] = message.last_error
+ return doc
+
+ @staticmethod
+ def from_mongo_document(data: Mapping[str, object]) -> DLQMessage:
+ schema_registry = SchemaRegistryManager()
+
+ def parse_dt(value: object) -> datetime | None:
+ if value is None:
+ return None
+ if isinstance(value, datetime):
+ return value if value.tzinfo else value.replace(tzinfo=timezone.utc)
+ if isinstance(value, str):
+ return datetime.fromisoformat(value).replace(tzinfo=timezone.utc)
+ raise ValueError("Invalid datetime type")
+
+ failed_at_raw = data.get(DLQFields.FAILED_AT)
+ if failed_at_raw is None:
+ raise ValueError("Missing failed_at")
+ failed_at = parse_dt(failed_at_raw)
+ if failed_at is None:
+ raise ValueError("Invalid failed_at value")
+
+ event_data = data.get(DLQFields.EVENT)
+ if not isinstance(event_data, dict):
+ raise ValueError("Missing or invalid event data")
+ event = schema_registry.deserialize_json(event_data)
+
+ status_raw = data.get(DLQFields.STATUS, DLQMessageStatus.PENDING)
+ status = DLQMessageStatus(str(status_raw))
+
+ retry_count_value: int = data.get(DLQFields.RETRY_COUNT, 0) # type: ignore[assignment]
+ dlq_offset_value: int | None = data.get(DLQFields.DLQ_OFFSET) # type: ignore[assignment]
+ dlq_partition_value: int | None = data.get(DLQFields.DLQ_PARTITION) # type: ignore[assignment]
+
+ return DLQMessage(
+ event=event,
+ original_topic=str(data.get(DLQFields.ORIGINAL_TOPIC, "")),
+ error=str(data.get(DLQFields.ERROR, "")),
+ retry_count=retry_count_value,
+ failed_at=failed_at,
+ status=status,
+ producer_id=str(data.get(DLQFields.PRODUCER_ID, "unknown")),
+ event_id=str(data.get(DLQFields.EVENT_ID, "") or event.event_id),
+ created_at=parse_dt(data.get(DLQFields.CREATED_AT)),
+ last_updated=parse_dt(data.get(DLQFields.LAST_UPDATED)),
+ next_retry_at=parse_dt(data.get(DLQFields.NEXT_RETRY_AT)),
+ retried_at=parse_dt(data.get(DLQFields.RETRIED_AT)),
+ discarded_at=parse_dt(data.get(DLQFields.DISCARDED_AT)),
+ discard_reason=str(data.get(DLQFields.DISCARD_REASON, "")) or None,
+ dlq_offset=dlq_offset_value,
+ dlq_partition=dlq_partition_value,
+ last_error=str(data.get(DLQFields.LAST_ERROR, "")) or None,
+ )
+
+ @staticmethod
+ def from_kafka_message(message: Message, schema_registry: SchemaRegistryManager) -> DLQMessage:
+ record_value = message.value()
+ if record_value is None:
+ raise ValueError("Message has no value")
+
+ data = json.loads(record_value.decode("utf-8"))
+ event_data = data.get("event", {})
+ event = schema_registry.deserialize_json(event_data)
+
+ headers: dict[str, str] = {}
+ msg_headers = message.headers()
+ if msg_headers:
+ for key, value in msg_headers:
+ headers[key] = value.decode("utf-8") if value else ""
+
+ failed_at_str = data.get("failed_at")
+ failed_at = (
+ datetime.fromisoformat(failed_at_str).replace(tzinfo=timezone.utc)
+ if failed_at_str
+ else datetime.now(timezone.utc)
+ )
+
+ offset: int = message.offset() # type: ignore[assignment]
+ partition: int = message.partition() # type: ignore[assignment]
+
+ return DLQMessage(
+ event=event,
+ original_topic=data.get("original_topic", "unknown"),
+ error=data.get("error", "Unknown error"),
+ retry_count=data.get("retry_count", 0),
+ failed_at=failed_at,
+ status=DLQMessageStatus.PENDING,
+ producer_id=data.get("producer_id", "unknown"),
+ event_id=event.event_id,
+ headers=headers,
+ dlq_offset=offset if offset >= 0 else None,
+ dlq_partition=partition if partition >= 0 else None,
+ )
+
+ @staticmethod
+ def to_response_dict(message: DLQMessage) -> dict[str, object]:
+ return {
+ "event_id": message.event_id,
+ "event_type": message.event_type,
+ "event": message.event.to_dict(),
+ "original_topic": message.original_topic,
+ "error": message.error,
+ "retry_count": message.retry_count,
+ "failed_at": message.failed_at,
+ "status": message.status,
+ "age_seconds": message.age_seconds,
+ "producer_id": message.producer_id,
+ "dlq_offset": message.dlq_offset,
+ "dlq_partition": message.dlq_partition,
+ "last_error": message.last_error,
+ "next_retry_at": message.next_retry_at,
+ "retried_at": message.retried_at,
+ "discarded_at": message.discarded_at,
+ "discard_reason": message.discard_reason,
+ }
+
+ @staticmethod
+ def retry_result_to_dict(result: DLQRetryResult) -> dict[str, object]:
+ d: dict[str, object] = {"event_id": result.event_id, "status": result.status}
+ if result.error:
+ d["error"] = result.error
+ return d
+
+ @staticmethod
+ def batch_retry_result_to_dict(result: DLQBatchRetryResult) -> dict[str, object]:
+ return {
+ "total": result.total,
+ "successful": result.successful,
+ "failed": result.failed,
+ "details": [DLQMapper.retry_result_to_dict(d) for d in result.details],
+ }
+
+ # Domain construction and updates
+ @staticmethod
+ def from_failed_event(
+ event: BaseEvent,
+ original_topic: str,
+ error: str,
+ producer_id: str,
+ retry_count: int = 0,
+ ) -> DLQMessage:
+ return DLQMessage(
+ event=event,
+ original_topic=original_topic,
+ error=error,
+ retry_count=retry_count,
+ failed_at=datetime.now(timezone.utc),
+ status=DLQMessageStatus.PENDING,
+ producer_id=producer_id,
+ )
+
+ @staticmethod
+ def update_to_mongo(update: DLQMessageUpdate) -> dict[str, object]:
+ now = datetime.now(timezone.utc)
+ doc: dict[str, object] = {
+ str(DLQFields.STATUS): update.status,
+ str(DLQFields.LAST_UPDATED): now,
+ }
+ if update.next_retry_at is not None:
+ doc[str(DLQFields.NEXT_RETRY_AT)] = update.next_retry_at
+ if update.retried_at is not None:
+ doc[str(DLQFields.RETRIED_AT)] = update.retried_at
+ if update.discarded_at is not None:
+ doc[str(DLQFields.DISCARDED_AT)] = update.discarded_at
+ if update.retry_count is not None:
+ doc[str(DLQFields.RETRY_COUNT)] = update.retry_count
+ if update.discard_reason is not None:
+ doc[str(DLQFields.DISCARD_REASON)] = update.discard_reason
+ if update.last_error is not None:
+ doc[str(DLQFields.LAST_ERROR)] = update.last_error
+ if update.extra:
+ doc.update(update.extra)
+ return doc
+
+ @staticmethod
+ def filter_to_query(f: DLQMessageFilter) -> dict[str, object]:
+ query: dict[str, object] = {}
+ if f.status:
+ query[DLQFields.STATUS] = f.status
+ if f.topic:
+ query[DLQFields.ORIGINAL_TOPIC] = f.topic
+ if f.event_type:
+ query[DLQFields.EVENT_TYPE] = f.event_type
+ return query
diff --git a/backend/app/infrastructure/mappers/event_mapper.py b/backend/app/infrastructure/mappers/event_mapper.py
index dbbcc5e6..d1b7b9d4 100644
--- a/backend/app/infrastructure/mappers/event_mapper.py
+++ b/backend/app/infrastructure/mappers/event_mapper.py
@@ -8,6 +8,7 @@
EventDetail,
EventExportRow,
EventFields,
+ EventFilter,
EventListResult,
EventProjection,
EventReplayInfo,
@@ -16,6 +17,7 @@
HourlyEventCount,
)
from app.infrastructure.kafka.events.metadata import EventMetadata
+from app.schemas_pydantic.admin_events import EventFilter as AdminEventFilter
class EventMapper:
@@ -292,6 +294,72 @@ def to_dict(row: EventExportRow) -> dict[str, str]:
"Error": row.error
}
+ @staticmethod
+ def from_event(event: Event) -> EventExportRow:
+ return EventExportRow(
+ event_id=event.event_id,
+ event_type=event.event_type,
+ timestamp=event.timestamp.isoformat(),
+ correlation_id=event.metadata.correlation_id or "",
+ aggregate_id=event.aggregate_id or "",
+ user_id=event.metadata.user_id or "",
+ service=event.metadata.service_name,
+ status=event.status or "",
+ error=event.error or "",
+ )
+
+
+class EventFilterMapper:
+ """Converts EventFilter domain model into MongoDB queries."""
+
+ @staticmethod
+ def to_mongo_query(flt: EventFilter) -> dict[str, Any]:
+ query: dict[str, Any] = {}
+
+ if flt.event_types:
+ query[EventFields.EVENT_TYPE] = {"$in": flt.event_types}
+ if flt.aggregate_id:
+ query[EventFields.AGGREGATE_ID] = flt.aggregate_id
+ if flt.correlation_id:
+ query[EventFields.METADATA_CORRELATION_ID] = flt.correlation_id
+ if flt.user_id:
+ query[EventFields.METADATA_USER_ID] = flt.user_id
+ if flt.service_name:
+ query[EventFields.METADATA_SERVICE_NAME] = flt.service_name
+ if getattr(flt, "status", None):
+ query[EventFields.STATUS] = flt.status
+
+ if flt.start_time or flt.end_time:
+ time_query: dict[str, Any] = {}
+ if flt.start_time:
+ time_query["$gte"] = flt.start_time
+ if flt.end_time:
+ time_query["$lte"] = flt.end_time
+ query[EventFields.TIMESTAMP] = time_query
+
+ search = getattr(flt, "text_search", None) or getattr(flt, "search_text", None)
+ if search:
+ query["$text"] = {"$search": search}
+
+ return query
+
+ @staticmethod
+ def from_admin_pydantic(pflt: AdminEventFilter) -> EventFilter:
+ ev_types: list[str] | None = None
+ if pflt.event_types is not None:
+ ev_types = [str(et) for et in pflt.event_types]
+ return EventFilter(
+ event_types=ev_types,
+ aggregate_id=pflt.aggregate_id,
+ correlation_id=pflt.correlation_id,
+ user_id=pflt.user_id,
+ service_name=pflt.service_name,
+ start_time=pflt.start_time,
+ end_time=pflt.end_time,
+ search_text=pflt.search_text,
+ text_search=pflt.search_text,
+ )
+
class EventReplayInfoMapper:
"""Handles EventReplayInfo serialization."""
diff --git a/backend/app/infrastructure/mappers/execution_api_mapper.py b/backend/app/infrastructure/mappers/execution_api_mapper.py
index 204a137e..2f6f7ff9 100644
--- a/backend/app/infrastructure/mappers/execution_api_mapper.py
+++ b/backend/app/infrastructure/mappers/execution_api_mapper.py
@@ -4,7 +4,7 @@
from app.domain.enums.common import ErrorType
from app.domain.enums.storage import ExecutionErrorType
-from app.domain.execution.models import DomainExecution, ResourceUsageDomain
+from app.domain.execution import DomainExecution, ResourceUsageDomain
from app.schemas_pydantic.execution import ExecutionResponse, ExecutionResult
from app.schemas_pydantic.execution import ResourceUsage as ResourceUsageSchema
@@ -33,8 +33,8 @@ def _map_error(t: Optional[ExecutionErrorType]) -> Optional[ErrorType]:
return ExecutionResult(
execution_id=e.execution_id,
status=e.status,
- output=e.output,
- errors=e.errors,
+ stdout=e.stdout,
+ stderr=e.stderr,
lang=e.lang,
lang_version=e.lang_version,
resource_usage=ru,
diff --git a/backend/app/infrastructure/mappers/notification_api_mapper.py b/backend/app/infrastructure/mappers/notification_api_mapper.py
index 596f0174..166ee14c 100644
--- a/backend/app/infrastructure/mappers/notification_api_mapper.py
+++ b/backend/app/infrastructure/mappers/notification_api_mapper.py
@@ -2,7 +2,7 @@
from typing import Dict, List
-from app.domain.notification.models import (
+from app.domain.notification import (
DomainNotification,
DomainNotificationListResult,
DomainNotificationSubscription,
@@ -20,7 +20,6 @@ class NotificationApiMapper:
def to_response(n: DomainNotification) -> NotificationResponse:
return NotificationResponse(
notification_id=n.notification_id,
- notification_type=n.notification_type,
channel=n.channel,
status=n.status,
subject=n.subject,
@@ -28,7 +27,8 @@ def to_response(n: DomainNotification) -> NotificationResponse:
action_url=n.action_url,
created_at=n.created_at,
read_at=n.read_at,
- priority=n.priority.value if hasattr(n.priority, "value") else str(n.priority),
+ severity=n.severity,
+ tags=n.tags,
)
@staticmethod
@@ -45,7 +45,9 @@ def subscription_to_pydantic(s: DomainNotificationSubscription) -> NotificationS
user_id=s.user_id,
channel=s.channel,
enabled=s.enabled,
- notification_types=s.notification_types,
+ severities=s.severities,
+ include_tags=s.include_tags,
+ exclude_tags=s.exclude_tags,
webhook_url=s.webhook_url,
slack_webhook=s.slack_webhook,
quiet_hours_enabled=s.quiet_hours_enabled,
@@ -63,4 +65,3 @@ def subscriptions_dict_to_response(subs: Dict[str, DomainNotificationSubscriptio
NotificationApiMapper.subscription_to_pydantic(s) for s in subs.values()
]
return SubscriptionsResponse(subscriptions=py_subs)
-
diff --git a/backend/app/infrastructure/mappers/notification_mapper.py b/backend/app/infrastructure/mappers/notification_mapper.py
new file mode 100644
index 00000000..8edc32c3
--- /dev/null
+++ b/backend/app/infrastructure/mappers/notification_mapper.py
@@ -0,0 +1,38 @@
+from dataclasses import asdict, fields
+
+from app.domain.notification import (
+ DomainNotification,
+ DomainNotificationSubscription,
+)
+
+
+class NotificationMapper:
+ """Map Notification domain models to/from MongoDB documents."""
+
+ # DomainNotification
+ @staticmethod
+ def to_mongo_document(notification: DomainNotification) -> dict:
+ return asdict(notification)
+
+ @staticmethod
+ def to_update_dict(notification: DomainNotification) -> dict:
+ doc = asdict(notification)
+ doc.pop("notification_id", None)
+ return doc
+
+ @staticmethod
+ def from_mongo_document(doc: dict) -> DomainNotification:
+ allowed = {f.name for f in fields(DomainNotification)}
+ filtered = {k: v for k, v in doc.items() if k in allowed}
+ return DomainNotification(**filtered)
+
+ # DomainNotificationSubscription
+ @staticmethod
+ def subscription_to_mongo_document(subscription: DomainNotificationSubscription) -> dict:
+ return asdict(subscription)
+
+ @staticmethod
+ def subscription_from_mongo_document(doc: dict) -> DomainNotificationSubscription:
+ allowed = {f.name for f in fields(DomainNotificationSubscription)}
+ filtered = {k: v for k, v in doc.items() if k in allowed}
+ return DomainNotificationSubscription(**filtered)
diff --git a/backend/app/infrastructure/mappers/replay_api_mapper.py b/backend/app/infrastructure/mappers/replay_api_mapper.py
index a334792a..1b3a842f 100644
--- a/backend/app/infrastructure/mappers/replay_api_mapper.py
+++ b/backend/app/infrastructure/mappers/replay_api_mapper.py
@@ -1,7 +1,7 @@
from __future__ import annotations
from app.domain.enums.replay import ReplayStatus
-from app.domain.replay.models import ReplayConfig, ReplayFilter, ReplaySessionState
+from app.domain.replay import ReplayConfig, ReplayFilter, ReplaySessionState
from app.schemas_pydantic.replay import CleanupResponse, ReplayRequest, ReplayResponse, SessionSummary
from app.schemas_pydantic.replay_models import (
ReplayConfigSchema,
@@ -90,8 +90,8 @@ def request_to_filter(req: ReplayRequest) -> ReplayFilter:
return ReplayFilter(
execution_id=req.execution_id,
event_types=req.event_types,
- start_time=req.start_time.timestamp() if req.start_time else None,
- end_time=req.end_time.timestamp() if req.end_time else None,
+ start_time=req.start_time if req.start_time else None,
+ end_time=req.end_time if req.end_time else None,
user_id=req.user_id,
service_name=req.service_name,
)
diff --git a/backend/app/infrastructure/mappers/replay_mapper.py b/backend/app/infrastructure/mappers/replay_mapper.py
index 6faf7a78..d903f393 100644
--- a/backend/app/infrastructure/mappers/replay_mapper.py
+++ b/backend/app/infrastructure/mappers/replay_mapper.py
@@ -1,16 +1,19 @@
from datetime import datetime, timezone
from typing import Any
-from app.domain.admin.replay_models import (
+from app.domain.admin import (
ReplayQuery,
ReplaySession,
ReplaySessionData,
ReplaySessionFields,
- ReplaySessionStatus,
ReplaySessionStatusDetail,
ReplaySessionStatusInfo,
)
+from app.domain.enums.replay import ReplayStatus
from app.domain.events.event_models import EventFields
+from app.domain.replay import ReplayConfig as DomainReplayConfig
+from app.domain.replay import ReplaySessionState
+from app.schemas_pydantic.admin_events import EventReplayRequest
class ReplaySessionMapper:
@@ -29,7 +32,7 @@ def to_dict(session: ReplaySession) -> dict[str, Any]:
ReplaySessionFields.DRY_RUN: session.dry_run,
"triggered_executions": session.triggered_executions
}
-
+
if session.started_at:
doc[ReplaySessionFields.STARTED_AT] = session.started_at
if session.completed_at:
@@ -40,15 +43,15 @@ def to_dict(session: ReplaySession) -> dict[str, Any]:
doc[ReplaySessionFields.CREATED_BY] = session.created_by
if session.target_service:
doc[ReplaySessionFields.TARGET_SERVICE] = session.target_service
-
+
return doc
-
+
@staticmethod
def from_dict(data: dict[str, Any]) -> ReplaySession:
return ReplaySession(
session_id=data.get(ReplaySessionFields.SESSION_ID, ""),
type=data.get(ReplaySessionFields.TYPE, "replay_session"),
- status=ReplaySessionStatus(data.get(ReplaySessionFields.STATUS, ReplaySessionStatus.SCHEDULED)),
+ status=ReplayStatus(data.get(ReplaySessionFields.STATUS, ReplayStatus.SCHEDULED)),
total_events=data.get(ReplaySessionFields.TOTAL_EVENTS, 0),
replayed_events=data.get(ReplaySessionFields.REPLAYED_EVENTS, 0),
failed_events=data.get(ReplaySessionFields.FAILED_EVENTS, 0),
@@ -63,7 +66,7 @@ def from_dict(data: dict[str, Any]) -> ReplaySession:
dry_run=data.get(ReplaySessionFields.DRY_RUN, False),
triggered_executions=data.get("triggered_executions", [])
)
-
+
@staticmethod
def status_detail_to_dict(detail: ReplaySessionStatusDetail) -> dict[str, Any]:
result = {
@@ -81,10 +84,10 @@ def status_detail_to_dict(detail: ReplaySessionStatusDetail) -> dict[str, Any]:
"progress_percentage": detail.session.progress_percentage,
"execution_results": detail.execution_results
}
-
+
if detail.estimated_completion:
result["estimated_completion"] = detail.estimated_completion
-
+
return result
@staticmethod
@@ -126,16 +129,16 @@ class ReplayQueryMapper:
@staticmethod
def to_mongodb_query(query: ReplayQuery) -> dict[str, Any]:
mongo_query: dict[str, Any] = {}
-
+
if query.event_ids:
mongo_query[EventFields.EVENT_ID] = {"$in": query.event_ids}
-
+
if query.correlation_id:
mongo_query[EventFields.METADATA_CORRELATION_ID] = query.correlation_id
-
+
if query.aggregate_id:
mongo_query[EventFields.AGGREGATE_ID] = query.aggregate_id
-
+
if query.start_time or query.end_time:
time_query = {}
if query.start_time:
@@ -143,7 +146,7 @@ def to_mongodb_query(query: ReplayQuery) -> dict[str, Any]:
if query.end_time:
time_query["$lte"] = query.end_time
mongo_query[EventFields.TIMESTAMP] = time_query
-
+
return mongo_query
@@ -156,7 +159,7 @@ def to_dict(data: ReplaySessionData) -> dict[str, Any]:
"replay_correlation_id": data.replay_correlation_id,
"query": data.query
}
-
+
if data.dry_run and data.events_preview:
result["events_preview"] = [
{
@@ -167,5 +170,68 @@ def to_dict(data: ReplaySessionData) -> dict[str, Any]:
}
for e in data.events_preview
]
-
+
return result
+
+
+class ReplayApiMapper:
+ """API-level mapper for converting replay requests to domain queries."""
+
+ @staticmethod
+ def request_to_query(req: EventReplayRequest) -> ReplayQuery:
+ return ReplayQuery(
+ event_ids=req.event_ids,
+ correlation_id=req.correlation_id,
+ aggregate_id=req.aggregate_id,
+ start_time=req.start_time,
+ end_time=req.end_time,
+ )
+
+
+class ReplayStateMapper:
+ """Mapper for service-level replay session state (domain.replay.models).
+
+ Moves all domainโMongo conversion out of the repository.
+ Assumes datetimes are stored as datetimes (no epoch/ISO fallback logic).
+ """
+
+ @staticmethod
+ def to_mongo_document(session: ReplaySessionState | Any) -> dict[str, Any]: # noqa: ANN401
+ cfg = session.config
+ # Both DomainReplayConfig and schema config are Pydantic models; use model_dump
+ cfg_dict = cfg.model_dump()
+ return {
+ "session_id": session.session_id,
+ "status": session.status,
+ "total_events": getattr(session, "total_events", 0),
+ "replayed_events": getattr(session, "replayed_events", 0),
+ "failed_events": getattr(session, "failed_events", 0),
+ "skipped_events": getattr(session, "skipped_events", 0),
+ "created_at": session.created_at,
+ "started_at": getattr(session, "started_at", None),
+ "completed_at": getattr(session, "completed_at", None),
+ "last_event_at": getattr(session, "last_event_at", None),
+ "errors": getattr(session, "errors", []),
+ "config": cfg_dict,
+ }
+
+ @staticmethod
+ def from_mongo_document(doc: dict[str, Any]) -> ReplaySessionState:
+ cfg_dict = doc.get("config", {})
+ cfg = DomainReplayConfig(**cfg_dict)
+ raw_status = doc.get("status", ReplayStatus.SCHEDULED)
+ status = raw_status if isinstance(raw_status, ReplayStatus) else ReplayStatus(str(raw_status))
+
+ return ReplaySessionState(
+ session_id=doc.get("session_id", ""),
+ config=cfg,
+ status=status,
+ total_events=doc.get("total_events", 0),
+ replayed_events=doc.get("replayed_events", 0),
+ failed_events=doc.get("failed_events", 0),
+ skipped_events=doc.get("skipped_events", 0),
+ started_at=doc.get("started_at"),
+ completed_at=doc.get("completed_at"),
+ last_event_at=doc.get("last_event_at"),
+ errors=doc.get("errors", []),
+ )
diff --git a/backend/app/infrastructure/mappers/saved_script_api_mapper.py b/backend/app/infrastructure/mappers/saved_script_api_mapper.py
index 2ef4ba5d..c759e494 100644
--- a/backend/app/infrastructure/mappers/saved_script_api_mapper.py
+++ b/backend/app/infrastructure/mappers/saved_script_api_mapper.py
@@ -2,7 +2,7 @@
from typing import List
-from app.domain.saved_script.models import (
+from app.domain.saved_script import (
DomainSavedScript,
DomainSavedScriptCreate,
DomainSavedScriptUpdate,
@@ -50,4 +50,3 @@ def to_response(s: DomainSavedScript) -> SavedScriptResponse:
@staticmethod
def list_to_response(items: List[DomainSavedScript]) -> List[SavedScriptResponse]:
return [SavedScriptApiMapper.to_response(i) for i in items]
-
diff --git a/backend/app/infrastructure/mappers/saved_script_mapper.py b/backend/app/infrastructure/mappers/saved_script_mapper.py
new file mode 100644
index 00000000..5d4ff774
--- /dev/null
+++ b/backend/app/infrastructure/mappers/saved_script_mapper.py
@@ -0,0 +1,54 @@
+from dataclasses import asdict, fields
+from datetime import datetime, timezone
+from typing import Any
+from uuid import uuid4
+
+from app.domain.saved_script import (
+ DomainSavedScript,
+ DomainSavedScriptCreate,
+ DomainSavedScriptUpdate,
+)
+
+
+class SavedScriptMapper:
+ """Mapper for Saved Script domain models to/from MongoDB docs."""
+
+ @staticmethod
+ def to_insert_document(create: DomainSavedScriptCreate, user_id: str) -> dict[str, Any]:
+ now = datetime.now(timezone.utc)
+ return {
+ "script_id": str(uuid4()),
+ "user_id": user_id,
+ "name": create.name,
+ "script": create.script,
+ "lang": create.lang,
+ "lang_version": create.lang_version,
+ "description": create.description,
+ "created_at": now,
+ "updated_at": now,
+ }
+
+ @staticmethod
+ def to_update_dict(update: DomainSavedScriptUpdate) -> dict[str, Any]:
+ # Convert to dict and drop None fields; keep updated_at
+ raw = asdict(update)
+ return {k: v for k, v in raw.items() if v is not None}
+
+ @staticmethod
+ def from_mongo_document(doc: dict[str, Any]) -> DomainSavedScript:
+ allowed = {f.name for f in fields(DomainSavedScript)}
+ filtered = {k: v for k, v in doc.items() if k in allowed}
+ # Coerce required fields to str where applicable for safety
+ if "script_id" in filtered:
+ filtered["script_id"] = str(filtered["script_id"])
+ if "user_id" in filtered:
+ filtered["user_id"] = str(filtered["user_id"])
+ if "name" in filtered:
+ filtered["name"] = str(filtered["name"])
+ if "script" in filtered:
+ filtered["script"] = str(filtered["script"])
+ if "lang" in filtered:
+ filtered["lang"] = str(filtered["lang"])
+ if "lang_version" in filtered:
+ filtered["lang_version"] = str(filtered["lang_version"])
+ return DomainSavedScript(**filtered) # dataclass defaults cover missing timestamps
diff --git a/backend/app/infrastructure/mappers/sse_mapper.py b/backend/app/infrastructure/mappers/sse_mapper.py
new file mode 100644
index 00000000..85f1145e
--- /dev/null
+++ b/backend/app/infrastructure/mappers/sse_mapper.py
@@ -0,0 +1,47 @@
+from datetime import datetime, timezone
+from typing import Any, Dict
+
+from app.domain.enums.execution import ExecutionStatus
+from app.domain.execution import DomainExecution, ResourceUsageDomain
+from app.domain.sse import SSEEventDomain, SSEExecutionStatusDomain
+
+
+class SSEMapper:
+ """Mapper for SSE-related domain models and MongoDB documents."""
+
+ # Execution status (lightweight)
+ @staticmethod
+ def to_execution_status(execution_id: str, status: str) -> SSEExecutionStatusDomain:
+ return SSEExecutionStatusDomain(
+ execution_id=execution_id,
+ status=status,
+ timestamp=datetime.now(timezone.utc).isoformat(),
+ )
+
+ # Execution events
+ @staticmethod
+ def event_from_mongo_document(doc: Dict[str, Any]) -> SSEEventDomain:
+ return SSEEventDomain(
+ aggregate_id=str(doc.get("aggregate_id", "")),
+ timestamp=doc.get("timestamp"),
+ )
+
+ # Executions
+ @staticmethod
+ def execution_from_mongo_document(doc: Dict[str, Any]) -> DomainExecution:
+ sv = doc.get("status")
+ return DomainExecution(
+ execution_id=str(doc.get("execution_id")),
+ script=str(doc.get("script", "")),
+ status=ExecutionStatus(str(sv)),
+ stdout=doc.get("stdout"),
+ stderr=doc.get("stderr"),
+ lang=str(doc.get("lang", "python")),
+ lang_version=str(doc.get("lang_version", "3.11")),
+ created_at=doc.get("created_at", datetime.now(timezone.utc)),
+ updated_at=doc.get("updated_at", datetime.now(timezone.utc)),
+ resource_usage=ResourceUsageDomain.from_dict(doc.get("resource_usage") or {}),
+ user_id=doc.get("user_id"),
+ exit_code=doc.get("exit_code"),
+ error_type=doc.get("error_type"),
+ )
diff --git a/backend/app/infrastructure/mappers/user_settings_mapper.py b/backend/app/infrastructure/mappers/user_settings_mapper.py
new file mode 100644
index 00000000..79813e69
--- /dev/null
+++ b/backend/app/infrastructure/mappers/user_settings_mapper.py
@@ -0,0 +1,93 @@
+from datetime import datetime, timezone
+from typing import Any
+
+from app.domain.enums import Theme
+from app.domain.enums.events import EventType
+from app.domain.enums.notification import NotificationChannel
+from app.domain.user.settings_models import (
+ DomainEditorSettings,
+ DomainNotificationSettings,
+ DomainSettingsEvent,
+ DomainUserSettings,
+)
+
+
+class UserSettingsMapper:
+ """Map user settings snapshot/event documents to domain and back."""
+
+ @staticmethod
+ def from_snapshot_document(doc: dict[str, Any]) -> DomainUserSettings:
+ notifications = doc.get("notifications", {})
+ editor = doc.get("editor", {})
+ theme = Theme(doc.get("theme", Theme.AUTO))
+
+ # Coerce channels to NotificationChannel list
+ channels_raw = notifications.get("channels", [])
+ channels: list[NotificationChannel] = [NotificationChannel(c) for c in channels_raw]
+
+ return DomainUserSettings(
+ user_id=str(doc.get("user_id")),
+ theme=theme,
+ timezone=doc.get("timezone", "UTC"),
+ date_format=doc.get("date_format", "YYYY-MM-DD"),
+ time_format=doc.get("time_format", "24h"),
+ notifications=DomainNotificationSettings(
+ execution_completed=notifications.get("execution_completed", True),
+ execution_failed=notifications.get("execution_failed", True),
+ system_updates=notifications.get("system_updates", True),
+ security_alerts=notifications.get("security_alerts", True),
+ channels=channels,
+ ),
+ editor=DomainEditorSettings(
+ theme=editor.get("theme", "one-dark"),
+ font_size=editor.get("font_size", 14),
+ tab_size=editor.get("tab_size", 4),
+ use_tabs=editor.get("use_tabs", False),
+ word_wrap=editor.get("word_wrap", True),
+ show_line_numbers=editor.get("show_line_numbers", True),
+ ),
+ custom_settings=doc.get("custom_settings", {}),
+ version=doc.get("version", 1),
+ created_at=doc.get("created_at", datetime.now(timezone.utc)),
+ updated_at=doc.get("updated_at", datetime.now(timezone.utc)),
+ )
+
+ @staticmethod
+ def to_snapshot_document(settings: DomainUserSettings) -> dict[str, Any]:
+ return {
+ "user_id": settings.user_id,
+ "theme": str(settings.theme),
+ "timezone": settings.timezone,
+ "date_format": settings.date_format,
+ "time_format": settings.time_format,
+ "notifications": {
+ "execution_completed": settings.notifications.execution_completed,
+ "execution_failed": settings.notifications.execution_failed,
+ "system_updates": settings.notifications.system_updates,
+ "security_alerts": settings.notifications.security_alerts,
+ "channels": [str(c) for c in settings.notifications.channels],
+ },
+ "editor": {
+ "theme": settings.editor.theme,
+ "font_size": settings.editor.font_size,
+ "tab_size": settings.editor.tab_size,
+ "use_tabs": settings.editor.use_tabs,
+ "word_wrap": settings.editor.word_wrap,
+ "show_line_numbers": settings.editor.show_line_numbers,
+ },
+ "custom_settings": settings.custom_settings,
+ "version": settings.version,
+ "created_at": settings.created_at,
+ "updated_at": settings.updated_at,
+ }
+
+ @staticmethod
+ def event_from_mongo_document(doc: dict[str, Any]) -> DomainSettingsEvent:
+ et_parsed: EventType = EventType(str(doc.get("event_type")))
+
+ return DomainSettingsEvent(
+ event_type=et_parsed,
+ timestamp=doc.get("timestamp"), # type: ignore[arg-type]
+ payload=doc.get("payload", {}),
+ correlation_id=(doc.get("metadata", {}) or {}).get("correlation_id"),
+ )
diff --git a/backend/app/main.py b/backend/app/main.py
index d74f02d6..510c2c40 100644
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -1,13 +1,14 @@
import uvicorn
+from dishka.integrations.fastapi import setup_dishka
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.api.routes import (
- alertmanager,
auth,
dlq,
events,
execution,
+ grafana_alerts,
health,
notifications,
replay,
@@ -25,13 +26,18 @@
from app.api.routes.admin import (
users_router as admin_users_router,
)
+from app.core.container import create_app_container
from app.core.correlation import CorrelationMiddleware
from app.core.dishka_lifespan import lifespan
from app.core.exceptions import configure_exception_handlers
from app.core.logging import logger
-from app.core.middlewares.cache import CacheControlMiddleware
-from app.core.middlewares.metrics import setup_metrics
-from app.core.middlewares.request_size_limit import RequestSizeLimitMiddleware
+from app.core.middlewares import (
+ CacheControlMiddleware,
+ MetricsMiddleware,
+ RateLimitMiddleware,
+ RequestSizeLimitMiddleware,
+ setup_metrics,
+)
from app.settings import get_settings
@@ -45,19 +51,18 @@ def create_app() -> FastAPI:
docs_url=None,
redoc_url=None,
)
-
- from dishka.integrations.fastapi import setup_dishka
- from app.core.container import create_app_container
container = create_app_container()
setup_dishka(container, app)
+ setup_metrics(app)
+ app.add_middleware(MetricsMiddleware)
+ if settings.RATE_LIMIT_ENABLED:
+ app.add_middleware(RateLimitMiddleware)
+
app.add_middleware(CorrelationMiddleware)
app.add_middleware(RequestSizeLimitMiddleware)
app.add_middleware(CacheControlMiddleware)
-
- # Note: Rate limiting is now handled by our custom middleware injected via Dishka
- logger.info(f"RATE LIMITING [TESTING={settings.TESTING}] enabled with Redis-based dynamic limits")
app.add_middleware(
CORSMiddleware,
@@ -102,7 +107,7 @@ def create_app() -> FastAPI:
app.include_router(user_settings.router, prefix=settings.API_V1_STR)
app.include_router(notifications.router, prefix=settings.API_V1_STR)
app.include_router(saga.router, prefix=settings.API_V1_STR)
- app.include_router(alertmanager.router, prefix=settings.API_V1_STR)
+ app.include_router(grafana_alerts.router, prefix=settings.API_V1_STR)
# No additional testing-only routes here
@@ -111,10 +116,6 @@ def create_app() -> FastAPI:
configure_exception_handlers(app)
logger.info("Exception handlers configured")
- # Set up OpenTelemetry metrics (after other middleware to avoid conflicts)
- setup_metrics(app)
- logger.info("OpenTelemetry metrics configured")
-
return app
diff --git a/backend/app/schemas_pydantic/admin_events.py b/backend/app/schemas_pydantic/admin_events.py
index 34035b87..894c0aff 100644
--- a/backend/app/schemas_pydantic/admin_events.py
+++ b/backend/app/schemas_pydantic/admin_events.py
@@ -3,10 +3,12 @@
from pydantic import BaseModel, Field
+from app.domain.enums.events import EventType
+
class EventFilter(BaseModel):
"""Filter criteria for browsing events"""
- event_types: List[str] | None = None
+ event_types: List[EventType] | None = None
aggregate_id: str | None = None
correlation_id: str | None = None
user_id: str | None = None
diff --git a/backend/app/schemas_pydantic/alertmanager.py b/backend/app/schemas_pydantic/alertmanager.py
deleted file mode 100644
index 8f4538f7..00000000
--- a/backend/app/schemas_pydantic/alertmanager.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from datetime import datetime
-from typing import Dict, List
-
-from pydantic import BaseModel, Field
-
-from app.domain.enums.health import AlertStatus
-
-
-class Alert(BaseModel):
- status: AlertStatus
- labels: Dict[str, str]
- annotations: Dict[str, str]
- starts_at: datetime = Field(alias="startsAt")
- ends_at: datetime | None = Field(alias="endsAt", default=None)
- generator_url: str = Field(alias="generatorURL")
- fingerprint: str
-
- class Config:
- populate_by_name = True
-
-
-class AlertmanagerWebhook(BaseModel):
- version: str
- group_key: str = Field(alias="groupKey")
- truncated_alerts: int = Field(alias="truncatedAlerts", default=0)
- status: AlertStatus
- receiver: str
- group_labels: Dict[str, str] = Field(alias="groupLabels")
- common_labels: Dict[str, str] = Field(alias="commonLabels")
- common_annotations: Dict[str, str] = Field(alias="commonAnnotations")
- external_url: str = Field(alias="externalURL")
- alerts: List[Alert]
-
- class Config:
- populate_by_name = True
-
-
-class AlertResponse(BaseModel):
- """Response after processing alerts"""
- message: str
- alerts_received: int
- alerts_processed: int
- errors: List[str] = Field(default_factory=list)
diff --git a/backend/app/schemas_pydantic/dlq.py b/backend/app/schemas_pydantic/dlq.py
index b04d11a1..690b6c35 100644
--- a/backend/app/schemas_pydantic/dlq.py
+++ b/backend/app/schemas_pydantic/dlq.py
@@ -3,7 +3,7 @@
from pydantic import BaseModel
-from app.dlq.models import DLQMessageStatus, RetryStrategy
+from app.dlq import DLQMessageStatus, RetryStrategy
class DLQStats(BaseModel):
diff --git a/backend/app/schemas_pydantic/execution.py b/backend/app/schemas_pydantic/execution.py
index 6d7fbba2..fb513201 100644
--- a/backend/app/schemas_pydantic/execution.py
+++ b/backend/app/schemas_pydantic/execution.py
@@ -6,14 +6,15 @@
from app.domain.enums.common import ErrorType
from app.domain.enums.execution import ExecutionStatus
+from app.settings import get_settings
class ExecutionBase(BaseModel):
"""Base model for execution data."""
script: str = Field(..., max_length=50000, description="Script content (max 50,000 characters)")
status: ExecutionStatus = ExecutionStatus.QUEUED
- output: str | None = None
- errors: str | None = None
+ stdout: str | None = None
+ stderr: str | None = None
lang: str = "python"
lang_version: str = "3.11"
@@ -41,8 +42,8 @@ class ExecutionInDB(ExecutionBase):
class ExecutionUpdate(BaseModel):
"""Model for updating an execution."""
status: ExecutionStatus | None = None
- output: str | None = None
- errors: str | None = None
+ stdout: str | None = None
+ stderr: str | None = None
resource_usage: dict | None = None
exit_code: int | None = None
error_type: ErrorType | None = None
@@ -76,8 +77,6 @@ class ExecutionRequest(BaseModel):
@model_validator(mode="after")
def validate_runtime_supported(self) -> "ExecutionRequest": # noqa: D401
- from app.settings import get_settings
-
settings = get_settings()
runtimes = settings.SUPPORTED_RUNTIMES or {}
if self.lang not in runtimes:
@@ -104,8 +103,8 @@ class ExecutionResult(BaseModel):
"""Model for execution result."""
execution_id: str
status: ExecutionStatus
- output: str | None = None
- errors: str | None = None
+ stdout: str | None = None
+ stderr: str | None = None
lang: str
lang_version: str
resource_usage: ResourceUsage | None = None
diff --git a/backend/app/schemas_pydantic/grafana.py b/backend/app/schemas_pydantic/grafana.py
new file mode 100644
index 00000000..3a4eb45d
--- /dev/null
+++ b/backend/app/schemas_pydantic/grafana.py
@@ -0,0 +1,28 @@
+from __future__ import annotations
+
+from typing import Dict, List, Optional
+
+from pydantic import BaseModel, Field
+
+
+class GrafanaAlertItem(BaseModel):
+ status: Optional[str] = None
+ labels: Dict[str, str] = Field(default_factory=dict)
+ annotations: Dict[str, str] = Field(default_factory=dict)
+ valueString: Optional[str] = None
+
+
+class GrafanaWebhook(BaseModel):
+ status: Optional[str] = None
+ receiver: Optional[str] = None
+ alerts: List[GrafanaAlertItem] = Field(default_factory=list)
+ groupLabels: Dict[str, str] = Field(default_factory=dict)
+ commonLabels: Dict[str, str] = Field(default_factory=dict)
+ commonAnnotations: Dict[str, str] = Field(default_factory=dict)
+
+
+class AlertResponse(BaseModel):
+ message: str
+ alerts_received: int
+ alerts_processed: int
+ errors: List[str] = Field(default_factory=list)
diff --git a/backend/app/schemas_pydantic/notification.py b/backend/app/schemas_pydantic/notification.py
index 2739d77b..d208ca71 100644
--- a/backend/app/schemas_pydantic/notification.py
+++ b/backend/app/schemas_pydantic/notification.py
@@ -6,40 +6,26 @@
from app.domain.enums.notification import (
NotificationChannel,
- NotificationPriority,
+ NotificationSeverity,
NotificationStatus,
- NotificationType,
)
-
-class NotificationTemplate(BaseModel):
- """Notification template for different types"""
- notification_type: NotificationType
- channels: list[NotificationChannel]
- priority: NotificationPriority = NotificationPriority.MEDIUM
- subject_template: str
- body_template: str
- action_url_template: str | None = None
- metadata: dict[str, Any] = Field(default_factory=dict)
-
- model_config = ConfigDict(
- from_attributes=True
- )
+# Templates are removed in the unified model
class Notification(BaseModel):
"""Individual notification instance"""
notification_id: str = Field(default_factory=lambda: str(uuid4()))
user_id: str
- notification_type: NotificationType
channel: NotificationChannel
- priority: NotificationPriority = NotificationPriority.MEDIUM
+ severity: NotificationSeverity = NotificationSeverity.MEDIUM
status: NotificationStatus = NotificationStatus.PENDING
# Content
subject: str
body: str
action_url: str | None = None
+ tags: list[str] = Field(default_factory=list)
# Tracking
created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
@@ -56,9 +42,7 @@ class Notification(BaseModel):
error_message: str | None = None
# Context
- correlation_id: str | None = None
- related_entity_id: str | None = None
- related_entity_type: str | None = None
+ # Removed correlation_id and related_entity_*; use tags/metadata for correlation
metadata: dict[str, Any] = Field(default_factory=dict)
# Webhook specific
@@ -99,50 +83,16 @@ def validate_notifications(cls, v: list[Notification]) -> list[Notification]:
)
-class NotificationRule(BaseModel):
- """Rule for automatic notification generation"""
- rule_id: str = Field(default_factory=lambda: str(uuid4()))
- name: str
- description: str | None = None
- enabled: bool = True
-
- # Trigger conditions
- event_types: list[str]
- conditions: dict[str, Any] = Field(default_factory=dict)
-
- # Actions
- notification_type: NotificationType
- channels: list[NotificationChannel]
- priority: NotificationPriority = NotificationPriority.MEDIUM
- template_id: str | None = None
-
- # Throttling
- throttle_minutes: int | None = None
- max_per_hour: int | None = None
- max_per_day: int | None = None
-
- # Metadata
- created_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
- updated_at: datetime = Field(default_factory=lambda: datetime.now(UTC))
- created_by: str | None = None
-
- model_config = ConfigDict(
- from_attributes=True
- )
-
- @field_validator("event_types")
- @classmethod
- def validate_event_types(cls, v: list[str]) -> list[str]:
- if not v:
- raise ValueError("At least one event type must be specified")
- return v
+# Rules removed in unified model
class NotificationSubscription(BaseModel):
"""User subscription preferences for notifications"""
user_id: str
channel: NotificationChannel
- notification_types: list[NotificationType]
+ severities: list[NotificationSeverity] = Field(default_factory=list)
+ include_tags: list[str] = Field(default_factory=list)
+ exclude_tags: list[str] = Field(default_factory=list)
enabled: bool = True
# Channel-specific settings
@@ -170,7 +120,8 @@ class NotificationStats(BaseModel):
"""Statistics for notification delivery"""
user_id: str | None = None
channel: NotificationChannel | None = None
- notification_type: NotificationType | None = None
+ tags: list[str] | None = None
+ severity: NotificationSeverity | None = None
# Time range
start_date: datetime
@@ -200,7 +151,6 @@ class NotificationStats(BaseModel):
class NotificationResponse(BaseModel):
"""Response schema for notification endpoints"""
notification_id: str
- notification_type: NotificationType
channel: NotificationChannel
status: NotificationStatus
subject: str
@@ -208,7 +158,8 @@ class NotificationResponse(BaseModel):
action_url: str | None
created_at: datetime
read_at: datetime | None
- priority: str
+ severity: NotificationSeverity
+ tags: list[str]
model_config = ConfigDict(
from_attributes=True
@@ -229,7 +180,9 @@ class NotificationListResponse(BaseModel):
class SubscriptionUpdate(BaseModel):
"""Request schema for updating notification subscriptions"""
enabled: bool
- notification_types: list[NotificationType]
+ severities: list[NotificationSeverity] = Field(default_factory=list)
+ include_tags: list[str] = Field(default_factory=list)
+ exclude_tags: list[str] = Field(default_factory=list)
webhook_url: str | None = None
slack_webhook: str | None = None
quiet_hours_enabled: bool = False
@@ -243,14 +196,7 @@ class SubscriptionUpdate(BaseModel):
)
-class TestNotificationRequest(BaseModel):
- """Request schema for sending test notifications"""
- notification_type: NotificationType
- channel: NotificationChannel
-
- model_config = ConfigDict(
- from_attributes=True
- )
+# TestNotificationRequest removed in unified model; use Notification schema directly for test endpoints
class SubscriptionsResponse(BaseModel):
diff --git a/backend/app/schemas_pydantic/replay_models.py b/backend/app/schemas_pydantic/replay_models.py
index f041a86a..34ad991d 100644
--- a/backend/app/schemas_pydantic/replay_models.py
+++ b/backend/app/schemas_pydantic/replay_models.py
@@ -5,15 +5,15 @@
from pydantic import BaseModel, Field, field_validator, model_validator
from app.domain.enums.replay import ReplayStatus, ReplayTarget, ReplayType
-from app.domain.replay.models import ReplayConfig as DomainReplayConfig
-from app.domain.replay.models import ReplayFilter as DomainReplayFilter
+from app.domain.replay import ReplayConfig as DomainReplayConfig
+from app.domain.replay import ReplayFilter as DomainReplayFilter
class ReplayFilterSchema(BaseModel):
execution_id: str | None = None
event_types: List[str] | None = None
- start_time: float | None = None
- end_time: float | None = None
+ start_time: datetime | None = None
+ end_time: datetime | None = None
user_id: str | None = None
service_name: str | None = None
custom_query: Dict[str, Any] | None = None
diff --git a/backend/app/schemas_pydantic/sse.py b/backend/app/schemas_pydantic/sse.py
index f774948e..f2cc044c 100644
--- a/backend/app/schemas_pydantic/sse.py
+++ b/backend/app/schemas_pydantic/sse.py
@@ -30,8 +30,8 @@ class ExecutionStreamEvent(BaseModel):
execution_id: str = Field(description="Execution ID")
status: str | None = Field(None, description="Execution status")
payload: Dict[str, Any] = Field(default_factory=dict, description="Event payload")
- output: str | None = Field(None, description="Execution output")
- errors: str | None = Field(None, description="Execution errors")
+ stdout: str | None = Field(None, description="Execution stdout")
+ stderr: str | None = Field(None, description="Execution stderr")
class NotificationStreamEvent(BaseModel):
diff --git a/backend/app/schemas_pydantic/user_settings.py b/backend/app/schemas_pydantic/user_settings.py
index f211cf4d..b2224432 100644
--- a/backend/app/schemas_pydantic/user_settings.py
+++ b/backend/app/schemas_pydantic/user_settings.py
@@ -1,7 +1,7 @@
from datetime import datetime, timezone
from typing import Any, Dict, List
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
from app.domain.enums.common import Theme
from app.domain.enums.events import EventType
@@ -31,6 +31,20 @@ class EditorSettings(BaseModel):
bracket_matching: bool = True
highlight_active_line: bool = True
default_language: str = "python"
+
+ @field_validator("font_size")
+ @classmethod
+ def validate_font_size(cls, v: int) -> int:
+ if v < 8 or v > 32:
+ raise ValueError("Font size must be between 8 and 32")
+ return v
+
+ @field_validator("tab_size")
+ @classmethod
+ def validate_tab_size(cls, v: int) -> int:
+ if v not in (2, 4, 8):
+ raise ValueError("Tab size must be 2, 4, or 8")
+ return v
class UserSettings(BaseModel):
diff --git a/backend/app/services/admin/__init__.py b/backend/app/services/admin/__init__.py
new file mode 100644
index 00000000..2762e2cb
--- /dev/null
+++ b/backend/app/services/admin/__init__.py
@@ -0,0 +1,9 @@
+from .admin_events_service import AdminEventsService
+from .admin_settings_service import AdminSettingsService
+from .admin_user_service import AdminUserService
+
+__all__ = [
+ "AdminUserService",
+ "AdminSettingsService",
+ "AdminEventsService",
+]
diff --git a/backend/app/services/admin/admin_events_service.py b/backend/app/services/admin/admin_events_service.py
new file mode 100644
index 00000000..7f9cfe82
--- /dev/null
+++ b/backend/app/services/admin/admin_events_service.py
@@ -0,0 +1,250 @@
+import csv
+import json
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from io import StringIO
+from typing import Any, Dict, List
+
+from app.core.logging import logger
+from app.db.repositories.admin import AdminEventsRepository
+from app.domain.admin import (
+ ReplayQuery,
+ ReplaySessionStatusDetail,
+)
+from app.domain.admin.replay_updates import ReplaySessionUpdate
+from app.domain.enums.replay import ReplayStatus, ReplayTarget, ReplayType
+from app.domain.events.event_models import (
+ EventBrowseResult,
+ EventDetail,
+ EventExportRow,
+ EventFilter,
+ EventStatistics,
+)
+from app.domain.replay import ReplayConfig, ReplayFilter
+from app.infrastructure.mappers import EventExportRowMapper, EventMapper
+from app.services.replay_service import ReplayService
+
+
+class AdminReplayResult:
+ def __init__(
+ self,
+ *,
+ dry_run: bool,
+ total_events: int,
+ replay_correlation_id: str,
+ status: str,
+ session_id: str | None = None,
+ events_preview: List[Dict[str, Any]] | None = None,
+ ) -> None:
+ self.dry_run = dry_run
+ self.total_events = total_events
+ self.replay_correlation_id = replay_correlation_id
+ self.status = status
+ self.session_id = session_id
+ self.events_preview = events_preview
+
+
+@dataclass
+class ExportResult:
+ filename: str
+ content: str
+ media_type: str
+
+
+class AdminEventsService:
+ def __init__(self, repository: AdminEventsRepository, replay_service: ReplayService) -> None:
+ self._repo = repository
+ self._replay_service = replay_service
+
+ async def browse_events(
+ self,
+ *,
+ filter: EventFilter,
+ skip: int,
+ limit: int,
+ sort_by: str,
+ sort_order: int,
+ ) -> EventBrowseResult:
+ return await self._repo.browse_events(
+ filter=filter, skip=skip, limit=limit, sort_by=sort_by, sort_order=sort_order
+ )
+
+ async def get_event_detail(self, event_id: str) -> EventDetail | None:
+ return await self._repo.get_event_detail(event_id)
+
+ async def get_event_stats(self, *, hours: int) -> EventStatistics:
+ return await self._repo.get_event_stats(hours=hours)
+
+ async def prepare_or_schedule_replay(
+ self,
+ *,
+ replay_query: ReplayQuery,
+ dry_run: bool,
+ replay_correlation_id: str,
+ target_service: str | None,
+ ) -> AdminReplayResult:
+ query = self._repo.build_replay_query(replay_query)
+ if not query:
+ raise ValueError("Must specify at least one filter for replay")
+
+ # Prepare and optionally preview
+ logger.info("Preparing replay session", extra={
+ "dry_run": dry_run,
+ "replay_correlation_id": replay_correlation_id,
+ })
+ session_data = await self._repo.prepare_replay_session(
+ query=query,
+ dry_run=dry_run,
+ replay_correlation_id=replay_correlation_id,
+ max_events=1000,
+ )
+
+ if dry_run:
+ # Map previews into simple dicts via repository summary mapper
+ previews = [
+ {
+ "event_id": e.event_id,
+ "event_type": e.event_type,
+ "timestamp": e.timestamp,
+ "aggregate_id": e.aggregate_id,
+ }
+ for e in session_data.events_preview
+ ]
+ result = AdminReplayResult(
+ dry_run=True,
+ total_events=session_data.total_events,
+ replay_correlation_id=replay_correlation_id,
+ status="Preview",
+ events_preview=previews,
+ )
+ logger.info("Replay dry-run prepared", extra={
+ "total_events": result.total_events,
+ "replay_correlation_id": result.replay_correlation_id,
+ })
+ return result
+
+ # Build config for actual replay and create session via replay service
+ replay_filter = ReplayFilter(custom_query=query)
+ config = ReplayConfig(
+ replay_type=ReplayType.QUERY,
+ target=ReplayTarget.KAFKA if target_service else ReplayTarget.TEST,
+ filter=replay_filter,
+ speed_multiplier=1.0,
+ preserve_timestamps=False,
+ batch_size=100,
+ max_events=1000,
+ skip_errors=True,
+ )
+
+ op = await self._replay_service.create_session_from_config(config)
+ session_id = op.session_id
+
+ # Persist additional metadata to the admin replay session record
+ session_update = ReplaySessionUpdate(
+ total_events=session_data.total_events,
+ correlation_id=replay_correlation_id,
+ status=ReplayStatus.SCHEDULED,
+ )
+ await self._repo.update_replay_session(
+ session_id=session_id,
+ updates=session_update,
+ )
+
+ result = AdminReplayResult(
+ dry_run=False,
+ total_events=session_data.total_events,
+ replay_correlation_id=replay_correlation_id,
+ session_id=session_id,
+ status="Replay scheduled",
+ )
+ logger.info("Replay scheduled", extra={
+ "session_id": result.session_id,
+ "total_events": result.total_events,
+ "replay_correlation_id": result.replay_correlation_id,
+ })
+ return result
+
+ async def start_replay_session(self, session_id: str) -> None:
+ await self._replay_service.start_session(session_id)
+
+ async def get_replay_status(self, session_id: str) -> ReplaySessionStatusDetail | None:
+ status = await self._repo.get_replay_status_with_progress(session_id)
+ return status
+
+ async def export_events_csv(self, filter: EventFilter) -> List[EventExportRow]:
+ rows = await self._repo.export_events_csv(filter)
+ return rows
+
+ async def export_events_csv_content(self, *, filter: EventFilter, limit: int) -> ExportResult:
+ rows = await self._repo.export_events_csv(filter)
+ output = StringIO()
+ writer = csv.DictWriter(output, fieldnames=[
+ "Event ID", "Event Type", "Timestamp", "Correlation ID",
+ "Aggregate ID", "User ID", "Service", "Status", "Error",
+ ])
+ writer.writeheader()
+ row_mapper = EventExportRowMapper()
+ for row in rows[:limit]:
+ writer.writerow(row_mapper.to_dict(row))
+ output.seek(0)
+ filename = f"events_export_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}.csv"
+ logger.info("Exported events CSV", extra={
+ "row_count": len(rows),
+ "filename": filename,
+ })
+ return ExportResult(filename=filename, content=output.getvalue(), media_type="text/csv")
+
+ async def export_events_json_content(self, *, filter: EventFilter, limit: int) -> ExportResult:
+ result = await self._repo.browse_events(
+ filter=filter, skip=0, limit=limit, sort_by="timestamp", sort_order=-1
+ )
+ event_mapper = EventMapper()
+ events_data: list[dict[str, Any]] = []
+ for event in result.events:
+ event_dict = event_mapper.to_dict(event)
+ for field in ["timestamp", "created_at", "updated_at", "stored_at", "ttl_expires_at"]:
+ if field in event_dict and isinstance(event_dict[field], datetime):
+ event_dict[field] = event_dict[field].isoformat()
+ events_data.append(event_dict)
+
+ export_data: dict[str, Any] = {
+ "export_metadata": {
+ "exported_at": datetime.now(timezone.utc).isoformat(),
+ "total_events": len(events_data),
+ "filters_applied": {
+ "event_types": filter.event_types,
+ "aggregate_id": filter.aggregate_id,
+ "correlation_id": filter.correlation_id,
+ "user_id": filter.user_id,
+ "service_name": filter.service_name,
+ "start_time": filter.start_time.isoformat() if filter.start_time else None,
+ "end_time": filter.end_time.isoformat() if filter.end_time else None,
+ },
+ "export_limit": limit,
+ },
+ "events": events_data,
+ }
+ json_content = json.dumps(export_data, indent=2, default=str)
+ filename = f"events_export_{datetime.now(timezone.utc).strftime('%Y%m%d_%H%M%S')}.json"
+ logger.info("Exported events JSON", extra={
+ "event_count": len(events_data),
+ "filename": filename,
+ })
+ return ExportResult(filename=filename, content=json_content, media_type="application/json")
+
+ async def delete_event(self, *, event_id: str, deleted_by: str) -> bool:
+ # Load event for archival; archive then delete
+ logger.warning("Admin attempting to delete event", extra={"event_id": event_id, "deleted_by": deleted_by})
+ detail = await self._repo.get_event_detail(event_id)
+ if not detail:
+ return False
+ await self._repo.archive_event(detail.event, deleted_by)
+ deleted = await self._repo.delete_event(event_id)
+ if deleted:
+ logger.info("Event deleted", extra={
+ "event_id": event_id,
+ "event_type": detail.event.event_type,
+ "correlation_id": detail.event.correlation_id,
+ "deleted_by": deleted_by,
+ })
+ return deleted
diff --git a/backend/app/services/admin/admin_settings_service.py b/backend/app/services/admin/admin_settings_service.py
new file mode 100644
index 00000000..f71b9d0b
--- /dev/null
+++ b/backend/app/services/admin/admin_settings_service.py
@@ -0,0 +1,43 @@
+from app.core.logging import logger
+from app.db.repositories.admin.admin_settings_repository import AdminSettingsRepository
+from app.domain.admin import SystemSettings
+
+
+class AdminSettingsService:
+ def __init__(self, repository: AdminSettingsRepository):
+ self._repo = repository
+
+ async def get_system_settings(self, admin_username: str) -> SystemSettings:
+ logger.info(
+ "Admin retrieving system settings",
+ extra={"admin_username": admin_username},
+ )
+ settings = await self._repo.get_system_settings()
+ return settings
+
+ async def update_system_settings(
+ self,
+ settings: SystemSettings,
+ updated_by: str,
+ user_id: str,
+ ) -> SystemSettings:
+ logger.info(
+ "Admin updating system settings",
+ extra={"admin_username": updated_by},
+ )
+ updated = await self._repo.update_system_settings(
+ settings=settings, updated_by=updated_by, user_id=user_id
+ )
+ logger.info("System settings updated successfully")
+ return updated
+
+ async def reset_system_settings(self, username: str, user_id: str) -> SystemSettings:
+ # Reset (with audit) and return fresh defaults persisted via get
+ logger.info(
+ "Admin resetting system settings to defaults",
+ extra={"admin_username": username},
+ )
+ await self._repo.reset_system_settings(username=username, user_id=user_id)
+ settings = await self._repo.get_system_settings()
+ logger.info("System settings reset to defaults")
+ return settings
diff --git a/backend/app/services/admin/admin_user_service.py b/backend/app/services/admin/admin_user_service.py
new file mode 100644
index 00000000..7cc55cc2
--- /dev/null
+++ b/backend/app/services/admin/admin_user_service.py
@@ -0,0 +1,224 @@
+from datetime import datetime, timedelta, timezone
+from uuid import uuid4
+
+from app.core.logging import logger
+from app.core.security import SecurityService
+from app.db.repositories.admin.admin_user_repository import AdminUserRepository
+from app.domain.admin import AdminUserOverviewDomain, DerivedCountsDomain, RateLimitSummaryDomain
+from app.domain.enums.events import EventType
+from app.domain.enums.execution import ExecutionStatus
+from app.domain.enums.user import UserRole
+from app.domain.rate_limit import UserRateLimit
+from app.domain.user import PasswordReset, User, UserListResult, UserUpdate
+from app.infrastructure.mappers import UserRateLimitMapper
+from app.schemas_pydantic.user import UserCreate
+from app.services.event_service import EventService
+from app.services.execution_service import ExecutionService
+from app.services.rate_limit_service import RateLimitService
+
+
+class AdminUserService:
+ def __init__(
+ self,
+ user_repository: AdminUserRepository,
+ event_service: EventService,
+ execution_service: ExecutionService,
+ rate_limit_service: RateLimitService,
+ ) -> None:
+ self._users = user_repository
+ self._events = event_service
+ self._executions = execution_service
+ self._rate_limits = rate_limit_service
+
+ async def get_user_overview(self, user_id: str, hours: int = 24) -> AdminUserOverviewDomain:
+ logger.info("Admin getting user overview",
+ extra={"target_user_id": user_id, "hours": hours})
+ user = await self._users.get_user_by_id(user_id)
+ if not user:
+ raise ValueError("User not found")
+
+ now = datetime.now(timezone.utc)
+ start = now - timedelta(hours=hours)
+ stats_domain = await self._events.get_event_statistics(
+ user_id=user_id,
+ user_role=UserRole.ADMIN,
+ start_time=start,
+ end_time=now,
+ include_all_users=False,
+ )
+ exec_stats = await self._executions.get_execution_stats(
+ user_id=user_id,
+ time_range=(start, now)
+ )
+ by_status = exec_stats.get("by_status", {}) or {}
+
+ def _count(status: ExecutionStatus) -> int:
+ return int(by_status.get(status, 0) or by_status.get(status.value, 0) or 0)
+
+ succeeded = _count(ExecutionStatus.COMPLETED)
+ failed = _count(ExecutionStatus.FAILED)
+ timeout = _count(ExecutionStatus.TIMEOUT)
+ cancelled = _count(ExecutionStatus.CANCELLED)
+ derived = DerivedCountsDomain(
+ succeeded=succeeded,
+ failed=failed,
+ timeout=timeout,
+ cancelled=cancelled,
+ terminal_total=succeeded + failed + timeout + cancelled,
+ )
+
+ rl = await self._rate_limits.get_user_rate_limit(user_id)
+ rl_summary = RateLimitSummaryDomain(
+ bypass_rate_limit=rl.bypass_rate_limit if rl else False,
+ global_multiplier=rl.global_multiplier if rl else 1.0,
+ has_custom_limits=bool(rl.rules) if rl else False,
+ )
+
+ # Recent execution-related events (last 10)
+ event_types: list[EventType] = [
+ EventType.EXECUTION_REQUESTED,
+ EventType.EXECUTION_STARTED,
+ EventType.EXECUTION_COMPLETED,
+ EventType.EXECUTION_FAILED,
+ EventType.EXECUTION_TIMEOUT,
+ EventType.EXECUTION_CANCELLED,
+ ]
+ recent_result = await self._events.get_user_events_paginated(
+ user_id=user_id,
+ event_types=[str(et) for et in event_types],
+ start_time=start,
+ end_time=now,
+ limit=10,
+ skip=0,
+ sort_order="desc",
+ )
+ recent_events = recent_result.events
+
+ return AdminUserOverviewDomain(
+ user=user,
+ stats=stats_domain,
+ derived_counts=derived,
+ rate_limit_summary=rl_summary,
+ recent_events=recent_events,
+ )
+
+ async def list_users(self,
+ *,
+ admin_username: str,
+ limit: int,
+ offset: int,
+ search: str | None,
+ role: UserRole | None) -> UserListResult:
+ logger.info(
+ "Admin listing users",
+ extra={
+ "admin_username": admin_username,
+ "limit": limit,
+ "offset": offset,
+ "search": search,
+ "role": role,
+ },
+ )
+
+ return await self._users.list_users(limit=limit, offset=offset, search=search, role=role)
+
+ async def create_user(self, *, admin_username: str, user_data: UserCreate) -> User:
+ """Create a new user and return domain user."""
+ logger.info(
+ "Admin creating new user", extra={"admin_username": admin_username, "new_username": user_data.username}
+ )
+ # Ensure not exists
+ search_result = await self._users.list_users(limit=1, offset=0, search=user_data.username)
+ for user in search_result.users:
+ if user.username == user_data.username:
+ raise ValueError("Username already exists")
+
+ security = SecurityService()
+ hashed_password = security.get_password_hash(user_data.password)
+
+ user_id = str(uuid4()) # imported where defined
+ now = datetime.now(timezone.utc)
+ user_doc = {
+ "user_id": user_id,
+ "username": user_data.username,
+ "email": user_data.email,
+ "hashed_password": hashed_password,
+ "role": getattr(user_data, "role", UserRole.USER),
+ "is_active": getattr(user_data, "is_active", True),
+ "is_superuser": False,
+ "created_at": now,
+ "updated_at": now,
+ }
+ await self._users.users_collection.insert_one(user_doc)
+ logger.info("User created successfully",
+ extra={"new_username": user_data.username, "admin_username": admin_username})
+ # Return fresh domain user
+ created = await self._users.get_user_by_id(user_id)
+ if not created:
+ raise ValueError("Failed to fetch created user")
+ return created
+
+ async def get_user(self, *, admin_username: str, user_id: str) -> User | None:
+ logger.info("Admin getting user details",
+ extra={"admin_username": admin_username, "target_user_id": user_id})
+ return await self._users.get_user_by_id(user_id)
+
+ async def update_user(self, *, admin_username: str, user_id: str, update: UserUpdate) -> User | None:
+ logger.info(
+ "Admin updating user",
+ extra={"admin_username": admin_username, "target_user_id": user_id},
+ )
+ return await self._users.update_user(user_id, update)
+
+ async def delete_user(self, *, admin_username: str, user_id: str, cascade: bool) -> dict[str, int]:
+ logger.info(
+ "Admin deleting user",
+ extra={"admin_username": admin_username, "target_user_id": user_id, "cascade": cascade},
+ )
+ # Reset rate limits prior to deletion
+ await self._rate_limits.reset_user_limits(user_id)
+ deleted_counts = await self._users.delete_user(user_id, cascade=cascade)
+ if deleted_counts.get("user", 0) > 0:
+ logger.info("User deleted successfully", extra={"target_user_id": user_id})
+ return deleted_counts
+
+ async def reset_user_password(self, *, admin_username: str, user_id: str, new_password: str) -> bool:
+ logger.info("Admin resetting user password",
+ extra={"admin_username": admin_username, "target_user_id": user_id})
+ pr = PasswordReset(user_id=user_id, new_password=new_password)
+ ok = await self._users.reset_user_password(pr)
+ if ok:
+ logger.info("User password reset successfully", extra={"target_user_id": user_id})
+ return ok
+
+ async def get_user_rate_limits(self, *, admin_username: str, user_id: str) -> dict:
+ logger.info("Admin getting user rate limits",
+ extra={"admin_username": admin_username, "target_user_id": user_id})
+ user_limit = await self._rate_limits.get_user_rate_limit(user_id)
+ usage_stats = await self._rate_limits.get_usage_stats(user_id)
+ rate_limit_mapper = UserRateLimitMapper()
+ return {
+ "user_id": user_id,
+ "rate_limit_config": rate_limit_mapper.to_dict(user_limit) if user_limit else None,
+ "current_usage": usage_stats,
+ }
+
+ async def update_user_rate_limits(self,
+ *,
+ admin_username: str,
+ user_id: str,
+ config: UserRateLimit) -> dict[str, object]:
+ mapper = UserRateLimitMapper()
+ logger.info(
+ "Admin updating user rate limits",
+ extra={"admin_username": admin_username, "target_user_id": user_id, "config": mapper.to_dict(config)},
+ )
+ config.user_id = user_id
+ await self._rate_limits.update_user_rate_limit(user_id, config)
+ return {"message": "Rate limits updated successfully", "config": mapper.to_dict(config)}
+
+ async def reset_user_rate_limits(self, *, admin_username: str, user_id: str) -> bool:
+ logger.info("Admin resetting user rate limits",
+ extra={"admin_username": admin_username, "target_user_id": user_id})
+ await self._rate_limits.reset_user_limits(user_id)
+ return True
diff --git a/backend/app/services/admin_user_service.py b/backend/app/services/admin_user_service.py
deleted file mode 100644
index 47075d3f..00000000
--- a/backend/app/services/admin_user_service.py
+++ /dev/null
@@ -1,102 +0,0 @@
-from __future__ import annotations
-
-from datetime import datetime, timedelta, timezone
-
-from app.db.repositories.admin.admin_user_repository import AdminUserRepository
-from app.domain.admin.overview_models import (
- AdminUserOverviewDomain,
- DerivedCountsDomain,
- RateLimitSummaryDomain,
-)
-from app.domain.enums.events import EventType
-from app.domain.enums.execution import ExecutionStatus
-from app.domain.enums.user import UserRole
-from app.services.event_service import EventService
-from app.services.execution_service import ExecutionService
-from app.services.rate_limit_service import RateLimitService
-
-
-class AdminUserService:
- def __init__(
- self,
- user_repository: AdminUserRepository,
- event_service: EventService,
- execution_service: ExecutionService,
- rate_limit_service: RateLimitService,
- ) -> None:
- self._users = user_repository
- self._events = event_service
- self._executions = execution_service
- self._rate_limits = rate_limit_service
- # Service operates purely on domain types
-
- async def get_user_overview(self, user_id: str, hours: int = 24) -> AdminUserOverviewDomain:
- user = await self._users.get_user_by_id(user_id)
- if not user:
- raise ValueError("User not found")
-
- now = datetime.now(timezone.utc)
- start = now - timedelta(hours=hours)
- stats_domain = await self._events.get_event_statistics(
- user_id=user_id,
- user_role=UserRole.ADMIN,
- start_time=start,
- end_time=now,
- include_all_users=False,
- )
- exec_stats = await self._executions.get_execution_stats(
- user_id=user_id,
- time_range=(start, now)
- )
- by_status = exec_stats.get("by_status", {}) or {}
-
- def _count(status: ExecutionStatus) -> int:
- return int(by_status.get(status, 0) or by_status.get(status.value, 0) or 0)
-
- succeeded = _count(ExecutionStatus.COMPLETED)
- failed = _count(ExecutionStatus.FAILED)
- timeout = _count(ExecutionStatus.TIMEOUT)
- cancelled = _count(ExecutionStatus.CANCELLED)
- derived = DerivedCountsDomain(
- succeeded=succeeded,
- failed=failed,
- timeout=timeout,
- cancelled=cancelled,
- terminal_total=succeeded + failed + timeout + cancelled,
- )
-
- # Rate limit summary (must reflect current state; let errors bubble)
- rl = await self._rate_limits.get_user_rate_limit(user_id)
- rl_summary = RateLimitSummaryDomain(
- bypass_rate_limit=rl.bypass_rate_limit if rl else False,
- global_multiplier=rl.global_multiplier if rl else 1.0,
- has_custom_limits=bool(rl.rules) if rl else False,
- )
-
- # Recent execution-related events (last 10)
- event_types = [
- EventType.EXECUTION_REQUESTED,
- EventType.EXECUTION_STARTED,
- EventType.EXECUTION_COMPLETED,
- EventType.EXECUTION_FAILED,
- EventType.EXECUTION_TIMEOUT,
- EventType.EXECUTION_CANCELLED,
- ]
- recent_result = await self._events.get_user_events_paginated(
- user_id=user_id,
- event_types=[str(et) for et in event_types],
- start_time=start,
- end_time=now,
- limit=10,
- skip=0,
- sort_order="desc",
- )
- recent_events = recent_result.events
-
- return AdminUserOverviewDomain(
- user=user,
- stats=stats_domain,
- derived_counts=derived,
- rate_limit_summary=rl_summary,
- recent_events=recent_events,
- )
diff --git a/backend/app/services/auth_service.py b/backend/app/services/auth_service.py
new file mode 100644
index 00000000..3c1a4cc9
--- /dev/null
+++ b/backend/app/services/auth_service.py
@@ -0,0 +1,45 @@
+from fastapi import HTTPException, Request, status
+
+from app.core.logging import logger
+from app.core.security import security_service
+from app.db.repositories.user_repository import UserRepository
+from app.domain.enums.user import UserRole
+from app.schemas_pydantic.user import UserResponse
+
+
+class AuthService:
+ def __init__(self, user_repo: UserRepository):
+ self.user_repo = user_repo
+
+ async def get_current_user(self, request: Request) -> UserResponse:
+ token = request.cookies.get("access_token")
+ if not token:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Not authenticated",
+ headers={"WWW-Authenticate": "Bearer"},
+ )
+
+ user = await security_service.get_current_user(token, self.user_repo)
+
+ return UserResponse(
+ user_id=user.user_id,
+ username=user.username,
+ email=user.email,
+ role=user.role,
+ is_superuser=user.is_superuser,
+ created_at=user.created_at,
+ updated_at=user.updated_at,
+ )
+
+ async def get_admin(self, request: Request) -> UserResponse:
+ user = await self.get_current_user(request)
+ if user.role != UserRole.ADMIN:
+ logger.warning(
+ f"Admin access denied for user: {user.username} (role: {user.role})"
+ )
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="Admin access required",
+ )
+ return user
diff --git a/backend/app/services/coordinator/__init__.py b/backend/app/services/coordinator/__init__.py
index 9fa9d8f5..b3890c9d 100644
--- a/backend/app/services/coordinator/__init__.py
+++ b/backend/app/services/coordinator/__init__.py
@@ -1,5 +1,3 @@
-"""ExecutionCoordinator service for managing execution queue and scheduling"""
-
from app.services.coordinator.coordinator import ExecutionCoordinator
from app.services.coordinator.queue_manager import QueueManager, QueuePriority
from app.services.coordinator.resource_manager import ResourceAllocation, ResourceManager
diff --git a/backend/app/services/coordinator/coordinator.py b/backend/app/services/coordinator/coordinator.py
index ca2dd7ee..b827d15e 100644
--- a/backend/app/services/coordinator/coordinator.py
+++ b/backend/app/services/coordinator/coordinator.py
@@ -3,7 +3,9 @@
import time
from collections.abc import Coroutine
from typing import Any, TypeAlias
+from uuid import uuid4
+import redis.asyncio as redis
from motor.motor_asyncio import AsyncIOMotorClient
from app.core.logging import logger
@@ -13,10 +15,8 @@
from app.domain.enums.events import EventType
from app.domain.enums.kafka import KafkaTopic
from app.domain.enums.storage import ExecutionErrorType
-from app.domain.execution.models import ResourceUsageDomain
-from app.events.core.consumer import ConsumerConfig, UnifiedConsumer
-from app.events.core.dispatcher import EventDispatcher
-from app.events.core.producer import ProducerConfig, UnifiedProducer
+from app.domain.execution import ResourceUsageDomain
+from app.events.core import ConsumerConfig, EventDispatcher, ProducerConfig, UnifiedConsumer, UnifiedProducer
from app.events.event_store import EventStore, create_event_store
from app.events.schema.schema_registry import (
SchemaRegistryManager,
@@ -32,10 +32,13 @@
ExecutionRequestedEvent,
)
from app.infrastructure.kafka.events.metadata import EventMetadata
+from app.infrastructure.kafka.events.saga import CreatePodCommandEvent
from app.services.coordinator.queue_manager import QueueManager, QueuePriority
from app.services.coordinator.resource_manager import ResourceAllocation, ResourceManager
-from app.services.idempotency import IdempotencyManager, create_idempotency_manager
+from app.services.idempotency import IdempotencyManager
+from app.services.idempotency.idempotency_manager import IdempotencyConfig, create_idempotency_manager
from app.services.idempotency.middleware import IdempotentConsumerWrapper
+from app.services.idempotency.redis_repository import RedisIdempotencyRepository
from app.settings import get_settings
EventHandler: TypeAlias = Coroutine[Any, Any, None]
@@ -431,10 +434,6 @@ async def _publish_execution_started(
request: ExecutionRequestedEvent
) -> None:
"""Send CreatePodCommandEvent to k8s-worker via SAGA_COMMANDS topic"""
- from uuid import uuid4
-
- from app.infrastructure.kafka.events.saga import CreatePodCommandEvent
-
metadata = await self._build_command_metadata(request)
create_pod_cmd = CreatePodCommandEvent(
@@ -575,7 +574,19 @@ async def run_coordinator() -> None:
# Build repositories and idempotency manager
exec_repo = ExecutionRepository(database)
- idem_manager = create_idempotency_manager(database)
+ r = redis.Redis(
+ host=settings.REDIS_HOST,
+ port=settings.REDIS_PORT,
+ db=settings.REDIS_DB,
+ password=settings.REDIS_PASSWORD,
+ ssl=settings.REDIS_SSL,
+ max_connections=settings.REDIS_MAX_CONNECTIONS,
+ decode_responses=settings.REDIS_DECODE_RESPONSES,
+ socket_connect_timeout=5,
+ socket_timeout=5,
+ )
+ idem_repo = RedisIdempotencyRepository(r, key_prefix="idempotency")
+ idem_manager = create_idempotency_manager(repository=idem_repo, config=IdempotencyConfig())
await idem_manager.initialize()
coordinator = ExecutionCoordinator(
diff --git a/backend/app/services/event_bus.py b/backend/app/services/event_bus.py
index a16ebb32..90666cef 100644
--- a/backend/app/services/event_bus.py
+++ b/backend/app/services/event_bus.py
@@ -1,10 +1,9 @@
import asyncio
import fnmatch
import json
-from contextlib import asynccontextmanager
from dataclasses import dataclass, field
from datetime import datetime, timezone
-from typing import Any, AsyncGenerator, Callable, Optional
+from typing import Any, Callable, Optional
from uuid import uuid4
from confluent_kafka import Consumer, KafkaError, Producer
@@ -58,7 +57,6 @@ async def start(self) -> None:
self._running = True
logger.info("Event bus started with Kafka backing")
-
async def _initialize_kafka(self) -> None:
"""Initialize Kafka producer and consumer."""
# Producer setup
@@ -78,7 +76,7 @@ async def _initialize_kafka(self) -> None:
'client.id': f'event-bus-consumer-{uuid4()}'
})
self.consumer.subscribe([self._topic])
-
+
# Store the executor function for sync operations
loop = asyncio.get_event_loop()
self._executor = loop.run_in_executor
@@ -131,10 +129,10 @@ async def publish(self, event_type: str, data: dict[str, Any]) -> None:
# Serialize and send message asynchronously
value = json.dumps(event).encode('utf-8')
key = event_type.encode('utf-8') if event_type else None
-
+
# Use executor to avoid blocking
if self._executor:
- await self._executor(None, self.producer.produce, self._topic, value=value, key=key)
+ await self._executor(None, self.producer.produce, self._topic, value, key)
# Poll to handle delivery callbacks
await self._executor(None, self.producer.poll, 0)
else:
@@ -273,10 +271,10 @@ async def _kafka_listener(self) -> None:
# Fallback to sync operation if executor not available
await asyncio.sleep(0.1)
continue
-
+
if msg is None:
continue
-
+
if msg.error():
if msg.error().code() != KafkaError._PARTITION_EOF:
logger.error(f"Consumer error: {msg.error()}")
@@ -336,15 +334,6 @@ async def close(self) -> None:
await self._event_bus.stop()
self._event_bus = None
- @asynccontextmanager
- async def event_bus_context(self) -> AsyncGenerator[EventBus, None]:
- """Context manager for event bus lifecycle."""
- bus = await self.get_event_bus()
- try:
- yield bus
- finally:
- await self.close()
-
async def get_event_bus(request: Request) -> EventBus:
manager: EventBusManager = request.app.state.event_bus_manager
diff --git a/backend/app/services/event_replay/__init__.py b/backend/app/services/event_replay/__init__.py
index e7d194e8..82e67bc5 100644
--- a/backend/app/services/event_replay/__init__.py
+++ b/backend/app/services/event_replay/__init__.py
@@ -1,5 +1,5 @@
from app.domain.enums.replay import ReplayStatus, ReplayTarget, ReplayType
-from app.domain.replay.models import ReplayConfig, ReplayFilter
+from app.domain.replay import ReplayConfig, ReplayFilter
from app.schemas_pydantic.replay_models import ReplaySession
from app.services.event_replay.replay_service import EventReplayService
diff --git a/backend/app/services/event_replay/replay_service.py b/backend/app/services/event_replay/replay_service.py
index e4df82ae..ab043b2e 100644
--- a/backend/app/services/event_replay/replay_service.py
+++ b/backend/app/services/event_replay/replay_service.py
@@ -5,13 +5,16 @@
from typing import Any, AsyncIterator, Callable, Dict, List
from uuid import uuid4
+from opentelemetry.trace import SpanKind
+
from app.core.logging import logger
from app.core.metrics import ReplayMetrics
-from app.core.tracing import SpanKind, trace_span
+from app.core.tracing.utils import trace_span
from app.db.repositories.replay_repository import ReplayRepository
+from app.domain.admin.replay_updates import ReplaySessionUpdate
from app.domain.enums.replay import ReplayStatus, ReplayTarget
-from app.domain.replay.models import ReplayConfig, ReplaySessionState
-from app.events.core.producer import UnifiedProducer
+from app.domain.replay import ReplayConfig, ReplaySessionState
+from app.events.core import UnifiedProducer
from app.events.event_store import EventStore
from app.infrastructure.kafka.events.base import BaseEvent
@@ -254,23 +257,32 @@ async def _process_batch(
session: ReplaySessionState,
batch: List[BaseEvent]
) -> None:
- for event in batch:
- if session.status != ReplayStatus.RUNNING:
- break
+ with trace_span(
+ name="event_replay.process_batch",
+ kind=SpanKind.INTERNAL,
+ attributes={
+ "replay.session_id": str(session.session_id),
+ "replay.batch.count": len(batch),
+ "replay.target": session.config.target,
+ },
+ ):
+ for event in batch:
+ if session.status != ReplayStatus.RUNNING:
+ break
- # Apply delay before external I/O
- await self._apply_replay_delay(session, event)
- try:
- success = await self._replay_event(session, event)
- except Exception as e:
- await self._handle_replay_error(session, event, e)
- if not session.config.skip_errors:
- raise
- continue
+ # Apply delay before external I/O
+ await self._apply_replay_delay(session, event)
+ try:
+ success = await self._replay_event(session, event)
+ except Exception as e:
+ await self._handle_replay_error(session, event, e)
+ if not session.config.skip_errors:
+ raise
+ continue
- self._update_replay_metrics(session, event, success)
- session.last_event_at = event.timestamp
- await self._update_session_in_db(session)
+ self._update_replay_metrics(session, event, success)
+ session.last_event_at = event.timestamp
+ await self._update_session_in_db(session)
async def _replay_event(
self,
@@ -408,16 +420,18 @@ async def cleanup_old_sessions(
async def _update_session_in_db(self, session: ReplaySessionState) -> None:
"""Update session progress in the database."""
try:
+ session_update = ReplaySessionUpdate(
+ status=session.status,
+ replayed_events=session.replayed_events,
+ failed_events=session.failed_events,
+ skipped_events=session.skipped_events,
+ completed_at=session.completed_at,
+ )
+ # Note: last_event_at is not in ReplaySessionUpdate
+ # If needed, add it to the domain model
await self._repository.update_replay_session(
session_id=session.session_id,
- updates={
- "status": session.status,
- "replayed_events": session.replayed_events,
- "failed_events": session.failed_events,
- "skipped_events": session.skipped_events,
- "completed_at": session.completed_at,
- "last_event_at": session.last_event_at
- }
+ updates=session_update
)
except Exception as e:
logger.error(f"Failed to update session in database: {e}")
diff --git a/backend/app/services/event_service.py b/backend/app/services/event_service.py
index b752c2dc..bcb64516 100644
--- a/backend/app/services/event_service.py
+++ b/backend/app/services/event_service.py
@@ -3,7 +3,6 @@
from pymongo import ASCENDING, DESCENDING
-from app.core.logging import logger
from app.db.repositories.event_repository import EventRepository
from app.domain.enums.user import UserRole
from app.domain.events import (
@@ -14,18 +13,25 @@
EventReplayInfo,
EventStatistics,
)
+from app.infrastructure.mappers import EventFilterMapper
class EventService:
def __init__(self, repository: EventRepository):
self.repository = repository
+ def _build_user_filter(self, user_id: str, user_role: UserRole) -> dict[str, object]:
+ """Build user filter based on role. Returns empty dict ( = see everything) for admins."""
+ if user_role == UserRole.ADMIN:
+ return {}
+ return {"metadata.user_id": user_id}
+
async def get_execution_events(
- self,
- execution_id: str,
- user_id: str,
- user_role: UserRole,
- include_system_events: bool = False,
+ self,
+ execution_id: str,
+ user_id: str,
+ user_role: UserRole,
+ include_system_events: bool = False,
) -> List[Event] | None:
events = await self.repository.get_events_by_aggregate(aggregate_id=execution_id, limit=1000)
if not events:
@@ -46,14 +52,14 @@ async def get_execution_events(
return events
async def get_user_events_paginated(
- self,
- user_id: str,
- event_types: List[str] | None = None,
- start_time: datetime | None = None,
- end_time: datetime | None = None,
- limit: int = 100,
- skip: int = 0,
- sort_order: str = "desc",
+ self,
+ user_id: str,
+ event_types: List[str] | None = None,
+ start_time: datetime | None = None,
+ end_time: datetime | None = None,
+ limit: int = 100,
+ skip: int = 0,
+ sort_order: str = "desc",
) -> EventListResult:
return await self.repository.get_user_events_paginated(
user_id=user_id,
@@ -66,20 +72,20 @@ async def get_user_events_paginated(
)
async def query_events_advanced(
- self,
- user_id: str,
- user_role: UserRole,
- filters: EventFilter,
- sort_by: str = "timestamp",
- sort_order: Any = "desc",
- limit: int = 100,
- skip: int = 0,
+ self,
+ user_id: str,
+ user_role: UserRole,
+ filters: EventFilter,
+ sort_by: str = "timestamp",
+ sort_order: Any = "desc",
+ limit: int = 100,
+ skip: int = 0,
) -> EventListResult | None:
# Access control
if filters.user_id and filters.user_id != user_id and user_role != UserRole.ADMIN:
return None
- query = filters.to_query()
+ query = EventFilterMapper.to_mongo_query(filters)
if not filters.user_id and user_role != UserRole.ADMIN:
query["metadata.user_id"] = user_id
@@ -95,10 +101,8 @@ async def query_events_advanced(
direction = DESCENDING if str(sort_order).lower() == "desc" else ASCENDING
# Pagination and sorting from request
- # Cast to dict[str, object] for repository compatibility
- query_obj: dict[str, object] = query # type: ignore[assignment]
return await self.repository.query_events_generic(
- query=query_obj,
+ query=query, # type: ignore[assignment]
sort_field=sort_field,
sort_direction=direction,
skip=skip,
@@ -106,12 +110,12 @@ async def query_events_advanced(
)
async def get_events_by_correlation(
- self,
- correlation_id: str,
- user_id: str,
- user_role: UserRole,
- include_all_users: bool = False,
- limit: int = 100,
+ self,
+ correlation_id: str,
+ user_id: str,
+ user_role: UserRole,
+ include_all_users: bool = False,
+ limit: int = 100,
) -> List[Event]:
events = await self.repository.get_events_by_correlation(correlation_id=correlation_id, limit=limit)
if not include_all_users or user_role != UserRole.ADMIN:
@@ -119,16 +123,14 @@ async def get_events_by_correlation(
return events
async def get_event_statistics(
- self,
- user_id: str,
- user_role: UserRole,
- start_time: datetime | None = None,
- end_time: datetime | None = None,
- include_all_users: bool = False,
+ self,
+ user_id: str,
+ user_role: UserRole,
+ start_time: datetime | None = None,
+ end_time: datetime | None = None,
+ include_all_users: bool = False,
) -> EventStatistics:
- match: dict[str, Any] | None = None
- if not include_all_users or user_role != UserRole.ADMIN:
- match = {"metadata.user_id": user_id}
+ match = {} if include_all_users else self._build_user_filter(user_id, user_role)
return await self.repository.get_event_statistics_filtered(
match=match,
start_time=start_time,
@@ -136,29 +138,29 @@ async def get_event_statistics(
)
async def get_event(
- self,
- event_id: str,
- user_id: str,
- user_role: UserRole,
+ self,
+ event_id: str,
+ user_id: str,
+ user_role: UserRole,
) -> Event | None:
event = await self.repository.get_event(event_id)
if not event:
return None
- event_user_id = event.metadata.user_id if event.metadata else None
- if event_user_id and event_user_id != user_id and user_role != UserRole.ADMIN:
+ event_user_id = event.metadata.user_id
+ if event_user_id != user_id and user_role != UserRole.ADMIN:
return None
return event
async def aggregate_events(
- self,
- user_id: str,
- user_role: UserRole,
- pipeline: List[Dict[str, Any]],
- limit: int = 100,
+ self,
+ user_id: str,
+ user_role: UserRole,
+ pipeline: List[Dict[str, Any]],
+ limit: int = 100,
) -> EventAggregationResult:
- user_filter = {"metadata.user_id": user_id}
+ user_filter = self._build_user_filter(user_id, user_role)
new_pipeline = list(pipeline)
- if user_role != UserRole.ADMIN:
+ if user_filter:
if new_pipeline and "$match" in new_pipeline[0]:
new_pipeline[0]["$match"] = {"$and": [new_pipeline[0]["$match"], user_filter]}
else:
@@ -166,38 +168,34 @@ async def aggregate_events(
return await self.repository.aggregate_events(new_pipeline, limit=limit)
async def list_event_types(
- self,
- user_id: str,
- user_role: UserRole,
+ self,
+ user_id: str,
+ user_role: UserRole,
) -> List[str]:
- match: dict[str, object] | None = None if user_role == UserRole.ADMIN else {"metadata.user_id": user_id}
+ match = self._build_user_filter(user_id, user_role)
return await self.repository.list_event_types(match=match)
async def delete_event_with_archival(
- self,
- event_id: str,
- deleted_by: str,
- deletion_reason: str = "Admin deletion via API",
+ self,
+ event_id: str,
+ deleted_by: str,
+ deletion_reason: str = "Admin deletion via API",
) -> Event | None:
- try:
- return await self.repository.delete_event_with_archival(
- event_id=event_id,
- deleted_by=deleted_by,
- deletion_reason=deletion_reason,
- )
- except Exception as e:
- logger.error(f"Failed to delete event {event_id}: {e}")
- return None
+ return await self.repository.delete_event_with_archival(
+ event_id=event_id,
+ deleted_by=deleted_by,
+ deletion_reason=deletion_reason,
+ )
async def get_aggregate_replay_info(self, aggregate_id: str) -> EventReplayInfo | None:
return await self.repository.get_aggregate_replay_info(aggregate_id)
async def get_events_by_aggregate(
- self,
- aggregate_id: str,
- event_types: List[str] | None = None,
- limit: int = 100,
- ) -> List[Event]:
+ self,
+ aggregate_id: str,
+ event_types: List[str] | None = None,
+ limit: int = 100,
+ ) -> list[Event]:
return await self.repository.get_events_by_aggregate(
aggregate_id=aggregate_id,
event_types=event_types,
diff --git a/backend/app/services/execution_service.py b/backend/app/services/execution_service.py
index 8b0352d9..ae9e7ff2 100644
--- a/backend/app/services/execution_service.py
+++ b/backend/app/services/execution_service.py
@@ -1,7 +1,7 @@
-from contextlib import suppress
+from contextlib import contextmanager
from datetime import datetime
from time import time
-from typing import Any, TypeAlias
+from typing import Any, Generator, TypeAlias
from app.core.correlation import CorrelationContext
from app.core.exceptions import IntegrationException, ServiceError
@@ -10,8 +10,8 @@
from app.db.repositories.execution_repository import ExecutionRepository
from app.domain.enums.events import EventType
from app.domain.enums.execution import ExecutionStatus
-from app.domain.execution.models import DomainExecution
-from app.events.core.producer import UnifiedProducer
+from app.domain.execution import DomainExecution, ExecutionResultDomain, ResourceUsageDomain
+from app.events.core import UnifiedProducer
from app.events.event_store import EventStore
from app.infrastructure.kafka.events.base import BaseEvent
from app.infrastructure.kafka.events.execution import (
@@ -61,6 +61,15 @@ def __init__(
self.settings = settings
self.metrics = get_execution_metrics()
+ @contextmanager
+ def _track_active_execution(self) -> Generator[None, None, None]: # noqa: D401
+ """Increment active executions on enter and decrement on exit."""
+ self.metrics.increment_active_executions()
+ try:
+ yield
+ finally:
+ self.metrics.decrement_active_executions()
+
async def get_k8s_resource_limits(self) -> dict[str, Any]:
return {
"cpu_limit": self.settings.K8S_POD_CPU_LIMIT,
@@ -146,14 +155,9 @@ async def execute_script(
}
)
- # Track metrics
- self.metrics.increment_active_executions()
- created_execution: DomainExecution | None = None
-
- # Runtime selection relies on API schema validation
runtime_cfg = RUNTIME_REGISTRY[lang][lang_version]
- try:
+ with self._track_active_execution():
# Create execution record
created_execution = await self.execution_repo.create_execution(
DomainExecution(
@@ -196,18 +200,16 @@ async def execute_script(
metadata=metadata,
)
- with suppress(Exception):
- await self.event_store.store_event(event)
-
# Publish to Kafka; on failure, mark error and raise
try:
await self.producer.produce(event_to_produce=event)
- except Exception as e:
+ except Exception as e: # pragma: no cover - mapped behavior
self.metrics.record_script_execution(ExecutionStatus.ERROR, lang_and_version)
self.metrics.record_error(type(e).__name__)
- if created_execution:
- await self._update_execution_error(created_execution.execution_id,
- f"Failed to submit execution: {str(e)}")
+ await self._update_execution_error(
+ created_execution.execution_id,
+ f"Failed to submit execution: {str(e)}",
+ )
raise IntegrationException(status_code=500, detail="Failed to submit execution request") from e
# Success metrics and return
@@ -223,34 +225,22 @@ async def execute_script(
}
)
return created_execution
- finally:
- self.metrics.decrement_active_executions()
async def _update_execution_error(
self,
execution_id: str,
error_message: str
) -> None:
- """
- Update execution status to error.
-
- Args:
- execution_id: Execution identifier.
- error_message: Error message to set.
- """
- try:
- await self.execution_repo.update_execution(
- execution_id,
- {
- "status": ExecutionStatus.ERROR,
- "errors": error_message,
- }
- )
- except Exception as update_error:
- logger.error(
- f"Failed to update execution status: {update_error}",
- extra={"execution_id": execution_id}
- )
+ result = ExecutionResultDomain(
+ execution_id=execution_id,
+ status=ExecutionStatus.ERROR,
+ exit_code=-1,
+ stdout="",
+ stderr=error_message,
+ resource_usage=ResourceUsageDomain(0.0, 0, 0, 0),
+ metadata={},
+ )
+ await self.execution_repo.write_terminal_result(result)
async def get_execution_result(self, execution_id: str) -> DomainExecution:
"""
@@ -287,8 +277,8 @@ async def get_execution_result(self, execution_id: str) -> DomainExecution:
"status": execution.status,
"lang": execution.lang,
"lang_version": execution.lang_version,
- "has_output": bool(execution.output),
- "has_errors": bool(execution.errors),
+ "has_output": bool(execution.stdout),
+ "has_errors": bool(execution.stderr),
"resource_usage": execution.resource_usage,
}
)
@@ -464,7 +454,6 @@ async def delete_execution(self, execution_id: str) -> bool:
extra={"execution_id": execution_id}
)
- # Publish deletion event
await self._publish_deletion_event(execution_id)
return True
@@ -476,44 +465,27 @@ async def _publish_deletion_event(self, execution_id: str) -> None:
Args:
execution_id: UUID of deleted execution.
"""
- try:
- metadata = self._create_event_metadata()
-
- # Create proper cancellation event instead of raw dict
- event = ExecutionCancelledEvent(
- execution_id=execution_id,
- reason="user_requested",
- cancelled_by=metadata.user_id,
- metadata=metadata
- )
-
- # Store in event store
- with suppress(Exception):
- await self.event_store.store_event(event)
+ metadata = self._create_event_metadata()
- await self.producer.produce(
- event_to_produce=event,
- key=execution_id
- )
+ event = ExecutionCancelledEvent(
+ execution_id=execution_id,
+ reason="user_requested",
+ cancelled_by=metadata.user_id,
+ metadata=metadata
+ )
- logger.info(
- "Published cancellation event",
- extra={
- "execution_id": execution_id,
- "event_id": str(event.event_id),
- }
- )
+ await self.producer.produce(
+ event_to_produce=event,
+ key=execution_id
+ )
- except Exception as e:
- # Log but don't fail the deletion
- logger.error(
- "Failed to publish deletion event",
- extra={
- "execution_id": execution_id,
- "error": str(e)
- },
- exc_info=True
- )
+ logger.info(
+ "Published cancellation event",
+ extra={
+ "execution_id": execution_id,
+ "event_id": str(event.event_id),
+ }
+ )
async def get_execution_stats(
self,
diff --git a/backend/app/services/grafana_alert_processor.py b/backend/app/services/grafana_alert_processor.py
new file mode 100644
index 00000000..5689157e
--- /dev/null
+++ b/backend/app/services/grafana_alert_processor.py
@@ -0,0 +1,168 @@
+"""Grafana alert processing service."""
+
+from typing import Any
+
+from app.core.logging import logger
+from app.domain.enums.notification import NotificationSeverity
+from app.schemas_pydantic.grafana import GrafanaAlertItem, GrafanaWebhook
+from app.services.notification_service import NotificationService
+
+
+class GrafanaAlertProcessor:
+ """Processes Grafana alerts with reduced complexity."""
+
+ SEVERITY_MAPPING = {
+ "critical": NotificationSeverity.HIGH,
+ "error": NotificationSeverity.HIGH,
+ "warning": NotificationSeverity.MEDIUM,
+ "info": NotificationSeverity.LOW,
+ }
+
+ RESOLVED_STATUSES = {"ok", "resolved"}
+ DEFAULT_SEVERITY = "warning"
+ DEFAULT_TITLE = "Grafana Alert"
+ DEFAULT_MESSAGE = "Alert triggered"
+
+ def __init__(self, notification_service: NotificationService) -> None:
+ """Initialize the processor with required services."""
+ self.notification_service = notification_service
+ logger.info("GrafanaAlertProcessor initialized")
+
+ @classmethod
+ def extract_severity(cls, alert: GrafanaAlertItem, webhook: GrafanaWebhook) -> str:
+ """Extract severity from alert or webhook labels."""
+ alert_severity = (alert.labels or {}).get("severity")
+ webhook_severity = (webhook.commonLabels or {}).get("severity")
+ return (alert_severity or webhook_severity or cls.DEFAULT_SEVERITY).lower()
+
+ @classmethod
+ def map_severity(cls, severity_str: str, alert_status: str | None) -> NotificationSeverity:
+ """Map string severity to enum, considering alert status."""
+ if alert_status and alert_status.lower() in cls.RESOLVED_STATUSES:
+ return NotificationSeverity.LOW
+ return cls.SEVERITY_MAPPING.get(severity_str, NotificationSeverity.MEDIUM)
+
+ @classmethod
+ def extract_title(cls, alert: GrafanaAlertItem) -> str:
+ """Extract title from alert labels or annotations."""
+ return (
+ (alert.labels or {}).get("alertname")
+ or (alert.annotations or {}).get("title")
+ or cls.DEFAULT_TITLE
+ )
+
+ @classmethod
+ def build_message(cls, alert: GrafanaAlertItem) -> str:
+ """Build notification message from alert annotations."""
+ annotations = alert.annotations or {}
+ summary = annotations.get("summary")
+ description = annotations.get("description")
+
+ parts = [p for p in [summary, description] if p]
+ if parts:
+ return "\n\n".join(parts)
+ return summary or description or cls.DEFAULT_MESSAGE
+
+ @classmethod
+ def build_metadata(
+ cls,
+ alert: GrafanaAlertItem,
+ webhook: GrafanaWebhook,
+ severity: str
+ ) -> dict[str, Any]:
+ """Build metadata dictionary for the notification."""
+ return {
+ "grafana_status": alert.status or webhook.status,
+ "severity": severity,
+ **(webhook.commonLabels or {}),
+ **(alert.labels or {}),
+ }
+
+ async def process_single_alert(
+ self,
+ alert: GrafanaAlertItem,
+ webhook: GrafanaWebhook,
+ correlation_id: str,
+ ) -> tuple[bool, str | None]:
+ """Process a single Grafana alert.
+
+ Args:
+ alert: The Grafana alert to process
+ webhook: The webhook payload containing common data
+ correlation_id: Correlation ID for tracing
+
+ Returns:
+ Tuple of (success, error_message)
+ """
+ try:
+ severity_str = self.extract_severity(alert, webhook)
+ severity = self.map_severity(severity_str, alert.status)
+ title = self.extract_title(alert)
+ message = self.build_message(alert)
+ metadata = self.build_metadata(alert, webhook, severity_str)
+
+ await self.notification_service.create_system_notification(
+ title=title,
+ message=message,
+ severity=severity,
+ tags=["external_alert", "grafana", "entity:external_alert"],
+ metadata=metadata,
+ )
+ return True, None
+
+ except Exception as e:
+ error_msg = f"Failed to process Grafana alert: {e}"
+ logger.error(
+ error_msg,
+ extra={"correlation_id": correlation_id},
+ exc_info=True
+ )
+ return False, error_msg
+
+ async def process_webhook(
+ self,
+ webhook_payload: GrafanaWebhook,
+ correlation_id: str
+ ) -> tuple[int, list[str]]:
+ """Process all alerts in a Grafana webhook.
+
+ Args:
+ webhook_payload: The Grafana webhook payload
+ correlation_id: Correlation ID for tracing
+
+ Returns:
+ Tuple of (processed_count, errors)
+ """
+ alerts = webhook_payload.alerts or []
+ errors: list[str] = []
+ processed_count = 0
+
+ logger.info(
+ "Processing Grafana webhook",
+ extra={
+ "correlation_id": correlation_id,
+ "status": webhook_payload.status,
+ "alerts_count": len(alerts),
+ },
+ )
+
+ for alert in alerts:
+ success, error_msg = await self.process_single_alert(
+ alert, webhook_payload, correlation_id
+ )
+ if success:
+ processed_count += 1
+ elif error_msg:
+ errors.append(error_msg)
+
+ logger.info(
+ "Grafana webhook processing completed",
+ extra={
+ "correlation_id": correlation_id,
+ "alerts_received": len(alerts),
+ "alerts_processed": processed_count,
+ "errors_count": len(errors),
+ },
+ )
+
+ return processed_count, errors
diff --git a/backend/app/services/idempotency/__init__.py b/backend/app/services/idempotency/__init__.py
index 2210a6f6..7ce275ed 100644
--- a/backend/app/services/idempotency/__init__.py
+++ b/backend/app/services/idempotency/__init__.py
@@ -1,11 +1,9 @@
-"""Idempotency services for event processing"""
-
+from app.domain.idempotency import IdempotencyStatus
from app.services.idempotency.idempotency_manager import (
IdempotencyConfig,
IdempotencyKeyStrategy,
IdempotencyManager,
IdempotencyResult,
- IdempotencyStatus,
create_idempotency_manager,
)
from app.services.idempotency.middleware import IdempotentConsumerWrapper, IdempotentEventHandler, idempotent_handler
diff --git a/backend/app/services/idempotency/idempotency_manager.py b/backend/app/services/idempotency/idempotency_manager.py
index 33b54b23..ec26f6bf 100644
--- a/backend/app/services/idempotency/idempotency_manager.py
+++ b/backend/app/services/idempotency/idempotency_manager.py
@@ -2,34 +2,26 @@
import hashlib
import json
from datetime import datetime, timedelta, timezone
-from typing import cast
+from typing import Protocol
-from motor.motor_asyncio import AsyncIOMotorDatabase
from pydantic import BaseModel
from pymongo.errors import DuplicateKeyError
from app.core.logging import logger
from app.core.metrics.context import get_database_metrics
-from app.core.utils import StringEnum
-from app.db.repositories.idempotency_repository import IdempotencyRepository
+from app.domain.idempotency import IdempotencyRecord, IdempotencyStats, IdempotencyStatus
from app.infrastructure.kafka.events import BaseEvent
-class IdempotencyStatus(StringEnum):
- PROCESSING = "processing"
- COMPLETED = "completed"
- FAILED = "failed"
- EXPIRED = "expired"
-
-
class IdempotencyResult(BaseModel):
is_duplicate: bool
status: IdempotencyStatus
created_at: datetime
- result: object | None = None
- error: str | None = None
completed_at: datetime | None = None
processing_duration_ms: int | None = None
+ error: str | None = None
+ has_cached_result: bool = False
+ key: str
class IdempotencyConfig(BaseModel):
@@ -65,11 +57,20 @@ def custom(event: BaseEvent, custom_key: str) -> str:
return f"{event.event_type}:{custom_key}"
+class IdempotencyRepoProtocol(Protocol):
+ async def find_by_key(self, key: str) -> IdempotencyRecord | None: ...
+ async def insert_processing(self, record: IdempotencyRecord) -> None: ...
+ async def update_record(self, record: IdempotencyRecord) -> int: ...
+ async def delete_key(self, key: str) -> int: ...
+ async def aggregate_status_counts(self, key_prefix: str) -> dict[str, int]: ...
+ async def health_check(self) -> None: ...
+
+
class IdempotencyManager:
- def __init__(self, config: IdempotencyConfig, repository: IdempotencyRepository) -> None:
+ def __init__(self, config: IdempotencyConfig, repository: IdempotencyRepoProtocol) -> None:
self.config = config
self.metrics = get_database_metrics()
- self._repo = repository
+ self._repo: IdempotencyRepoProtocol = repository
self._stats_update_task: asyncio.Task[None] | None = None
async def initialize(self) -> None:
@@ -103,6 +104,8 @@ def _generate_key(
raise ValueError(f"Invalid key strategy: {key_strategy}")
return f"{self.config.key_prefix}:{key}"
+
+
async def check_and_reserve(
self,
event: BaseEvent,
@@ -124,102 +127,97 @@ async def check_and_reserve(
async def _handle_existing_key(
self,
- existing: dict[str, object],
+ existing: IdempotencyRecord,
full_key: str,
event_type: str,
) -> IdempotencyResult:
- sv0 = existing.get("status")
- st0 = sv0 if isinstance(sv0, IdempotencyStatus) else IdempotencyStatus(str(sv0))
- if st0 == IdempotencyStatus.PROCESSING:
+ status = existing.status
+ if status == IdempotencyStatus.PROCESSING:
return await self._handle_processing_key(existing, full_key, event_type)
self.metrics.record_idempotency_duplicate_blocked(event_type)
- status = st0
- created_at_raw = cast(datetime | None, existing.get("created_at"))
- created_at = self._ensure_timezone_aware(created_at_raw or datetime.now(timezone.utc))
+ created_at = existing.created_at or datetime.now(timezone.utc)
return IdempotencyResult(
is_duplicate=True,
status=status,
- result=existing.get("result"),
- error=cast(str | None, existing.get("error")),
created_at=created_at,
- completed_at=cast(datetime | None, existing.get("completed_at")),
- processing_duration_ms=cast(int | None, existing.get("processing_duration_ms"))
+ completed_at=existing.completed_at,
+ processing_duration_ms=existing.processing_duration_ms,
+ error=existing.error,
+ has_cached_result=existing.result_json is not None,
+ key=full_key,
)
async def _handle_processing_key(
self,
- existing: dict[str, object],
+ existing: IdempotencyRecord,
full_key: str,
event_type: str,
) -> IdempotencyResult:
- created_at = self._ensure_timezone_aware(cast(datetime, existing["created_at"]))
+ created_at = existing.created_at
now = datetime.now(timezone.utc)
if now - created_at > timedelta(seconds=self.config.processing_timeout_seconds):
logger.warning(f"Idempotency key {full_key} processing timeout, allowing retry")
- await self._repo.update_set(full_key, {"created_at": now, "status": IdempotencyStatus.PROCESSING})
- return IdempotencyResult(is_duplicate=False, status=IdempotencyStatus.PROCESSING, created_at=now)
+ existing.created_at = now
+ existing.status = IdempotencyStatus.PROCESSING
+ await self._repo.update_record(existing)
+ return IdempotencyResult(is_duplicate=False, status=IdempotencyStatus.PROCESSING, created_at=now,
+ key=full_key)
self.metrics.record_idempotency_duplicate_blocked(event_type)
- return IdempotencyResult(is_duplicate=True, status=IdempotencyStatus.PROCESSING, created_at=created_at)
+ return IdempotencyResult(is_duplicate=True, status=IdempotencyStatus.PROCESSING, created_at=created_at,
+ has_cached_result=existing.result_json is not None, key=full_key)
async def _create_new_key(self, full_key: str, event: BaseEvent, ttl: int) -> IdempotencyResult:
created_at = datetime.now(timezone.utc)
try:
- await self._repo.insert_processing(
+ record = IdempotencyRecord(
key=full_key,
+ status=IdempotencyStatus.PROCESSING,
event_type=event.event_type,
event_id=str(event.event_id),
created_at=created_at,
ttl_seconds=ttl,
)
- return IdempotencyResult(is_duplicate=False, status=IdempotencyStatus.PROCESSING, created_at=created_at)
+ await self._repo.insert_processing(record)
+ return IdempotencyResult(is_duplicate=False, status=IdempotencyStatus.PROCESSING, created_at=created_at,
+ key=full_key)
except DuplicateKeyError:
# Race: someone inserted the same key concurrently โ treat as existing
existing = await self._repo.find_by_key(full_key)
if existing:
return await self._handle_existing_key(existing, full_key, event.event_type)
# If for some reason it's still not found, allow processing
- return IdempotencyResult(is_duplicate=False, status=IdempotencyStatus.PROCESSING, created_at=created_at)
-
- def _ensure_timezone_aware(self, dt: datetime) -> datetime:
- if dt.tzinfo is None:
- return dt.replace(tzinfo=timezone.utc)
- return dt
+ return IdempotencyResult(is_duplicate=False, status=IdempotencyStatus.PROCESSING, created_at=created_at,
+ key=full_key)
async def _update_key_status(
self,
full_key: str,
- existing: dict[str, object],
+ existing: IdempotencyRecord,
status: IdempotencyStatus,
- result: object | None = None,
+ cached_json: str | None = None,
error: str | None = None,
) -> bool:
- created_at = self._ensure_timezone_aware(cast(datetime, existing["created_at"]))
+ created_at = existing.created_at
completed_at = datetime.now(timezone.utc)
duration_ms = int((completed_at - created_at).total_seconds() * 1000)
-
- update_fields: dict[str, object] = {
- "status": status,
- "completed_at": completed_at,
- "processing_duration_ms": duration_ms,
- }
+ existing.status = status
+ existing.completed_at = completed_at
+ existing.processing_duration_ms = duration_ms
if error:
- update_fields["error"] = error
- if result is not None and self.config.enable_result_caching:
- result_json = json.dumps(result) if not isinstance(result, str) else result
- if len(result_json.encode()) <= self.config.max_result_size_bytes:
- update_fields["result"] = result
+ existing.error = error
+ if cached_json is not None and self.config.enable_result_caching:
+ if len(cached_json.encode()) <= self.config.max_result_size_bytes:
+ existing.result_json = cached_json
else:
logger.warning(f"Result too large to cache for key {full_key}")
- modified = await self._repo.update_set(full_key, update_fields)
- return modified > 0
+ return (await self._repo.update_record(existing)) > 0
async def mark_completed(
self,
event: BaseEvent,
- result: object | None = None,
key_strategy: str = "event_based",
custom_key: str | None = None,
fields: set[str] | None = None
@@ -233,7 +231,8 @@ async def mark_completed(
if not existing:
logger.warning(f"Idempotency key {full_key} not found when marking completed")
return False
- return await self._update_key_status(full_key, existing, IdempotencyStatus.COMPLETED, result=result)
+ # mark_completed does not accept arbitrary result today; use mark_completed_with_cache for cached payloads
+ return await self._update_key_status(full_key, existing, IdempotencyStatus.COMPLETED, cached_json=None)
async def mark_failed(
self,
@@ -248,7 +247,30 @@ async def mark_failed(
if not existing:
logger.warning(f"Idempotency key {full_key} not found when marking failed")
return False
- return await self._update_key_status(full_key, existing, IdempotencyStatus.FAILED, error=error)
+ return await self._update_key_status(full_key, existing, IdempotencyStatus.FAILED, cached_json=None,
+ error=error)
+
+ async def mark_completed_with_json(
+ self,
+ event: BaseEvent,
+ cached_json: str,
+ key_strategy: str = "event_based",
+ custom_key: str | None = None,
+ fields: set[str] | None = None
+ ) -> bool:
+ full_key = self._generate_key(event, key_strategy, custom_key, fields)
+ existing = await self._repo.find_by_key(full_key)
+ if not existing:
+ logger.warning(f"Idempotency key {full_key} not found when marking completed with cache")
+ return False
+ return await self._update_key_status(full_key, existing, IdempotencyStatus.COMPLETED, cached_json=cached_json)
+
+ async def get_cached_json(self, event: BaseEvent, key_strategy: str, custom_key: str | None,
+ fields: set[str] | None = None) -> str:
+ full_key = self._generate_key(event, key_strategy, custom_key, fields)
+ existing = await self._repo.find_by_key(full_key)
+ assert existing and existing.result_json is not None, "Invariant: cached result must exist when requested"
+ return existing.result_json
async def remove(
self,
@@ -265,24 +287,21 @@ async def remove(
logger.error(f"Failed to remove idempotency key: {e}")
return False
- async def get_stats(self) -> dict[str, object]:
+ async def get_stats(self) -> IdempotencyStats:
counts_raw = await self._repo.aggregate_status_counts(self.config.key_prefix)
- status_counts = {
+ status_counts: dict[IdempotencyStatus, int] = {
IdempotencyStatus.PROCESSING: counts_raw.get(IdempotencyStatus.PROCESSING, 0),
IdempotencyStatus.COMPLETED: counts_raw.get(IdempotencyStatus.COMPLETED, 0),
IdempotencyStatus.FAILED: counts_raw.get(IdempotencyStatus.FAILED, 0),
}
- return {"total_keys": sum(status_counts.values()),
- "status_counts": status_counts,
- "prefix": self.config.key_prefix}
+ total = sum(status_counts.values())
+ return IdempotencyStats(total_keys=total, status_counts=status_counts, prefix=self.config.key_prefix)
async def _update_stats_loop(self) -> None:
while True:
try:
stats = await self.get_stats()
- from typing import cast
- total_keys = cast(int, stats.get("total_keys", 0))
- self.metrics.update_idempotency_keys_active(total_keys, self.config.key_prefix)
+ self.metrics.update_idempotency_keys_active(stats.total_keys, self.config.key_prefix)
await asyncio.sleep(60)
except asyncio.CancelledError:
break
@@ -292,9 +311,8 @@ async def _update_stats_loop(self) -> None:
def create_idempotency_manager(
- database: AsyncIOMotorDatabase, config: IdempotencyConfig | None = None
+ *,
+ repository: IdempotencyRepoProtocol,
+ config: IdempotencyConfig | None = None,
) -> IdempotencyManager:
- if config is None:
- config = IdempotencyConfig()
- repository = IdempotencyRepository(database, collection_name=config.collection_name)
- return IdempotencyManager(config, repository)
+ return IdempotencyManager(config or IdempotencyConfig(), repository)
diff --git a/backend/app/services/idempotency/middleware.py b/backend/app/services/idempotency/middleware.py
index e18cfae9..465c8f9a 100644
--- a/backend/app/services/idempotency/middleware.py
+++ b/backend/app/services/idempotency/middleware.py
@@ -6,8 +6,7 @@
from app.core.logging import logger
from app.domain.enums.events import EventType
from app.domain.enums.kafka import KafkaTopic
-from app.events.core.consumer import UnifiedConsumer
-from app.events.core.dispatcher import EventDispatcher
+from app.events.core import EventDispatcher, UnifiedConsumer
from app.infrastructure.kafka.events.base import BaseEvent
from app.services.idempotency.idempotency_manager import IdempotencyManager
@@ -78,7 +77,6 @@ async def __call__(self, event: BaseEvent) -> None:
# Mark as completed
await self.idempotency_manager.mark_completed(
event=event,
- result=None, # Handlers return None
key_strategy=self.key_strategy,
custom_key=custom_key,
fields=self.fields
diff --git a/backend/app/services/idempotency/redis_repository.py b/backend/app/services/idempotency/redis_repository.py
new file mode 100644
index 00000000..ac144778
--- /dev/null
+++ b/backend/app/services/idempotency/redis_repository.py
@@ -0,0 +1,141 @@
+from __future__ import annotations
+
+import json
+from datetime import datetime, timezone
+from typing import Any, Dict
+
+import redis.asyncio as redis
+from pymongo.errors import DuplicateKeyError
+
+from app.domain.idempotency import IdempotencyRecord, IdempotencyStatus
+
+
+def _iso(dt: datetime) -> str:
+ return dt.astimezone(timezone.utc).isoformat()
+
+
+def _json_default(obj: Any) -> str:
+ if isinstance(obj, datetime):
+ return _iso(obj)
+ return str(obj)
+
+
+def _parse_iso_datetime(v: str | None) -> datetime | None:
+ if not v:
+ return None
+ try:
+ return datetime.fromisoformat(v.replace("Z", "+00:00"))
+ except Exception:
+ return None
+
+
+class RedisIdempotencyRepository:
+ """Redis-backed repository compatible with IdempotencyManager expectations.
+
+ Key shape:
@@ -308,4 +306,4 @@
.relative {
z-index: 40;
}
-
\ No newline at end of file
+
diff --git a/frontend/src/lib/auth-init.js b/frontend/src/lib/auth-init.js
new file mode 100644
index 00000000..cbbc1388
--- /dev/null
+++ b/frontend/src/lib/auth-init.js
@@ -0,0 +1,190 @@
+import { get } from 'svelte/store';
+import { isAuthenticated, username, userId, userRole, userEmail, csrfToken, verifyAuth } from '../stores/auth.js';
+import { loadUserSettings } from './user-settings.js';
+
+/**
+ * Authentication initialization service
+ * This runs before any components mount to ensure auth state is ready
+ */
+export class AuthInitializer {
+ static initialized = false;
+ static initPromise = null;
+
+ /**
+ * Initialize authentication state from localStorage and verify with backend
+ * This should be called once at app startup
+ */
+ static async initialize() {
+ // If already initialized or initializing, return the existing promise
+ if (this.initialized) {
+ return true;
+ }
+
+ if (this.initPromise) {
+ return this.initPromise;
+ }
+
+ // Create initialization promise
+ this.initPromise = this._performInitialization();
+
+ try {
+ const result = await this.initPromise;
+ this.initialized = true;
+ return result;
+ } catch (error) {
+ console.error('Auth initialization failed:', error);
+ this.initialized = false;
+ throw error;
+ } finally {
+ this.initPromise = null;
+ }
+ }
+
+ static async _performInitialization() {
+ console.log('[AuthInit] Starting authentication initialization...');
+
+ // Check if we have persisted auth state
+ const persistedAuth = this._getPersistedAuth();
+
+ if (persistedAuth) {
+ return await this._handlePersistedAuth(persistedAuth);
+ }
+
+ return await this._handleNoPersistedAuth();
+ }
+
+ static async _handlePersistedAuth(persistedAuth) {
+ console.log('[AuthInit] Found persisted auth state, verifying with backend...');
+
+ // Set stores immediately to avoid UI flicker
+ this._setAuthStores(persistedAuth);
+
+ try {
+ const isValid = await verifyAuth(true); // Force refresh
+
+ if (!isValid) {
+ console.log('[AuthInit] Authentication invalid, clearing state');
+ this._clearAuth();
+ return false;
+ }
+
+ console.log('[AuthInit] Authentication verified successfully');
+ await this._loadUserSettingsSafely();
+ return true;
+
+ } catch (error) {
+ console.error('[AuthInit] Verification failed:', error);
+ return this._handleVerificationError(persistedAuth);
+ }
+ }
+
+ static async _handleNoPersistedAuth() {
+ console.log('[AuthInit] No persisted auth state found');
+
+ try {
+ const isValid = await verifyAuth();
+ console.log('[AuthInit] Backend verification result:', isValid);
+
+ if (isValid) {
+ await this._loadUserSettingsSafely();
+ }
+
+ return isValid;
+ } catch (error) {
+ console.error('[AuthInit] Backend verification failed:', error);
+ this._clearAuth();
+ return false;
+ }
+ }
+
+ static _setAuthStores(authData) {
+ isAuthenticated.set(true);
+ username.set(authData.username);
+ userId.set(authData.userId);
+ userRole.set(authData.userRole);
+ userEmail.set(authData.userEmail);
+ csrfToken.set(authData.csrfToken);
+ }
+
+ static async _loadUserSettingsSafely() {
+ try {
+ await loadUserSettings();
+ console.log('[AuthInit] User settings loaded');
+ } catch (error) {
+ console.warn('[AuthInit] Failed to load user settings:', error);
+ // Continue even if settings fail to load
+ }
+ }
+
+ static _handleVerificationError(persistedAuth) {
+ // On network error, keep the persisted state if it's recent
+ if (this._isRecentAuth(persistedAuth)) {
+ console.log('[AuthInit] Network error but auth is recent, keeping state');
+ return true;
+ }
+
+ console.log('[AuthInit] Network error and auth is stale, clearing state');
+ this._clearAuth();
+ return false;
+ }
+
+ static _getPersistedAuth() {
+ try {
+ const authData = localStorage.getItem('authState');
+ if (!authData) return null;
+
+ const parsed = JSON.parse(authData);
+
+ // Check if auth data is still fresh (24 hours)
+ if (Date.now() - parsed.timestamp > 24 * 60 * 60 * 1000) {
+ localStorage.removeItem('authState');
+ return null;
+ }
+
+ return parsed;
+ } catch (e) {
+ console.error('[AuthInit] Failed to parse persisted auth:', e);
+ return null;
+ }
+ }
+
+ static _isRecentAuth(authData) {
+ // Consider auth recent if less than 5 minutes old
+ return authData && (Date.now() - authData.timestamp < 5 * 60 * 1000);
+ }
+
+ static _clearAuth() {
+ isAuthenticated.set(false);
+ username.set(null);
+ userId.set(null);
+ userRole.set(null);
+ userEmail.set(null);
+ csrfToken.set(null);
+ localStorage.removeItem('authState');
+ }
+
+ /**
+ * Check if user is authenticated (after initialization)
+ */
+ static isAuthenticated() {
+ if (!this.initialized) {
+ console.warn('[AuthInit] Checking auth before initialization');
+ return false;
+ }
+ return get(isAuthenticated);
+ }
+
+ /**
+ * Wait for initialization to complete
+ */
+ static async waitForInit() {
+ if (this.initialized) return true;
+ if (this.initPromise) return this.initPromise;
+ return this.initialize();
+ }
+}
+
+// Export singleton instance methods for convenience
+export const initializeAuth = () => AuthInitializer.initialize();
+export const waitForAuth = () => AuthInitializer.waitForInit();
+export const checkAuth = () => AuthInitializer.isAuthenticated();
\ No newline at end of file
diff --git a/frontend/src/lib/auth-utils.js b/frontend/src/lib/auth-utils.js
new file mode 100644
index 00000000..95eb6fe6
--- /dev/null
+++ b/frontend/src/lib/auth-utils.js
@@ -0,0 +1,8 @@
+import { clearCache } from './settings-cache.js';
+
+/**
+ * Clear the settings cache (e.g., on logout)
+ */
+export function clearSettingsCache() {
+ clearCache();
+}
\ No newline at end of file
diff --git a/frontend/src/lib/eventStreamClient.js b/frontend/src/lib/eventStreamClient.js
new file mode 100644
index 00000000..1da519ff
--- /dev/null
+++ b/frontend/src/lib/eventStreamClient.js
@@ -0,0 +1,265 @@
+export class EventStreamClient {
+ constructor(url, options = {}) {
+ this.url = url;
+ this.options = {
+ withCredentials: true,
+ reconnectDelay: 1000,
+ maxReconnectDelay: 30000,
+ reconnectDelayMultiplier: 1.5,
+ maxReconnectAttempts: 10, // increased reconnection attempts
+ heartbeatTimeout: 20000, // 20 seconds (considering 10s heartbeat interval)
+ onOpen: () => {},
+ onError: () => {},
+ onClose: () => {},
+ onMessage: () => {},
+ onReconnect: () => {},
+ ...options
+ };
+
+ this.eventSource = null;
+ this.reconnectAttempts = 0;
+ this.reconnectDelay = this.options.reconnectDelay;
+ this.reconnectTimer = null;
+ this.heartbeatTimer = null;
+ this.lastHeartbeat = Date.now();
+ this.connectionState = 'disconnected'; // disconnected, connecting, connected
+ this.eventHandlers = new Map();
+ this.closed = false;
+ }
+
+ /**
+ * Connect to the event stream
+ */
+ connect() {
+ if (this.closed) {
+ console.warn('EventStreamClient: Cannot connect after close()');
+ return;
+ }
+
+ if (this.eventSource && this.eventSource.readyState !== EventSource.CLOSED) {
+ console.warn('EventStreamClient: Already connected');
+ return;
+ }
+
+ this.connectionState = 'connecting';
+ this._createEventSource();
+ }
+
+ /**
+ * Close the connection and cleanup
+ */
+ close() {
+ this.closed = true;
+ this.connectionState = 'disconnected';
+
+ if (this.reconnectTimer) {
+ clearTimeout(this.reconnectTimer);
+ this.reconnectTimer = null;
+ }
+
+ if (this.heartbeatTimer) {
+ clearTimeout(this.heartbeatTimer);
+ this.heartbeatTimer = null;
+ }
+
+ if (this.eventSource) {
+ this.eventSource.close();
+ this.eventSource = null;
+ }
+
+ this.options.onClose();
+ }
+
+ /**
+ * Add event listener for specific event types
+ */
+ addEventListener(eventType, handler) {
+ if (!this.eventHandlers.has(eventType)) {
+ this.eventHandlers.set(eventType, new Set());
+ }
+ this.eventHandlers.get(eventType).add(handler);
+
+ // Add to current EventSource if connected
+ if (this.eventSource && this.eventSource.readyState !== EventSource.CLOSED) {
+ this.eventSource.addEventListener(eventType, handler);
+ }
+ }
+
+ /**
+ * Remove event listener
+ */
+ removeEventListener(eventType, handler) {
+ if (this.eventHandlers.has(eventType)) {
+ this.eventHandlers.get(eventType).delete(handler);
+
+ if (this.eventHandlers.get(eventType).size === 0) {
+ this.eventHandlers.delete(eventType);
+ }
+ }
+
+ // Remove from current EventSource if connected
+ if (this.eventSource) {
+ this.eventSource.removeEventListener(eventType, handler);
+ }
+ }
+
+ /**
+ * Get current connection state
+ */
+ getState() {
+ return this.connectionState;
+ }
+
+ /**
+ * Create and setup EventSource
+ */
+ _createEventSource() {
+ try {
+ // No need to add token - using httpOnly cookies
+ this.eventSource = new EventSource(this.url, {
+ withCredentials: this.options.withCredentials
+ });
+
+ // Setup event handlers
+ this.eventSource.onopen = (event) => {
+ console.log('EventStreamClient: Connection opened');
+ this.connectionState = 'connected';
+ this.reconnectAttempts = 0;
+ this.reconnectDelay = this.options.reconnectDelay;
+ this.lastHeartbeat = Date.now();
+ this._startHeartbeatMonitor();
+ this.options.onOpen(event);
+ };
+
+ this.eventSource.onerror = (event) => {
+ console.error('EventStreamClient: Connection error', event);
+ this.connectionState = 'disconnected';
+ this.options.onError(event);
+
+ if (this.eventSource.readyState === EventSource.CLOSED) {
+ this._handleDisconnection();
+ }
+ };
+
+ this.eventSource.onmessage = (event) => {
+ this.options.onMessage(event);
+ };
+
+ // Re-attach all registered event handlers
+ for (const [eventType, handlers] of this.eventHandlers) {
+ for (const handler of handlers) {
+ this.eventSource.addEventListener(eventType, handler);
+ }
+ }
+
+ // Handle heartbeat events
+ this.eventSource.addEventListener('heartbeat', (event) => {
+ this.lastHeartbeat = Date.now();
+ console.debug('EventStreamClient: Heartbeat received');
+ });
+
+ } catch (error) {
+ console.error('EventStreamClient: Failed to create EventSource', error);
+ this.connectionState = 'disconnected';
+ this._handleDisconnection();
+ }
+ }
+
+ /**
+ * Handle disconnection and reconnection logic
+ */
+ _handleDisconnection() {
+ if (this.closed) {
+ return;
+ }
+
+ if (this.heartbeatTimer) {
+ clearTimeout(this.heartbeatTimer);
+ this.heartbeatTimer = null;
+ }
+
+ if (this.eventSource) {
+ this.eventSource.close();
+ this.eventSource = null;
+ }
+
+ // Check if we should attempt reconnection
+ if (this.options.maxReconnectAttempts !== null &&
+ this.reconnectAttempts >= this.options.maxReconnectAttempts) {
+ console.error('EventStreamClient: Max reconnection attempts reached');
+ this.close();
+ return;
+ }
+
+ // Schedule reconnection
+ this.reconnectAttempts++;
+ console.log(`EventStreamClient: Reconnecting in ${this.reconnectDelay}ms (attempt ${this.reconnectAttempts})`);
+
+ this.options.onReconnect(this.reconnectAttempts);
+
+ this.reconnectTimer = setTimeout(() => {
+ this.connect();
+ }, this.reconnectDelay);
+
+ // Increase delay for next attempt
+ this.reconnectDelay = Math.min(
+ this.reconnectDelay * this.options.reconnectDelayMultiplier,
+ this.options.maxReconnectDelay
+ );
+ }
+
+ /**
+ * Monitor heartbeat to detect stale connections
+ */
+ _startHeartbeatMonitor() {
+ if (this.heartbeatTimer) {
+ clearTimeout(this.heartbeatTimer);
+ }
+
+ this.heartbeatTimer = setTimeout(() => {
+ const timeSinceLastHeartbeat = Date.now() - this.lastHeartbeat;
+
+ if (timeSinceLastHeartbeat > this.options.heartbeatTimeout) {
+ console.warn('EventStreamClient: Heartbeat timeout, reconnecting...');
+ this._handleDisconnection();
+ } else {
+ // Continue monitoring
+ this._startHeartbeatMonitor();
+ }
+ }, this.options.heartbeatTimeout);
+ }
+}
+
+/**
+ * Create an EventStreamClient for execution updates
+ */
+export function createExecutionEventStream(executionId, handlers = {}) {
+ const url = `/api/v1/events/executions/${executionId}`;
+
+ const client = new EventStreamClient(url, {
+ onOpen: handlers.onOpen || (() => console.log('Execution event stream connected')),
+ onError: handlers.onError || ((error) => console.error('Execution event stream error:', error)),
+ onClose: handlers.onClose || (() => console.log('Execution event stream closed')),
+ onMessage: handlers.onMessage || ((event) => console.log('Execution event:', event)),
+ onReconnect: handlers.onReconnect || ((attempt) => console.log(`Reconnecting... (attempt ${attempt})`))
+ });
+
+ // Add specific event handlers
+ if (handlers.onStatus) {
+ client.addEventListener('status', handlers.onStatus);
+ }
+
+ if (handlers.onLog) {
+ client.addEventListener('log', handlers.onLog);
+ }
+
+ if (handlers.onComplete) {
+ client.addEventListener('complete', handlers.onComplete);
+ }
+
+ if (handlers.onConnected) {
+ client.addEventListener('connected', handlers.onConnected);
+ }
+
+ return client;
+}
\ No newline at end of file
diff --git a/frontend/src/lib/fetch-utils.js b/frontend/src/lib/fetch-utils.js
new file mode 100644
index 00000000..196f714d
--- /dev/null
+++ b/frontend/src/lib/fetch-utils.js
@@ -0,0 +1,90 @@
+import { backOff } from 'exponential-backoff';
+
+/**
+ * Check if an error should trigger a retry
+ */
+export function shouldRetry(error) {
+ // Check if error exists
+ if (!error) {
+ return false;
+ }
+
+ // Network errors
+ if (error.name === 'TypeError' && error.message?.includes('fetch')) {
+ return true;
+ }
+
+ // Timeout errors should not retry
+ if (error.name === 'TimeoutError' || error.name === 'AbortError') {
+ return false;
+ }
+
+ // If it's a Response object, check status codes
+ if (error instanceof Response) {
+ const status = error.status;
+ return status >= 500 || status === 408 || status === 429;
+ }
+
+ return false;
+}
+
+/**
+ * Base fetch with retry logic using exponential-backoff
+ * @param {string} url - The URL to fetch
+ * @param {Object} options - Fetch options
+ * @param {Object} retryOptions - Retry configuration
+ * @returns {Promise
{@html sanitizeOutput(ansiConverter.toHtml(result.output || ''))}
+ {@html sanitizeOutput(ansiConverter.toHtml(result.stdout || ''))}
{@html sanitizeOutput(ansiConverter.toHtml(result.errors || ''))}
+ {@html sanitizeOutput(ansiConverter.toHtml(result.stderr || ''))}