diff --git a/backend/app/core/providers.py b/backend/app/core/providers.py index 1daf530d..6c1f8eed 100644 --- a/backend/app/core/providers.py +++ b/backend/app/core/providers.py @@ -46,7 +46,7 @@ from app.events.core import ProducerConfig, UnifiedProducer from app.events.event_store import EventStore, create_event_store from app.events.event_store_consumer import EventStoreConsumer, create_event_store_consumer -from app.events.schema.schema_registry import SchemaRegistryManager, create_schema_registry_manager +from app.events.schema.schema_registry import SchemaRegistryManager from app.infrastructure.kafka.topics import get_all_topics from app.services.admin import AdminEventsService, AdminSettingsService, AdminUserService from app.services.auth_service import AuthService @@ -192,7 +192,7 @@ class EventProvider(Provider): @provide def get_schema_registry(self, settings: Settings, logger: logging.Logger) -> SchemaRegistryManager: - return create_schema_registry_manager(settings, logger) + return SchemaRegistryManager(settings, logger) @provide async def get_event_store(self, schema_registry: SchemaRegistryManager, logger: logging.Logger) -> EventStore: diff --git a/backend/app/dlq/manager.py b/backend/app/dlq/manager.py index 2fad0295..e5167324 100644 --- a/backend/app/dlq/manager.py +++ b/backend/app/dlq/manager.py @@ -4,7 +4,8 @@ from datetime import datetime, timezone from typing import Any, Awaitable, Callable -from confluent_kafka import Consumer, KafkaError, Message, Producer +from aiokafka import AIOKafkaConsumer, AIOKafkaProducer +from aiokafka.errors import KafkaError from opentelemetry.trace import SpanKind from app.core.lifecycle import LifecycleEnabled @@ -30,8 +31,8 @@ class DLQManager(LifecycleEnabled): def __init__( self, settings: Settings, - consumer: Consumer, - producer: Producer, + consumer: AIOKafkaConsumer, + producer: AIOKafkaProducer, schema_registry: SchemaRegistryManager, logger: logging.Logger, dlq_topic: KafkaTopic = KafkaTopic.DEAD_LETTER_QUEUE, @@ -48,8 +49,8 @@ def __init__( self.default_retry_policy = default_retry_policy or RetryPolicy( topic="default", strategy=RetryStrategy.EXPONENTIAL_BACKOFF ) - self.consumer: Consumer = consumer - self.producer: Producer = producer + self.consumer: AIOKafkaConsumer = consumer + self.producer: AIOKafkaProducer = producer self._process_task: asyncio.Task[None] | None = None self._monitor_task: asyncio.Task[None] | None = None @@ -116,13 +117,13 @@ def _message_to_doc(self, message: DLQMessage) -> DLQMessageDocument: headers=message.headers, ) - def _kafka_msg_to_message(self, msg: Message) -> DLQMessage: - """Parse Kafka message into DLQMessage.""" - raw_bytes = msg.value() + def _kafka_msg_to_message(self, msg: Any) -> DLQMessage: + """Parse Kafka ConsumerRecord into DLQMessage.""" + raw_bytes = msg.value raw: str = raw_bytes.decode("utf-8") if isinstance(raw_bytes, (bytes, bytearray)) else str(raw_bytes or "") data: dict[str, Any] = json.loads(raw) if raw else {} - headers_list = msg.headers() or [] + headers_list = msg.headers or [] headers: dict[str, str] = {} for k, v in headers_list: headers[str(k)] = v.decode("utf-8") if isinstance(v, (bytes, bytearray)) else (v or "") @@ -141,15 +142,16 @@ def _kafka_msg_to_message(self, msg: Message) -> DLQMessage: else datetime.now(timezone.utc), status=DLQMessageStatus(data.get("status", DLQMessageStatus.PENDING)), producer_id=data.get("producer_id", headers.get("producer_id", "unknown")), - dlq_offset=msg.offset(), - dlq_partition=msg.partition(), + dlq_offset=msg.offset, + dlq_partition=msg.partition, headers=headers, ) async def _on_start(self) -> None: """Start DLQ manager.""" - topic_name = f"{self.settings.KAFKA_TOPIC_PREFIX}{self.dlq_topic}" - self.consumer.subscribe([topic_name]) + # Start producer and consumer + await self.producer.start() + await self.consumer.start() # Start processing tasks self._process_task = asyncio.create_task(self._process_messages()) @@ -169,8 +171,8 @@ async def _on_stop(self) -> None: pass # Stop Kafka clients - self.consumer.close() - self.producer.flush(10) + await self.consumer.stop() + await self.producer.stop() self.logger.info("DLQ Manager stopped") @@ -181,9 +183,6 @@ async def _process_messages(self) -> None: if msg is None: continue - if not await self._validate_message(msg): - continue - start_time = asyncio.get_running_loop().time() dlq_message = self._kafka_msg_to_message(msg) @@ -195,23 +194,19 @@ async def _process_messages(self) -> None: self.logger.error(f"Error in DLQ processing loop: {e}") await asyncio.sleep(5) - async def _poll_message(self) -> Message | None: - """Poll for a message from Kafka.""" - return await asyncio.to_thread(self.consumer.poll, timeout=1.0) - - async def _validate_message(self, msg: Message) -> bool: - """Validate the Kafka message.""" - if msg.error(): - error = msg.error() - if error and error.code() == KafkaError._PARTITION_EOF: - return False - self.logger.error(f"Consumer error: {error}") - return False - return True - - def _extract_headers(self, msg: Message) -> dict[str, str]: - """Extract headers from Kafka message.""" - headers_list = msg.headers() or [] + async def _poll_message(self) -> Any | None: + """Poll for a message from Kafka using async getone().""" + try: + return await asyncio.wait_for(self.consumer.getone(), timeout=1.0) + except asyncio.TimeoutError: + return None + except KafkaError as e: + self.logger.error(f"Consumer error: {e}") + return None + + def _extract_headers(self, msg: Any) -> dict[str, str]: + """Extract headers from Kafka ConsumerRecord.""" + headers_list = msg.headers or [] headers: dict[str, str] = {} for k, v in headers_list: headers[str(k)] = v.decode("utf-8") if isinstance(v, (bytes, bytearray)) else (v or "") @@ -222,7 +217,7 @@ async def _record_message_metrics(self, dlq_message: DLQMessage) -> None: self.metrics.record_dlq_message_received(dlq_message.original_topic, dlq_message.event_type) self.metrics.record_dlq_message_age(dlq_message.age_seconds) - async def _process_message_with_tracing(self, msg: Message, dlq_message: DLQMessage) -> None: + async def _process_message_with_tracing(self, msg: Any, dlq_message: DLQMessage) -> None: """Process message with distributed tracing.""" headers = self._extract_headers(msg) ctx = extract_trace_context(headers) @@ -242,7 +237,7 @@ async def _process_message_with_tracing(self, msg: Message, dlq_message: DLQMess async def _commit_and_record_duration(self, start_time: float) -> None: """Commit offset and record processing duration.""" - await asyncio.to_thread(self.consumer.commit, asynchronous=False) + await self.consumer.commit() duration = asyncio.get_running_loop().time() - start_time self.metrics.record_dlq_processing_duration(duration, "process") @@ -324,13 +319,13 @@ async def _retry_message(self, message: DLQMessage) -> None: "dlq_retry_timestamp": datetime.now(timezone.utc).isoformat(), } hdrs = inject_trace_context(hdrs) - kafka_headers: list[tuple[str, str | bytes]] = [(k, v.encode()) for k, v in hdrs.items()] + kafka_headers: list[tuple[str, bytes]] = [(k, v.encode()) for k, v in hdrs.items()] # Get the original event event = message.event - await asyncio.to_thread( - self.producer.produce, + # Send to retry topic + await self.producer.send_and_wait( topic=retry_topic, value=json.dumps(event.to_dict()).encode(), key=message.event_id.encode(), @@ -338,17 +333,13 @@ async def _retry_message(self, message: DLQMessage) -> None: ) # Send to original topic - await asyncio.to_thread( - self.producer.produce, + await self.producer.send_and_wait( topic=message.original_topic, value=json.dumps(event.to_dict()).encode(), key=message.event_id.encode(), headers=kafka_headers, ) - # Flush to ensure messages are sent - await asyncio.to_thread(self.producer.flush, timeout=5) - # Update metrics self.metrics.record_dlq_message_retried(message.original_topic, message.event_type, "success") @@ -521,25 +512,22 @@ def create_dlq_manager( retry_topic_suffix: str = "-retry", default_retry_policy: RetryPolicy | None = None, ) -> DLQManager: - consumer = Consumer( - { - "bootstrap.servers": settings.KAFKA_BOOTSTRAP_SERVERS, - "group.id": f"{GroupId.DLQ_MANAGER}.{settings.KAFKA_GROUP_SUFFIX}", - "enable.auto.commit": False, - "auto.offset.reset": "earliest", - "client.id": "dlq-manager-consumer", - } + topic_name = f"{settings.KAFKA_TOPIC_PREFIX}{dlq_topic}" + consumer = AIOKafkaConsumer( + topic_name, + bootstrap_servers=settings.KAFKA_BOOTSTRAP_SERVERS, + group_id=f"{GroupId.DLQ_MANAGER}.{settings.KAFKA_GROUP_SUFFIX}", + enable_auto_commit=False, + auto_offset_reset="earliest", + client_id="dlq-manager-consumer", ) - producer = Producer( - { - "bootstrap.servers": settings.KAFKA_BOOTSTRAP_SERVERS, - "client.id": "dlq-manager-producer", - "acks": "all", - "enable.idempotence": True, - "compression.type": "gzip", - "batch.size": 16384, - "linger.ms": 10, - } + producer = AIOKafkaProducer( + bootstrap_servers=settings.KAFKA_BOOTSTRAP_SERVERS, + client_id="dlq-manager-producer", + acks="all", + compression_type="gzip", + max_batch_size=16384, + linger_ms=10, ) if default_retry_policy is None: default_retry_policy = RetryPolicy(topic="default", strategy=RetryStrategy.EXPONENTIAL_BACKOFF) diff --git a/backend/app/events/admin_utils.py b/backend/app/events/admin_utils.py deleted file mode 100644 index 5e133291..00000000 --- a/backend/app/events/admin_utils.py +++ /dev/null @@ -1,69 +0,0 @@ -import asyncio -import logging -from typing import Dict, List - -from confluent_kafka.admin import AdminClient, NewTopic - -from app.settings import Settings - - -class AdminUtils: - """Minimal admin utilities using native AdminClient.""" - - def __init__(self, settings: Settings, logger: logging.Logger): - self.logger = logger - self._admin = AdminClient( - { - "bootstrap.servers": settings.KAFKA_BOOTSTRAP_SERVERS, - "client.id": "integr8scode-admin", - } - ) - - @property - def admin_client(self) -> AdminClient: - """Get the native AdminClient instance.""" - return self._admin - - async def check_topic_exists(self, topic: str) -> bool: - """Check if topic exists.""" - try: - loop = asyncio.get_running_loop() - metadata = await loop.run_in_executor(None, lambda: self._admin.list_topics(timeout=5.0)) - return topic in metadata.topics - except Exception as e: - self.logger.error(f"Failed to check topic {topic}: {e}") - return False - - async def create_topic(self, topic: str, num_partitions: int = 1, replication_factor: int = 1) -> bool: - """Create a single topic.""" - try: - new_topic = NewTopic(topic, num_partitions=num_partitions, replication_factor=replication_factor) - loop = asyncio.get_running_loop() - futures = await loop.run_in_executor( - None, lambda: self._admin.create_topics([new_topic], operation_timeout=30.0) - ) - await loop.run_in_executor(None, lambda: futures[topic].result(timeout=30.0)) - self.logger.info(f"Topic {topic} created successfully") - return True - except Exception as e: - self.logger.error(f"Failed to create topic {topic}: {e}") - return False - - async def ensure_topics_exist(self, topics: List[tuple[str, int]]) -> Dict[str, bool]: - """Ensure topics exist, creating them if necessary.""" - results = {} - for topic, partitions in topics: - if await self.check_topic_exists(topic): - results[topic] = True - else: - results[topic] = await self.create_topic(topic, partitions) - return results - - def get_admin_client(self) -> AdminClient: - """Get the native AdminClient for direct operations.""" - return self._admin - - -def create_admin_utils(settings: Settings, logger: logging.Logger) -> AdminUtils: - """Create admin utilities.""" - return AdminUtils(settings, logger) diff --git a/backend/app/events/consumer_group_monitor.py b/backend/app/events/consumer_group_monitor.py index 21338dd9..6301d2ee 100644 --- a/backend/app/events/consumer_group_monitor.py +++ b/backend/app/events/consumer_group_monitor.py @@ -1,14 +1,16 @@ import asyncio import logging -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime, timezone -from typing import Any, Dict, List, cast +from typing import Any -from confluent_kafka import Consumer, ConsumerGroupState, KafkaError, TopicPartition -from confluent_kafka.admin import ConsumerGroupDescription +from aiokafka import AIOKafkaConsumer, TopicPartition +from aiokafka.admin import AIOKafkaAdminClient +from aiokafka.protocol.api import Response +from aiokafka.protocol.group import MemberAssignment +from aiokafka.structs import OffsetAndMetadata from app.core.utils import StringEnum -from app.events.admin_utils import AdminUtils from app.settings import Settings @@ -21,6 +23,18 @@ class ConsumerGroupHealth(StringEnum): UNKNOWN = "unknown" +# Known consumer group states from Kafka protocol +class ConsumerGroupState(StringEnum): + """Consumer group states from Kafka protocol.""" + + STABLE = "Stable" + PREPARING_REBALANCE = "PreparingRebalance" + COMPLETING_REBALANCE = "CompletingRebalance" + EMPTY = "Empty" + DEAD = "Dead" + UNKNOWN = "Unknown" + + @dataclass(slots=True) class ConsumerGroupMember: """Information about a consumer group member.""" @@ -28,7 +42,7 @@ class ConsumerGroupMember: member_id: str client_id: str host: str - assigned_partitions: List[str] # topic:partition format + assigned_partitions: list[str] # topic:partition format @dataclass(slots=True) @@ -36,41 +50,86 @@ class ConsumerGroupStatus: """Comprehensive consumer group status information.""" group_id: str - state: str + state: ConsumerGroupState protocol: str protocol_type: str coordinator: str - members: List[ConsumerGroupMember] + members: list[ConsumerGroupMember] # Health metrics member_count: int assigned_partitions: int - partition_distribution: Dict[str, int] # member_id -> partition count + partition_distribution: dict[str, int] # member_id -> partition count # Lag information (if available) total_lag: int = 0 - partition_lags: Dict[str, int] | None = None # topic:partition -> lag + partition_lags: dict[str, int] = field(default_factory=dict) # topic:partition -> lag # Health assessment health: ConsumerGroupHealth = ConsumerGroupHealth.UNKNOWN health_message: str = "" - timestamp: datetime | None = None + timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc)) + + +@dataclass(slots=True) +class DescribedGroup: + """Parsed group from DescribeGroupsResponse.""" + + error_code: int + group_id: str + state: str + protocol_type: str + protocol: str + members: list[dict[str, Any]] + + +def _parse_describe_groups_response(response: Response) -> list[DescribedGroup]: + """Parse DescribeGroupsResponse into typed DescribedGroup objects.""" + obj = response.to_object() + groups_data: list[dict[str, Any]] = obj["groups"] + + result: list[DescribedGroup] = [] + for g in groups_data: + result.append( + DescribedGroup( + error_code=g["error_code"], + group_id=g["group"], + state=g["state"], + protocol_type=g["protocol_type"], + protocol=g["protocol"], + members=g["members"], + ) + ) + return result + - def __post_init__(self) -> None: - if self.timestamp is None: - self.timestamp = datetime.now(timezone.utc) +def _parse_member_assignment(assignment_bytes: bytes) -> list[tuple[str, list[int]]]: + """Parse member_assignment bytes to list of (topic, partitions).""" + if not assignment_bytes: + return [] - if self.partition_lags is None: - self.partition_lags = {} + try: + assignment = MemberAssignment.decode(assignment_bytes) + return [(topic, list(partitions)) for topic, partitions in assignment.assignment] + except Exception: + return [] + + +def _state_from_string(state_str: str) -> ConsumerGroupState: + """Convert state string to ConsumerGroupState enum.""" + try: + return ConsumerGroupState(state_str) + except ValueError: + return ConsumerGroupState.UNKNOWN class NativeConsumerGroupMonitor: """ - Enhanced consumer group monitoring using confluent-kafka native operations. + Enhanced consumer group monitoring using aiokafka. Provides detailed consumer group health monitoring, lag tracking, and - rebalancing detection using AdminClient's native capabilities. + rebalancing detection using AIOKafkaAdminClient's native capabilities. """ def __init__( @@ -78,88 +137,112 @@ def __init__( settings: Settings, logger: logging.Logger, client_id: str = "integr8scode-consumer-group-monitor", - request_timeout_ms: int = 30000, # Health thresholds - max_rebalance_time_seconds: int = 300, # 5 minutes critical_lag_threshold: int = 10000, warning_lag_threshold: int = 1000, min_members_threshold: int = 1, ): self.logger = logger - self.bootstrap_servers = settings.KAFKA_BOOTSTRAP_SERVERS + self._bootstrap_servers = settings.KAFKA_BOOTSTRAP_SERVERS + self._client_id = client_id - self.admin_client = AdminUtils(settings=settings, logger=logger) + self._admin: AIOKafkaAdminClient | None = None # Health thresholds - self.max_rebalance_time = max_rebalance_time_seconds self.critical_lag_threshold = critical_lag_threshold self.warning_lag_threshold = warning_lag_threshold self.min_members_threshold = min_members_threshold # Monitoring state - self._group_status_cache: Dict[str, ConsumerGroupStatus] = {} + self._group_status_cache: dict[str, ConsumerGroupStatus] = {} self._cache_ttl_seconds = 30 + async def _get_admin(self) -> AIOKafkaAdminClient: + """Get or create the admin client.""" + if self._admin is None: + self._admin = AIOKafkaAdminClient( + bootstrap_servers=self._bootstrap_servers, + client_id=self._client_id, + ) + await self._admin.start() + return self._admin + + async def close(self) -> None: + """Close the admin client.""" + if self._admin is not None: + await self._admin.close() + self._admin = None + async def get_consumer_group_status( - self, group_id: str, timeout: float = 30.0, include_lag: bool = True + self, group_id: str, include_lag: bool = True ) -> ConsumerGroupStatus: """Get comprehensive status for a consumer group.""" try: # Check cache first - if group_id in self._group_status_cache: - cached = self._group_status_cache[group_id] - if cached.timestamp is not None: - cache_age = (datetime.now(timezone.utc) - cached.timestamp).total_seconds() - if cache_age < self._cache_ttl_seconds: - return cached + cached = self._group_status_cache.get(group_id) + if cached is not None: + cache_age = (datetime.now(timezone.utc) - cached.timestamp).total_seconds() + if cache_age < self._cache_ttl_seconds: + return cached # Get group description from AdminClient - group_desc = await self._describe_consumer_group(group_id, timeout) + described_group = await self._describe_consumer_group(group_id) # Build member information - members = [] - partition_distribution = {} + members: list[ConsumerGroupMember] = [] + partition_distribution: dict[str, int] = {} total_assigned_partitions = 0 - for member in group_desc.members: - # Parse assigned partitions - assigned_partitions = [] - if member.assignment and hasattr(member.assignment, "topic_partitions"): - for tp in member.assignment.topic_partitions: - assigned_partitions.append(f"{tp.topic}:{tp.partition}") + for member_data in described_group.members: + member_id: str = member_data["member_id"] + client_id: str = member_data["client_id"] + client_host: str = member_data["client_host"] + assignment_bytes: bytes = member_data["member_assignment"] + + # Parse assigned partitions from assignment bytes + assigned_partitions: list[str] = [] + topic_partitions = _parse_member_assignment(assignment_bytes) + for topic, partitions in topic_partitions: + for partition in partitions: + assigned_partitions.append(f"{topic}:{partition}") members.append( ConsumerGroupMember( - member_id=member.member_id, - client_id=member.client_id, - host=member.host, + member_id=member_id, + client_id=client_id, + host=client_host, assigned_partitions=assigned_partitions, ) ) - partition_distribution[member.member_id] = len(assigned_partitions) + partition_distribution[member_id] = len(assigned_partitions) total_assigned_partitions += len(assigned_partitions) # Get coordinator information - coordinator = f"{group_desc.coordinator.host}:{group_desc.coordinator.port}" + admin = await self._get_admin() + coordinator_id = await admin.find_coordinator(group_id) + coordinator = f"node:{coordinator_id}" + + # Parse state + state = _state_from_string(described_group.state) # Get lag information if requested total_lag = 0 - partition_lags = {} - if include_lag and group_desc.state == ConsumerGroupState.STABLE: + partition_lags: dict[str, int] = {} + if include_lag and state == ConsumerGroupState.STABLE: try: - lag_info = await self._get_consumer_group_lag(group_id, timeout) - total_lag = lag_info.get("total_lag", 0) - partition_lags = lag_info.get("partition_lags", {}) + lag_info = await self._get_consumer_group_lag(group_id) + total_lag = lag_info["total_lag"] + partition_lags = lag_info["partition_lags"] except Exception as e: self.logger.warning(f"Failed to get lag info for group {group_id}: {e}") # Create status object status = ConsumerGroupStatus( group_id=group_id, - state=group_desc.state.name if group_desc.state else "UNKNOWN", - protocol=getattr(group_desc, "protocol", "unknown"), - protocol_type=getattr(group_desc, "protocol_type", "unknown"), + state=state, + protocol=described_group.protocol, + protocol_type=described_group.protocol_type, coordinator=coordinator, members=members, member_count=len(members), @@ -183,7 +266,7 @@ async def get_consumer_group_status( # Return minimal status with error return ConsumerGroupStatus( group_id=group_id, - state="ERROR", + state=ConsumerGroupState.UNKNOWN, protocol="unknown", protocol_type="unknown", coordinator="unknown", @@ -196,23 +279,26 @@ async def get_consumer_group_status( ) async def get_multiple_group_status( - self, group_ids: List[str], timeout: float = 30.0, include_lag: bool = True - ) -> Dict[str, ConsumerGroupStatus]: + self, group_ids: list[str], include_lag: bool = True + ) -> dict[str, ConsumerGroupStatus]: """Get status for multiple consumer groups efficiently.""" - results = {} + results: dict[str, ConsumerGroupStatus] = {} # Process groups concurrently - tasks = [self.get_consumer_group_status(group_id, timeout, include_lag) for group_id in group_ids] + tasks = [self.get_consumer_group_status(group_id, include_lag) for group_id in group_ids] try: statuses = await asyncio.gather(*tasks, return_exceptions=True) for group_id, status in zip(group_ids, statuses, strict=False): - if isinstance(status, Exception): + if isinstance(status, ConsumerGroupStatus): + results[group_id] = status + else: + # status is BaseException self.logger.error(f"Failed to get status for group {group_id}: {status}") results[group_id] = ConsumerGroupStatus( group_id=group_id, - state="ERROR", + state=ConsumerGroupState.UNKNOWN, protocol="unknown", protocol_type="unknown", coordinator="unknown", @@ -223,152 +309,102 @@ async def get_multiple_group_status( health=ConsumerGroupHealth.UNHEALTHY, health_message=str(status), ) - elif isinstance(status, ConsumerGroupStatus): - results[group_id] = status except Exception as e: self.logger.error(f"Failed to get multiple group status: {e}") # Return error status for all groups for group_id in group_ids: - results[group_id] = ConsumerGroupStatus( - group_id=group_id, - state="ERROR", - protocol="unknown", - protocol_type="unknown", - coordinator="unknown", - members=[], - member_count=0, - assigned_partitions=0, - partition_distribution={}, - health=ConsumerGroupHealth.UNHEALTHY, - health_message=str(e), - ) + if group_id not in results: + results[group_id] = ConsumerGroupStatus( + group_id=group_id, + state=ConsumerGroupState.UNKNOWN, + protocol="unknown", + protocol_type="unknown", + coordinator="unknown", + members=[], + member_count=0, + assigned_partitions=0, + partition_distribution={}, + health=ConsumerGroupHealth.UNHEALTHY, + health_message=str(e), + ) return results - async def list_consumer_groups(self, timeout: float = 10.0) -> List[str]: + async def list_consumer_groups(self) -> list[str]: """List all consumer groups in the cluster.""" try: - # Use native AdminClient to list consumer groups - admin = self.admin_client.admin_client - - # List consumer groups (sync operation) - result = await asyncio.to_thread(admin.list_consumer_groups, request_timeout=timeout) - - # Extract group IDs from result - # ListConsumerGroupsResult has .valid and .errors attributes - group_ids = [] - if hasattr(result, "valid"): - # result.valid contains a list of ConsumerGroupListing objects - group_ids = [group_listing.group_id for group_listing in result.valid] - - # Log any errors that occurred - if hasattr(result, "errors") and result.errors: - for error in result.errors: - self.logger.warning(f"Error listing some consumer groups: {error}") - - return group_ids - + admin = await self._get_admin() + # Returns list of tuples: (group_id, protocol_type) + groups: list[tuple[Any, ...]] = await admin.list_consumer_groups() + return [str(g[0]) for g in groups] except Exception as e: self.logger.error(f"Failed to list consumer groups: {e}") return [] - async def _describe_consumer_group(self, group_id: str, timeout: float) -> ConsumerGroupDescription: + async def _describe_consumer_group(self, group_id: str) -> DescribedGroup: """Describe a single consumer group using native AdminClient.""" - try: - admin = self.admin_client.admin_client - - # Describe consumer group (sync operation) - future_map = admin.describe_consumer_groups([group_id], request_timeout=timeout) + admin = await self._get_admin() + responses: list[Response] = await admin.describe_consumer_groups([group_id]) - if group_id not in future_map: - raise ValueError(f"Group {group_id} not found in describe result") + if not responses: + raise ValueError(f"No response for group {group_id}") - future = future_map[group_id] - # Cast future.result to proper type to help mypy - result_func = cast(Any, future.result) - group_desc: ConsumerGroupDescription = await asyncio.to_thread(result_func, timeout=timeout) + # Parse the response + groups = _parse_describe_groups_response(responses[0]) - return group_desc + # Find our group in the response + for group in groups: + if group.group_id == group_id: + if group.error_code != 0: + raise ValueError(f"Error describing group {group_id}: error_code={group.error_code}") + return group - except Exception as e: - if hasattr(e, "args") and e.args and isinstance(e.args[0], KafkaError): - kafka_err = e.args[0] - self.logger.error( - f"Kafka error describing group {group_id}: " - f"code={kafka_err.code()}, " - f"name={kafka_err.name()}, " - f"message={kafka_err}" - ) - raise ValueError(f"Failed to describe group {group_id}: {kafka_err}") - raise ValueError(f"Failed to describe group {group_id}: {e}") + raise ValueError(f"Group {group_id} not found in response") - async def _get_consumer_group_lag(self, group_id: str, timeout: float) -> Dict[str, Any]: + async def _get_consumer_group_lag(self, group_id: str) -> dict[str, Any]: """Get consumer group lag information.""" try: - # Create a temporary consumer to get lag info - consumer_config = { - "bootstrap.servers": self.bootstrap_servers, - "group.id": f"{group_id}-lag-monitor-{datetime.now().timestamp()}", - "enable.auto.commit": False, - "auto.offset.reset": "earliest", - } + admin = await self._get_admin() - consumer = Consumer(consumer_config) + # Get committed offsets for the group + offsets: dict[TopicPartition, OffsetAndMetadata] = await admin.list_consumer_group_offsets(group_id) - try: - # Get group metadata to find assigned topics - group_desc = await self._describe_consumer_group(group_id, timeout) - - # Extract topics from member assignments - topics = set() - for member in group_desc.members: - if member.assignment and hasattr(member.assignment, "topic_partitions"): - for tp in member.assignment.topic_partitions: - topics.add(tp.topic) + if not offsets: + return {"total_lag": 0, "partition_lags": {}} - if not topics: - return {"total_lag": 0, "partition_lags": {}} + # Create a temporary consumer to get high watermarks + consumer = AIOKafkaConsumer( + bootstrap_servers=self._bootstrap_servers, + group_id=f"{group_id}-lag-monitor-{datetime.now().timestamp()}", + enable_auto_commit=False, + auto_offset_reset="earliest", + ) - # Get topic metadata to find all partitions - metadata = await asyncio.to_thread(consumer.list_topics, timeout=timeout) + try: + await consumer.start() total_lag = 0 - partition_lags = {} - - for topic in topics: - if topic not in metadata.topics: - continue - - topic_metadata = metadata.topics[topic] + partition_lags: dict[str, int] = {} - for partition_id in topic_metadata.partitions.keys(): - try: - # Get high water mark - low, high = await asyncio.to_thread( - consumer.get_watermark_offsets, TopicPartition(topic, partition_id), timeout=timeout - ) + # Get end offsets for all partitions + tps = list(offsets.keys()) + if tps: + end_offsets: dict[TopicPartition, int] = await consumer.end_offsets(tps) - # Get committed offset for the group - committed = await asyncio.to_thread( - consumer.committed, [TopicPartition(topic, partition_id)], timeout=timeout - ) + for tp, offset_meta in offsets.items(): + committed_offset = offset_meta.offset + high = end_offsets.get(tp, 0) - if committed and len(committed) > 0: - committed_offset = committed[0].offset - if committed_offset >= 0: # Valid offset - lag = max(0, high - committed_offset) - partition_lags[f"{topic}:{partition_id}"] = lag - total_lag += lag - - except Exception as e: - self.logger.debug(f"Failed to get lag for {topic}:{partition_id}: {e}") - continue + if committed_offset >= 0: + lag = max(0, high - committed_offset) + partition_lags[f"{tp.topic}:{tp.partition}"] = lag + total_lag += lag return {"total_lag": total_lag, "partition_lags": partition_lags} finally: - consumer.close() + await consumer.stop() except Exception as e: self.logger.warning(f"Failed to get consumer group lag for {group_id}: {e}") @@ -377,17 +413,23 @@ async def _get_consumer_group_lag(self, group_id: str, timeout: float) -> Dict[s def _assess_group_health(self, status: ConsumerGroupStatus) -> tuple[ConsumerGroupHealth, str]: """Assess the health of a consumer group based on its status.""" - # Check for critical issues - if status.state == "ERROR": - return ConsumerGroupHealth.UNHEALTHY, "Group is in error state" + # Check for error/unknown state + if status.state == ConsumerGroupState.UNKNOWN: + return ConsumerGroupHealth.UNHEALTHY, "Group is in unknown state" + + if status.state == ConsumerGroupState.DEAD: + return ConsumerGroupHealth.UNHEALTHY, "Group is dead" if status.member_count < self.min_members_threshold: return ConsumerGroupHealth.UNHEALTHY, f"Insufficient members: {status.member_count}" # Check for rebalancing issues - if status.state in ("REBALANCING", "COMPLETING_REBALANCE"): - # This could be normal, but we'll mark as degraded - return ConsumerGroupHealth.DEGRADED, f"Group is rebalancing: {status.state}" + if status.state in (ConsumerGroupState.PREPARING_REBALANCE, ConsumerGroupState.COMPLETING_REBALANCE): + return ConsumerGroupHealth.DEGRADED, f"Group is rebalancing: {status.state.value}" + + # Check for empty group + if status.state == ConsumerGroupState.EMPTY: + return ConsumerGroupHealth.DEGRADED, "Group is empty (no active members)" # Check lag if available if status.total_lag >= self.critical_lag_threshold: @@ -398,32 +440,33 @@ def _assess_group_health(self, status: ConsumerGroupStatus) -> tuple[ConsumerGro # Check partition distribution if status.partition_distribution: - max_partitions = max(status.partition_distribution.values()) - min_partitions = min(status.partition_distribution.values()) + values = list(status.partition_distribution.values()) + max_partitions = max(values) + min_partitions = min(values) # Warn if partition distribution is very uneven if max_partitions > 0 and (max_partitions - min_partitions) > max_partitions * 0.5: return ConsumerGroupHealth.DEGRADED, "Uneven partition distribution" # Check if group is stable and consuming - if status.state == "STABLE" and status.assigned_partitions > 0: + if status.state == ConsumerGroupState.STABLE and status.assigned_partitions > 0: return ConsumerGroupHealth.HEALTHY, f"Group is stable with {status.member_count} members" # Default case - return ConsumerGroupHealth.UNKNOWN, f"Group state: {status.state}" + return ConsumerGroupHealth.UNKNOWN, f"Group state: {status.state.value}" - def get_health_summary(self, status: ConsumerGroupStatus) -> Dict[str, Any]: + def get_health_summary(self, status: ConsumerGroupStatus) -> dict[str, Any]: """Get a health summary for a consumer group.""" return { "group_id": status.group_id, - "health": status.health, + "health": status.health.value, "health_message": status.health_message, - "state": status.state, + "state": status.state.value, "members": status.member_count, "assigned_partitions": status.assigned_partitions, "total_lag": status.total_lag, "coordinator": status.coordinator, - "timestamp": status.timestamp.isoformat() if status.timestamp else None, + "timestamp": status.timestamp.isoformat(), "partition_distribution": status.partition_distribution, } diff --git a/backend/app/events/core/consumer.py b/backend/app/events/core/consumer.py index 3d0a1355..0e63fb59 100644 --- a/backend/app/events/core/consumer.py +++ b/backend/app/events/core/consumer.py @@ -1,12 +1,11 @@ import asyncio -import json import logging from collections.abc import Awaitable, Callable from datetime import datetime, timezone from typing import Any -from confluent_kafka import OFFSET_BEGINNING, OFFSET_END, Consumer, Message, TopicPartition -from confluent_kafka.error import KafkaError +from aiokafka import AIOKafkaConsumer, TopicPartition +from aiokafka.errors import KafkaError from opentelemetry.trace import SpanKind from app.core.metrics.context import get_event_metrics @@ -36,7 +35,7 @@ def __init__( self._schema_registry = schema_registry self._dispatcher = event_dispatcher self._stats_callback = stats_callback - self._consumer: Consumer | None = None + self._consumer: AIOKafkaConsumer | None = None self._state = ConsumerState.STOPPED self._running = False self._metrics = ConsumerMetrics() @@ -48,13 +47,23 @@ def __init__( async def start(self, topics: list[KafkaTopic]) -> None: self._state = self._state if self._state != ConsumerState.STOPPED else ConsumerState.STARTING - consumer_config = self._config.to_consumer_config() - if self._stats_callback: - consumer_config["stats_cb"] = self._handle_stats - - self._consumer = Consumer(consumer_config) topic_strings = [f"{self._topic_prefix}{str(topic)}" for topic in topics] - self._consumer.subscribe(topic_strings) + + self._consumer = AIOKafkaConsumer( + *topic_strings, + bootstrap_servers=self._config.bootstrap_servers, + group_id=self._config.group_id, + client_id=self._config.client_id, + auto_offset_reset=self._config.auto_offset_reset, + enable_auto_commit=self._config.enable_auto_commit, + session_timeout_ms=self._config.session_timeout_ms, + heartbeat_interval_ms=self._config.heartbeat_interval_ms, + max_poll_interval_ms=self._config.max_poll_interval_ms, + fetch_min_bytes=self._config.fetch_min_bytes, + fetch_max_wait_ms=self._config.fetch_max_wait_ms, + ) + + await self._consumer.start() self._running = True self._consume_task = asyncio.create_task(self._consume_loop()) @@ -81,7 +90,7 @@ async def stop(self) -> None: async def _cleanup(self) -> None: if self._consumer: - self._consumer.close() + await self._consumer.stop() self._consumer = None async def _consume_loop(self) -> None: @@ -94,47 +103,52 @@ async def _consume_loop(self) -> None: if poll_count % 100 == 0: # Log every 100 polls self.logger.debug(f"Consumer loop active: polls={poll_count}, messages={message_count}") - msg = await asyncio.to_thread(self._consumer.poll, timeout=0.1) - - if msg is not None: - error = msg.error() - if error: - if error.code() != KafkaError._PARTITION_EOF: - self.logger.error(f"Consumer error: {error}") - self._metrics.processing_errors += 1 - else: - message_count += 1 - self.logger.debug( - f"Message received from topic {msg.topic()}, partition {msg.partition()}, offset {msg.offset()}" - ) - await self._process_message(msg) - if not self._config.enable_auto_commit: - await asyncio.to_thread(self._consumer.commit, msg) - else: + try: + # Use getone() with timeout for single message consumption + msg = await asyncio.wait_for( + self._consumer.getone(), + timeout=0.1 + ) + + message_count += 1 + self.logger.debug( + f"Message received from topic {msg.topic}, partition {msg.partition}, offset {msg.offset}" + ) + await self._process_message(msg) + if not self._config.enable_auto_commit: + await self._consumer.commit() + + except asyncio.TimeoutError: + # No message available within timeout, continue polling await asyncio.sleep(0.01) + except KafkaError as e: + self.logger.error(f"Consumer error: {e}") + self._metrics.processing_errors += 1 self.logger.warning( f"Consumer loop ended for group {self._config.group_id}: " f"running={self._running}, consumer={self._consumer is not None}" ) - async def _process_message(self, message: Message) -> None: - topic = message.topic() + async def _process_message(self, message: Any) -> None: + """Process a ConsumerRecord from aiokafka.""" + topic = message.topic if not topic: self.logger.warning("Message with no topic received") return - raw_value = message.value() + raw_value = message.value if not raw_value: self.logger.warning(f"Empty message from topic {topic}") return self.logger.debug(f"Deserializing message from topic {topic}, size={len(raw_value)} bytes") - event = self._schema_registry.deserialize_event(raw_value, topic) + event = await self._schema_registry.deserialize_event(raw_value, topic) self.logger.info(f"Deserialized event: type={event.event_type}, id={event.event_id}") # Extract trace context from Kafka headers and start a consumer span - header_list = message.headers() or [] + # aiokafka headers are list of tuples: [(key, value), ...] + header_list = message.headers or [] headers: dict[str, str] = {} for k, v in header_list: headers[str(k)] = v.decode("utf-8") if isinstance(v, (bytes, bytearray)) else (v or "") @@ -144,8 +158,8 @@ async def _process_message(self, message: Message) -> None: # Dispatch event through EventDispatcher try: self.logger.debug(f"Dispatching {event.event_type} to handlers") - partition_val = message.partition() - offset_val = message.offset() + partition_val = message.partition + offset_val = message.offset part_attr = partition_val if partition_val is not None else -1 off_attr = offset_val if offset_val is not None else -1 with tracer.start_as_current_span( @@ -181,23 +195,6 @@ async def _process_message(self, message: Message) -> None: def register_error_callback(self, callback: Callable[[Exception, BaseEvent], Awaitable[None]]) -> None: self._error_callback = callback - def _handle_stats(self, stats_json: str) -> None: - stats = json.loads(stats_json) - - self._metrics.messages_consumed = stats.get("rxmsgs", 0) - self._metrics.bytes_consumed = stats.get("rxmsg_bytes", 0) - - topics = stats.get("topics", {}) - self._metrics.consumer_lag = sum( - partition_stats.get("consumer_lag", 0) - for topic_stats in topics.values() - for partition_stats in topic_stats.get("partitions", {}).values() - if partition_stats.get("consumer_lag", 0) >= 0 - ) - - self._metrics.last_updated = datetime.now(timezone.utc) - self._stats_callback and self._stats_callback(stats) - @property def state(self) -> ConsumerState: return self._state @@ -211,7 +208,7 @@ def is_running(self) -> bool: return self._state == ConsumerState.RUNNING @property - def consumer(self) -> Consumer | None: + def consumer(self) -> AIOKafkaConsumer | None: return self._consumer def get_status(self) -> ConsumerStatus: @@ -232,24 +229,30 @@ def get_status(self) -> ConsumerStatus: ) async def seek_to_beginning(self) -> None: - self._seek_all_partitions(OFFSET_BEGINNING) + """Seek all assigned partitions to the beginning.""" + if not self._consumer: + self.logger.warning("Cannot seek: consumer not initialized") + return - async def seek_to_end(self) -> None: - self._seek_all_partitions(OFFSET_END) + assignment = self._consumer.assignment() + if assignment: + await self._consumer.seek_to_beginning(*assignment) - def _seek_all_partitions(self, offset_type: int) -> None: + async def seek_to_end(self) -> None: + """Seek all assigned partitions to the end.""" if not self._consumer: self.logger.warning("Cannot seek: consumer not initialized") return assignment = self._consumer.assignment() - for partition in assignment: - new_partition = TopicPartition(partition.topic, partition.partition, offset_type) - self._consumer.seek(new_partition) + if assignment: + await self._consumer.seek_to_end(*assignment) async def seek_to_offset(self, topic: str, partition: int, offset: int) -> None: + """Seek a specific partition to a specific offset.""" if not self._consumer: self.logger.warning("Cannot seek to offset: consumer not initialized") return - self._consumer.seek(TopicPartition(topic, partition, offset)) + tp = TopicPartition(topic, partition) + self._consumer.seek(tp, offset) diff --git a/backend/app/events/core/producer.py b/backend/app/events/core/producer.py index d3f68b83..7241bda0 100644 --- a/backend/app/events/core/producer.py +++ b/backend/app/events/core/producer.py @@ -2,12 +2,11 @@ import json import logging import socket -import threading from datetime import datetime, timezone from typing import Any, Callable, TypeAlias -from confluent_kafka import Message, Producer -from confluent_kafka.error import KafkaError +from aiokafka import AIOKafkaProducer +from aiokafka.errors import KafkaError from app.core.lifecycle import LifecycleEnabled from app.core.metrics.context import get_event_metrics @@ -19,33 +18,29 @@ from .types import ProducerConfig, ProducerMetrics, ProducerState -# Global lock to serialize Producer initialization (workaround for librdkafka race condition) -# See: https://github.com/confluentinc/confluent-kafka-python/issues/1797 -_producer_init_lock = threading.Lock() - -DeliveryCallback: TypeAlias = Callable[[KafkaError | None, Message], None] StatsCallback: TypeAlias = Callable[[dict[str, Any]], None] class UnifiedProducer(LifecycleEnabled): + """Fully async Kafka producer using aiokafka.""" + def __init__( - self, - config: ProducerConfig, - schema_registry_manager: SchemaRegistryManager, - logger: logging.Logger, - settings: Settings, - stats_callback: StatsCallback | None = None, + self, + config: ProducerConfig, + schema_registry_manager: SchemaRegistryManager, + logger: logging.Logger, + settings: Settings, + stats_callback: StatsCallback | None = None, ): super().__init__() self._config = config self._schema_registry = schema_registry_manager self.logger = logger - self._producer: Producer | None = None + self._producer: AIOKafkaProducer | None = None self._stats_callback = stats_callback self._state = ProducerState.STOPPED self._metrics = ProducerMetrics() - self._event_metrics = get_event_metrics() # Singleton for Kafka metrics - self._poll_task: asyncio.Task[None] | None = None + self._event_metrics = get_event_metrics() self._topic_prefix = settings.KAFKA_TOPIC_PREFIX @property @@ -61,68 +56,25 @@ def metrics(self) -> ProducerMetrics: return self._metrics @property - def producer(self) -> Producer | None: + def producer(self) -> AIOKafkaProducer | None: return self._producer - def _handle_delivery(self, error: KafkaError | None, message: Message) -> None: - if error: - self._metrics.messages_failed += 1 - self._metrics.last_error = str(error) - self._metrics.last_error_time = datetime.now(timezone.utc) - # Record Kafka production error - topic = message.topic() if message else None - self._event_metrics.record_kafka_production_error( - topic=topic if topic is not None else "unknown", error_type=str(error.code()) - ) - self.logger.error(f"Message delivery failed: {error}") - else: - self._metrics.messages_sent += 1 - message_value = message.value() - if message_value: - self._metrics.bytes_sent += len(message_value) - self.logger.debug(f"Message delivered to {message.topic()}[{message.partition()}]@{message.offset()}") - - def _handle_stats(self, stats_json: str) -> None: - try: - stats = json.loads(stats_json) - self._metrics.queue_size = stats.get("msg_cnt", 0) - - topics = stats.get("topics", {}) - total_messages = 0 - total_latency = 0 - - for topic_stats in topics.values(): - partitions = topic_stats.get("partitions", {}) - for partition_stats in partitions.values(): - msg_cnt = partition_stats.get("msgq_cnt", 0) - total_messages += msg_cnt - latency = partition_stats.get("rtt", {}).get("avg", 0) - if latency > 0 and msg_cnt > 0: - total_latency += latency * msg_cnt - - if total_messages > 0: - self._metrics.avg_latency_ms = total_latency / total_messages - - if self._stats_callback: - self._stats_callback(stats) - except Exception as e: - self.logger.error(f"Error parsing producer stats: {e}") - async def _on_start(self) -> None: """Start the Kafka producer.""" self._state = ProducerState.STARTING self.logger.info("Starting producer...") - producer_config = self._config.to_producer_config() - producer_config["stats_cb"] = self._handle_stats - producer_config["statistics.interval.ms"] = 30000 + self._producer = AIOKafkaProducer( + bootstrap_servers=self._config.bootstrap_servers, + client_id=self._config.client_id, + acks=self._config.acks, + compression_type=self._config.compression_type, + max_batch_size=self._config.batch_size, + linger_ms=self._config.linger_ms, + ) - # Serialize Producer initialization to prevent librdkafka race condition - with _producer_init_lock: - self._producer = Producer(producer_config) - self._poll_task = asyncio.create_task(self._poll_loop()) + await self._producer.start() self._state = ProducerState.RUNNING - self.logger.info(f"Producer started: {self._config.bootstrap_servers}") def get_status(self) -> dict[str, Any]: @@ -151,36 +103,21 @@ async def _on_stop(self) -> None: self._state = ProducerState.STOPPING self.logger.info("Stopping producer...") - if self._poll_task: - self._poll_task.cancel() - await asyncio.gather(self._poll_task, return_exceptions=True) - self._poll_task = None - if self._producer: - self._producer.flush(timeout=10.0) + await self._producer.stop() self._producer = None self._state = ProducerState.STOPPED self.logger.info("Producer stopped") - async def _poll_loop(self) -> None: - self.logger.info("Started producer poll loop") - - while self.is_running and self._producer: - self._producer.poll(timeout=0.1) - await asyncio.sleep(0.01) - - self.logger.info("Producer poll loop ended") - async def produce( - self, event_to_produce: BaseEvent, key: str | None = None, headers: dict[str, str] | None = None + self, event_to_produce: BaseEvent, key: str | None = None, headers: dict[str, str] | None = None ) -> None: """ Produce a message to Kafka. Args: event_to_produce: Message value (BaseEvent) - N.B. each instance of BaseEvent has .topic classvar, returning type of KafkaTopic key: Message key headers: Message headers """ @@ -188,25 +125,43 @@ async def produce( self.logger.error("Producer not running") return - # Serialize value - serialized_value = self._schema_registry.serialize_event(event_to_produce) + try: + # Serialize value using async schema registry + serialized_value = await self._schema_registry.serialize_event(event_to_produce) + + topic = f"{self._topic_prefix}{str(event_to_produce.topic)}" - topic = f"{self._topic_prefix}{str(event_to_produce.topic)}" - self._producer.produce( - topic=topic, - value=serialized_value, - key=key.encode() if isinstance(key, str) else key, - headers=[(k, v.encode()) for k, v in headers.items()] if headers else None, - callback=self._handle_delivery, - ) + # Convert headers to list of tuples format + header_list = [(k, v.encode()) for k, v in headers.items()] if headers else None + + await self._producer.send_and_wait( + topic=topic, + value=serialized_value, + key=key.encode() if isinstance(key, str) else key, + headers=header_list, + ) + + # Update metrics on success + self._metrics.messages_sent += 1 + self._metrics.bytes_sent += len(serialized_value) + + # Record Kafka metrics + self._event_metrics.record_kafka_message_produced(topic) - # Record Kafka metrics - self._event_metrics.record_kafka_message_produced(topic) + self.logger.debug(f"Message [{event_to_produce}] sent to topic: {topic}") - self.logger.debug(f"Message [{event_to_produce}] queued for topic: {topic}") + except KafkaError as e: + self._metrics.messages_failed += 1 + self._metrics.last_error = str(e) + self._metrics.last_error_time = datetime.now(timezone.utc) + self._event_metrics.record_kafka_production_error( + topic=f"{self._topic_prefix}{str(event_to_produce.topic)}", error_type=type(e).__name__ + ) + self.logger.error(f"Failed to produce message: {e}") + raise async def send_to_dlq( - self, original_event: BaseEvent, original_topic: str, error: Exception, retry_count: int = 0 + self, original_event: BaseEvent, original_topic: str, error: Exception, retry_count: int = 0 ) -> None: """ Send a failed event to the Dead Letter Queue. @@ -256,9 +211,11 @@ async def send_to_dlq( # Serialize as JSON (DLQ uses JSON format for flexibility) serialized_value = json.dumps(dlq_event_data).encode("utf-8") + dlq_topic = f"{self._topic_prefix}{str(KafkaTopic.DEAD_LETTER_QUEUE)}" + # Send to DLQ topic - self._producer.produce( - topic=f"{self._topic_prefix}{str(KafkaTopic.DEAD_LETTER_QUEUE)}", + await self._producer.send_and_wait( + topic=dlq_topic, value=serialized_value, key=original_event.event_id.encode() if original_event.event_id else None, headers=[ @@ -266,13 +223,10 @@ async def send_to_dlq( ("error_type", type(error).__name__.encode()), ("retry_count", str(retry_count).encode()), ], - callback=self._handle_delivery, ) # Record metrics - self._event_metrics.record_kafka_message_produced( - f"{self._topic_prefix}{str(KafkaTopic.DEAD_LETTER_QUEUE)}" - ) + self._event_metrics.record_kafka_message_produced(dlq_topic) self._metrics.messages_sent += 1 self.logger.warning( diff --git a/backend/app/events/schema/schema_registry.py b/backend/app/events/schema/schema_registry.py index fbbead06..b36392b2 100644 --- a/backend/app/events/schema/schema_registry.py +++ b/backend/app/events/schema/schema_registry.py @@ -1,13 +1,10 @@ -import json import logging import struct from functools import lru_cache from typing import Any, Dict, Type, TypeVar -import httpx -from confluent_kafka.schema_registry import Schema, SchemaRegistryClient, record_subject_name_strategy -from confluent_kafka.schema_registry.avro import AvroDeserializer, AvroSerializer -from confluent_kafka.serialization import MessageField, SerializationContext +from schema_registry.client import AsyncSchemaRegistryClient, schema +from schema_registry.serializers import AsyncAvroMessageSerializer # type: ignore[attr-defined] from app.domain.enums.events import EventType from app.infrastructure.kafka.events.base import BaseEvent @@ -40,65 +37,63 @@ def _get_all_event_classes() -> list[Type[BaseEvent]]: @lru_cache(maxsize=1) def _get_event_type_to_class_mapping() -> Dict[EventType, Type[BaseEvent]]: """ - EventType enum → event class, inferred from the default of the `event_type` field on each subclass. + EventType enum -> event class, inferred from the default of the `event_type` field on each subclass. """ mapping: Dict[EventType, Type[BaseEvent]] = {} for subclass in _get_all_event_classes(): f = subclass.model_fields.get("event_type") if f is not None and f.default is not None: - mapping[f.default] = subclass # default is EventType thanks to Literal[…] + mapping[f.default] = subclass # default is EventType thanks to Literal[...] return mapping class SchemaRegistryManager: - """Schema registry manager for Avro serialization with Confluent wire format.""" + """Schema registry manager for Avro serialization with Confluent wire format. - def __init__(self, settings: Settings, logger: logging.Logger, schema_registry_url: str | None = None): + Uses aiokafka-compatible python-schema-registry-client for fully async operations. + """ + + def __init__(self, settings: Settings, logger: logging.Logger): self.logger = logger - self.url = schema_registry_url or settings.SCHEMA_REGISTRY_URL self.namespace = "com.integr8scode.events" - # Optional per-session/worker subject prefix for tests/local isolation - # e.g., "test..." -> subjects become "test.x.y.ExecutionRequestedEvent-value" self.subject_prefix = settings.SCHEMA_SUBJECT_PREFIX - config = {"url": self.url} - if settings.SCHEMA_REGISTRY_AUTH: - config["basic.auth.user.info"] = settings.SCHEMA_REGISTRY_AUTH - self.client = SchemaRegistryClient(config) + parts = settings.SCHEMA_REGISTRY_AUTH.split(":", 1) + auth: tuple[str, str] | None = (parts[0], parts[1]) if len(parts) == 2 else None + self._client = AsyncSchemaRegistryClient(url=settings.SCHEMA_REGISTRY_URL, auth=auth) # type: ignore[arg-type] + self._serializer = AsyncAvroMessageSerializer(self._client) - # Caches - self._serializers: Dict[str, AvroSerializer] = {} # subject -> serializer - self._deserializer: AvroDeserializer | None = None # single, returns dict - self._schema_id_cache: Dict[Type[BaseEvent], int] = {} # class -> schema id - self._id_to_class_cache: Dict[int, Type[BaseEvent]] = {} # schema id -> class - self._initialized = False + # Caches: class <-> schema_id (library caches schema_string -> id, we need class -> id) + self._schema_id_cache: Dict[Type[BaseEvent], int] = {} + self._id_to_class_cache: Dict[int, Type[BaseEvent]] = {} - def register_schema(self, subject: str, event_class: Type[BaseEvent]) -> int: - avro_schema = event_class.avro_schema(namespace=self.namespace) - schema_str = json.dumps(avro_schema) - - schema_id: int = self.client.register_schema(subject, Schema(schema_str, "AVRO")) + async def register_schema(self, subject: str, event_class: Type[BaseEvent]) -> int: + """Register schema and return schema ID.""" + avro_schema = schema.AvroSchema(event_class.avro_schema(namespace=self.namespace)) + schema_id: int = await self._client.register(subject, avro_schema) self._schema_id_cache[event_class] = schema_id self._id_to_class_cache[schema_id] = event_class self.logger.info(f"Registered schema for {event_class.__name__}: ID {schema_id}") return schema_id - def _get_schema_id(self, event_class: Type[BaseEvent]) -> int: + async def _get_schema_id(self, event_class: Type[BaseEvent]) -> int: """Get or register schema ID for event class.""" if event_class in self._schema_id_cache: return self._schema_id_cache[event_class] - # Use event class name in subject with optional prefix for test isolation subject = f"{self.subject_prefix}{event_class.__name__}-value" - return self.register_schema(subject, event_class) + return await self.register_schema(subject, event_class) - def _get_event_class_by_id(self, schema_id: int) -> Type[BaseEvent] | None: - """Get event class by schema ID, via cache or registry lookup of the writer schema name.""" + async def _get_event_class_by_id(self, schema_id: int) -> Type[BaseEvent] | None: + """Get event class by schema ID, via cache or registry lookup.""" if schema_id in self._id_to_class_cache: return self._id_to_class_cache[schema_id] - schema = self.client.get_schema(schema_id) - schema_dict = json.loads(str(schema.schema_str)) + schema_obj = await self._client.get_by_id(schema_id) + if schema_obj is None: + return None + # Parse schema to get class name - raw_schema is already a dict + schema_dict = schema_obj.raw_schema class_name = schema_dict.get("name") if class_name: cls = _get_event_class_mapping().get(class_name) @@ -109,25 +104,15 @@ def _get_event_class_by_id(self, schema_id: int) -> Type[BaseEvent] | None: return None - def serialize_event(self, event: BaseEvent) -> bytes: + async def serialize_event(self, event: BaseEvent) -> bytes: """ Serialize event to Confluent wire format. - AvroSerializer already emits: [0x00][4-byte schema id][Avro binary]. (No manual packing) + Format: [0x00][4-byte schema id][Avro binary] """ - # Ensure schema is registered & id cached (keeps id<->class mapping warm) - self._get_schema_id(event.__class__) - - # Subject-key for serializer cache (include optional prefix for isolation) - subject_key = f"{self.subject_prefix}{event.__class__.__name__}-value" - if subject_key not in self._serializers: - schema_str = json.dumps(event.__class__.avro_schema(namespace=self.namespace)) - # Use record_subject_name_strategy to ensure subject is based on record name, not topic - self._serializers[subject_key] = AvroSerializer( - self.client, schema_str, conf={"subject.name.strategy": record_subject_name_strategy} - ) + subject = f"{self.subject_prefix}{event.__class__.__name__}-value" + avro_schema = schema.AvroSchema(event.__class__.avro_schema(namespace=self.namespace)) # Prepare payload dict (exclude event_type: schema id implies the concrete record) - # Don't use mode="json" as it converts datetime to string, breaking Avro timestamp-micros payload: dict[str, Any] = event.model_dump(mode="python", by_alias=False, exclude_unset=False) payload.pop("event_type", None) @@ -135,18 +120,11 @@ def serialize_event(self, event: BaseEvent) -> bytes: if "timestamp" in payload and payload["timestamp"] is not None: payload["timestamp"] = int(payload["timestamp"].timestamp() * 1_000_000) - ctx = SerializationContext(str(event.topic), MessageField.VALUE) - data = self._serializers[subject_key](payload, ctx) # returns framed bytes (magic+id+payload) - if data is None: - raise ValueError("Serialization returned None") - return data + return await self._serializer.encode_record_with_schema(subject, avro_schema, payload) - def deserialize_event(self, data: bytes, topic: str) -> BaseEvent: + async def deserialize_event(self, data: bytes, topic: str) -> BaseEvent: """ Deserialize from Confluent wire format to a concrete BaseEvent subclass. - - Parse header to get schema id → resolve event class - - Use a single AvroDeserializer (no from_dict) to get a dict - - Hydrate Pydantic model and restore constant event_type, if omitted from payload """ if not data or len(data) < 5: raise ValueError("Invalid message: too short for wire format") @@ -154,23 +132,20 @@ def deserialize_event(self, data: bytes, topic: str) -> BaseEvent: if data[0:1] != MAGIC_BYTE: raise ValueError(f"Unknown magic byte: {data[0]:#x}") + # Extract schema ID from wire format schema_id = struct.unpack(">I", data[1:5])[0] - event_class = self._get_event_class_by_id(schema_id) + event_class = await self._get_event_class_by_id(schema_id) if not event_class: raise ValueError(f"Unknown schema ID: {schema_id}") - if self._deserializer is None: - self._deserializer = AvroDeserializer(self.client) # returns dict when no from_dict is provided - - ctx = SerializationContext(topic or "unknown", MessageField.VALUE) - obj = self._deserializer(data, ctx) + # Decode the message + obj = await self._serializer.decode_message(data) if not isinstance(obj, dict): raise ValueError(f"Deserialization returned {type(obj)}, expected dict") # Restore constant event_type if schema/payload doesn't include it f = event_class.model_fields.get("event_type") if f is not None and f.default is not None and "event_type" not in obj: - # f.default is already the EventType enum which is what we want obj["event_type"] = f.default return event_class.model_validate(obj) @@ -194,7 +169,7 @@ def deserialize_json(self, data: dict[str, Any]) -> BaseEvent: async def set_compatibility(self, subject: str, mode: str) -> None: """ - Set compatibility for a subject via REST API. + Set compatibility for a subject. Valid: BACKWARD, FORWARD, FULL, NONE, BACKWARD_TRANSITIVE, FORWARD_TRANSITIVE, FULL_TRANSITIVE """ valid_modes = { @@ -209,32 +184,18 @@ async def set_compatibility(self, subject: str, mode: str) -> None: if mode not in valid_modes: raise ValueError(f"Invalid compatibility mode: {mode}") - url = f"{self.url}/config/{subject}" - async with httpx.AsyncClient() as client: - response = await client.put(url, json={"compatibility": mode}) - response.raise_for_status() + await self._client.update_compatibility(level=mode, subject=subject) self.logger.info(f"Set {subject} compatibility to {mode}") async def initialize_schemas(self) -> None: """Initialize all event schemas in the registry (set compat + register).""" - if self._initialized: - return - for event_class in _get_all_event_classes(): - # Use event class name with optional prefix for per-run isolation in tests subject = f"{self.subject_prefix}{event_class.__name__}-value" await self.set_compatibility(subject, "FORWARD") - self.register_schema(subject, event_class) + await self.register_schema(subject, event_class) - self._initialized = True self.logger.info(f"Initialized {len(_get_all_event_classes())} event schemas") -def create_schema_registry_manager( - settings: Settings, logger: logging.Logger, schema_registry_url: str | None = None -) -> SchemaRegistryManager: - return SchemaRegistryManager(settings, logger, schema_registry_url) - - async def initialize_event_schemas(registry: SchemaRegistryManager) -> None: await registry.initialize_schemas() diff --git a/backend/app/services/event_bus.py b/backend/app/services/event_bus.py index 50a361b2..35098cd0 100644 --- a/backend/app/services/event_bus.py +++ b/backend/app/services/event_bus.py @@ -7,7 +7,8 @@ from typing import Any, Callable, Optional from uuid import uuid4 -from confluent_kafka import Consumer, KafkaError, Producer +from aiokafka import AIOKafkaConsumer, AIOKafkaProducer +from aiokafka.errors import KafkaError from fastapi import Request from pydantic import BaseModel, ConfigDict @@ -39,7 +40,12 @@ class Subscription: class EventBus(LifecycleEnabled): """ - Hybrid event bus with Kafka backing and local in-memory distribution. + Distributed event bus for cross-instance communication via Kafka. + + Publishers send events to Kafka. Subscribers receive events from OTHER instances + only - self-published messages are filtered out. This design means: + - Publishers should update their own state directly before calling publish() + - Handlers only run for events from other instances (cache invalidation, etc.) Supports pattern-based subscriptions using wildcards: - execution.* - matches all execution events @@ -52,14 +58,14 @@ def __init__(self, settings: Settings, logger: logging.Logger) -> None: self.logger = logger self.settings = settings self.metrics = get_connection_metrics() - self.producer: Optional[Producer] = None - self.consumer: Optional[Consumer] = None + self.producer: Optional[AIOKafkaProducer] = None + self.consumer: Optional[AIOKafkaConsumer] = None self._subscriptions: dict[str, Subscription] = {} # id -> Subscription self._pattern_index: dict[str, set[str]] = {} # pattern -> set of subscription ids self._consumer_task: Optional[asyncio.Task[None]] = None self._lock = asyncio.Lock() self._topic = f"{self.settings.KAFKA_TOPIC_PREFIX}{KafkaTopic.EVENT_BUS_STREAM}" - self._executor: Optional[Callable[..., Any]] = None # Will store the executor function + self._instance_id = str(uuid4()) # Unique ID for filtering self-published messages async def _on_start(self) -> None: """Start the event bus with Kafka backing.""" @@ -70,30 +76,24 @@ async def _on_start(self) -> None: async def _initialize_kafka(self) -> None: """Initialize Kafka producer and consumer.""" # Producer setup - self.producer = Producer( - { - "bootstrap.servers": self.settings.KAFKA_BOOTSTRAP_SERVERS, - "client.id": f"event-bus-producer-{uuid4()}", - "linger.ms": 10, - "batch.size": 16384, - } + self.producer = AIOKafkaProducer( + bootstrap_servers=self.settings.KAFKA_BOOTSTRAP_SERVERS, + client_id=f"event-bus-producer-{uuid4()}", + linger_ms=10, + max_batch_size=16384, ) + await self.producer.start() # Consumer setup - self.consumer = Consumer( - { - "bootstrap.servers": self.settings.KAFKA_BOOTSTRAP_SERVERS, - "group.id": f"event-bus-{uuid4()}", - "auto.offset.reset": "latest", - "enable.auto.commit": True, - "client.id": f"event-bus-consumer-{uuid4()}", - } + self.consumer = AIOKafkaConsumer( + self._topic, + bootstrap_servers=self.settings.KAFKA_BOOTSTRAP_SERVERS, + group_id=f"event-bus-{uuid4()}", + auto_offset_reset="latest", + enable_auto_commit=True, + client_id=f"event-bus-consumer-{uuid4()}", ) - self.consumer.subscribe([self._topic]) - - # Store the executor function for sync operations - loop = asyncio.get_running_loop() - self._executor = loop.run_in_executor + await self.consumer.start() async def _on_stop(self) -> None: """Stop the event bus and clean up resources.""" @@ -107,12 +107,11 @@ async def _on_stop(self) -> None: # Stop Kafka components if self.consumer: - self.consumer.close() + await self.consumer.stop() self.consumer = None if self.producer: - # Flush any pending messages - self.producer.flush(timeout=5) + await self.producer.stop() self.producer = None # Clear subscriptions @@ -124,7 +123,10 @@ async def _on_stop(self) -> None: async def publish(self, event_type: str, data: dict[str, Any]) -> None: """ - Publish an event to Kafka and local subscribers. + Publish an event to Kafka for cross-instance distribution. + + Local handlers receive events only from OTHER instances via the Kafka listener. + Publishers should update their own state directly before calling publish(). Args: event_type: Event type (e.g., "execution.123.started") @@ -132,28 +134,21 @@ async def publish(self, event_type: str, data: dict[str, Any]) -> None: """ event = self._create_event(event_type, data) - # Publish to Kafka for distributed handling if self.producer: try: - # Serialize and send message asynchronously value = event.model_dump_json().encode("utf-8") key = event_type.encode("utf-8") if event_type else None - - # Use executor to avoid blocking - if self._executor: - await self._executor(None, self.producer.produce, self._topic, value, key) - # Poll to handle delivery callbacks - await self._executor(None, self.producer.poll, 0) - else: - # Fallback to sync operation if executor not available - self.producer.produce(self._topic, value=value, key=key) - self.producer.poll(0) + headers = [("source_instance", self._instance_id.encode("utf-8"))] + + await self.producer.send_and_wait( + topic=self._topic, + value=value, + key=key, + headers=headers, + ) except Exception as e: self.logger.error(f"Failed to publish to Kafka: {e}") - # Publish to local subscribers for immediate handling - await self._distribute_event(event_type, event) - def _create_event(self, event_type: str, data: dict[str, Any]) -> EventBusEvent: """Create a standardized event object.""" return EventBusEvent( @@ -262,7 +257,7 @@ async def _invoke_handler(self, handler: Callable[[EventBusEvent], Any], event: await asyncio.to_thread(handler, event) async def _kafka_listener(self) -> None: - """Listen for Kafka messages and distribute to local subscribers.""" + """Listen for Kafka messages from OTHER instances and distribute to local subscribers.""" if not self.consumer: return @@ -270,30 +265,28 @@ async def _kafka_listener(self) -> None: try: while self.is_running: - # Poll for messages with small timeout - if self._executor: - msg = await self._executor(None, self.consumer.poll, 0.1) - else: - # Fallback to sync operation if executor not available - await asyncio.sleep(0.1) - continue - - if msg is None: + try: + msg = await asyncio.wait_for(self.consumer.getone(), timeout=0.1) + + # Skip messages from this instance - publisher handles its own state + headers = dict(msg.headers) if msg.headers else {} + source = headers.get("source_instance", b"").decode("utf-8") + if source == self._instance_id: + continue + + try: + event_dict = json.loads(msg.value.decode("utf-8")) + event = EventBusEvent.model_validate(event_dict) + await self._distribute_event(event.event_type, event) + except Exception as e: + self.logger.error(f"Error processing Kafka message: {e}") + + except asyncio.TimeoutError: continue - - if msg.error(): - if msg.error().code() != KafkaError._PARTITION_EOF: - self.logger.error(f"Consumer error: {msg.error()}") + except KafkaError as e: + self.logger.error(f"Consumer error: {e}") continue - try: - # Deserialize message - Pydantic parses timestamp string to datetime - event_dict = json.loads(msg.value().decode("utf-8")) - event = EventBusEvent.model_validate(event_dict) - await self._distribute_event(event.event_type, event) - except Exception as e: - self.logger.error(f"Error processing Kafka message: {e}") - except asyncio.CancelledError: self.logger.info("Kafka listener cancelled") except Exception as e: diff --git a/backend/app/services/idempotency/middleware.py b/backend/app/services/idempotency/middleware.py index fe6e3a9e..7a5ff9cb 100644 --- a/backend/app/services/idempotency/middleware.py +++ b/backend/app/services/idempotency/middleware.py @@ -235,7 +235,7 @@ async def async_handler(message: Any) -> Any: try: # Deserialize using schema registry if available - event = self.consumer._schema_registry.deserialize_event(raw_value, message.topic()) + event = await self.consumer._schema_registry.deserialize_event(raw_value, message.topic) if not event: self.logger.error(f"Failed to deserialize event for {event_type}") return None diff --git a/backend/app/services/user_settings_service.py b/backend/app/services/user_settings_service.py index 20074aa4..75817055 100644 --- a/backend/app/services/user_settings_service.py +++ b/backend/app/services/user_settings_service.py @@ -54,7 +54,11 @@ async def get_user_settings(self, user_id: str) -> DomainUserSettings: return await self.get_user_settings_fresh(user_id) async def initialize(self, event_bus_manager: EventBusManager) -> None: - """Subscribe to settings update events for cache invalidation.""" + """Subscribe to settings update events for cross-instance cache invalidation. + + Note: EventBus filters out self-published messages, so this handler only + runs for events from OTHER instances. + """ self._event_bus_manager = event_bus_manager bus = await event_bus_manager.get_event_bus() diff --git a/backend/app/settings.py b/backend/app/settings.py index 89bcad33..e0ef1b00 100644 --- a/backend/app/settings.py +++ b/backend/app/settings.py @@ -51,7 +51,7 @@ class Settings(BaseSettings): KAFKA_BOOTSTRAP_SERVERS: str = "kafka:29092" KAFKA_GROUP_SUFFIX: str = "suff" # Suffix to append to consumer group IDs for test/parallel isolation SCHEMA_REGISTRY_URL: str = "http://schema-registry:8081" - SCHEMA_REGISTRY_AUTH: str | None = None # Format: "username:password" + SCHEMA_REGISTRY_AUTH: str = "" # Format: "username:password" ENABLE_EVENT_STREAMING: bool = False EVENT_RETENTION_DAYS: int = 30 KAFKA_TOPIC_PREFIX: str = "pref" diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 6726f5db..cd8dfeba 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -23,7 +23,8 @@ dependencies = [ "charset-normalizer==3.4.0", "click==8.1.7", "ConfigArgParse==1.7.1", - "confluent-kafka==2.6.1", + "aiokafka==0.13.0", + "python-schema-registry-client==2.6.1", "contourpy==1.3.3", "cycler==0.12.1", "Deprecated==1.2.14", @@ -148,7 +149,6 @@ dev = [ "pytest-xdist==3.6.1", "ruff==0.14.10", "types-cachetools==6.2.0.20250827", - "types-confluent-kafka==1.3.6", ] # Ruff configuration diff --git a/backend/scripts/create_topics.py b/backend/scripts/create_topics.py index de69c55f..c7695a0c 100755 --- a/backend/scripts/create_topics.py +++ b/backend/scripts/create_topics.py @@ -6,13 +6,12 @@ import asyncio import os import sys -from typing import List +from aiokafka.admin import AIOKafkaAdminClient, NewTopic +from aiokafka.errors import TopicAlreadyExistsError from app.core.logging import setup_logger from app.infrastructure.kafka.topics import get_all_topics, get_topic_configs from app.settings import Settings -from confluent_kafka import KafkaException -from confluent_kafka.admin import AdminClient, NewTopic logger = setup_logger(os.environ.get("LOG_LEVEL", "INFO")) @@ -21,20 +20,19 @@ async def create_topics(settings: Settings) -> None: """Create all required Kafka topics using provided settings.""" # Create admin client - admin_client = AdminClient( - { - "bootstrap.servers": settings.KAFKA_BOOTSTRAP_SERVERS, - "client.id": "topic-creator", - } + admin_client = AIOKafkaAdminClient( + bootstrap_servers=settings.KAFKA_BOOTSTRAP_SERVERS, + client_id="topic-creator", ) try: + await admin_client.start() logger.info(f"Connected to Kafka brokers: {settings.KAFKA_BOOTSTRAP_SERVERS}") # Get existing topics - metadata = admin_client.list_topics(timeout=10) - existing_topics = set(metadata.topics.keys()) - logger.info(f"Existing topics: {existing_topics}") + existing_topics: list[str] = await admin_client.list_topics() + existing_topics_set = set(existing_topics) + logger.info(f"Existing topics: {existing_topics_set}") # Get all required topics and their configs all_topics = get_all_topics() @@ -43,12 +41,12 @@ async def create_topics(settings: Settings) -> None: logger.info(f"Total required topics: {len(all_topics)} (prefix: '{topic_prefix}')") # Create topics - topics_to_create: List[NewTopic] = [] + topics_to_create: list[NewTopic] = [] for topic in all_topics: # Apply topic prefix for consistency with consumers/producers topic_name = f"{topic_prefix}{topic}" - if topic_name not in existing_topics: + if topic_name not in existing_topics_set: # Get config from topic_configs config = topic_configs.get( topic, @@ -63,10 +61,10 @@ async def create_topics(settings: Settings) -> None: ) new_topic = NewTopic( - topic=topic_name, + name=topic_name, num_partitions=config.get("num_partitions", 3), replication_factor=config.get("replication_factor", 1), - config=config.get("config", {}), + topic_configs=config.get("config", {}), ) topics_to_create.append(new_topic) logger.info(f"Will create topic: {topic_name}") @@ -75,14 +73,11 @@ async def create_topics(settings: Settings) -> None: if topics_to_create: try: - fs = admin_client.create_topics(topics_to_create) - # Wait for operations to complete - for topic_name, future in fs.items(): - try: - future.result() # The result itself is None - logger.info(f"Successfully created topic: {topic_name}") - except KafkaException as e: - logger.warning(f"Failed to create topic {topic_name}: {e}") + await admin_client.create_topics(topics_to_create) + for topic in topics_to_create: + logger.info(f"Successfully created topic: {topic.name}") + except TopicAlreadyExistsError as e: + logger.warning(f"Some topics already exist: {e}") except Exception as e: logger.error(f"Error creating topics: {e}") raise @@ -90,15 +85,14 @@ async def create_topics(settings: Settings) -> None: logger.info("All topics already exist") # List final topics - final_metadata = admin_client.list_topics(timeout=10) - final_topics = set(final_metadata.topics.keys()) + final_topics: list[str] = await admin_client.list_topics() logger.info(f"Final topics count: {len(final_topics)}") for topic_name in sorted(final_topics): if not topic_name.startswith("__"): # Skip internal topics logger.info(f" - {topic_name}") finally: - pass # AdminClient doesn't need explicit cleanup + await admin_client.close() async def main() -> None: diff --git a/backend/tests/integration/dlq/test_dlq_manager.py b/backend/tests/integration/dlq/test_dlq_manager.py index 6615f248..091d2f08 100644 --- a/backend/tests/integration/dlq/test_dlq_manager.py +++ b/backend/tests/integration/dlq/test_dlq_manager.py @@ -4,13 +4,13 @@ from datetime import datetime, timezone import pytest +from aiokafka import AIOKafkaProducer from app.core.database_context import Database from app.db.docs import DLQMessageDocument from app.dlq.manager import create_dlq_manager from app.domain.enums.kafka import KafkaTopic -from app.events.schema.schema_registry import create_schema_registry_manager +from app.events.schema.schema_registry import SchemaRegistryManager from app.settings import Settings -from confluent_kafka import Producer from tests.helpers import make_execution_requested_event from tests.helpers.eventually import eventually @@ -25,7 +25,7 @@ @pytest.mark.asyncio async def test_dlq_manager_persists_in_mongo(db: Database, test_settings: Settings) -> None: - schema_registry = create_schema_registry_manager(test_settings, _test_logger) + schema_registry = SchemaRegistryManager(test_settings, _test_logger) manager = create_dlq_manager(settings=test_settings, schema_registry=schema_registry, logger=_test_logger) # Use prefix from test_settings to match what the manager uses @@ -42,14 +42,17 @@ async def test_dlq_manager_persists_in_mongo(db: Database, test_settings: Settin "producer_id": "tests", } - # Produce to DLQ topic - producer = Producer({"bootstrap.servers": "localhost:9092"}) - producer.produce( - topic=f"{prefix}{str(KafkaTopic.DEAD_LETTER_QUEUE)}", - key=ev.event_id.encode(), - value=json.dumps(payload).encode(), - ) - producer.flush(5) + # Produce to DLQ topic using aiokafka + producer = AIOKafkaProducer(bootstrap_servers="localhost:9092") + await producer.start() + try: + await producer.send_and_wait( + topic=f"{prefix}{str(KafkaTopic.DEAD_LETTER_QUEUE)}", + key=ev.event_id.encode(), + value=json.dumps(payload).encode(), + ) + finally: + await producer.stop() # Run the manager briefly to consume and persist async with manager: diff --git a/backend/tests/integration/events/test_admin_utils.py b/backend/tests/integration/events/test_admin_utils.py deleted file mode 100644 index db03ac86..00000000 --- a/backend/tests/integration/events/test_admin_utils.py +++ /dev/null @@ -1,21 +0,0 @@ -import logging - -import pytest -from app.events.admin_utils import AdminUtils -from app.settings import Settings - -_test_logger = logging.getLogger("test.events.admin_utils") - - -@pytest.mark.kafka -@pytest.mark.asyncio -async def test_admin_utils_real_topic_checks(test_settings: Settings) -> None: - topic = f"{test_settings.KAFKA_TOPIC_PREFIX}adminutils.{test_settings.KAFKA_GROUP_SUFFIX}" - au = AdminUtils(settings=test_settings, logger=_test_logger) - - # Ensure topic exists (idempotent) - res = await au.ensure_topics_exist([(topic, 1)]) - assert res.get(topic) in (True, False) # Some clusters may report exists - - exists = await au.check_topic_exists(topic) - assert exists is True diff --git a/backend/tests/integration/events/test_consumer_group_monitor_real.py b/backend/tests/integration/events/test_consumer_group_monitor_real.py index 457ac87c..adcead33 100644 --- a/backend/tests/integration/events/test_consumer_group_monitor_real.py +++ b/backend/tests/integration/events/test_consumer_group_monitor_real.py @@ -4,6 +4,7 @@ import pytest from app.events.consumer_group_monitor import ( ConsumerGroupHealth, + ConsumerGroupState, ConsumerGroupStatus, NativeConsumerGroupMonitor, ) @@ -19,21 +20,21 @@ async def test_consumer_group_status_error_path_and_summary(test_settings: Setti monitor = NativeConsumerGroupMonitor(settings=test_settings, logger=_test_logger) # Non-existent group triggers error-handling path and returns minimal status gid = f"does-not-exist-{uuid4().hex[:8]}" - status = await monitor.get_consumer_group_status(gid, timeout=5.0, include_lag=False) + status = await monitor.get_consumer_group_status(gid, include_lag=False) assert status.group_id == gid # Some clusters report non-existent groups as DEAD/UNKNOWN rather than raising - assert status.state in ("ERROR", "DEAD", "UNKNOWN") + assert status.state in (ConsumerGroupState.DEAD, ConsumerGroupState.UNKNOWN) assert status.health is ConsumerGroupHealth.UNHEALTHY summary = monitor.get_health_summary(status) - assert summary["group_id"] == gid and summary["health"] == ConsumerGroupHealth.UNHEALTHY + assert summary["group_id"] == gid and summary["health"] == ConsumerGroupHealth.UNHEALTHY.value def test_assess_group_health_branches(test_settings: Settings) -> None: m = NativeConsumerGroupMonitor(settings=test_settings, logger=_test_logger) - # Error state + # Unknown state (triggers unhealthy) s = ConsumerGroupStatus( group_id="g", - state="ERROR", + state=ConsumerGroupState.UNKNOWN, protocol="p", protocol_type="ptype", coordinator="c", @@ -44,21 +45,36 @@ def test_assess_group_health_branches(test_settings: Settings) -> None: total_lag=0, ) h, msg = m._assess_group_health(s) # noqa: SLF001 - assert h is ConsumerGroupHealth.UNHEALTHY and "error" in msg.lower() + assert h is ConsumerGroupHealth.UNHEALTHY and "unknown" in msg.lower() + + # Dead state + s.state = ConsumerGroupState.DEAD + h, msg = m._assess_group_health(s) # noqa: SLF001 + assert h is ConsumerGroupHealth.UNHEALTHY and "dead" in msg.lower() # Insufficient members - s.state = "STABLE" + s.state = ConsumerGroupState.STABLE h, _ = m._assess_group_health(s) # noqa: SLF001 assert h is ConsumerGroupHealth.UNHEALTHY - # Rebalancing + # Rebalancing (preparing) s.member_count = 1 - s.state = "REBALANCING" + s.state = ConsumerGroupState.PREPARING_REBALANCE + h, _ = m._assess_group_health(s) # noqa: SLF001 + assert h is ConsumerGroupHealth.DEGRADED + + # Rebalancing (completing) + s.state = ConsumerGroupState.COMPLETING_REBALANCE + h, _ = m._assess_group_health(s) # noqa: SLF001 + assert h is ConsumerGroupHealth.DEGRADED + + # Empty group + s.state = ConsumerGroupState.EMPTY h, _ = m._assess_group_health(s) # noqa: SLF001 assert h is ConsumerGroupHealth.DEGRADED # Critical lag - s.state = "STABLE" + s.state = ConsumerGroupState.STABLE s.total_lag = m.critical_lag_threshold + 1 h, _ = m._assess_group_health(s) # noqa: SLF001 assert h is ConsumerGroupHealth.UNHEALTHY @@ -85,6 +101,6 @@ def test_assess_group_health_branches(test_settings: Settings) -> None: async def test_multiple_group_status_mixed_errors(test_settings: Settings) -> None: m = NativeConsumerGroupMonitor(settings=test_settings, logger=_test_logger) gids = [f"none-{uuid4().hex[:6]}", f"none-{uuid4().hex[:6]}"] - res = await m.get_multiple_group_status(gids, timeout=5.0, include_lag=False) + res = await m.get_multiple_group_status(gids, include_lag=False) assert set(res.keys()) == set(gids) assert all(v.health is ConsumerGroupHealth.UNHEALTHY for v in res.values()) diff --git a/backend/tests/integration/events/test_schema_registry_real.py b/backend/tests/integration/events/test_schema_registry_real.py index 273c7706..90647f0d 100644 --- a/backend/tests/integration/events/test_schema_registry_real.py +++ b/backend/tests/integration/events/test_schema_registry_real.py @@ -11,7 +11,8 @@ _test_logger = logging.getLogger("test.events.schema_registry_real") -def test_serialize_and_deserialize_event_real_registry(test_settings: Settings) -> None: +@pytest.mark.asyncio +async def test_serialize_and_deserialize_event_real_registry(test_settings: Settings) -> None: # Uses real Schema Registry configured via env (SCHEMA_REGISTRY_URL) m = SchemaRegistryManager(settings=test_settings, logger=_test_logger) ev = PodCreatedEvent( @@ -20,7 +21,7 @@ def test_serialize_and_deserialize_event_real_registry(test_settings: Settings) namespace="n", metadata=AvroEventMetadata(service_name="s", service_version="1"), ) - data = m.serialize_event(ev) - obj = m.deserialize_event(data, topic=str(ev.topic)) + data = await m.serialize_event(ev) + obj = await m.deserialize_event(data, topic=str(ev.topic)) assert isinstance(obj, PodCreatedEvent) assert obj.namespace == "n" diff --git a/backend/tests/integration/events/test_schema_registry_roundtrip.py b/backend/tests/integration/events/test_schema_registry_roundtrip.py index 914982d0..00cc2784 100644 --- a/backend/tests/integration/events/test_schema_registry_roundtrip.py +++ b/backend/tests/integration/events/test_schema_registry_roundtrip.py @@ -17,16 +17,17 @@ async def test_schema_registry_serialize_deserialize_roundtrip(scope: AsyncConta reg: SchemaRegistryManager = await scope.get(SchemaRegistryManager) # Schema registration happens lazily in serialize_event ev = make_execution_requested_event(execution_id="e-rt") - data = reg.serialize_event(ev) + data = await reg.serialize_event(ev) assert data.startswith(MAGIC_BYTE) - back = reg.deserialize_event(data, topic=str(ev.topic)) + back = await reg.deserialize_event(data, topic=str(ev.topic)) assert back.event_id == ev.event_id and getattr(back, "execution_id", None) == ev.execution_id # initialize_schemas should be a no-op if already initialized; call to exercise path await reg.initialize_schemas() -def test_schema_registry_deserialize_invalid_header(test_settings: Settings) -> None: +@pytest.mark.asyncio +async def test_schema_registry_deserialize_invalid_header(test_settings: Settings) -> None: reg = SchemaRegistryManager(settings=test_settings, logger=_test_logger) with pytest.raises(ValueError): - reg.deserialize_event(b"\x01\x00\x00\x00\x01", topic="t") # wrong magic byte + await reg.deserialize_event(b"\x01\x00\x00\x00\x01", topic="t") # wrong magic byte diff --git a/backend/tests/integration/services/coordinator/test_execution_coordinator.py b/backend/tests/integration/services/coordinator/test_execution_coordinator.py index 7b4cbcce..c00ff263 100644 --- a/backend/tests/integration/services/coordinator/test_execution_coordinator.py +++ b/backend/tests/integration/services/coordinator/test_execution_coordinator.py @@ -2,6 +2,7 @@ from app.services.coordinator.coordinator import ExecutionCoordinator from dishka import AsyncContainer from tests.helpers import make_execution_requested_event +from tests.helpers.eventually import eventually pytestmark = pytest.mark.integration @@ -11,15 +12,10 @@ async def test_handle_requested_and_schedule(scope: AsyncContainer) -> None: coord: ExecutionCoordinator = await scope.get(ExecutionCoordinator) ev = make_execution_requested_event(execution_id="e-real-1") - # Directly route requested event (no Kafka consumer) await coord._handle_execution_requested(ev) # noqa: SLF001 - pos = await coord.queue_manager.get_queue_position("e-real-1") - assert pos is not None + # Coordinator's background loop schedules executions automatically + async def is_active() -> None: + assert "e-real-1" in coord._active_executions # noqa: SLF001 - # Schedule one execution from queue - next_ev = await coord.queue_manager.get_next_execution() - assert next_ev is not None and next_ev.execution_id == "e-real-1" - await coord._schedule_execution(next_ev) # noqa: SLF001 - # Should be tracked as active - assert "e-real-1" in coord._active_executions # noqa: SLF001 + await eventually(is_active, timeout=2.0, interval=0.05) diff --git a/backend/tests/unit/events/core/test_producer.py b/backend/tests/unit/events/core/test_producer.py deleted file mode 100644 index ba825dee..00000000 --- a/backend/tests/unit/events/core/test_producer.py +++ /dev/null @@ -1,22 +0,0 @@ -import json -import logging - -import pytest -from app.events.core import ProducerMetrics, UnifiedProducer - -pytestmark = pytest.mark.unit - -_test_logger = logging.getLogger("test.events.core.producer") - - -def test_producer_handle_stats_path() -> None: - """Directly run stats parsing to cover branch logic; avoid relying on timing.""" - m = ProducerMetrics() - p = object.__new__(UnifiedProducer) # bypass __init__ safely for method call - # Inject required attributes for _handle_stats (including logger for exception handler) - p._metrics = m - p._stats_callback = None - p.logger = _test_logger - payload = json.dumps({"msg_cnt": 1, "topics": {"t": {"partitions": {"0": {"msgq_cnt": 2, "rtt": {"avg": 5}}}}}}) - UnifiedProducer._handle_stats(p, payload) - assert m.queue_size == 1 and m.avg_latency_ms > 0 diff --git a/backend/uv.lock b/backend/uv.lock index ed11e615..1206ff4a 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -100,6 +100,37 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b4/63/278a98c715ae467624eafe375542d8ba9b4383a016df8fdefe0ae28382a7/aiohttp-3.13.3-cp314-cp314t-win_amd64.whl", hash = "sha256:44531a36aa2264a1860089ffd4dce7baf875ee5a6079d5fb42e261c704ef7344", size = 499694, upload-time = "2026-01-03T17:32:24.546Z" }, ] +[[package]] +name = "aiokafka" +version = "0.13.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "async-timeout" }, + { name = "packaging" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/87/18/d3a4f8f9ad099fc59217b8cdf66eeecde3a9ef3bb31fe676e431a3b0010f/aiokafka-0.13.0.tar.gz", hash = "sha256:7d634af3c8d694a37a6c8535c54f01a740e74cccf7cc189ecc4a3d64e31ce122", size = 598580, upload-time = "2026-01-02T13:55:18.911Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/60/17/715ac23b4f8df3ff8d7c0a6f1c5fd3a179a8a675205be62d1d1bb27dffa2/aiokafka-0.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:231ecc0038c2736118f1c95149550dbbdf7b7a12069f70c005764fa1824c35d4", size = 346168, upload-time = "2026-01-02T13:54:49.128Z" }, + { url = "https://files.pythonhosted.org/packages/00/26/71c6f4cce2c710c6ffa18b9e294384157f46b0491d5b020de300802d167e/aiokafka-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2e2817593cab4c71c1d3b265b2446da91121a467ff7477c65f0f39a80047bc28", size = 349037, upload-time = "2026-01-02T13:54:50.48Z" }, + { url = "https://files.pythonhosted.org/packages/82/18/7b86418a4d3dc1303e89c0391942258ead31c02309e90eb631f3081eec1d/aiokafka-0.13.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b80e0aa1c811a9a12edb0b94445a0638d61a345932f785d47901d28b8aad86c8", size = 1140066, upload-time = "2026-01-02T13:54:52.33Z" }, + { url = "https://files.pythonhosted.org/packages/f9/51/45e46b4407d39b950c8493e19498aeeb5af4fc461fb54fa0247da16bfd75/aiokafka-0.13.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:79672c456bd1642769e74fc2db1c34f23b15500e978fd38411662e8ca07590ad", size = 1130088, upload-time = "2026-01-02T13:54:53.786Z" }, + { url = "https://files.pythonhosted.org/packages/49/7f/6a66f6fd6fb73e15bd34f574e38703ba36d3f9256c80e7aba007bd8a9256/aiokafka-0.13.0-cp312-cp312-win32.whl", hash = "sha256:00bb4e3d5a237b8618883eb1dd8c08d671db91d3e8e33ac98b04edf64225658c", size = 309581, upload-time = "2026-01-02T13:54:55.444Z" }, + { url = "https://files.pythonhosted.org/packages/d3/e0/a2d5a8912699dd0fee28e6fb780358c63c7a4727517fffc110cb7e43f874/aiokafka-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:0f0cccdf2fd16927fbe077279524950676fbffa7b102d6b117041b3461b5d927", size = 329327, upload-time = "2026-01-02T13:54:56.981Z" }, + { url = "https://files.pythonhosted.org/packages/e3/f6/a74c49759233e98b61182ba3d49d5ac9c8de0643651892acba2704fba1cc/aiokafka-0.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:39d71c40cff733221a6b2afff4beeac5dacbd119fb99eec5198af59115264a1a", size = 343733, upload-time = "2026-01-02T13:54:58.536Z" }, + { url = "https://files.pythonhosted.org/packages/cf/52/4f7e80eee2c69cd8b047c18145469bf0dc27542a5dca3f96ff81ade575b0/aiokafka-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:faa2f5f3d0d2283a0c1a149748cc7e3a3862ef327fa5762e2461088eedde230a", size = 346258, upload-time = "2026-01-02T13:55:00.947Z" }, + { url = "https://files.pythonhosted.org/packages/81/9b/d2766bb3b0bad53eb25a88e51a884be4b77a1706053ad717b893b4daea4b/aiokafka-0.13.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b890d535e55f5073f939585bef5301634df669e97832fda77aa743498f008662", size = 1114744, upload-time = "2026-01-02T13:55:02.475Z" }, + { url = "https://files.pythonhosted.org/packages/8f/00/12e0a39cd4809149a09b4a52b629abc9bf80e7b8bad9950040b1adae99fc/aiokafka-0.13.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e22eb8a1475b9c0f45b553b6e2dcaf4ec3c0014bf4e389e00a0a0ec85d0e3bdc", size = 1105676, upload-time = "2026-01-02T13:55:04.036Z" }, + { url = "https://files.pythonhosted.org/packages/38/4a/0bc91e90faf55533fe6468461c2dd31c22b0e1d274b9386f341cca3f7eb7/aiokafka-0.13.0-cp313-cp313-win32.whl", hash = "sha256:ae507c7b09e882484f709f2e7172b3a4f75afffcd896d00517feb35c619495bb", size = 308257, upload-time = "2026-01-02T13:55:05.873Z" }, + { url = "https://files.pythonhosted.org/packages/23/63/5433d1aa10c4fb4cf85bd73013263c36d7da4604b0c77ed4d1ad42fae70c/aiokafka-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:fec1a7e3458365a72809edaa2b990f65ca39b01a2a579f879ac4da6c9b2dbc5c", size = 326968, upload-time = "2026-01-02T13:55:07.351Z" }, + { url = "https://files.pythonhosted.org/packages/3c/cc/45b04c3a5fd3d2d5f444889ecceb80b2f78d6d66aa45e3042767e55579e2/aiokafka-0.13.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:9a403785f7092c72906c37f7618f7b16a4219eba8ed0bdda90fba410a7dd50b5", size = 344503, upload-time = "2026-01-02T13:55:08.723Z" }, + { url = "https://files.pythonhosted.org/packages/76/df/0b76fe3b93558ae71b856940e384909c4c2c7a1c330423003191e4ba7782/aiokafka-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:256807326831b7eee253ea1017bd2b19ab1c2298ce6b20a87fde97c253c572bc", size = 347621, upload-time = "2026-01-02T13:55:10.147Z" }, + { url = "https://files.pythonhosted.org/packages/34/1a/d59932f98fd3c106e2a7c8d4d5ebd8df25403436dfc27b3031918a37385e/aiokafka-0.13.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:64d90f91291da265d7f25296ba68fc6275684eebd6d1cf05a1b2abe6c2ba3543", size = 1111410, upload-time = "2026-01-02T13:55:11.763Z" }, + { url = "https://files.pythonhosted.org/packages/7e/04/fbf3e34ab3bc21e6e760c3fcd089375052fccc04eb8745459a82a58a647b/aiokafka-0.13.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b5a33cc043c8d199bcf101359d86f2d31fd54f4b157ac12028bdc34e3e1cf74a", size = 1094799, upload-time = "2026-01-02T13:55:13.795Z" }, + { url = "https://files.pythonhosted.org/packages/85/10/509f709fd3b7c3e568a5b8044be0e80a1504f8da6ddc72c128b21e270913/aiokafka-0.13.0-cp314-cp314-win32.whl", hash = "sha256:538950384b539ba2333d35a853f09214c0409e818e5d5f366ef759eea50bae9c", size = 311553, upload-time = "2026-01-02T13:55:15.928Z" }, + { url = "https://files.pythonhosted.org/packages/2b/18/424d6a4eb6f4835a371c1e2cfafce800540b33d957c6638795d911f98973/aiokafka-0.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:c906dd42daadd14b4506a2e6c62dfef3d4919b5953d32ae5e5f0d99efd103c89", size = 330648, upload-time = "2026-01-02T13:55:17.421Z" }, +] + [[package]] name = "aiosignal" version = "1.4.0" @@ -403,24 +434,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/31/28/d28211d29bcc3620b1fece85a65ce5bb22f18670a03cd28ea4b75ede270c/configargparse-1.7.1-py3-none-any.whl", hash = "sha256:8b586a31f9d873abd1ca527ffbe58863c99f36d896e2829779803125e83be4b6", size = 25607, upload-time = "2025-05-23T14:26:15.923Z" }, ] -[[package]] -name = "confluent-kafka" -version = "2.6.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/13/89/928e6886ba33f5847692430d003dc33f81a6e595d25f9935d6185ea073d3/confluent_kafka-2.6.1.tar.gz", hash = "sha256:71fdab0f65ccdb00eff0f14ccde65723ebbd20392723f8c1e87e5d8ab29e50cf", size = 160019, upload-time = "2024-11-18T18:53:02.559Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/60/1d/ce01ed336e2724410e083970528aacdf448757d87607be183d8a745ba1bd/confluent_kafka-2.6.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a5062f24f3af27f6edd63cb71a663144de09dd3249647c80c4aec9655a0ea1dd", size = 3441316, upload-time = "2024-11-18T18:51:27.137Z" }, - { url = "https://files.pythonhosted.org/packages/bd/87/1854e3fca1aa02c28ca69b2df844523854c020d2cf4757e40f5f70060c59/confluent_kafka-2.6.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a21b712b17362fd0602faa87c4363bd604f271ca62076bcf2d83ca91d2bd62c2", size = 2967136, upload-time = "2024-11-18T18:51:29.997Z" }, - { url = "https://files.pythonhosted.org/packages/d3/a0/598aff8b2249f6f73471257898ba44188fa5096cb95fa89468cc6cb33518/confluent_kafka-2.6.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:0c2b63f7f764dc504296a8a474a3f09e408a456626cf938635eb31f59cac59fa", size = 15282347, upload-time = "2024-11-18T18:51:36.539Z" }, - { url = "https://files.pythonhosted.org/packages/05/44/b387e65fa2a9ef73adbc10fb2e81d0daed9a76bd5492a15a7321a69809c9/confluent_kafka-2.6.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:298aa323eb8507862ccfa619367667b41ab76ddf3657e8a45dafe4b270208257", size = 3954077, upload-time = "2024-11-18T18:51:40.57Z" }, - { url = "https://files.pythonhosted.org/packages/0e/68/d7618aa37c343d2b918704aff5b05bda7d2329c3493163247e4d5cb75a1e/confluent_kafka-2.6.1-cp312-cp312-win_amd64.whl", hash = "sha256:b17da915fc35b1bef49d599f685656f65f379094dbbc7aafc5ede1843cc72699", size = 3889112, upload-time = "2024-11-18T18:51:43.573Z" }, - { url = "https://files.pythonhosted.org/packages/8c/cf/b5d0124b143a98c7bc0ecd638f12a05db9c1ba173349d0bf22e23ac559a4/confluent_kafka-2.6.1-cp313-cp313-macosx_13_0_arm64.whl", hash = "sha256:2584812d8834932e194929544d09325dd09039223aced7ab09095ffd4e96a1ca", size = 2968528, upload-time = "2024-11-18T18:51:46.158Z" }, - { url = "https://files.pythonhosted.org/packages/7d/fc/4e3e5c492111d8d4444888f0503a8cebdfca70fbe64e63be8cea41de88d1/confluent_kafka-2.6.1-cp313-cp313-macosx_13_0_x86_64.whl", hash = "sha256:90853bbf466c57e0295076549b9b3b9cb6ab9447965977f3cde7f0790c39b5d6", size = 3441632, upload-time = "2024-11-18T18:51:48.642Z" }, - { url = "https://files.pythonhosted.org/packages/1f/5b/d4963bf090a8ae0712d83163f43a018297216a2cb4d0c28a422c2a0c254b/confluent_kafka-2.6.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:8c38bf716dbf99bd0e84ba867417e397c4d544d37a530b28df6cadd227a57f98", size = 15281866, upload-time = "2024-11-18T18:51:55.536Z" }, - { url = "https://files.pythonhosted.org/packages/ab/71/bd85b6b0e30227d722649a180de3459d3360cc01380f370651ff18d4dba1/confluent_kafka-2.6.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:efc94016d3b438f0af2ddd3f5a8fb293616fda296644f341161033b24282b23d", size = 3953827, upload-time = "2024-11-18T18:51:59.301Z" }, - { url = "https://files.pythonhosted.org/packages/b6/eb/042c083e0d0a5016559dda61fb060e1c9be4fe540c95898c35a9fd540762/confluent_kafka-2.6.1-cp313-cp313-win_amd64.whl", hash = "sha256:150735910da42c44f7fa21ad96e3ae0f52df34066f163d3f544001a64ea776d2", size = 3948805, upload-time = "2024-11-18T18:52:02.158Z" }, -] - [[package]] name = "contourpy" version = "1.3.3" @@ -1005,6 +1018,7 @@ source = { editable = "." } dependencies = [ { name = "aiohappyeyeballs" }, { name = "aiohttp" }, + { name = "aiokafka" }, { name = "aiosignal" }, { name = "aiosmtplib" }, { name = "annotated-doc" }, @@ -1023,7 +1037,6 @@ dependencies = [ { name = "charset-normalizer" }, { name = "click" }, { name = "configargparse" }, - { name = "confluent-kafka" }, { name = "contourpy" }, { name = "cycler" }, { name = "deprecated" }, @@ -1097,6 +1110,7 @@ dependencies = [ { name = "python-dateutil" }, { name = "python-json-logger" }, { name = "python-multipart" }, + { name = "python-schema-registry-client" }, { name = "pyyaml" }, { name = "pyzmq" }, { name = "redis" }, @@ -1140,13 +1154,13 @@ dev = [ { name = "pytest-xdist" }, { name = "ruff" }, { name = "types-cachetools" }, - { name = "types-confluent-kafka" }, ] [package.metadata] requires-dist = [ { name = "aiohappyeyeballs", specifier = "==2.6.1" }, { name = "aiohttp", specifier = "==3.13.3" }, + { name = "aiokafka", specifier = "==0.13.0" }, { name = "aiosignal", specifier = "==1.4.0" }, { name = "aiosmtplib", specifier = "==3.0.2" }, { name = "annotated-doc", specifier = "==0.0.4" }, @@ -1165,7 +1179,6 @@ requires-dist = [ { name = "charset-normalizer", specifier = "==3.4.0" }, { name = "click", specifier = "==8.1.7" }, { name = "configargparse", specifier = "==1.7.1" }, - { name = "confluent-kafka", specifier = "==2.6.1" }, { name = "contourpy", specifier = "==1.3.3" }, { name = "cycler", specifier = "==0.12.1" }, { name = "deprecated", specifier = "==1.2.14" }, @@ -1239,6 +1252,7 @@ requires-dist = [ { name = "python-dateutil", specifier = "==2.9.0.post0" }, { name = "python-json-logger", specifier = "==2.0.7" }, { name = "python-multipart", specifier = "==0.0.18" }, + { name = "python-schema-registry-client", specifier = "==2.6.1" }, { name = "pyyaml", specifier = "==6.0.2" }, { name = "pyzmq", specifier = "==26.2.0" }, { name = "redis", specifier = "==7.1.0" }, @@ -1282,7 +1296,6 @@ dev = [ { name = "pytest-xdist", specifier = "==3.6.1" }, { name = "ruff", specifier = "==0.14.10" }, { name = "types-cachetools", specifier = "==6.2.0.20250827" }, - { name = "types-confluent-kafka", specifier = "==1.3.6" }, ] [[package]] @@ -1306,6 +1319,33 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] +[[package]] +name = "jsonschema" +version = "4.26.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "jsonschema-specifications" }, + { name = "referencing" }, + { name = "rpds-py" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/b3/fc/e067678238fa451312d4c62bf6e6cf5ec56375422aee02f9cb5f909b3047/jsonschema-4.26.0.tar.gz", hash = "sha256:0c26707e2efad8aa1bfc5b7ce170f3fccc2e4918ff85989ba9ffa9facb2be326", size = 366583, upload-time = "2026-01-07T13:41:07.246Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/69/90/f63fb5873511e014207a475e2bb4e8b2e570d655b00ac19a9a0ca0a385ee/jsonschema-4.26.0-py3-none-any.whl", hash = "sha256:d489f15263b8d200f8387e64b4c3a75f06629559fb73deb8fdfb525f2dab50ce", size = 90630, upload-time = "2026-01-07T13:41:05.306Z" }, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "referencing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/19/74/a633ee74eb36c44aa6d1095e7cc5569bebf04342ee146178e2d36600708b/jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d", size = 32855, upload-time = "2025-09-08T01:34:59.186Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/45/1a4ed80516f02155c51f51e8cedb3c1902296743db0bbc66608a0db2814f/jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe", size = 18437, upload-time = "2025-09-08T01:34:57.871Z" }, +] + [[package]] name = "kiwisolver" version = "1.4.9" @@ -2540,6 +2580,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/13/6b/b60f47101ba2cac66b4a83246630e68ae9bbe2e614cbae5f4465f46dee13/python_multipart-0.0.18-py3-none-any.whl", hash = "sha256:efe91480f485f6a361427a541db4796f9e1591afc0fb8e7a4ba06bfbc6708996", size = 24389, upload-time = "2024-11-28T19:16:00.947Z" }, ] +[[package]] +name = "python-schema-registry-client" +version = "2.6.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "anyio" }, + { name = "fastavro" }, + { name = "httpx" }, + { name = "jsonschema" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/00/4c/3b10063174780ee1ad97bca6c100cf9634aaba9559f03a588d721403567b/python_schema_registry_client-2.6.1.tar.gz", hash = "sha256:017fd45a36a4517d9c87c03c992393cce2c437c5ffa8fe1c9dfde1664caa89c9", size = 21360, upload-time = "2025-04-04T15:07:51.143Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/c1/abd18fc3c23dbe09321fcd812091320d4dc954046f95cb431ef2926cb11c/python_schema_registry_client-2.6.1-py3-none-any.whl", hash = "sha256:05950ca8f9a3409247514bef3fdb421839d6e1ae544b32dfd3b7b16237673303", size = 23095, upload-time = "2025-04-04T15:07:49.592Z" }, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -2619,6 +2674,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/89/f0/8956f8a86b20d7bb9d6ac0187cf4cd54d8065bc9a1a09eb8011d4d326596/redis-7.1.0-py3-none-any.whl", hash = "sha256:23c52b208f92b56103e17c5d06bdc1a6c2c0b3106583985a76a18f83b265de2b", size = 354159, upload-time = "2025-11-19T15:54:38.064Z" }, ] +[[package]] +name = "referencing" +version = "0.37.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "attrs" }, + { name = "rpds-py" }, + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/22/f5/df4e9027acead3ecc63e50fe1e36aca1523e1719559c499951bb4b53188f/referencing-0.37.0.tar.gz", hash = "sha256:44aefc3142c5b842538163acb373e24cce6632bd54bdb01b21ad5863489f50d8", size = 78036, upload-time = "2025-10-13T15:30:48.871Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2c/58/ca301544e1fa93ed4f80d724bf5b194f6e4b945841c5bfd555878eea9fcb/referencing-0.37.0-py3-none-any.whl", hash = "sha256:381329a9f99628c9069361716891d34ad94af76e461dcb0335825aecc7692231", size = 26766, upload-time = "2025-10-13T15:30:47.625Z" }, +] + [[package]] name = "regex" version = "2025.8.29" @@ -2710,6 +2779,87 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/25/7a/b0178788f8dc6cafce37a212c99565fa1fe7872c70c6c9c1e1a372d9d88f/rich-14.2.0-py3-none-any.whl", hash = "sha256:76bc51fe2e57d2b1be1f96c524b890b816e334ab4c1e45888799bfaab0021edd", size = 243393, upload-time = "2025-10-09T14:16:51.245Z" }, ] +[[package]] +name = "rpds-py" +version = "0.30.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/af/3f2f423103f1113b36230496629986e0ef7e199d2aa8392452b484b38ced/rpds_py-0.30.0.tar.gz", hash = "sha256:dd8ff7cf90014af0c0f787eea34794ebf6415242ee1d6fa91eaba725cc441e84", size = 69469, upload-time = "2025-11-30T20:24:38.837Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/e7/98a2f4ac921d82f33e03f3835f5bf3a4a40aa1bfdc57975e74a97b2b4bdd/rpds_py-0.30.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a161f20d9a43006833cd7068375a94d035714d73a172b681d8881820600abfad", size = 375086, upload-time = "2025-11-30T20:22:17.93Z" }, + { url = "https://files.pythonhosted.org/packages/4d/a1/bca7fd3d452b272e13335db8d6b0b3ecde0f90ad6f16f3328c6fb150c889/rpds_py-0.30.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6abc8880d9d036ecaafe709079969f56e876fcf107f7a8e9920ba6d5a3878d05", size = 359053, upload-time = "2025-11-30T20:22:19.297Z" }, + { url = "https://files.pythonhosted.org/packages/65/1c/ae157e83a6357eceff62ba7e52113e3ec4834a84cfe07fa4b0757a7d105f/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca28829ae5f5d569bb62a79512c842a03a12576375d5ece7d2cadf8abe96ec28", size = 390763, upload-time = "2025-11-30T20:22:21.661Z" }, + { url = "https://files.pythonhosted.org/packages/d4/36/eb2eb8515e2ad24c0bd43c3ee9cd74c33f7ca6430755ccdb240fd3144c44/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1010ed9524c73b94d15919ca4d41d8780980e1765babf85f9a2f90d247153dd", size = 408951, upload-time = "2025-11-30T20:22:23.408Z" }, + { url = "https://files.pythonhosted.org/packages/d6/65/ad8dc1784a331fabbd740ef6f71ce2198c7ed0890dab595adb9ea2d775a1/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f8d1736cfb49381ba528cd5baa46f82fdc65c06e843dab24dd70b63d09121b3f", size = 514622, upload-time = "2025-11-30T20:22:25.16Z" }, + { url = "https://files.pythonhosted.org/packages/63/8e/0cfa7ae158e15e143fe03993b5bcd743a59f541f5952e1546b1ac1b5fd45/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d948b135c4693daff7bc2dcfc4ec57237a29bd37e60c2fabf5aff2bbacf3e2f1", size = 414492, upload-time = "2025-11-30T20:22:26.505Z" }, + { url = "https://files.pythonhosted.org/packages/60/1b/6f8f29f3f995c7ffdde46a626ddccd7c63aefc0efae881dc13b6e5d5bb16/rpds_py-0.30.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47f236970bccb2233267d89173d3ad2703cd36a0e2a6e92d0560d333871a3d23", size = 394080, upload-time = "2025-11-30T20:22:27.934Z" }, + { url = "https://files.pythonhosted.org/packages/6d/d5/a266341051a7a3ca2f4b750a3aa4abc986378431fc2da508c5034d081b70/rpds_py-0.30.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:2e6ecb5a5bcacf59c3f912155044479af1d0b6681280048b338b28e364aca1f6", size = 408680, upload-time = "2025-11-30T20:22:29.341Z" }, + { url = "https://files.pythonhosted.org/packages/10/3b/71b725851df9ab7a7a4e33cf36d241933da66040d195a84781f49c50490c/rpds_py-0.30.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a8fa71a2e078c527c3e9dc9fc5a98c9db40bcc8a92b4e8858e36d329f8684b51", size = 423589, upload-time = "2025-11-30T20:22:31.469Z" }, + { url = "https://files.pythonhosted.org/packages/00/2b/e59e58c544dc9bd8bd8384ecdb8ea91f6727f0e37a7131baeff8d6f51661/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73c67f2db7bc334e518d097c6d1e6fed021bbc9b7d678d6cc433478365d1d5f5", size = 573289, upload-time = "2025-11-30T20:22:32.997Z" }, + { url = "https://files.pythonhosted.org/packages/da/3e/a18e6f5b460893172a7d6a680e86d3b6bc87a54c1f0b03446a3c8c7b588f/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:5ba103fb455be00f3b1c2076c9d4264bfcb037c976167a6047ed82f23153f02e", size = 599737, upload-time = "2025-11-30T20:22:34.419Z" }, + { url = "https://files.pythonhosted.org/packages/5c/e2/714694e4b87b85a18e2c243614974413c60aa107fd815b8cbc42b873d1d7/rpds_py-0.30.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee9c752c0364588353e627da8a7e808a66873672bcb5f52890c33fd965b394", size = 563120, upload-time = "2025-11-30T20:22:35.903Z" }, + { url = "https://files.pythonhosted.org/packages/6f/ab/d5d5e3bcedb0a77f4f613706b750e50a5a3ba1c15ccd3665ecc636c968fd/rpds_py-0.30.0-cp312-cp312-win32.whl", hash = "sha256:1ab5b83dbcf55acc8b08fc62b796ef672c457b17dbd7820a11d6c52c06839bdf", size = 223782, upload-time = "2025-11-30T20:22:37.271Z" }, + { url = "https://files.pythonhosted.org/packages/39/3b/f786af9957306fdc38a74cef405b7b93180f481fb48453a114bb6465744a/rpds_py-0.30.0-cp312-cp312-win_amd64.whl", hash = "sha256:a090322ca841abd453d43456ac34db46e8b05fd9b3b4ac0c78bcde8b089f959b", size = 240463, upload-time = "2025-11-30T20:22:39.021Z" }, + { url = "https://files.pythonhosted.org/packages/f3/d2/b91dc748126c1559042cfe41990deb92c4ee3e2b415f6b5234969ffaf0cc/rpds_py-0.30.0-cp312-cp312-win_arm64.whl", hash = "sha256:669b1805bd639dd2989b281be2cfd951c6121b65e729d9b843e9639ef1fd555e", size = 230868, upload-time = "2025-11-30T20:22:40.493Z" }, + { url = "https://files.pythonhosted.org/packages/ed/dc/d61221eb88ff410de3c49143407f6f3147acf2538c86f2ab7ce65ae7d5f9/rpds_py-0.30.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:f83424d738204d9770830d35290ff3273fbb02b41f919870479fab14b9d303b2", size = 374887, upload-time = "2025-11-30T20:22:41.812Z" }, + { url = "https://files.pythonhosted.org/packages/fd/32/55fb50ae104061dbc564ef15cc43c013dc4a9f4527a1f4d99baddf56fe5f/rpds_py-0.30.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e7536cd91353c5273434b4e003cbda89034d67e7710eab8761fd918ec6c69cf8", size = 358904, upload-time = "2025-11-30T20:22:43.479Z" }, + { url = "https://files.pythonhosted.org/packages/58/70/faed8186300e3b9bdd138d0273109784eea2396c68458ed580f885dfe7ad/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2771c6c15973347f50fece41fc447c054b7ac2ae0502388ce3b6738cd366e3d4", size = 389945, upload-time = "2025-11-30T20:22:44.819Z" }, + { url = "https://files.pythonhosted.org/packages/bd/a8/073cac3ed2c6387df38f71296d002ab43496a96b92c823e76f46b8af0543/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a59119fc6e3f460315fe9d08149f8102aa322299deaa5cab5b40092345c2136", size = 407783, upload-time = "2025-11-30T20:22:46.103Z" }, + { url = "https://files.pythonhosted.org/packages/77/57/5999eb8c58671f1c11eba084115e77a8899d6e694d2a18f69f0ba471ec8b/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:76fec018282b4ead0364022e3c54b60bf368b9d926877957a8624b58419169b7", size = 515021, upload-time = "2025-11-30T20:22:47.458Z" }, + { url = "https://files.pythonhosted.org/packages/e0/af/5ab4833eadc36c0a8ed2bc5c0de0493c04f6c06de223170bd0798ff98ced/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:692bef75a5525db97318e8cd061542b5a79812d711ea03dbc1f6f8dbb0c5f0d2", size = 414589, upload-time = "2025-11-30T20:22:48.872Z" }, + { url = "https://files.pythonhosted.org/packages/b7/de/f7192e12b21b9e9a68a6d0f249b4af3fdcdff8418be0767a627564afa1f1/rpds_py-0.30.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9027da1ce107104c50c81383cae773ef5c24d296dd11c99e2629dbd7967a20c6", size = 394025, upload-time = "2025-11-30T20:22:50.196Z" }, + { url = "https://files.pythonhosted.org/packages/91/c4/fc70cd0249496493500e7cc2de87504f5aa6509de1e88623431fec76d4b6/rpds_py-0.30.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:9cf69cdda1f5968a30a359aba2f7f9aa648a9ce4b580d6826437f2b291cfc86e", size = 408895, upload-time = "2025-11-30T20:22:51.87Z" }, + { url = "https://files.pythonhosted.org/packages/58/95/d9275b05ab96556fefff73a385813eb66032e4c99f411d0795372d9abcea/rpds_py-0.30.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a4796a717bf12b9da9d3ad002519a86063dcac8988b030e405704ef7d74d2d9d", size = 422799, upload-time = "2025-11-30T20:22:53.341Z" }, + { url = "https://files.pythonhosted.org/packages/06/c1/3088fc04b6624eb12a57eb814f0d4997a44b0d208d6cace713033ff1a6ba/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5d4c2aa7c50ad4728a094ebd5eb46c452e9cb7edbfdb18f9e1221f597a73e1e7", size = 572731, upload-time = "2025-11-30T20:22:54.778Z" }, + { url = "https://files.pythonhosted.org/packages/d8/42/c612a833183b39774e8ac8fecae81263a68b9583ee343db33ab571a7ce55/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ba81a9203d07805435eb06f536d95a266c21e5b2dfbf6517748ca40c98d19e31", size = 599027, upload-time = "2025-11-30T20:22:56.212Z" }, + { url = "https://files.pythonhosted.org/packages/5f/60/525a50f45b01d70005403ae0e25f43c0384369ad24ffe46e8d9068b50086/rpds_py-0.30.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:945dccface01af02675628334f7cf49c2af4c1c904748efc5cf7bbdf0b579f95", size = 563020, upload-time = "2025-11-30T20:22:58.2Z" }, + { url = "https://files.pythonhosted.org/packages/0b/5d/47c4655e9bcd5ca907148535c10e7d489044243cc9941c16ed7cd53be91d/rpds_py-0.30.0-cp313-cp313-win32.whl", hash = "sha256:b40fb160a2db369a194cb27943582b38f79fc4887291417685f3ad693c5a1d5d", size = 223139, upload-time = "2025-11-30T20:23:00.209Z" }, + { url = "https://files.pythonhosted.org/packages/f2/e1/485132437d20aa4d3e1d8b3fb5a5e65aa8139f1e097080c2a8443201742c/rpds_py-0.30.0-cp313-cp313-win_amd64.whl", hash = "sha256:806f36b1b605e2d6a72716f321f20036b9489d29c51c91f4dd29a3e3afb73b15", size = 240224, upload-time = "2025-11-30T20:23:02.008Z" }, + { url = "https://files.pythonhosted.org/packages/24/95/ffd128ed1146a153d928617b0ef673960130be0009c77d8fbf0abe306713/rpds_py-0.30.0-cp313-cp313-win_arm64.whl", hash = "sha256:d96c2086587c7c30d44f31f42eae4eac89b60dabbac18c7669be3700f13c3ce1", size = 230645, upload-time = "2025-11-30T20:23:03.43Z" }, + { url = "https://files.pythonhosted.org/packages/ff/1b/b10de890a0def2a319a2626334a7f0ae388215eb60914dbac8a3bae54435/rpds_py-0.30.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:eb0b93f2e5c2189ee831ee43f156ed34e2a89a78a66b98cadad955972548be5a", size = 364443, upload-time = "2025-11-30T20:23:04.878Z" }, + { url = "https://files.pythonhosted.org/packages/0d/bf/27e39f5971dc4f305a4fb9c672ca06f290f7c4e261c568f3dea16a410d47/rpds_py-0.30.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:922e10f31f303c7c920da8981051ff6d8c1a56207dbdf330d9047f6d30b70e5e", size = 353375, upload-time = "2025-11-30T20:23:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/40/58/442ada3bba6e8e6615fc00483135c14a7538d2ffac30e2d933ccf6852232/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cdc62c8286ba9bf7f47befdcea13ea0e26bf294bda99758fd90535cbaf408000", size = 383850, upload-time = "2025-11-30T20:23:07.825Z" }, + { url = "https://files.pythonhosted.org/packages/14/14/f59b0127409a33c6ef6f5c1ebd5ad8e32d7861c9c7adfa9a624fc3889f6c/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:47f9a91efc418b54fb8190a6b4aa7813a23fb79c51f4bb84e418f5476c38b8db", size = 392812, upload-time = "2025-11-30T20:23:09.228Z" }, + { url = "https://files.pythonhosted.org/packages/b3/66/e0be3e162ac299b3a22527e8913767d869e6cc75c46bd844aa43fb81ab62/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1f3587eb9b17f3789ad50824084fa6f81921bbf9a795826570bda82cb3ed91f2", size = 517841, upload-time = "2025-11-30T20:23:11.186Z" }, + { url = "https://files.pythonhosted.org/packages/3d/55/fa3b9cf31d0c963ecf1ba777f7cf4b2a2c976795ac430d24a1f43d25a6ba/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:39c02563fc592411c2c61d26b6c5fe1e51eaa44a75aa2c8735ca88b0d9599daa", size = 408149, upload-time = "2025-11-30T20:23:12.864Z" }, + { url = "https://files.pythonhosted.org/packages/60/ca/780cf3b1a32b18c0f05c441958d3758f02544f1d613abf9488cd78876378/rpds_py-0.30.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51a1234d8febafdfd33a42d97da7a43f5dcb120c1060e352a3fbc0c6d36e2083", size = 383843, upload-time = "2025-11-30T20:23:14.638Z" }, + { url = "https://files.pythonhosted.org/packages/82/86/d5f2e04f2aa6247c613da0c1dd87fcd08fa17107e858193566048a1e2f0a/rpds_py-0.30.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:eb2c4071ab598733724c08221091e8d80e89064cd472819285a9ab0f24bcedb9", size = 396507, upload-time = "2025-11-30T20:23:16.105Z" }, + { url = "https://files.pythonhosted.org/packages/4b/9a/453255d2f769fe44e07ea9785c8347edaf867f7026872e76c1ad9f7bed92/rpds_py-0.30.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bdfdb946967d816e6adf9a3d8201bfad269c67efe6cefd7093ef959683c8de0", size = 414949, upload-time = "2025-11-30T20:23:17.539Z" }, + { url = "https://files.pythonhosted.org/packages/a3/31/622a86cdc0c45d6df0e9ccb6becdba5074735e7033c20e401a6d9d0e2ca0/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c77afbd5f5250bf27bf516c7c4a016813eb2d3e116139aed0096940c5982da94", size = 565790, upload-time = "2025-11-30T20:23:19.029Z" }, + { url = "https://files.pythonhosted.org/packages/1c/5d/15bbf0fb4a3f58a3b1c67855ec1efcc4ceaef4e86644665fff03e1b66d8d/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:61046904275472a76c8c90c9ccee9013d70a6d0f73eecefd38c1ae7c39045a08", size = 590217, upload-time = "2025-11-30T20:23:20.885Z" }, + { url = "https://files.pythonhosted.org/packages/6d/61/21b8c41f68e60c8cc3b2e25644f0e3681926020f11d06ab0b78e3c6bbff1/rpds_py-0.30.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4c5f36a861bc4b7da6516dbdf302c55313afa09b81931e8280361a4f6c9a2d27", size = 555806, upload-time = "2025-11-30T20:23:22.488Z" }, + { url = "https://files.pythonhosted.org/packages/f9/39/7e067bb06c31de48de3eb200f9fc7c58982a4d3db44b07e73963e10d3be9/rpds_py-0.30.0-cp313-cp313t-win32.whl", hash = "sha256:3d4a69de7a3e50ffc214ae16d79d8fbb0922972da0356dcf4d0fdca2878559c6", size = 211341, upload-time = "2025-11-30T20:23:24.449Z" }, + { url = "https://files.pythonhosted.org/packages/0a/4d/222ef0b46443cf4cf46764d9c630f3fe4abaa7245be9417e56e9f52b8f65/rpds_py-0.30.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f14fc5df50a716f7ece6a80b6c78bb35ea2ca47c499e422aa4463455dd96d56d", size = 225768, upload-time = "2025-11-30T20:23:25.908Z" }, + { url = "https://files.pythonhosted.org/packages/86/81/dad16382ebbd3d0e0328776d8fd7ca94220e4fa0798d1dc5e7da48cb3201/rpds_py-0.30.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:68f19c879420aa08f61203801423f6cd5ac5f0ac4ac82a2368a9fcd6a9a075e0", size = 362099, upload-time = "2025-11-30T20:23:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/2b/60/19f7884db5d5603edf3c6bce35408f45ad3e97e10007df0e17dd57af18f8/rpds_py-0.30.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ec7c4490c672c1a0389d319b3a9cfcd098dcdc4783991553c332a15acf7249be", size = 353192, upload-time = "2025-11-30T20:23:29.151Z" }, + { url = "https://files.pythonhosted.org/packages/bf/c4/76eb0e1e72d1a9c4703c69607cec123c29028bff28ce41588792417098ac/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f251c812357a3fed308d684a5079ddfb9d933860fc6de89f2b7ab00da481e65f", size = 384080, upload-time = "2025-11-30T20:23:30.785Z" }, + { url = "https://files.pythonhosted.org/packages/72/87/87ea665e92f3298d1b26d78814721dc39ed8d2c74b86e83348d6b48a6f31/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ac98b175585ecf4c0348fd7b29c3864bda53b805c773cbf7bfdaffc8070c976f", size = 394841, upload-time = "2025-11-30T20:23:32.209Z" }, + { url = "https://files.pythonhosted.org/packages/77/ad/7783a89ca0587c15dcbf139b4a8364a872a25f861bdb88ed99f9b0dec985/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3e62880792319dbeb7eb866547f2e35973289e7d5696c6e295476448f5b63c87", size = 516670, upload-time = "2025-11-30T20:23:33.742Z" }, + { url = "https://files.pythonhosted.org/packages/5b/3c/2882bdac942bd2172f3da574eab16f309ae10a3925644e969536553cb4ee/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4e7fc54e0900ab35d041b0601431b0a0eb495f0851a0639b6ef90f7741b39a18", size = 408005, upload-time = "2025-11-30T20:23:35.253Z" }, + { url = "https://files.pythonhosted.org/packages/ce/81/9a91c0111ce1758c92516a3e44776920b579d9a7c09b2b06b642d4de3f0f/rpds_py-0.30.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47e77dc9822d3ad616c3d5759ea5631a75e5809d5a28707744ef79d7a1bcfcad", size = 382112, upload-time = "2025-11-30T20:23:36.842Z" }, + { url = "https://files.pythonhosted.org/packages/cf/8e/1da49d4a107027e5fbc64daeab96a0706361a2918da10cb41769244b805d/rpds_py-0.30.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:b4dc1a6ff022ff85ecafef7979a2c6eb423430e05f1165d6688234e62ba99a07", size = 399049, upload-time = "2025-11-30T20:23:38.343Z" }, + { url = "https://files.pythonhosted.org/packages/df/5a/7ee239b1aa48a127570ec03becbb29c9d5a9eb092febbd1699d567cae859/rpds_py-0.30.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4559c972db3a360808309e06a74628b95eaccbf961c335c8fe0d590cf587456f", size = 415661, upload-time = "2025-11-30T20:23:40.263Z" }, + { url = "https://files.pythonhosted.org/packages/70/ea/caa143cf6b772f823bc7929a45da1fa83569ee49b11d18d0ada7f5ee6fd6/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:0ed177ed9bded28f8deb6ab40c183cd1192aa0de40c12f38be4d59cd33cb5c65", size = 565606, upload-time = "2025-11-30T20:23:42.186Z" }, + { url = "https://files.pythonhosted.org/packages/64/91/ac20ba2d69303f961ad8cf55bf7dbdb4763f627291ba3d0d7d67333cced9/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:ad1fa8db769b76ea911cb4e10f049d80bf518c104f15b3edb2371cc65375c46f", size = 591126, upload-time = "2025-11-30T20:23:44.086Z" }, + { url = "https://files.pythonhosted.org/packages/21/20/7ff5f3c8b00c8a95f75985128c26ba44503fb35b8e0259d812766ea966c7/rpds_py-0.30.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:46e83c697b1f1c72b50e5ee5adb4353eef7406fb3f2043d64c33f20ad1c2fc53", size = 553371, upload-time = "2025-11-30T20:23:46.004Z" }, + { url = "https://files.pythonhosted.org/packages/72/c7/81dadd7b27c8ee391c132a6b192111ca58d866577ce2d9b0ca157552cce0/rpds_py-0.30.0-cp314-cp314-win32.whl", hash = "sha256:ee454b2a007d57363c2dfd5b6ca4a5d7e2c518938f8ed3b706e37e5d470801ed", size = 215298, upload-time = "2025-11-30T20:23:47.696Z" }, + { url = "https://files.pythonhosted.org/packages/3e/d2/1aaac33287e8cfb07aab2e6b8ac1deca62f6f65411344f1433c55e6f3eb8/rpds_py-0.30.0-cp314-cp314-win_amd64.whl", hash = "sha256:95f0802447ac2d10bcc69f6dc28fe95fdf17940367b21d34e34c737870758950", size = 228604, upload-time = "2025-11-30T20:23:49.501Z" }, + { url = "https://files.pythonhosted.org/packages/e8/95/ab005315818cc519ad074cb7784dae60d939163108bd2b394e60dc7b5461/rpds_py-0.30.0-cp314-cp314-win_arm64.whl", hash = "sha256:613aa4771c99f03346e54c3f038e4cc574ac09a3ddfb0e8878487335e96dead6", size = 222391, upload-time = "2025-11-30T20:23:50.96Z" }, + { url = "https://files.pythonhosted.org/packages/9e/68/154fe0194d83b973cdedcdcc88947a2752411165930182ae41d983dcefa6/rpds_py-0.30.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:7e6ecfcb62edfd632e56983964e6884851786443739dbfe3582947e87274f7cb", size = 364868, upload-time = "2025-11-30T20:23:52.494Z" }, + { url = "https://files.pythonhosted.org/packages/83/69/8bbc8b07ec854d92a8b75668c24d2abcb1719ebf890f5604c61c9369a16f/rpds_py-0.30.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:a1d0bc22a7cdc173fedebb73ef81e07faef93692b8c1ad3733b67e31e1b6e1b8", size = 353747, upload-time = "2025-11-30T20:23:54.036Z" }, + { url = "https://files.pythonhosted.org/packages/ab/00/ba2e50183dbd9abcce9497fa5149c62b4ff3e22d338a30d690f9af970561/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d08f00679177226c4cb8c5265012eea897c8ca3b93f429e546600c971bcbae7", size = 383795, upload-time = "2025-11-30T20:23:55.556Z" }, + { url = "https://files.pythonhosted.org/packages/05/6f/86f0272b84926bcb0e4c972262f54223e8ecc556b3224d281e6598fc9268/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5965af57d5848192c13534f90f9dd16464f3c37aaf166cc1da1cae1fd5a34898", size = 393330, upload-time = "2025-11-30T20:23:57.033Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/0e02bb2e6dc63d212641da45df2b0bf29699d01715913e0d0f017ee29438/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9a4e86e34e9ab6b667c27f3211ca48f73dba7cd3d90f8d5b11be56e5dbc3fb4e", size = 518194, upload-time = "2025-11-30T20:23:58.637Z" }, + { url = "https://files.pythonhosted.org/packages/ee/ca/be7bca14cf21513bdf9c0606aba17d1f389ea2b6987035eb4f62bd923f25/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5d3e6b26f2c785d65cc25ef1e5267ccbe1b069c5c21b8cc724efee290554419", size = 408340, upload-time = "2025-11-30T20:24:00.2Z" }, + { url = "https://files.pythonhosted.org/packages/c2/c7/736e00ebf39ed81d75544c0da6ef7b0998f8201b369acf842f9a90dc8fce/rpds_py-0.30.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:626a7433c34566535b6e56a1b39a7b17ba961e97ce3b80ec62e6f1312c025551", size = 383765, upload-time = "2025-11-30T20:24:01.759Z" }, + { url = "https://files.pythonhosted.org/packages/4a/3f/da50dfde9956aaf365c4adc9533b100008ed31aea635f2b8d7b627e25b49/rpds_py-0.30.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:acd7eb3f4471577b9b5a41baf02a978e8bdeb08b4b355273994f8b87032000a8", size = 396834, upload-time = "2025-11-30T20:24:03.687Z" }, + { url = "https://files.pythonhosted.org/packages/4e/00/34bcc2565b6020eab2623349efbdec810676ad571995911f1abdae62a3a0/rpds_py-0.30.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fe5fa731a1fa8a0a56b0977413f8cacac1768dad38d16b3a296712709476fbd5", size = 415470, upload-time = "2025-11-30T20:24:05.232Z" }, + { url = "https://files.pythonhosted.org/packages/8c/28/882e72b5b3e6f718d5453bd4d0d9cf8df36fddeb4ddbbab17869d5868616/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:74a3243a411126362712ee1524dfc90c650a503502f135d54d1b352bd01f2404", size = 565630, upload-time = "2025-11-30T20:24:06.878Z" }, + { url = "https://files.pythonhosted.org/packages/3b/97/04a65539c17692de5b85c6e293520fd01317fd878ea1995f0367d4532fb1/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:3e8eeb0544f2eb0d2581774be4c3410356eba189529a6b3e36bbbf9696175856", size = 591148, upload-time = "2025-11-30T20:24:08.445Z" }, + { url = "https://files.pythonhosted.org/packages/85/70/92482ccffb96f5441aab93e26c4d66489eb599efdcf96fad90c14bbfb976/rpds_py-0.30.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:dbd936cde57abfee19ab3213cf9c26be06d60750e60a8e4dd85d1ab12c8b1f40", size = 556030, upload-time = "2025-11-30T20:24:10.956Z" }, + { url = "https://files.pythonhosted.org/packages/20/53/7c7e784abfa500a2b6b583b147ee4bb5a2b3747a9166bab52fec4b5b5e7d/rpds_py-0.30.0-cp314-cp314t-win32.whl", hash = "sha256:dc824125c72246d924f7f796b4f63c1e9dc810c7d9e2355864b3c3a73d59ade0", size = 211570, upload-time = "2025-11-30T20:24:12.735Z" }, + { url = "https://files.pythonhosted.org/packages/d0/02/fa464cdfbe6b26e0600b62c528b72d8608f5cc49f96b8d6e38c95d60c676/rpds_py-0.30.0-cp314-cp314t-win_amd64.whl", hash = "sha256:27f4b0e92de5bfbc6f86e43959e6edd1425c33b5e69aab0984a72047f2bcf1e3", size = 226532, upload-time = "2025-11-30T20:24:14.634Z" }, +] + [[package]] name = "rsa" version = "4.9" @@ -2864,15 +3014,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/80/9c/be9d24c246ff825385b4a139409ae7e2d36ea2294f12681951a6b9f905f7/types_cachetools-6.2.0.20250827-py3-none-any.whl", hash = "sha256:96ae5abcb5ea1e1f1faf811a2ff8b2ce7e6d820fc42c4fcb4b332b2da485de16", size = 8940, upload-time = "2025-08-27T02:56:49.645Z" }, ] -[[package]] -name = "types-confluent-kafka" -version = "1.3.6" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/38/31/eb48322a20c2527c2d9ccbfcd7f4023a05f9368d58128ef569aa62f6410b/types_confluent_kafka-1.3.6.tar.gz", hash = "sha256:dfefc985df06a1912b023f0a91c0abe6179c4f79ad54e7ebc3e141c03c01b90b", size = 22977, upload-time = "2025-07-18T19:42:23.102Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/43/e4/3194d0f979c6b1efd6924e4601af78541467cc6eaf635e14a3e304246043/types_confluent_kafka-1.3.6-py3-none-any.whl", hash = "sha256:f2748272d74f311411dd1b8b2c7bcfbc107270fc152815e4e786ed528f43a980", size = 35679, upload-time = "2025-07-18T19:42:21.999Z" }, -] - [[package]] name = "typing-extensions" version = "4.12.2" diff --git a/docs/architecture/event-bus.md b/docs/architecture/event-bus.md new file mode 100644 index 00000000..3041b22f --- /dev/null +++ b/docs/architecture/event-bus.md @@ -0,0 +1,167 @@ +# Event bus + +This document explains how the EventBus provides cross-instance communication for services that need to react to changes +happening on other instances. If you've wondered how cache invalidation works across multiple backend replicas, this is +where that question gets answered. + +## The problem + +When running multiple instances of the backend (horizontal scaling), each instance has its own in-memory cache. When +Instance A updates a user's settings, Instance B's cache becomes stale. Without coordination, Instance B would return +outdated data until its cache TTL expires. + +```mermaid +graph LR + subgraph "Instance A" + A1[Update settings] --> A2[Update local cache] + end + + subgraph "Instance B" + B1[Stale cache] --> B2[Returns old data] + end + + A2 -.->|"No communication"| B1 +``` + +The EventBus solves this by providing a Kafka-backed pub/sub mechanism for cross-instance events. + +## Architecture + +The EventBus uses Kafka as a message broker. When a service publishes an event, it goes to Kafka. Each instance has a +Kafka listener that receives events from other instances and distributes them to local subscribers. + +```mermaid +graph TB + subgraph "Instance A" + PA[Publisher] --> KP[Kafka Producer] + KCA[Kafka Consumer] --> HA[Local Handlers] + end + + subgraph "Kafka" + T[event-bus-stream topic] + end + + subgraph "Instance B" + PB[Publisher] --> KPB[Kafka Producer] + KCB[Kafka Consumer] --> HB[Local Handlers] + end + + KP --> T + KPB --> T + T --> KCA + T --> KCB +``` + +The key insight is that publishers handle their own state changes directly. They don't need to receive their own events +back from Kafka. The EventBus filters out self-published messages so handlers only run for events from other instances. + +## Self-filtering mechanism + +Each EventBus instance has a unique ID. When publishing to Kafka, this ID is included as a message header: + +```python +headers = [("source_instance", self._instance_id.encode("utf-8"))] +await self.producer.send_and_wait(topic=self._topic, value=value, headers=headers) +``` + +The Kafka listener checks this header and skips messages from itself: + +```python +headers = dict(msg.headers) if msg.headers else {} +source = headers.get("source_instance", b"").decode("utf-8") +if source == self._instance_id: + continue # Skip self-published messages +``` + +This design means: + +1. Publishers update their own state before calling `publish()` +2. The `publish()` call tells other instances about the change +3. Handlers only run for events from other instances + +## Usage pattern + +Services that need cross-instance communication follow this pattern: + +```python +class MyService: + async def initialize(self, event_bus_manager: EventBusManager) -> None: + bus = await event_bus_manager.get_event_bus() + + async def _handle(evt: EventBusEvent) -> None: + # This only runs for events from OTHER instances + await self.invalidate_cache(evt.payload["id"]) + + await bus.subscribe("my.event.*", _handle) + + async def update_something(self, id: str, data: dict) -> None: + # 1. Update local state + self._cache[id] = data + + # 2. Notify other instances + bus = await self._event_bus_manager.get_event_bus() + await bus.publish("my.event.updated", {"id": id}) +``` + +## Pattern matching + +Subscriptions support wildcard patterns using `fnmatch` syntax: + +| Pattern | Matches | +|--------------------------|----------------------------------| +| `execution.*` | All execution events | +| `execution.123.*` | All events for execution 123 | +| `*.completed` | All completed events | +| `user.settings.updated*` | Settings updates with any suffix | + +## Flow diagram + +Here's what happens when Instance A updates user settings: + +```mermaid +sequenceDiagram + participant API as Instance A + participant Cache as Local Cache A + participant Kafka + participant ListenerB as Instance B Listener + participant CacheB as Local Cache B + + API->>Cache: _add_to_cache(user_id, settings) + API->>Kafka: publish("user.settings.updated", {user_id}) + Note over Kafka: Message includes source_instance header + + Kafka->>API: Listener receives message + API->>API: source == self, SKIP + + Kafka->>ListenerB: Listener receives message + ListenerB->>ListenerB: source != self, PROCESS + ListenerB->>CacheB: invalidate_cache(user_id) +``` + +## EventBusManager + +The `EventBusManager` provides singleton access to the EventBus with proper lifecycle management: + +```python +async def get_event_bus(self) -> EventBus: + async with self._lock: + if self._event_bus is None: + self._event_bus = EventBus(self.settings, self.logger) + await self._event_bus.__aenter__() + return self._event_bus +``` + +Services receive the manager via dependency injection and call `get_event_bus()` when needed. + +## Key files + +| File | Purpose | +|------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------| +| [`services/event_bus.py`](https://github.com/HardMax71/Integr8sCode/blob/main/backend/app/services/event_bus.py) | EventBus and EventBusManager implementation | +| [`services/user_settings_service.py`](https://github.com/HardMax71/Integr8sCode/blob/main/backend/app/services/user_settings_service.py) | Example usage for cache invalidation | + +## Related docs + +- [User Settings Events](user-settings-events.md) — event sourcing with cache invalidation via EventBus +- [Event System Design](event-system-design.md) — domain events vs integration events +- [Kafka Topics](kafka-topic-architecture.md) — topic naming and partitioning diff --git a/docs/architecture/user-settings-events.md b/docs/architecture/user-settings-events.md index 53ff8dbb..857fda7d 100644 --- a/docs/architecture/user-settings-events.md +++ b/docs/architecture/user-settings-events.md @@ -20,7 +20,7 @@ contain the new values in Avro-compatible form. The service uses Pydantic's `TypeAdapter` for dict-based operations without reflection or branching: ```python ---8<-- "backend/app/services/user_settings_service.py:22:24" +--8<-- "backend/app/services/user_settings_service.py:22:23" ``` ### Updating settings @@ -28,7 +28,7 @@ The service uses Pydantic's `TypeAdapter` for dict-based operations without refl The `update_user_settings` method merges changes into current settings, publishes an event, and manages snapshots: ```python ---8<-- "backend/app/services/user_settings_service.py:88:120" +--8<-- "backend/app/services/user_settings_service.py:91:118" ``` ### Applying events @@ -36,7 +36,7 @@ The `update_user_settings` method merges changes into current settings, publishe When reconstructing settings from events, `_apply_event` merges each event's changes: ```python ---8<-- "backend/app/services/user_settings_service.py:243:254" +--8<-- "backend/app/services/user_settings_service.py:212:223" ``` The `validate_python` call handles nested dict-to-dataclass conversion, enum parsing, and type coercion automatically. @@ -63,23 +63,25 @@ while preserving full event history for auditing. Settings are cached with TTL to avoid repeated reconstruction: ```python ---8<-- "backend/app/services/user_settings_service.py:34:40" +--8<-- "backend/app/services/user_settings_service.py:33:40" ``` -Cache invalidation happens via event bus subscription: +Cache invalidation happens via [EventBus](event-bus.md) subscription. The EventBus filters out self-published messages, +so the handler only runs for events from other instances: ```python ---8<-- "backend/app/services/user_settings_service.py:58:68" +--8<-- "backend/app/services/user_settings_service.py:56:70" ``` -After each update, the service publishes to the event bus, triggering cache invalidation across instances. +After each update, the service updates its local cache directly, then publishes to the event bus to trigger cache +invalidation on other instances. ## Settings history The `get_settings_history` method returns a list of changes extracted from events: ```python ---8<-- "backend/app/services/user_settings_service.py:171:189" +--8<-- "backend/app/services/user_settings_service.py:167:184" ``` ## Key files @@ -87,6 +89,12 @@ The `get_settings_history` method returns a list of changes extracted from event | File | Purpose | |--------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------------------------------------------------------| | [`services/user_settings_service.py`](https://github.com/HardMax71/Integr8sCode/blob/main/backend/app/services/user_settings_service.py) | Settings service with caching and event sourcing | +| [`services/event_bus.py`](https://github.com/HardMax71/Integr8sCode/blob/main/backend/app/services/event_bus.py) | Cross-instance event distribution | | [`domain/user/settings_models.py`](https://github.com/HardMax71/Integr8sCode/blob/main/backend/app/domain/user/settings_models.py) | `DomainUserSettings`, `DomainUserSettingsUpdate` dataclasses | | [`infrastructure/kafka/events/user.py`](https://github.com/HardMax71/Integr8sCode/blob/main/backend/app/infrastructure/kafka/events/user.py) | `UserSettingsUpdatedEvent` definition | | [`db/repositories/user_settings_repository.py`](https://github.com/HardMax71/Integr8sCode/blob/main/backend/app/db/repositories/user_settings_repository.py) | Snapshot and event queries | + +## Related docs + +- [Event Bus](event-bus.md) — cross-instance communication with self-filtering +- [Pydantic Dataclasses](pydantic-dataclasses.md) — TypeAdapter and dict-to-model conversion diff --git a/docs/components/schema-manager.md b/docs/components/schema-manager.md index 9b22fc15..c435b807 100644 --- a/docs/components/schema-manager.md +++ b/docs/components/schema-manager.md @@ -20,7 +20,7 @@ The `SchemaRegistryManager` class in `app/events/schema/schema_registry.py` hand Each event class (subclass of `BaseEvent`) generates its own Avro schema from Pydantic model definitions. The manager registers these schemas with subjects named after the class (like `ExecutionRequestedEvent-value`) and sets FORWARD compatibility, meaning new schemas can add fields but not remove required ones. This allows producers to be upgraded before consumers without breaking deserialization. -Serialization uses the Confluent wire format: a magic byte, four-byte schema id, then the Avro binary payload. The manager caches serializers per subject and maintains a bidirectional cache between schema ids and Python classes. When deserializing, it reads the schema id from the message header, looks up the corresponding event class, deserializes the Avro payload to a dict, and hydrates the Pydantic model. +Serialization uses the Confluent wire format: a magic byte, four-byte schema id, then the Avro binary payload. The underlying `python-schema-registry-client` library handles schema registration caching internally. The manager maintains a bidirectional cache between schema ids and Python event classes for deserialization. When deserializing, it reads the schema id from the message header, looks up the corresponding event class, deserializes the Avro payload to a dict, and hydrates the Pydantic model. For test isolation, the manager supports an optional `SCHEMA_SUBJECT_PREFIX` environment variable. Setting this to something like `test.session123.` prefixes all subject names, preventing test runs from polluting production schemas or interfering with each other. diff --git a/docs/reference/environment-variables.md b/docs/reference/environment-variables.md index c1888210..c4c054a5 100644 --- a/docs/reference/environment-variables.md +++ b/docs/reference/environment-variables.md @@ -47,7 +47,7 @@ Complete reference of all environment variables used by the Integr8sCode backend |----------------------------|-------------------------------|-------------------------------------| | `KAFKA_BOOTSTRAP_SERVERS` | `kafka:29092` | Kafka broker addresses | | `SCHEMA_REGISTRY_URL` | `http://schema-registry:8081` | Schema Registry URL | -| `SCHEMA_REGISTRY_AUTH` | *none* | Registry auth (`username:password`) | +| `SCHEMA_REGISTRY_AUTH` | `""` | Registry auth (`username:password`) | | `ENABLE_EVENT_STREAMING` | `false` | Enable Kafka event streaming | | `EVENT_RETENTION_DAYS` | `30` | Event retention period | | `KAFKA_TOPIC_PREFIX` | `pref` | Topic name prefix | diff --git a/mkdocs.yml b/mkdocs.yml index eae6b2ec..5140f5e5 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -119,6 +119,7 @@ nav: - Model Conversion: architecture/model-conversion.md - Event Storage: architecture/event-storage.md - Event System Design: architecture/event-system-design.md + - Event Bus: architecture/event-bus.md - User Settings Events: architecture/user-settings-events.md - Frontend Build: architecture/frontend-build.md - Svelte 5 Migration: architecture/svelte5-migration.md