a2aproject
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎client/transport/jsonrpc/src/main/java/io/a2a/client/transport/jsonrpc/JSONRPCTransport.java‎
Lines changed: 4 additions & 2 deletions b/‎client/transport/jsonrpc/src/main/java/io/a2a/client/transport/jsonrpc/JSONRPCTransport.java‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎client/transport/jsonrpc/src/main/java/io/a2a/client/transport/jsonrpc/sse/SSEEventListener.java‎
Lines changed: 19 additions & 0 deletions b/‎client/transport/jsonrpc/src/main/java/io/a2a/client/transport/jsonrpc/sse/SSEEventListener.java‎
Lines changed: 19 additions & 0 deletions
diff --git a/‎extras/common/pom.xml‎
Lines changed: 21 additions & 0 deletions b/‎extras/common/pom.xml‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎extras/common/src/main/java/io/a2a/extras/common/events/TaskFinalizedEvent.java‎
Lines changed: 26 additions & 0 deletions b/‎extras/common/src/main/java/io/a2a/extras/common/events/TaskFinalizedEvent.java‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎extras/queue-manager-replicated/README.md‎
Lines changed: 180 additions & 0 deletions b/‎extras/queue-manager-replicated/README.md‎
Lines changed: 180 additions & 0 deletions
diff --git a/‎extras/queue-manager-replicated/core/pom.xml‎
Lines changed: 5 additions & 0 deletions b/‎extras/queue-manager-replicated/core/pom.xml‎
Lines changed: 5 additions & 0 deletions
@@ -47,3 +47,4 @@ nbproject/
 
 # Private Claude config
 .claude/
+.serena/
@@ -140,7 +140,8 @@ public void sendMessageStreaming(MessageSendParams request, Consumer<StreamingEv
                     msg -> sseEventListener.onMessage(msg, ref.get()),
                     throwable -> sseEventListener.onError(throwable, ref.get()),
                     () -> {
-                        // We don't need to do anything special on completion
+                        // Signal normal stream completion to error handler (null error means success)
+                        sseEventListener.onComplete();
                     }));
         } catch (IOException e) {
             throw new A2AClientException("Failed to send streaming message request: " + e, e);
@@ -318,7 +319,8 @@ public void resubscribe(TaskIdParams request, Consumer<StreamingEventKind> event
                     msg -> sseEventListener.onMessage(msg, ref.get()),
                     throwable -> sseEventListener.onError(throwable, ref.get()),
                     () -> {
-                        // We don't need to do anything special on completion
+                        // Signal normal stream completion to error handler (null error means success)
+                        sseEventListener.onComplete();
                     }));
         } catch (IOException e) {
             throw new A2AClientException("Failed to send task resubscription request: " + e, e);
 
@@ -16,6 +16,7 @@ public class SSEEventListener {
     private static final Logger log = Logger.getLogger(SSEEventListener.class.getName());
     private final Consumer<StreamingEventKind> eventHandler;
     private final Consumer<Throwable> errorHandler;
+    private volatile boolean completed = false;
 
     public SSEEventListener(Consumer<StreamingEventKind> eventHandler,
                             Consumer<Throwable> errorHandler) {
@@ -38,6 +39,24 @@ public void onError(Throwable throwable, Future<Void> future) {
         future.cancel(true); // close SSE channel
     }
 
+    public void onComplete() {
+        // Idempotent: only signal completion once, even if called multiple times
+        if (completed) {
+            log.fine("SSEEventListener.onComplete() called again - ignoring (already completed)");
+            return;
+        }
+        completed = true;
+
+        // Signal normal stream completion (null error means successful completion)
+        log.fine("SSEEventListener.onComplete() called - signaling successful stream completion");
+        if (errorHandler != null) {
+            log.fine("Calling errorHandler.accept(null) to signal successful completion");
+            errorHandler.accept(null);
+        } else {
+            log.warning("errorHandler is null, cannot signal completion");
+        }
+    }
+
     private void handleMessage(JsonNode jsonNode, Future<Void> future) {
         try {
             if (jsonNode.has("error")) {
 
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+
+    <parent>
+        <groupId>io.github.a2asdk</groupId>
+        <artifactId>a2a-java-sdk-parent</artifactId>
+        <version>0.3.0.Beta3-SNAPSHOT</version>
+        <relativePath>../../pom.xml</relativePath>
+    </parent>
+
+    <artifactId>a2a-java-extras-common</artifactId>
+    <name>A2A Java SDK :: Extras :: Common</name>
+    <description>Common classes shared across extras modules</description>
+
+    <dependencies>
+        <!-- Minimal dependencies - just what's needed for event classes -->
+    </dependencies>
+</project>
@@ -0,0 +1,26 @@
+package io.a2a.extras.common.events;
+
+/**
+ * CDI event fired when a task reaches a final state and is successfully persisted to the database.
+ * This event is fired AFTER the database transaction commits, making it safe for downstream
+ * components to assume the task is durably stored.
+ *
+ * <p>Used by the replicated queue manager to send poison pill events after ensuring
+ * the final task state is committed to the database, eliminating race conditions.
+ */
+public class TaskFinalizedEvent {
+    private final String taskId;
+
+    public TaskFinalizedEvent(String taskId) {
+        this.taskId = taskId;
+    }
+
+    public String getTaskId() {
+        return taskId;
+    }
+
+    @Override
+    public String toString() {
+        return "TaskFinalizedEvent{taskId='" + taskId + "'}";
+    }
+}
@@ -177,6 +177,186 @@ Events are serialized using Jackson with polymorphic type information to ensure
 }
 ```
 
+## Production Considerations
+
+### Kafka Partitioning Strategy
+
+**Critical for scalability and correctness**: How you partition your Kafka topic significantly impacts system performance and behavior.
+
+#### Simple Approach: Single Partition
+
+The simplest configuration uses a single partition for the replicated events topic:
+
+```bash
+kafka-topics.sh --create --topic replicated-events --bootstrap-server localhost:9092 --partitions 1 --replication-factor 1
+```
+
+**Advantages**:
+- Guarantees global event ordering
+- Simpler to reason about and debug
+- Suitable for development, testing, and low-throughput production systems
+
+**Disadvantages**:
+- Limited scalability (single partition bottleneck)
+- Cannot parallelize consumption across multiple consumer instances
+- All events processed sequentially
+
+**When to use**: Development environments, integration tests, production systems with low event volumes (<1000 events/sec), or when strict global ordering is required.
+
+#### Recommended Approach: Partition by Task ID
+
+For production systems with higher throughput, partition events by `taskId`:
+
+```properties
+# Configure the producer to use taskId as the partition key
+mp.messaging.outgoing.replicated-events-out.key.serializer=org.apache.kafka.common.serialization.StringSerializer
+mp.messaging.outgoing.replicated-events-out.value.serializer=org.apache.kafka.common.serialization.StringSerializer
+```
+
+```bash
+# Create topic with multiple partitions
+kafka-topics.sh --create --topic replicated-events --bootstrap-server localhost:9092 --partitions 10 --replication-factor 3
+```
+
+The `ReactiveMessagingReplicationStrategy` already sends the `taskId` as the Kafka message key, so Kafka will automatically partition by task ID using its default partitioner.
+
+**Advantages**:
+- **Horizontal scalability**: Different tasks can be processed in parallel across partitions
+- **Per-task ordering guarantee**: All events for a single task go to the same partition and maintain order
+- **Consumer parallelism**: Multiple consumer instances can process different partitions concurrently
+
+**Disadvantages**:
+- No global ordering across all tasks
+- More complex to debug (events spread across partitions)
+- Requires proper consumer group configuration
+
+**When to use**: Production systems with medium to high throughput, systems that need to scale horizontally, distributed deployments with multiple A2A instances.
+
+#### Consumer Group Configuration
+
+When using multiple partitions, ensure all A2A instances belong to the same consumer group:
+
+```properties
+mp.messaging.incoming.replicated-events-in.group.id=a2a-instance-group
+```
+
+This ensures that:
+- Each partition is consumed by exactly one instance
+- Events for the same task always go to the same instance (partition affinity)
+- System can scale horizontally by adding more instances (up to the number of partitions)
+
+**Rule of thumb**: Number of partitions ≥ number of A2A instances for optimal distribution.
+
+### Transaction-Aware Queue Cleanup ("Poison Pill")
+
+When a task reaches a final state (COMPLETED, FAILED, CANCELED), all nodes in the cluster must terminate their event consumers for that task. This is achieved through a special "poison pill" event (`QueueClosedEvent`) that is replicated to all nodes.
+
+#### How It Works
+
+The poison pill mechanism uses **transaction-aware CDI events** to ensure the poison pill is only sent AFTER the final task state is durably committed to the database:
+
+1. **Task Finalization**: When `JpaDatabaseTaskStore.save()` persists a task with a final state, it fires a `TaskFinalizedEvent` CDI event
+2. **Transaction Coordination**: The CDI observer is configured with `@Observes(during = TransactionPhase.AFTER_SUCCESS)`, which delays event delivery until AFTER the JPA transaction commits
+3. **Poison Pill Delivery**: `ReplicatedQueueManager.onTaskFinalized()` receives the event and sends `QueueClosedEvent` via the replication strategy
+4. **Cluster-Wide Termination**: All nodes receive the `QueueClosedEvent`, recognize it as final, and gracefully terminate their event consumers
+
+**Key Architecture Decision**: We use JPA transaction lifecycle hooks instead of time-based delays for poison pill delivery because:
+- **Eliminates race conditions**: No time window where the poison pill might arrive before the database commit
+- **Deterministic cleanup**: Queue termination happens immediately after transaction commit, without delay-based tuning
+- **Simplicity**: No need to monitor consumer lag or configure delays for cleanup timing
+- **Reliability**: Works correctly regardless of network latency or database performance
+
+**Note**: While the poison pill mechanism eliminates delays for cleanup, the system still uses a configurable grace period (`a2a.replication.grace-period-seconds`, default 15s) in `JpaDatabaseTaskStore.isTaskActive()` to handle late-arriving replicated events. This grace period prevents queue recreation for tasks that were recently finalized, accommodating Kafka consumer lag and network delays. See the Grace Period Configuration section below for details.
+
+#### Code Flow
+
+**JpaDatabaseTaskStore** (fires CDI event):
+```java
+@Inject
+Event<TaskFinalizedEvent> taskFinalizedEvent;
+
+public void save(Task task) {
+    // ... persist task to database ...
+
+    // Fire CDI event if task reached final state
+    if (task.getStatus().state().isFinal()) {
+        taskFinalizedEvent.fire(new TaskFinalizedEvent(task.getId()));
+    }
+    // Transaction commits here (end of method)
+}
+```
+
+**ReplicatedQueueManager** (observes and sends poison pill):
+```java
+public void onTaskFinalized(@Observes(during = TransactionPhase.AFTER_SUCCESS) TaskFinalizedEvent event) {
+    String taskId = event.getTaskId();
+    LOGGER.debug("Task {} finalized - sending poison pill after transaction commit", taskId);
+
+    // Send QueueClosedEvent to all nodes via replication
+    QueueClosedEvent closedEvent = new QueueClosedEvent(taskId);
+    replicationStrategy.send(taskId, closedEvent);
+}
+```
+
+#### Configuration
+
+No configuration is required for the poison pill mechanism - it works automatically when:
+1. Using `JpaDatabaseTaskStore` for task persistence
+2. Using `ReplicatedQueueManager` for event replication
+3. Both modules are present in your application
+
+#### Monitoring
+
+Enable debug logging to monitor poison pill delivery:
+
+```properties
+quarkus.log.category."io.a2a.extras.queuemanager.replicated".level=DEBUG
+quarkus.log.category."io.a2a.extras.taskstore.database.jpa".level=DEBUG
+```
+
+You should see log entries like:
+```
+Task abc-123 is in final state, firing TaskFinalizedEvent
+Task abc-123 finalized - sending poison pill (QueueClosedEvent) after transaction commit
+```
+
+#### Grace Period Configuration
+
+While the poison pill mechanism provides deterministic cleanup timing, the system uses a configurable **grace period** to handle late-arriving replicated events. This is separate from the poison pill mechanism and serves a different purpose.
+
+**Purpose**: The grace period prevents queue recreation for tasks that were recently finalized. When a replicated event arrives after a task is finalized, the system checks if the task is still within the grace period before creating a new queue.
+
+**Configuration**:
+```properties
+# Grace period for handling late-arriving events (default: 15 seconds)
+a2a.replication.grace-period-seconds=15
+```
+
+**How It Works**:
+1. When a task is finalized, `JpaDatabaseTaskStore` records the `finalizedAt` timestamp
+2. When a replicated event arrives, `ReplicatedQueueManager.onReplicatedEvent()` calls `taskStateProvider.isTaskActive(taskId)`
+3. `JpaDatabaseTaskStore.isTaskActive()` returns `true` if:
+   - Task is not in a final state, OR
+   - Task is final but within the grace period (`now < finalizedAt + gracePeriodSeconds`)
+4. If `isTaskActive()` returns `false`, the replicated event is skipped (no queue created)
+
+**When to Adjust**:
+- **Increase** the grace period if you observe warnings about skipped events for inactive tasks in high-latency networks
+- **Decrease** the grace period to reduce memory usage in systems with very low latency and high task turnover
+- **Default (15s)** is suitable for most deployments with typical Kafka consumer lag
+
+**Monitoring**:
+```properties
+quarkus.log.category."io.a2a.extras.queuemanager.replicated".level=DEBUG
+```
+
+Watch for:
+```
+Skipping replicated event for inactive task abc-123  # Event arrived too late
+```
+
+**Important**: This grace period is for **late event handling**, not cleanup timing. The poison pill mechanism handles cleanup deterministically without delays.
+
 ## Advanced Topics
 
 ### Custom Replication Strategies
 
@@ -20,6 +20,11 @@
             <artifactId>a2a-java-sdk-server-common</artifactId>
             <version>${project.version}</version>
         </dependency>
+        <dependency>
+            <groupId>io.github.a2asdk</groupId>
+            <artifactId>a2a-java-extras-common</artifactId>
+            <version>${project.version}</version>
+        </dependency>
         <dependency>
             <groupId>io.quarkus</groupId>
             <artifactId>quarkus-core</artifactId>
Original file line number	Diff line number	Diff line change
`@@ -47,3 +47,4 @@ nbproject/`
`47`	`47`
`48`	`48`	`# Private Claude config`
`49`	`49`	`.claude/`
	`50`	`+.serena/`