From 954b18af7c6ed8d229bf85e21cc6f5c84f667a00 Mon Sep 17 00:00:00 2001 From: Markus Ecker Date: Tue, 23 Sep 2025 16:11:02 +0200 Subject: [PATCH 1/3] wip --- docs/docs.json | 13 ++ docs/drafts/activity-events.mdx | 219 +++++++++++++++++++ docs/drafts/generative-ui.mdx | 317 ++++++++++++++++++++++++++++ docs/drafts/interrupts.mdx | 239 +++++++++++++++++++++ docs/drafts/meta-events.mdx | 245 +++++++++++++++++++++ docs/drafts/multimodal-messages.mdx | 280 ++++++++++++++++++++++++ docs/drafts/overview.mdx | 84 ++++++++ docs/drafts/reasoning.mdx | 269 +++++++++++++++++++++++ docs/drafts/serialization.mdx | 255 ++++++++++++++++++++++ 9 files changed, 1921 insertions(+) create mode 100644 docs/drafts/activity-events.mdx create mode 100644 docs/drafts/generative-ui.mdx create mode 100644 docs/drafts/interrupts.mdx create mode 100644 docs/drafts/meta-events.mdx create mode 100644 docs/drafts/multimodal-messages.mdx create mode 100644 docs/drafts/overview.mdx create mode 100644 docs/drafts/reasoning.mdx create mode 100644 docs/drafts/serialization.mdx diff --git a/docs/docs.json b/docs/docs.json index 5ed000574..e6c6a9c4d 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -45,6 +45,19 @@ "concepts/tools" ] }, + { + "group": "Draft Changes", + "pages": [ + "drafts/overview", + "drafts/activity-events", + "drafts/reasoning", + "drafts/serialization", + "drafts/multimodal-messages", + "drafts/interrupts", + "drafts/generative-ui", + "drafts/meta-events" + ] + }, { "group": "Tutorials", "pages": ["tutorials/cursor", "tutorials/debugging"] diff --git a/docs/drafts/activity-events.mdx b/docs/drafts/activity-events.mdx new file mode 100644 index 000000000..618e8932d --- /dev/null +++ b/docs/drafts/activity-events.mdx @@ -0,0 +1,219 @@ +--- +title: Activity Events +description: Proposal for representing ongoing agent progress between chat messages +--- + +# Activity Events Proposal + +## Summary + +### Problem Statement +Users want to render "activity" updates inline with chat, not just at run start or end. Currently, there's no standardized way to represent ongoing agent progress between chat messages. + +### Motivation +AG-UI is extended with **ActivityEvents** and **ActivityMessages** to represent ongoing agent progress in between chat messages. This allows frameworks to surface fine-grained activity updates chronologically, giving users immediate visibility into what an agent is doing without waiting for the next message or run boundary. + +## Status + +- **Status**: Draft +- **Author(s)**: AG-UI Team + +## Background + +Users want real-time visibility into agent activities as they happen. Consider this example UI: + +``` ++------------------------------------------------------------+ +| I will search the internet for relevant information | <- TextMessage ++------------------------------------------------------------+ ++------------------------------------------------------------+ +| ✓ checking reddit | <- ActivityMessage +| searching X.com... | ++------------------------------------------------------------+ +``` + +### Use Cases + +- **Workflows**: Step-by-step progress through workflow execution +- **Planning**: Intermediate planning or tool use visibility +- **Custom frameworks**: Signals representing ongoing work in any agent system + +## Challenges + +- **Flexibility**: Must handle arbitrary activity data from different frameworks +- **Serializability**: Events must be replayable and rehydrated for session recovery +- **Extensibility**: Developers should define custom renderers per activity type, with a generic fallback +- **Chronology**: Activities must interleave naturally with chat and run events + +## Detailed Specification + +### Overview + +This proposal introduces two new concepts to the AG-UI protocol: +1. **ActivityEvent**: A new event type in the event stream +2. **ActivityMessage**: A new message type alongside TextMessage, ToolMessage, etc. + +Frameworks may emit ActivityEvents, and frontends can render them inline with chat. + +### New Event: ActivityEvent + +```typescript +type ActivityEvent = BaseEvent & { + type: EventType.ACTIVITY + /** + * Unique identifier for the ActivityMessage this event belongs to. + */ + messageId: string + /** + * Activity type, e.g. "PLAN", "SEARCH", "SCRAPE" + */ + activityType: string + /** + * Snapshot of the full activity state (optional). + */ + snapshot?: Record + /** + * Patch to apply to the prior snapshot (optional). + * Follows JSON Patch semantics. + */ + patch?: Record +} +``` + +#### Example Events + +Initial activity snapshot: +```json +{ + "id": "evt_001", + "ts": 1714064100000, + "type": "ACTIVITY", + "messageId": "msg_789", + "activityType": "PLAN", + "snapshot": { + "tasks": ["check reddit", "search X.com"] + } +} +``` + +Incremental update via patch: +```json +{ + "id": "evt_002", + "ts": 1714064120000, + "type": "ACTIVITY", + "messageId": "msg_789", + "activityType": "PLAN", + "patch": { + "op": "replace", + "path": "/tasks/0", + "value": "✓ check reddit" + } +} +``` + +### New Message: ActivityMessage + +```typescript +type ActivityMessage = { + id: string + role: "activity" + activityType: string + /** + * Finalized activity content as of compaction. + */ + content: Record +} +``` + +### Rendering Strategy + +- **Generic renderer**: Displays raw snapshot/patch as JSON or formatted text +- **Custom renderer**: Developers can register a renderer per `activityType`: + - `"PLAN"` → Interactive checklist component + - `"SEARCH"` → Live status with progress indicators + - `"WORKFLOW"` → Step-by-step workflow visualization + +## Implementation Considerations + +### Client SDK Changes + +TypeScript SDK additions: +- New `ActivityEvent` type in `@ag-ui/core` +- New `ActivityMessage` type in message unions +- Activity renderer registry in `@ag-ui/client` + +Python SDK additions: +- New `ActivityEvent` class in `ag_ui.core.events` +- New `ActivityMessage` class in message types +- Activity serialization/deserialization support + +### Integration Impact + +- **Planning Frameworks**: Can emit ActivityEvents during planning or tool execution phases +- **Workflow Systems**: Can surface step-by-step workflow progress as ActivityEvents +- **Other frameworks**: May emit ActivityEvents freely; AG-UI will serialize them like other events + +## Breaking Changes + +None. This is an additive change that maintains backward compatibility. + +## Examples and Use Cases + +### Example 1: Web Search Activity + +```typescript +// Agent emits search activity +agent.emitActivity({ + messageId: "msg_123", + activityType: "SEARCH", + snapshot: { + sources: [ + { name: "Reddit", status: "pending" }, + { name: "X.com", status: "pending" }, + { name: "Google", status: "pending" } + ] + } +}) + +// Update as search progresses +agent.emitActivity({ + messageId: "msg_123", + activityType: "SEARCH", + patch: { + op: "replace", + path: "/sources/0/status", + value: "complete" + } +}) +``` + +### Use Case: Multi-Step Workflow Visibility + +A data analysis agent performing multiple steps: +1. Loading dataset → ActivityEvent shows progress bar +2. Cleaning data → ActivityEvent shows rows processed +3. Running analysis → ActivityEvent shows current computation +4. Generating report → ActivityEvent shows sections completed + +Each step appears inline with chat, giving users real-time feedback. + +## Testing Strategy + +- Unit tests for ActivityEvent serialization/deserialization +- Integration tests with mock frameworks emitting ActivityEvents +- E2E tests in AG-UI Dojo demonstrating activity rendering +- Performance benchmarks for high-frequency activity updates + +## Open Questions + +1. Should we enforce a schema for common activity types (e.g., standardized "PLAN" format)? +2. How should activities behave across run boundaries? +3. Should there be a maximum number of patches before requiring a new snapshot? +4. How do activities interact with message editing/regeneration? + +## References + +- [JSON Patch RFC 6902](https://tools.ietf.org/html/rfc6902) +- [AG-UI Events Documentation](/concepts/events) +- [AG-UI Messages Documentation](/concepts/messages) \ No newline at end of file diff --git a/docs/drafts/generative-ui.mdx b/docs/drafts/generative-ui.mdx new file mode 100644 index 000000000..48da045fe --- /dev/null +++ b/docs/drafts/generative-ui.mdx @@ -0,0 +1,317 @@ +--- +title: Generative User Interfaces +description: AI-generated interfaces without custom tool renderers +--- + +# Generative User Interfaces + +## Summary + +### Problem Statement +Currently, creating custom user interfaces for agent interactions requires programmers to define specific tool renderers. This limits the flexibility and adaptability of agent-driven applications. + +### Motivation +This draft describes an AG-UI extension that addresses **generative user interfaces**—interfaces produced directly by artificial intelligence without requiring a programmer to define custom tool renderers. The key idea is to leverage our ability to send client-side tools to the agent, thereby enabling this capability across all agent frameworks supported by AG-UI. + +## Status + +- **Status**: Draft +- **Author(s)**: AG-UI Team + +## Challenges and Limitations + +### Tool Description Length +OpenAI enforces a limit of 1024 characters for tool descriptions. Gemini and Anthropic impose no such limit. + +### Arguments JSON Schema Constraints +Classes, nesting, `$ref`, and `oneOf` are not reliably supported across LLM providers. + +### Context Window Considerations +Injecting a large UI description language into an agent may reduce its performance. Agents dedicated solely to UI generation perform better than agents combining UI generation with other tasks. + +## Detailed Specification + +### Two-Step Generation Process + +```mermaid +flowchart TD + A[Agent needs UI] --> B["Step 1: What?
Agent calls generateUserInterface
(description, data, output)"] + B --> C["Step 2: How?
Secondary generator builds actual UI
(JSON Schema, React, etc.)"] + C --> D[Rendered UI shown to user] + D --> E[Validated user input returned to Agent] +``` + +### Step 1: What to Generate? + +Inject a lightweight tool into the agent: + +**Tool Definition:** +- **Name:** `generateUserInterface` +- **Arguments:** + - **description**: A high-level description of the UI (e.g., *"A form for entering the user's address"*) + - **data**: Arbitrary pre-populated data for the generated UI + - **output**: A description or schema of the data the agent expects the user to submit back (fields, required/optional, types, constraints) + +**Example Tool Call:** +```json +{ + "tool": "generateUserInterface", + "arguments": { + "description": "A form that collects a user's shipping address.", + "data": { + "firstName": "Ada", + "lastName": "Lovelace", + "city": "London" + }, + "output": { + "type": "object", + "required": ["firstName", "lastName", "street", "city", "postalCode", "country"], + "properties": { + "firstName": { "type": "string", "title": "First Name" }, + "lastName": { "type": "string", "title": "Last Name" }, + "street": { "type": "string", "title": "Street Address" }, + "city": { "type": "string", "title": "City" }, + "postalCode":{ "type": "string", "title": "Postal Code" }, + "country": { + "type": "string", + "title": "Country", + "enum": ["GB", "US", "DE", "AT"] + } + } + } + } +} +``` + +### Step 2: How to Generate? + +Delegate UI generation to a secondary LLM or agent: + +- The CopilotKit user stays in control: Can make their own generators, add custom libraries, include additional prompts etc. +- On tool invocation, the secondary model consumes `description`, `data`, and `output` to generate the user interface +- This model is focused solely on UI generation, ensuring maximum fidelity and consistency +- The generation method can be swapped as needed (e.g., JSON, HTML, or other renderable formats) +- The UI format description is not subject to structural or length constraints, allowing arbitrarily complex specifications + +## Implementation Examples + +### Example Output: UISchemaGenerator + +```json +{ + "jsonSchema": { + "title": "Shipping Address", + "type": "object", + "required": ["firstName", "lastName", "street", "city", "postalCode", "country"], + "properties": { + "firstName": { "type": "string", "title": "First name" }, + "lastName": { "type": "string", "title": "Last name" }, + "street": { "type": "string", "title": "Street address" }, + "city": { "type": "string", "title": "City" }, + "postalCode": { "type": "string", "title": "Postal code" }, + "country": { + "type": "string", + "title": "Country", + "enum": ["GB", "US", "DE", "AT"] + } + } + }, + "uiSchema": { + "type": "VerticalLayout", + "elements": [ + { + "type": "Group", + "label": "Personal Information", + "elements": [ + { "type": "Control", "scope": "#/properties/firstName" }, + { "type": "Control", "scope": "#/properties/lastName" } + ] + }, + { + "type": "Group", + "label": "Address", + "elements": [ + { "type": "Control", "scope": "#/properties/street" }, + { "type": "Control", "scope": "#/properties/city" }, + { "type": "Control", "scope": "#/properties/postalCode" }, + { "type": "Control", "scope": "#/properties/country" } + ] + } + ] + }, + "initialData": { + "firstName": "Ada", + "lastName": "Lovelace", + "city": "London", + "country": "GB" + } +} +``` + +### Example Output: ReactFormHookGenerator + +```tsx +import React from "react"; +import { useForm } from "react-hook-form"; +import { z } from "zod"; +import { zodResolver } from "@hookform/resolvers/zod"; + +// ----- Schema (contract) ----- +const AddressSchema = z.object({ + firstName: z.string().min(1, "Required"), + lastName: z.string().min(1, "Required"), + street: z.string().min(1, "Required"), + city: z.string().min(1, "Required"), + postalCode: z.string().regex(/^[A-Za-z0-9\\-\\s]{3,10}$/, "3–10 chars"), + country: z.enum(["GB", "US", "DE", "AT", "FR", "IT", "ES"]) +}); +export type Address = z.infer; + +type Props = { + initialData?: Partial
; + meta?: { title?: string; submitLabel?: string }; + respond: (data: Address) => void; // <-- called on successful submit +}; + +const COUNTRIES: Address["country"][] = ["GB", "US", "DE", "AT", "FR", "IT", "ES"]; + +export default function AddressForm({ initialData, meta, respond }: Props) { + const { register, handleSubmit, formState: { errors } } = useForm
({ + resolver: zodResolver(AddressSchema), + defaultValues: { + firstName: "", + lastName: "", + street: "", + city: "", + postalCode: "", + country: "GB", + ...initialData + } + }); + + const onSubmit = (data: Address) => { + // Guaranteed to match AddressSchema + respond(data); + }; + + return ( +
+ {meta?.title &&

{meta.title}

} + + {/* Section: Personal Information */} +
+ Personal Information + +
+ + + {errors.firstName && {errors.firstName.message}} +
+ +
+ + + {errors.lastName && {errors.lastName.message}} +
+
+ + {/* Section: Address */} +
+ Address + +
+ + + {errors.street && {errors.street.message}} +
+ +
+ + + {errors.city && {errors.city.message}} +
+ +
+ + + {errors.postalCode && {errors.postalCode.message}} +
+ +
+ + + {errors.country && {errors.country.message}} +
+
+ +
+ +
+
+ ); +} +``` + +## Implementation Considerations + +### Client SDK Changes + +TypeScript SDK additions: +- New `generateUserInterface` tool type +- UI generator registry for pluggable generators +- Validation layer for generated UI schemas +- Response handler for user-submitted data + +Python SDK additions: +- Support for UI generation tool invocation +- Schema validation utilities +- Serialization for UI definitions + +### Integration Impact + +- All AG-UI integrations can leverage this capability without modification +- Frameworks emit standard tool calls; client handles UI generation +- Backward compatible with existing tool-based UI approaches + +## Breaking Changes + +None. This is an additive change that maintains backward compatibility. + +## Use Cases + +### Dynamic Forms +Agents can generate forms on-the-fly based on conversation context without pre-defined schemas. + +### Data Visualization +Generate charts, graphs, or tables appropriate to the data being discussed. + +### Interactive Workflows +Create multi-step wizards or guided processes tailored to user needs. + +### Adaptive Interfaces +Generate different UI layouts based on user preferences or device capabilities. + +## Testing Strategy + +- Unit tests for tool injection and invocation +- Integration tests with multiple UI generators +- E2E tests demonstrating various UI types +- Performance benchmarks comparing single vs. two-step generation +- Cross-provider compatibility testing + +## Open Questions + +1. Should we standardize a set of common UI generators? +2. How to handle UI state management across regenerations? +3. Should generators support streaming UI updates? +4. What's the optimal size/capability for the secondary generation model? +5. How to ensure generated UIs are accessible and follow best practices? + +## References + +- [AG-UI Tools Documentation](/concepts/tools) +- [JSON Schema](https://json-schema.org/) +- [React Hook Form](https://react-hook-form.com/) +- [JSON Forms](https://jsonforms.io/) \ No newline at end of file diff --git a/docs/drafts/interrupts.mdx b/docs/drafts/interrupts.mdx new file mode 100644 index 000000000..f49e6386e --- /dev/null +++ b/docs/drafts/interrupts.mdx @@ -0,0 +1,239 @@ +--- +title: Interrupt-Aware Run Lifecycle +description: Native support for human-in-the-loop pauses and interrupts +--- + +# Interrupt-Aware Run Lifecycle Proposal + +## Summary + +### Problem Statement +Agents often need to pause execution to request human approval, gather additional input, or confirm potentially risky actions. Currently, there's no standardized way to handle these interruptions across different agent frameworks. + +### Motivation +Support **human-in-the-loop pauses** (and related mechanisms) natively in AG-UI and CopilotKit. This enables compatibility with various framework interrupts, workflow suspend/resume, and other framework-specific pause mechanisms. + +## Status + +- **Status**: Draft +- **Author(s)**: AG-UI Team + +## Overview + +This proposal introduces a standardized interrupt/resume pattern: + +```mermaid +sequenceDiagram + participant Agent + participant Client as Client App + + Agent-->>Client: RUN_FINISHED { outcome: "interrupt", interrupt:{ id, reason, payload }} + Client-->>Agent: RunAgentInput.resume { threadId, interruptId, payload } + Agent-->>Client: RUN_FINISHED { outcome: "success", result } +``` + +## Detailed Specification + +### Updates to RUN_FINISHED Event + +```typescript +type RunFinishedOutcome = "success" | "interrupt"; + +type RunFinished = { + type: "RUN_FINISHED"; + + // ... existing fields + + outcome?: RunFinishedOutcome; // optional for back-compat (see rules below) + + // Present when outcome === "success" (or when outcome omitted and interrupt is absent) + result?: any; + + // Present when outcome === "interrupt" (or when outcome omitted and interrupt is present) + interrupt?: { + id?: string; // id can be set when needed + reason?: string; // e.g. "human_approval" | "upload_required" | "policy_hold" + payload?: any; // arbitrary JSON for UI (forms, proposals, diffs, etc.) + }; +}; +``` + +When a run finishes with `outcome == "interrupt"`, the agent indicates that on the next run, a value needs to be provided to continue. + +### Updates to RunAgentInput + +```typescript +type RunAgentInput = { + // ... existing fields + + // NEW: resume channel for continuing a suspension + resume?: { + interruptId?: string; // echo back if one was provided + payload?: any; // arbitrary JSON: approvals, edits, files-as-refs, etc. + }; +}; +``` + +### Contract Rules + +- Resume requests **must** use the same `threadId` +- When given in the `interrupt`, the `interruptId` must be provided via `RunAgentInput` +- Agents should handle missing or invalid resume payloads gracefully + +## Implementation Examples + +### Minimal Interrupt/Resume + +**Agent sends interrupt:** +```json +{ + "type": "RUN_FINISHED", + "threadId": "t1", + "runId": "r1", + "outcome": "interrupt", + "interrupt": { + "id": "int-abc123", + "reason": "human_approval", + "payload": { + "proposal": { + "tool": "sendEmail", + "args": { "to": "a@b.com", "subject": "Hi", "body": "…" } + } + } + } +} +``` + +**User responds:** +```json +{ + "threadId": "t1", + "runId": "r2", + "resume": { + "interruptId": "int-abc123", + "payload": { "approved": true } + } +} +``` + +### Complex Approval Flow + +**Agent requests approval with context:** +```json +{ + "type": "RUN_FINISHED", + "threadId": "thread-456", + "runId": "run-789", + "outcome": "interrupt", + "interrupt": { + "id": "approval-001", + "reason": "database_modification", + "payload": { + "action": "DELETE", + "table": "users", + "affectedRows": 42, + "query": "DELETE FROM users WHERE last_login < '2023-01-01'", + "rollbackPlan": "Restore from backup snapshot-2025-01-23", + "riskLevel": "high" + } + } +} +``` + +**User approves with modifications:** +```json +{ + "threadId": "thread-456", + "runId": "run-790", + "resume": { + "interruptId": "approval-001", + "payload": { + "approved": true, + "modifications": { + "batchSize": 10, + "dryRun": true + } + } + } +} +``` + +## Use Cases + +### Human Approval +Agents pause before executing sensitive operations (sending emails, making purchases, deleting data). + +### Information Gathering +Agent requests additional context or files from the user mid-execution. + +### Policy Enforcement +Automatic pauses triggered by organizational policies or compliance requirements. + +### Multi-Step Wizards +Complex workflows where each step requires user confirmation or input. + +### Error Recovery +Agent pauses when encountering an error, allowing user to provide guidance. + +## Implementation Considerations + +### Client SDK Changes + +TypeScript SDK: +- Extended `RunFinishedEvent` type with outcome and interrupt fields +- Updated `RunAgentInput` with resume field +- Helper methods for interrupt handling + +Python SDK: +- Extended `RunFinishedEvent` class +- Updated `RunAgentInput` with resume support +- Interrupt state management utilities + +### Framework Integration + +**Planning Frameworks:** +- Map framework interrupts to AG-UI interrupt events +- Handle resume payloads in execution continuation + +**Workflow Systems:** +- Convert workflow suspensions to AG-UI interrupts +- Resume workflow execution with provided payload + +**Custom Frameworks:** +- Provide interrupt/resume adapter interface +- Documentation for integration patterns + +### UI Considerations + +- Standard components for common interrupt reasons +- Customizable interrupt UI based on payload +- Clear indication of pending interrupts +- History of interrupt/resume actions + +## Breaking Changes + +None. This is an additive change with backward compatibility: +- Omitting `outcome` field maintains existing behavior +- Clients not supporting interrupts can treat them as regular run completions + +## Testing Strategy + +- Unit tests for interrupt/resume serialization +- Integration tests with multiple frameworks +- E2E tests demonstrating various interrupt scenarios +- State consistency tests across interrupt boundaries +- Performance tests for rapid interrupt/resume cycles + +## Open Questions + +1. Should we define standard interrupt reasons (enum vs. free string)? +2. How to handle timeout for unanswered interrupts? +3. Should interrupts support priority levels? +4. Can multiple interrupts be pending simultaneously? +5. How to handle interrupt cancellation? +6. Should we support batch approval of multiple interrupts? + +## References + +- [AG-UI Events Documentation](/concepts/events) +- [AG-UI State Management](/concepts/state) \ No newline at end of file diff --git a/docs/drafts/meta-events.mdx b/docs/drafts/meta-events.mdx new file mode 100644 index 000000000..7cd0cf339 --- /dev/null +++ b/docs/drafts/meta-events.mdx @@ -0,0 +1,245 @@ +--- +title: Meta Events +description: Annotations and signals independent of agent runs +--- + +# Meta Events Proposal + +## Summary + +### Problem Statement +Currently, AG-UI events are tightly coupled to agent runs. There's no standardized way to attach user feedback, annotations, or external signals to the event stream that are independent of the agent's execution lifecycle. + +### Motivation +AG-UI is extended with **MetaEvents**, a new class of events that can occur at any point in the event stream, independent of agent runs. MetaEvents provide a way to attach annotations, signals, or feedback to a serialized stream. They may originate from users, clients, or external systems rather than from agents. Examples include reactions such as thumbs up/down on a message. + +## Status + +- **Status**: Draft +- **Author(s)**: AG-UI Team + +## Detailed Specification + +### Overview + +This proposal introduces: +- A new **MetaEvent** type for side-band annotations +- Events that can appear anywhere in the stream +- Support for user feedback, tags, and external annotations +- Extensible payload structure for application-specific data + +## New Type: MetaEvent + +```typescript +type MetaEvent = BaseEvent & { + type: EventType.META + /** + * Application-defined type of the meta event. + * Examples: "thumbs_up", "thumbs_down", "tag", "note" + */ + metaType: string + + /** + * Application-defined payload. + * May reference other entities (e.g., messageId) or contain freeform data. + */ + payload: Record +} +``` + +### Key Characteristics + +- **Run-independent**: MetaEvents are not tied to any specific run lifecycle +- **Position-flexible**: Can appear before, between, or after runs +- **Origin-diverse**: May come from users, clients, or external systems +- **Extensible**: Applications define their own metaType values and payload schemas + +## Implementation Examples + +### User Feedback + +**Thumbs Up:** +```json +{ + "id": "evt_123", + "ts": 1714063982000, + "type": "META", + "metaType": "thumbs_up", + "payload": { + "messageId": "msg_456", + "userId": "user_789" + } +} +``` + +**Thumbs Down with Reason:** +```json +{ + "id": "evt_124", + "ts": 1714063985000, + "type": "META", + "metaType": "thumbs_down", + "payload": { + "messageId": "msg_456", + "userId": "user_789", + "reason": "inaccurate", + "comment": "The calculation seems incorrect" + } +} +``` + +### Annotations + +**User Note:** +```json +{ + "id": "evt_789", + "ts": 1714064001000, + "type": "META", + "metaType": "note", + "payload": { + "text": "Important question to revisit", + "relatedRunId": "run_001", + "author": "user_123" + } +} +``` + +**Tag Assignment:** +```json +{ + "id": "evt_890", + "ts": 1714064100000, + "type": "META", + "metaType": "tag", + "payload": { + "tags": ["important", "follow-up"], + "threadId": "thread_001" + } +} +``` + +### External System Events + +**Analytics Event:** +```json +{ + "id": "evt_901", + "ts": 1714064200000, + "type": "META", + "metaType": "analytics", + "payload": { + "event": "conversation_shared", + "properties": { + "shareMethod": "link", + "recipientCount": 3 + } + } +} +``` + +**Moderation Flag:** +```json +{ + "id": "evt_902", + "ts": 1714064300000, + "type": "META", + "metaType": "moderation", + "payload": { + "action": "flag", + "messageId": "msg_999", + "category": "inappropriate_content", + "confidence": 0.95 + } +} +``` + +## Common Meta Event Types + +While applications can define their own types, these are commonly used: + +| MetaType | Description | Typical Payload | +| --- | --- | --- | +| `thumbs_up` | Positive feedback | `{ messageId, userId }` | +| `thumbs_down` | Negative feedback | `{ messageId, userId, reason? }` | +| `note` | User annotation | `{ text, relatedId?, author }` | +| `tag` | Categorization | `{ tags[], targetId }` | +| `bookmark` | Save for later | `{ messageId, userId }` | +| `copy` | Content copied | `{ messageId, content }` | +| `share` | Content shared | `{ messageId, method }` | +| `rating` | Numeric rating | `{ messageId, rating, maxRating }` | + +## Use Cases + +### User Feedback Collection +Capture user reactions to agent responses for quality improvement. + +### Conversation Annotation +Allow users to add notes, tags, or bookmarks to important parts of conversations. + +### Analytics and Tracking +Record user interactions and behaviors without affecting agent execution. + +### Content Moderation +Flag or mark content for review by external moderation systems. + +### Collaborative Features +Enable multiple users to annotate or comment on shared conversations. + +### Audit Trail +Create a complete record of all interactions, not just agent responses. + +## Implementation Considerations + +### Client SDK Changes + +TypeScript SDK: +- New `MetaEvent` type in `@ag-ui/core` +- Helper functions for common meta event types +- MetaEvent filtering and querying utilities + +Python SDK: +- `MetaEvent` class implementation +- Meta event builders for common types +- Event stream filtering capabilities + +### Storage Implications + +- MetaEvents should be persisted alongside regular events +- Consider separate indexing for efficient querying +- Support filtering streams to include/exclude meta events + +### Security Considerations + +- Validate metaType against allowed values if restricting +- Consider permissions for who can create certain meta event types +- Sanitize payload content to prevent injection attacks + +## Breaking Changes + +None. This is an additive change: +- New event type doesn't affect existing event processing +- Clients not handling MetaEvents can safely ignore them +- No changes to existing event schemas + +## Testing Strategy + +- Unit tests for MetaEvent creation and validation +- Integration tests with mixed event streams +- Performance tests with high-volume meta events +- Security tests for payload validation + +## Open Questions + +1. Should we define a standard set of metaType values? +2. How to handle MetaEvent permissions and authorization? +3. Should MetaEvents support references to multiple entities? +4. What's the maximum payload size for MetaEvents? +5. Should we support MetaEvent versioning? +6. How to handle MetaEvent conflicts (e.g., multiple ratings)? + +## References + +- [AG-UI Events Documentation](/concepts/events) +- [Event Sourcing](https://martinfowler.com/eaaDev/EventSourcing.html) +- [CQRS Pattern](https://martinfowler.com/bliki/CQRS.html) \ No newline at end of file diff --git a/docs/drafts/multimodal-messages.mdx b/docs/drafts/multimodal-messages.mdx new file mode 100644 index 000000000..5915d63d1 --- /dev/null +++ b/docs/drafts/multimodal-messages.mdx @@ -0,0 +1,280 @@ +--- +title: Multi-modal Messages +description: Support for multimodal input messages including text, images, audio, and files +--- + +# Multi-modal Messages Proposal + +## Summary + +### Problem Statement +Current AG-UI protocol only supports text-based user messages. As LLMs increasingly support multimodal inputs (images, audio, files), the protocol needs to evolve to handle these richer input types. + +### Motivation +Evolve AG-UI to support **multimodal input messages** without breaking existing apps. Inputs may include text, images, audio, and files. + +## Status + +- **Status**: Draft +- **Author(s)**: AG-UI Team + +## Detailed Specification + +### Overview + +Extend the `UserMessage` `content` property to be either a string or an array of `InputContent`: + +```typescript +interface TextInputContent { + type: "text" + text: string +} + +interface BinaryInputContent { + type: "binary" + mimeType: string + id?: string + url?: string + data?: string + filename?: string +} + +type InputContent = TextInputContent | BinaryInputContent; + +type UserMessage = { + id: string + role: "user" + content: string | InputContent[] + name?: string +} +``` + +### InputContent Types + +#### TextInputContent + +Represents text content within a multimodal message. + +```typescript +interface TextInputContent { + type: "text" + text: string +} +``` + +| Property | Type | Description | +| --- | --- | --- | +| `type` | `"text"` | Identifies this as text content | +| `text` | `string` | The text content | + +#### BinaryInputContent + +Represents binary content such as images, audio, or files. + +```typescript +interface BinaryInputContent { + type: "binary" + mimeType: string + id?: string + url?: string + data?: string + filename?: string +} +``` + +| Property | Type | Description | +| --- | --- | --- | +| `type` | `"binary"` | Identifies this as binary content | +| `mimeType` | `string` | MIME type of the content (e.g., "image/jpeg", "audio/wav") | +| `id` | `string?` | Optional identifier for content reference | +| `url` | `string?` | Optional URL to fetch the content | +| `data` | `string?` | Optional base64-encoded content | +| `filename` | `string?` | Optional filename for the content | + +### Content Delivery Methods + +Binary content can be provided through multiple methods: + +1. **Inline Data**: Base64-encoded in the `data` field +2. **URL Reference**: External URL in the `url` field +3. **ID Reference**: Reference to pre-uploaded content via `id` field + +At least one of `data`, `url`, or `id` must be provided for binary content. + +## Implementation Examples + +### Simple Text Message (Backward Compatible) + +```json +{ + "id": "msg-001", + "role": "user", + "content": "What's in this image?" +} +``` + +### Image with Text + +```json +{ + "id": "msg-002", + "role": "user", + "content": [ + { + "type": "text", + "text": "What's in this image?" + }, + { + "type": "binary", + "mimeType": "image/jpeg", + "data": "base64-encoded-image-data..." + } + ] +} +``` + +### Multiple Images with Question + +```json +{ + "id": "msg-003", + "role": "user", + "content": [ + { + "type": "text", + "text": "What are the differences between these images?" + }, + { + "type": "binary", + "mimeType": "image/png", + "url": "https://example.com/image1.png" + }, + { + "type": "binary", + "mimeType": "image/png", + "url": "https://example.com/image2.png" + } + ] +} +``` + +### Audio Transcription Request + +```json +{ + "id": "msg-004", + "role": "user", + "content": [ + { + "type": "text", + "text": "Please transcribe this audio recording" + }, + { + "type": "binary", + "mimeType": "audio/wav", + "filename": "meeting-recording.wav", + "id": "audio-upload-123" + } + ] +} +``` + +### Document Analysis + +```json +{ + "id": "msg-005", + "role": "user", + "content": [ + { + "type": "text", + "text": "Summarize the key points from this PDF" + }, + { + "type": "binary", + "mimeType": "application/pdf", + "filename": "quarterly-report.pdf", + "url": "https://example.com/reports/q4-2024.pdf" + } + ] +} +``` + +## Implementation Considerations + +### Client SDK Changes + +TypeScript SDK: +- Extended `UserMessage` type in `@ag-ui/core` +- Content validation utilities +- Helper methods for constructing multimodal messages +- Binary content encoding/decoding utilities + +Python SDK: +- Extended `UserMessage` class +- Content type validation +- Multimodal message builders +- Binary content handling utilities + +### Framework Integration + +Frameworks need to: +- Parse multimodal user messages +- Forward content to LLM providers that support multimodal inputs +- Handle fallbacks for models that don't support certain content types +- Manage content upload/storage for binary data + +### Security Considerations + +- Validate MIME types against allowed list +- Implement size limits for binary content +- Sanitize URLs to prevent SSRF attacks +- Consider content scanning for malicious files + +## Breaking Changes + +None. This is a backward-compatible change: +- String content continues to work as before +- Clients not supporting multimodal content can treat array content as text-only +- Frameworks can gracefully degrade for unsupported content types + +## Use Cases + +### Visual Question Answering +Users can upload images and ask questions about them. + +### Document Processing +Upload PDFs, Word documents, or spreadsheets for analysis. + +### Audio Transcription and Analysis +Process voice recordings, podcasts, or meeting audio. + +### Multi-document Comparison +Compare multiple images, documents, or mixed media. + +### Screenshot Analysis +Share screenshots for UI/UX feedback or debugging assistance. + +## Testing Strategy + +- Unit tests for content type validation +- Integration tests with multimodal LLMs +- Backward compatibility tests with string content +- Performance tests for large binary payloads +- Security tests for content validation and sanitization + +## Open Questions + +1. Should we define a maximum size for inline base64 content? +2. How to handle content type negotiation with different LLM providers? +3. Should we support streaming for large binary content? +4. What MIME types should be officially supported? +5. How to handle content expiration for URL references? +6. Should we support content compression? + +## References + +- [OpenAI Vision API](https://platform.openai.com/docs/guides/vision) +- [Anthropic Vision](https://docs.anthropic.com/en/docs/vision) +- [MIME Types](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types) +- [Data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs) \ No newline at end of file diff --git a/docs/drafts/overview.mdx b/docs/drafts/overview.mdx new file mode 100644 index 000000000..a8c307bc5 --- /dev/null +++ b/docs/drafts/overview.mdx @@ -0,0 +1,84 @@ +--- +title: Overview +description: Draft changes being considered for the AG-UI protocol +--- + +# Overview + +This section contains draft changes being considered for the AG-UI protocol. These proposals are under internal review and may be modified or withdrawn before implementation. + +## Current Drafts + + + + Represent ongoing agent progress between chat messages with fine-grained activity updates + + + Support for LLM reasoning visibility and continuity with encrypted content + + + Stream serialization for chat history restoration and event compaction + + + Support for multimodal input messages including images, audio, and files + + + Native support for agent pauses requiring human approval or input + + + AI-generated interfaces without requiring custom tool renderers + + + Annotations and signals independent of agent runs + + + +## Status Definitions + +- **Draft** - Initial proposal under consideration +- **Under Review** - Active development and testing +- **Accepted** - Approved for implementation +- **Implemented** - Merged into the main protocol specification +- **Withdrawn** - Proposal has been withdrawn or superseded \ No newline at end of file diff --git a/docs/drafts/reasoning.mdx b/docs/drafts/reasoning.mdx new file mode 100644 index 000000000..b1983dbd6 --- /dev/null +++ b/docs/drafts/reasoning.mdx @@ -0,0 +1,269 @@ +--- +title: Reasoning +description: Support for LLM reasoning visibility and continuity +--- + +# Reasoning Proposal + +## Summary + +### Problem Statement +LLMs increasingly use chain-of-thought reasoning to improve response quality, but there's no standardized way to surface reasoning signals while maintaining privacy and state continuity across turns. + +### Motivation +AG-UI should support **LLM reasoning** without breaking existing apps. + +- **Reasoning visibility & continuity**: We must surface reasoning signals (e.g., **reasoning summaries**) and support **encrypted reasoning items** for state carry-over across turns—especially under `store:false`/ZDR—*without exposing raw chain-of-thought*. +- **Backwards compatibility**: Existing AG-UI clients must keep working unchanged. New capabilities should be non-breaking. + +## Status + +- **Status**: Draft +- **Author(s)**: AG-UI Team + +## Detailed Specification + +### Overview + +This proposal introduces: +- New events for reasoning lifecycle management +- A new `ReasoningMessage` type for message history +- Support for encrypted reasoning content + +## New Reasoning Events + +These events represent the lifecycle of reasoning messages in a conversation. + +### ReasoningStartEvent + +Marks the start of reasoning. + +```typescript +type ReasoningStartEvent = BaseEvent & { + type: EventType.REASONING_START + messageId: string + encryptedContent?: string +} +``` + +| Property | Type | Description | +| --- | --- | --- | +| `messageId` | `string` | Unique identifier of this reasoning | +| `encryptedContent` | `string?` | Optionally the encrypted content | + +### ReasoningMessageStartEvent + +Signals the start of a reasoning message. + +```typescript +type ReasoningMessageStartEvent = BaseEvent & { + type: EventType.REASONING_MESSAGE_START + messageId: string + role: "assistant" +} +``` + +| Property | Type | Description | +| --- | --- | --- | +| `messageId` | `string` | Unique identifier of the message | +| `role` | `"assistant"` | Role of the reasoning message | + +### ReasoningMessageContentEvent + +Represents a chunk of content in a streaming reasoning message. + +```typescript +type ReasoningMessageContentEvent = BaseEvent & { + type: EventType.REASONING_MESSAGE_CONTENT + messageId: string + delta: string // Non-empty string +} +``` + +| Property | Type | Description | +| --- | --- | --- | +| `messageId` | `string` | Matches the ID from ReasoningMessageStartEvent | +| `delta` | `string` | Reasoning content chunk (non-empty) | + +### ReasoningMessageEndEvent + +Signals the end of a reasoning message. + +```typescript +type ReasoningMessageEndEvent = BaseEvent & { + type: EventType.REASONING_MESSAGE_END + messageId: string +} +``` + +| Property | Type | Description | +| --- | --- | --- | +| `messageId` | `string` | Matches the ID from ReasoningMessageStartEvent | + +### ReasoningMessageChunkEvent + +A convenience event to auto start/close reasoning messages. + +```typescript +type ReasoningMessageChunkEvent = BaseEvent & { + type: EventType.REASONING_MESSAGE_CHUNK + messageId?: string + delta?: string +} +``` + +| Property | Type | Description | +| --- | --- | --- | +| `messageId` | `string?` | Message ID (first event must be non-empty) | +| `delta` | `string?` | Reasoning content chunk | + +### ReasoningEndEvent + +Marks the end of reasoning. + +```typescript +type ReasoningEndEvent = BaseEvent & { + type: EventType.REASONING_END + messageId: string +} +``` + +| Property | Type | Description | +| --- | --- | --- | +| `messageId` | `string` | Unique identifier of this reasoning | + +## New ReasoningMessage Type + +```typescript +type ReasoningMessage = { + id: string + role: "reasoning" + content: string[] + encryptedContent?: string +} +``` + +## Removed Events + +These events have never been publicly documented and will be removed: +- `THINKING_TEXT_MESSAGE_START` +- `THINKING_TEXT_MESSAGE_CONTENT` +- `THINKING_TEXT_MESSAGE_END` + +## Implementation Considerations + +### Client SDK Changes + +TypeScript SDK: +- New event types in `@ag-ui/core` +- ReasoningMessage type in message unions +- Reasoning event handlers in subscriber +- Support for encrypted content handling + +Python SDK: +- New event classes in `ag_ui.core.events` +- ReasoningMessage class +- Encryption/decryption utilities + +### Privacy and Security + +- **Encrypted reasoning**: Support for encrypted reasoning content that clients cannot decrypt +- **State continuity**: Encrypted reasoning items can be passed across turns without exposing content +- **ZDR compliance**: Works with `store:false` and zero data retention policies + +### Backward Compatibility + +- Clients not handling reasoning events continue to work +- Reasoning messages are optional in message history +- No changes required to existing integrations + +## Use Cases + +### Chain-of-Thought Visibility +Show users that the model is "thinking" without exposing internal reasoning. + +### Reasoning Summaries +Provide high-level summaries of reasoning process for transparency. + +### State Continuity +Maintain reasoning context across conversation turns without storing raw content. + +### Compliance and Privacy +Meet data retention requirements while preserving reasoning capabilities. + +## Examples + +### Basic Reasoning Flow + +```typescript +// Agent emits reasoning start +{ + "type": "REASONING_START", + "messageId": "reasoning-001", + "encryptedContent": "encrypted-blob-xyz" +} + +// Stream reasoning content (visible to client) +{ + "type": "REASONING_MESSAGE_START", + "messageId": "msg-123", + "role": "assistant" +} + +{ + "type": "REASONING_MESSAGE_CONTENT", + "messageId": "msg-123", + "delta": "Let me think through this step by step..." +} + +{ + "type": "REASONING_MESSAGE_END", + "messageId": "msg-123" +} + +// End reasoning +{ + "type": "REASONING_END", + "messageId": "reasoning-001" +} +``` + +### Convenience Event Usage + +```typescript +// Using chunk event for simpler implementation +{ + "type": "REASONING_MESSAGE_CHUNK", + "messageId": "msg-456", + "delta": "Analyzing the requirements..." +} + +// Auto-closes on next non-reasoning event or empty chunk +{ + "type": "REASONING_MESSAGE_CHUNK", + "messageId": "msg-456", + "delta": "" +} +``` + +## Testing Strategy + +- Unit tests for new event types +- Integration tests with reasoning-capable models +- Backward compatibility tests with existing clients +- Encryption/decryption roundtrip tests +- Performance tests for reasoning event streaming + +## Open Questions + +1. Should reasoning messages be included in `MESSAGES_SNAPSHOT` by default? +2. How to handle partial reasoning in case of interruptions? +3. Should we support different reasoning visibility levels (none, summary, full)? +4. What's the maximum size for encrypted reasoning content? +5. Should reasoning events support metadata (e.g., reasoning type, complexity)? + +## References + +- [AG-UI Events Documentation](/concepts/events) +- [AG-UI Messages Documentation](/concepts/messages) +- [Chain-of-Thought Prompting](https://arxiv.org/abs/2201.11903) \ No newline at end of file diff --git a/docs/drafts/serialization.mdx b/docs/drafts/serialization.mdx new file mode 100644 index 000000000..bbb22e196 --- /dev/null +++ b/docs/drafts/serialization.mdx @@ -0,0 +1,255 @@ +--- +title: Serialization +description: Stream serialization for chat history restoration and event compaction +--- + +# Serialization Proposal + +## Summary + +### Problem Statement +Currently, there's no standardized way to serialize and restore AG-UI event streams, making it difficult to reload chat history, attach to running agents, or implement branching/time travel features. + +### Motivation +AG-UI adds **stream serialization** to reload chat history and attach to active agents, enabling restoration and interaction with live state. A standardized `compactEvents(events: BaseEvent[]): BaseEvent[]` reduces already-streamed events and normalizes inputs. Additionally, `RunStartedEvent` gains `parentRunId` for branching/time travel and an `input` field carrying the exact `AgentInput` sent to the agent (which may omit messages already present in history). + +## Status + +- **Status**: Draft +- **Author(s)**: AG-UI Team + +## Detailed Specification + +### Overview + +This proposal introduces three key capabilities: +1. **Stream serialization** - Serialize/deserialize event streams for persistence and restoration +2. **Event compaction** - Reduce event volume while preserving semantic meaning +3. **Run lineage tracking** - Enable branching and time travel with parent run references + +## Proposed Changes + +### Stream Serialization + +Support serializing/deserializing the event stream so chat history can be reloaded and sessions can attach to running agents/live state. + +### Event Compaction + +Introduce `compactEvents(events: BaseEvent[]): BaseEvent[]` to: +- Reduce the number of already-streamed events +- **Normalize** `RunStartedEvent.input` so it contains only the messages that were not already sent/recorded earlier in the thread + +```typescript +// Event compaction API +declare function compactEvents(events: BaseEvent[]): BaseEvent[] +``` + +### Run Lineage and Input Capture + +Extend `RunStartedEvent` with: +- `parentRunId?: string` to enable branching/time travel +- `input?: AgentInput` containing the agent input exactly as sent + - `input.messages` **may omit** messages already present in history + - `compactEvents` **normalizes** this field to a minimal form + +## Updated Types + +```typescript +type RunStartedEvent = BaseEvent & { + type: EventType.RUN_STARTED + threadId: string + runId: string + /** + * Optional lineage pointer for branching/time travel. + * If present, refers to a prior run within the same thread. + */ + parentRunId?: string + /** + * The exact AgentInput payload that was sent to the agent for this run. + * May omit messages already present in history; compactEvents() will normalize. + */ + input?: AgentInput +} +``` + +## Event Compaction Rules + +The `compactEvents` function applies these transformations: + +### Message Events +- Consecutive `TEXT_MESSAGE_CONTENT` events with same `messageId` → single event with concatenated content +- Complete message sequences (START + CONTENT + END) → single snapshot event +- Tool call sequences → compacted tool invocation records + +### State Events +- Multiple `STATE_DELTA` events → single `STATE_SNAPSHOT` with final state +- Redundant state updates → removed if superseded by later snapshots + +### Run Input Normalization +- Messages in `RunStartedEvent.input` that exist in prior events → removed +- Only new/incremental messages retained in normalized form + +## Implementation Examples + +### Basic Serialization + +```typescript +// Serialize event stream +const events: BaseEvent[] = [...]; // Full event history +const serialized = JSON.stringify(events); + +// Store to database, file, etc. +await storage.save(threadId, serialized); + +// Later: deserialize and restore +const restored = JSON.parse(await storage.load(threadId)); +const compacted = compactEvents(restored); +``` + +### Event Compaction Example + +**Before compaction:** +```typescript +[ + { type: "TEXT_MESSAGE_START", messageId: "msg1", role: "user" }, + { type: "TEXT_MESSAGE_CONTENT", messageId: "msg1", delta: "Hello " }, + { type: "TEXT_MESSAGE_CONTENT", messageId: "msg1", delta: "world" }, + { type: "TEXT_MESSAGE_END", messageId: "msg1" }, + { type: "STATE_DELTA", patch: { op: "add", path: "/foo", value: 1 } }, + { type: "STATE_DELTA", patch: { op: "replace", path: "/foo", value: 2 } }, +] +``` + +**After compaction:** +```typescript +[ + { + type: "MESSAGES_SNAPSHOT", + messages: [{ id: "msg1", role: "user", content: "Hello world" }] + }, + { + type: "STATE_SNAPSHOT", + state: { foo: 2 } + } +] +``` + +### Branching with Parent Run ID + +```typescript +// Original run +{ + type: "RUN_STARTED", + threadId: "thread1", + runId: "run1", + input: { messages: ["Tell me about Paris"] } +} + +// Branch from run1 +{ + type: "RUN_STARTED", + threadId: "thread1", + runId: "run2", + parentRunId: "run1", // Points to parent + input: { messages: ["Actually, tell me about London instead"] } +} +``` + +### Normalized Input Example + +```typescript +// First run includes full message +{ + type: "RUN_STARTED", + runId: "run1", + input: { + messages: [ + { id: "msg1", role: "user", content: "Hello" } + ] + } +} + +// Second run omits already-present message +{ + type: "RUN_STARTED", + runId: "run2", + input: { + messages: [ + { id: "msg2", role: "user", content: "How are you?" } + ] + // msg1 omitted as it's already in history + } +} +``` + +## Use Cases + +### Session Restoration +Reload a previous chat session with full history and state. + +### Live Agent Attachment +Connect to an already-running agent and receive ongoing events. + +### Branching Conversations +Create alternative conversation branches from any point in history. + +### Time Travel Debugging +Navigate to any point in conversation history for debugging. + +### Efficient Storage +Compact events before long-term storage to reduce size. + +## Implementation Considerations + +### Client SDK Changes + +TypeScript SDK: +- `compactEvents` function implementation +- Serialization/deserialization utilities +- Branch management helpers +- Storage adapter interfaces + +Python SDK: +- Event compaction algorithm +- Serialization utilities +- Parent run tracking +- Storage abstractions + +### Storage Considerations + +- Support for various storage backends (memory, database, file) +- Incremental storage for append-only events +- Compression support for serialized streams +- Indexing strategies for quick access + +## Breaking Changes + +None. This is an additive change: +- Existing events remain valid +- New fields are optional +- Compaction is opt-in +- Backward compatible with existing streams + +## Testing Strategy + +- Unit tests for compaction algorithm +- Round-trip serialization tests +- Branch/merge scenario tests +- Performance benchmarks for large event streams +- Storage adapter integration tests + +## Open Questions + +1. Should compaction be automatic or explicit? +2. What's the optimal compaction strategy for different use cases? +3. How to handle conflicts in branched conversations? +4. Should we support custom compaction rules? +5. How to handle event versioning for long-term storage? +6. Should we provide built-in storage adapters? + +## References + +- [Event Sourcing](https://martinfowler.com/eaaDev/EventSourcing.html) +- [AG-UI Events Documentation](/concepts/events) +- [AG-UI State Management](/concepts/state) +- [JSON Patch RFC 6902](https://tools.ietf.org/html/rfc6902) \ No newline at end of file From 1f4500fa59e794ad8c9e43dde5b4a30967ba2c25 Mon Sep 17 00:00:00 2001 From: Markus Ecker Date: Tue, 23 Sep 2025 16:17:01 +0200 Subject: [PATCH 2/3] wip --- docs/drafts/activity-events.mdx | 13 +------------ docs/drafts/generative-ui.mdx | 14 +------------- docs/drafts/interrupts.mdx | 17 +---------------- docs/drafts/meta-events.mdx | 18 +----------------- docs/drafts/multimodal-messages.mdx | 18 +----------------- docs/drafts/reasoning.mdx | 11 +---------- docs/drafts/serialization.mdx | 19 +------------------ 7 files changed, 7 insertions(+), 103 deletions(-) diff --git a/docs/drafts/activity-events.mdx b/docs/drafts/activity-events.mdx index 618e8932d..95afa3063 100644 --- a/docs/drafts/activity-events.mdx +++ b/docs/drafts/activity-events.mdx @@ -16,7 +16,7 @@ AG-UI is extended with **ActivityEvents** and **ActivityMessages** to represent ## Status - **Status**: Draft -- **Author(s)**: AG-UI Team +- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) ## Background @@ -154,10 +154,6 @@ Python SDK additions: - **Workflow Systems**: Can surface step-by-step workflow progress as ActivityEvents - **Other frameworks**: May emit ActivityEvents freely; AG-UI will serialize them like other events -## Breaking Changes - -None. This is an additive change that maintains backward compatibility. - ## Examples and Use Cases ### Example 1: Web Search Activity @@ -205,13 +201,6 @@ Each step appears inline with chat, giving users real-time feedback. - E2E tests in AG-UI Dojo demonstrating activity rendering - Performance benchmarks for high-frequency activity updates -## Open Questions - -1. Should we enforce a schema for common activity types (e.g., standardized "PLAN" format)? -2. How should activities behave across run boundaries? -3. Should there be a maximum number of patches before requiring a new snapshot? -4. How do activities interact with message editing/regeneration? - ## References - [JSON Patch RFC 6902](https://tools.ietf.org/html/rfc6902) diff --git a/docs/drafts/generative-ui.mdx b/docs/drafts/generative-ui.mdx index 48da045fe..a9a0daaa8 100644 --- a/docs/drafts/generative-ui.mdx +++ b/docs/drafts/generative-ui.mdx @@ -16,7 +16,7 @@ This draft describes an AG-UI extension that addresses **generative user interfa ## Status - **Status**: Draft -- **Author(s)**: AG-UI Team +- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) ## Challenges and Limitations @@ -275,10 +275,6 @@ Python SDK additions: - Frameworks emit standard tool calls; client handles UI generation - Backward compatible with existing tool-based UI approaches -## Breaking Changes - -None. This is an additive change that maintains backward compatibility. - ## Use Cases ### Dynamic Forms @@ -301,14 +297,6 @@ Generate different UI layouts based on user preferences or device capabilities. - Performance benchmarks comparing single vs. two-step generation - Cross-provider compatibility testing -## Open Questions - -1. Should we standardize a set of common UI generators? -2. How to handle UI state management across regenerations? -3. Should generators support streaming UI updates? -4. What's the optimal size/capability for the secondary generation model? -5. How to ensure generated UIs are accessible and follow best practices? - ## References - [AG-UI Tools Documentation](/concepts/tools) diff --git a/docs/drafts/interrupts.mdx b/docs/drafts/interrupts.mdx index f49e6386e..518d4c2ca 100644 --- a/docs/drafts/interrupts.mdx +++ b/docs/drafts/interrupts.mdx @@ -16,7 +16,7 @@ Support **human-in-the-loop pauses** (and related mechanisms) natively in AG-UI ## Status - **Status**: Draft -- **Author(s)**: AG-UI Team +- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) ## Overview @@ -210,12 +210,6 @@ Python SDK: - Clear indication of pending interrupts - History of interrupt/resume actions -## Breaking Changes - -None. This is an additive change with backward compatibility: -- Omitting `outcome` field maintains existing behavior -- Clients not supporting interrupts can treat them as regular run completions - ## Testing Strategy - Unit tests for interrupt/resume serialization @@ -224,15 +218,6 @@ None. This is an additive change with backward compatibility: - State consistency tests across interrupt boundaries - Performance tests for rapid interrupt/resume cycles -## Open Questions - -1. Should we define standard interrupt reasons (enum vs. free string)? -2. How to handle timeout for unanswered interrupts? -3. Should interrupts support priority levels? -4. Can multiple interrupts be pending simultaneously? -5. How to handle interrupt cancellation? -6. Should we support batch approval of multiple interrupts? - ## References - [AG-UI Events Documentation](/concepts/events) diff --git a/docs/drafts/meta-events.mdx b/docs/drafts/meta-events.mdx index 7cd0cf339..87f31fd15 100644 --- a/docs/drafts/meta-events.mdx +++ b/docs/drafts/meta-events.mdx @@ -16,7 +16,7 @@ AG-UI is extended with **MetaEvents**, a new class of events that can occur at a ## Status - **Status**: Draft -- **Author(s)**: AG-UI Team +- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) ## Detailed Specification @@ -215,13 +215,6 @@ Python SDK: - Consider permissions for who can create certain meta event types - Sanitize payload content to prevent injection attacks -## Breaking Changes - -None. This is an additive change: -- New event type doesn't affect existing event processing -- Clients not handling MetaEvents can safely ignore them -- No changes to existing event schemas - ## Testing Strategy - Unit tests for MetaEvent creation and validation @@ -229,15 +222,6 @@ None. This is an additive change: - Performance tests with high-volume meta events - Security tests for payload validation -## Open Questions - -1. Should we define a standard set of metaType values? -2. How to handle MetaEvent permissions and authorization? -3. Should MetaEvents support references to multiple entities? -4. What's the maximum payload size for MetaEvents? -5. Should we support MetaEvent versioning? -6. How to handle MetaEvent conflicts (e.g., multiple ratings)? - ## References - [AG-UI Events Documentation](/concepts/events) diff --git a/docs/drafts/multimodal-messages.mdx b/docs/drafts/multimodal-messages.mdx index 5915d63d1..f1ef8398a 100644 --- a/docs/drafts/multimodal-messages.mdx +++ b/docs/drafts/multimodal-messages.mdx @@ -16,7 +16,7 @@ Evolve AG-UI to support **multimodal input messages** without breaking existing ## Status - **Status**: Draft -- **Author(s)**: AG-UI Team +- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) ## Detailed Specification @@ -231,13 +231,6 @@ Frameworks need to: - Sanitize URLs to prevent SSRF attacks - Consider content scanning for malicious files -## Breaking Changes - -None. This is a backward-compatible change: -- String content continues to work as before -- Clients not supporting multimodal content can treat array content as text-only -- Frameworks can gracefully degrade for unsupported content types - ## Use Cases ### Visual Question Answering @@ -263,15 +256,6 @@ Share screenshots for UI/UX feedback or debugging assistance. - Performance tests for large binary payloads - Security tests for content validation and sanitization -## Open Questions - -1. Should we define a maximum size for inline base64 content? -2. How to handle content type negotiation with different LLM providers? -3. Should we support streaming for large binary content? -4. What MIME types should be officially supported? -5. How to handle content expiration for URL references? -6. Should we support content compression? - ## References - [OpenAI Vision API](https://platform.openai.com/docs/guides/vision) diff --git a/docs/drafts/reasoning.mdx b/docs/drafts/reasoning.mdx index b1983dbd6..b19d12e28 100644 --- a/docs/drafts/reasoning.mdx +++ b/docs/drafts/reasoning.mdx @@ -19,7 +19,7 @@ AG-UI should support **LLM reasoning** without breaking existing apps. ## Status - **Status**: Draft -- **Author(s)**: AG-UI Team +- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) ## Detailed Specification @@ -254,16 +254,7 @@ Meet data retention requirements while preserving reasoning capabilities. - Encryption/decryption roundtrip tests - Performance tests for reasoning event streaming -## Open Questions - -1. Should reasoning messages be included in `MESSAGES_SNAPSHOT` by default? -2. How to handle partial reasoning in case of interruptions? -3. Should we support different reasoning visibility levels (none, summary, full)? -4. What's the maximum size for encrypted reasoning content? -5. Should reasoning events support metadata (e.g., reasoning type, complexity)? - ## References - [AG-UI Events Documentation](/concepts/events) - [AG-UI Messages Documentation](/concepts/messages) -- [Chain-of-Thought Prompting](https://arxiv.org/abs/2201.11903) \ No newline at end of file diff --git a/docs/drafts/serialization.mdx b/docs/drafts/serialization.mdx index bbb22e196..1b20281ba 100644 --- a/docs/drafts/serialization.mdx +++ b/docs/drafts/serialization.mdx @@ -16,7 +16,7 @@ AG-UI adds **stream serialization** to reload chat history and attach to active ## Status - **Status**: Draft -- **Author(s)**: AG-UI Team +- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) ## Detailed Specification @@ -222,14 +222,6 @@ Python SDK: - Compression support for serialized streams - Indexing strategies for quick access -## Breaking Changes - -None. This is an additive change: -- Existing events remain valid -- New fields are optional -- Compaction is opt-in -- Backward compatible with existing streams - ## Testing Strategy - Unit tests for compaction algorithm @@ -238,15 +230,6 @@ None. This is an additive change: - Performance benchmarks for large event streams - Storage adapter integration tests -## Open Questions - -1. Should compaction be automatic or explicit? -2. What's the optimal compaction strategy for different use cases? -3. How to handle conflicts in branched conversations? -4. Should we support custom compaction rules? -5. How to handle event versioning for long-term storage? -6. Should we provide built-in storage adapters? - ## References - [Event Sourcing](https://martinfowler.com/eaaDev/EventSourcing.html) From ea49443b1ebc363958aa9878b4c18b41033f8e48 Mon Sep 17 00:00:00 2001 From: Markus Ecker Date: Tue, 23 Sep 2025 16:31:40 +0200 Subject: [PATCH 3/3] wip --- docs/drafts/activity-events.mdx | 59 ++++++++---- docs/drafts/generative-ui.mdx | 143 ++++++++++++++++++++-------- docs/drafts/interrupts.mdx | 67 +++++++++---- docs/drafts/meta-events.mdx | 69 ++++++++------ docs/drafts/multimodal-messages.mdx | 60 +++++++----- docs/drafts/reasoning.mdx | 69 +++++++++----- docs/drafts/serialization.mdx | 116 ++++++++++++++++++---- 7 files changed, 405 insertions(+), 178 deletions(-) diff --git a/docs/drafts/activity-events.mdx b/docs/drafts/activity-events.mdx index 95afa3063..97c115281 100644 --- a/docs/drafts/activity-events.mdx +++ b/docs/drafts/activity-events.mdx @@ -1,6 +1,7 @@ --- title: Activity Events -description: Proposal for representing ongoing agent progress between chat messages +description: + Proposal for representing ongoing agent progress between chat messages --- # Activity Events Proposal @@ -8,19 +9,28 @@ description: Proposal for representing ongoing agent progress between chat messa ## Summary ### Problem Statement -Users want to render "activity" updates inline with chat, not just at run start or end. Currently, there's no standardized way to represent ongoing agent progress between chat messages. + +Users want to render "activity" updates inline with chat, not just at run start +or end. Currently, there's no standardized way to represent ongoing agent +progress between chat messages. ### Motivation -AG-UI is extended with **ActivityEvents** and **ActivityMessages** to represent ongoing agent progress in between chat messages. This allows frameworks to surface fine-grained activity updates chronologically, giving users immediate visibility into what an agent is doing without waiting for the next message or run boundary. + +AG-UI is extended with **ActivityEvents** and **ActivityMessages** to represent +ongoing agent progress in between chat messages. This allows frameworks to +surface fine-grained activity updates chronologically, giving users immediate +visibility into what an agent is doing without waiting for the next message or +run boundary. ## Status - **Status**: Draft -- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) +- **Author(s)**: Markus Ecker (mail@mme.xyz) ## Background -Users want real-time visibility into agent activities as they happen. Consider this example UI: +Users want real-time visibility into agent activities as they happen. Consider +this example UI: ``` +------------------------------------------------------------+ @@ -41,8 +51,10 @@ Users want real-time visibility into agent activities as they happen. Consider t ## Challenges - **Flexibility**: Must handle arbitrary activity data from different frameworks -- **Serializability**: Events must be replayable and rehydrated for session recovery -- **Extensibility**: Developers should define custom renderers per activity type, with a generic fallback +- **Serializability**: Events must be replayable and rehydrated for session + recovery +- **Extensibility**: Developers should define custom renderers per activity + type, with a generic fallback - **Chronology**: Activities must interleave naturally with chat and run events ## Detailed Specification @@ -50,10 +62,13 @@ Users want real-time visibility into agent activities as they happen. Consider t ### Overview This proposal introduces two new concepts to the AG-UI protocol: + 1. **ActivityEvent**: A new event type in the event stream -2. **ActivityMessage**: A new message type alongside TextMessage, ToolMessage, etc. +2. **ActivityMessage**: A new message type alongside TextMessage, ToolMessage, + etc. -Frameworks may emit ActivityEvents, and frontends can render them inline with chat. +Frameworks may emit ActivityEvents, and frontends can render them inline with +chat. ### New Event: ActivityEvent @@ -83,6 +98,7 @@ type ActivityEvent = BaseEvent & { #### Example Events Initial activity snapshot: + ```json { "id": "evt_001", @@ -97,6 +113,7 @@ Initial activity snapshot: ``` Incremental update via patch: + ```json { "id": "evt_002", @@ -139,20 +156,25 @@ type ActivityMessage = { ### Client SDK Changes TypeScript SDK additions: + - New `ActivityEvent` type in `@ag-ui/core` - New `ActivityMessage` type in message unions - Activity renderer registry in `@ag-ui/client` Python SDK additions: + - New `ActivityEvent` class in `ag_ui.core.events` - New `ActivityMessage` class in message types - Activity serialization/deserialization support ### Integration Impact -- **Planning Frameworks**: Can emit ActivityEvents during planning or tool execution phases -- **Workflow Systems**: Can surface step-by-step workflow progress as ActivityEvents -- **Other frameworks**: May emit ActivityEvents freely; AG-UI will serialize them like other events +- **Planning Frameworks**: Can emit ActivityEvents during planning or tool + execution phases +- **Workflow Systems**: Can surface step-by-step workflow progress as + ActivityEvents +- **Other frameworks**: May emit ActivityEvents freely; AG-UI will serialize + them like other events ## Examples and Use Cases @@ -167,9 +189,9 @@ agent.emitActivity({ sources: [ { name: "Reddit", status: "pending" }, { name: "X.com", status: "pending" }, - { name: "Google", status: "pending" } - ] - } + { name: "Google", status: "pending" }, + ], + }, }) // Update as search progresses @@ -179,14 +201,15 @@ agent.emitActivity({ patch: { op: "replace", path: "/sources/0/status", - value: "complete" - } + value: "complete", + }, }) ``` ### Use Case: Multi-Step Workflow Visibility A data analysis agent performing multiple steps: + 1. Loading dataset → ActivityEvent shows progress bar 2. Cleaning data → ActivityEvent shows rows processed 3. Running analysis → ActivityEvent shows current computation @@ -205,4 +228,4 @@ Each step appears inline with chat, giving users real-time feedback. - [JSON Patch RFC 6902](https://tools.ietf.org/html/rfc6902) - [AG-UI Events Documentation](/concepts/events) -- [AG-UI Messages Documentation](/concepts/messages) \ No newline at end of file +- [AG-UI Messages Documentation](/concepts/messages) diff --git a/docs/drafts/generative-ui.mdx b/docs/drafts/generative-ui.mdx index a9a0daaa8..9a62f5857 100644 --- a/docs/drafts/generative-ui.mdx +++ b/docs/drafts/generative-ui.mdx @@ -8,26 +8,41 @@ description: AI-generated interfaces without custom tool renderers ## Summary ### Problem Statement -Currently, creating custom user interfaces for agent interactions requires programmers to define specific tool renderers. This limits the flexibility and adaptability of agent-driven applications. + +Currently, creating custom user interfaces for agent interactions requires +programmers to define specific tool renderers. This limits the flexibility and +adaptability of agent-driven applications. ### Motivation -This draft describes an AG-UI extension that addresses **generative user interfaces**—interfaces produced directly by artificial intelligence without requiring a programmer to define custom tool renderers. The key idea is to leverage our ability to send client-side tools to the agent, thereby enabling this capability across all agent frameworks supported by AG-UI. + +This draft describes an AG-UI extension that addresses **generative user +interfaces**—interfaces produced directly by artificial intelligence without +requiring a programmer to define custom tool renderers. The key idea is to +leverage our ability to send client-side tools to the agent, thereby enabling +this capability across all agent frameworks supported by AG-UI. ## Status - **Status**: Draft -- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) +- **Author(s)**: Markus Ecker (mail@mme.xyz) ## Challenges and Limitations ### Tool Description Length -OpenAI enforces a limit of 1024 characters for tool descriptions. Gemini and Anthropic impose no such limit. + +OpenAI enforces a limit of 1024 characters for tool descriptions. Gemini and +Anthropic impose no such limit. ### Arguments JSON Schema Constraints -Classes, nesting, `$ref`, and `oneOf` are not reliably supported across LLM providers. + +Classes, nesting, `$ref`, and `oneOf` are not reliably supported across LLM +providers. ### Context Window Considerations -Injecting a large UI description language into an agent may reduce its performance. Agents dedicated solely to UI generation perform better than agents combining UI generation with other tasks. + +Injecting a large UI description language into an agent may reduce its +performance. Agents dedicated solely to UI generation perform better than agents +combining UI generation with other tasks. ## Detailed Specification @@ -46,13 +61,17 @@ flowchart TD Inject a lightweight tool into the agent: **Tool Definition:** + - **Name:** `generateUserInterface` - **Arguments:** - - **description**: A high-level description of the UI (e.g., *"A form for entering the user's address"*) + - **description**: A high-level description of the UI (e.g., _"A form for + entering the user's address"_) - **data**: Arbitrary pre-populated data for the generated UI - - **output**: A description or schema of the data the agent expects the user to submit back (fields, required/optional, types, constraints) + - **output**: A description or schema of the data the agent expects the user + to submit back (fields, required/optional, types, constraints) **Example Tool Call:** + ```json { "tool": "generateUserInterface", @@ -65,13 +84,20 @@ Inject a lightweight tool into the agent: }, "output": { "type": "object", - "required": ["firstName", "lastName", "street", "city", "postalCode", "country"], + "required": [ + "firstName", + "lastName", + "street", + "city", + "postalCode", + "country" + ], "properties": { "firstName": { "type": "string", "title": "First Name" }, - "lastName": { "type": "string", "title": "Last Name" }, - "street": { "type": "string", "title": "Street Address" }, - "city": { "type": "string", "title": "City" }, - "postalCode":{ "type": "string", "title": "Postal Code" }, + "lastName": { "type": "string", "title": "Last Name" }, + "street": { "type": "string", "title": "Street Address" }, + "city": { "type": "string", "title": "City" }, + "postalCode": { "type": "string", "title": "Postal Code" }, "country": { "type": "string", "title": "Country", @@ -87,11 +113,16 @@ Inject a lightweight tool into the agent: Delegate UI generation to a secondary LLM or agent: -- The CopilotKit user stays in control: Can make their own generators, add custom libraries, include additional prompts etc. -- On tool invocation, the secondary model consumes `description`, `data`, and `output` to generate the user interface -- This model is focused solely on UI generation, ensuring maximum fidelity and consistency -- The generation method can be swapped as needed (e.g., JSON, HTML, or other renderable formats) -- The UI format description is not subject to structural or length constraints, allowing arbitrarily complex specifications +- The CopilotKit user stays in control: Can make their own generators, add + custom libraries, include additional prompts etc. +- On tool invocation, the secondary model consumes `description`, `data`, and + `output` to generate the user interface +- This model is focused solely on UI generation, ensuring maximum fidelity and + consistency +- The generation method can be swapped as needed (e.g., JSON, HTML, or other + renderable formats) +- The UI format description is not subject to structural or length constraints, + allowing arbitrarily complex specifications ## Implementation Examples @@ -102,7 +133,14 @@ Delegate UI generation to a secondary LLM or agent: "jsonSchema": { "title": "Shipping Address", "type": "object", - "required": ["firstName", "lastName", "street", "city", "postalCode", "country"], + "required": [ + "firstName", + "lastName", + "street", + "city", + "postalCode", + "country" + ], "properties": { "firstName": { "type": "string", "title": "First name" }, "lastName": { "type": "string", "title": "Last name" }, @@ -151,10 +189,10 @@ Delegate UI generation to a secondary LLM or agent: ### Example Output: ReactFormHookGenerator ```tsx -import React from "react"; -import { useForm } from "react-hook-form"; -import { z } from "zod"; -import { zodResolver } from "@hookform/resolvers/zod"; +import React from "react" +import { useForm } from "react-hook-form" +import { z } from "zod" +import { zodResolver } from "@hookform/resolvers/zod" // ----- Schema (contract) ----- const AddressSchema = z.object({ @@ -163,20 +201,32 @@ const AddressSchema = z.object({ street: z.string().min(1, "Required"), city: z.string().min(1, "Required"), postalCode: z.string().regex(/^[A-Za-z0-9\\-\\s]{3,10}$/, "3–10 chars"), - country: z.enum(["GB", "US", "DE", "AT", "FR", "IT", "ES"]) -}); -export type Address = z.infer; + country: z.enum(["GB", "US", "DE", "AT", "FR", "IT", "ES"]), +}) +export type Address = z.infer type Props = { - initialData?: Partial
; - meta?: { title?: string; submitLabel?: string }; - respond: (data: Address) => void; // <-- called on successful submit -}; + initialData?: Partial
+ meta?: { title?: string; submitLabel?: string } + respond: (data: Address) => void // <-- called on successful submit +} -const COUNTRIES: Address["country"][] = ["GB", "US", "DE", "AT", "FR", "IT", "ES"]; +const COUNTRIES: Address["country"][] = [ + "GB", + "US", + "DE", + "AT", + "FR", + "IT", + "ES", +] export default function AddressForm({ initialData, meta, respond }: Props) { - const { register, handleSubmit, formState: { errors } } = useForm
({ + const { + register, + handleSubmit, + formState: { errors }, + } = useForm
({ resolver: zodResolver(AddressSchema), defaultValues: { firstName: "", @@ -185,14 +235,14 @@ export default function AddressForm({ initialData, meta, respond }: Props) { city: "", postalCode: "", country: "GB", - ...initialData - } - }); + ...initialData, + }, + }) const onSubmit = (data: Address) => { // Guaranteed to match AddressSchema - respond(data); - }; + respond(data) + } return (
@@ -240,7 +290,11 @@ export default function AddressForm({ initialData, meta, respond }: Props) {
{errors.country && {errors.country.message}}
@@ -250,7 +304,7 @@ export default function AddressForm({ initialData, meta, respond }: Props) {
- ); + ) } ``` @@ -259,12 +313,14 @@ export default function AddressForm({ initialData, meta, respond }: Props) { ### Client SDK Changes TypeScript SDK additions: + - New `generateUserInterface` tool type - UI generator registry for pluggable generators - Validation layer for generated UI schemas - Response handler for user-submitted data Python SDK additions: + - Support for UI generation tool invocation - Schema validation utilities - Serialization for UI definitions @@ -278,15 +334,20 @@ Python SDK additions: ## Use Cases ### Dynamic Forms -Agents can generate forms on-the-fly based on conversation context without pre-defined schemas. + +Agents can generate forms on-the-fly based on conversation context without +pre-defined schemas. ### Data Visualization + Generate charts, graphs, or tables appropriate to the data being discussed. ### Interactive Workflows + Create multi-step wizards or guided processes tailored to user needs. ### Adaptive Interfaces + Generate different UI layouts based on user preferences or device capabilities. ## Testing Strategy @@ -302,4 +363,4 @@ Generate different UI layouts based on user preferences or device capabilities. - [AG-UI Tools Documentation](/concepts/tools) - [JSON Schema](https://json-schema.org/) - [React Hook Form](https://react-hook-form.com/) -- [JSON Forms](https://jsonforms.io/) \ No newline at end of file +- [JSON Forms](https://jsonforms.io/) diff --git a/docs/drafts/interrupts.mdx b/docs/drafts/interrupts.mdx index 518d4c2ca..108a669c0 100644 --- a/docs/drafts/interrupts.mdx +++ b/docs/drafts/interrupts.mdx @@ -8,15 +8,22 @@ description: Native support for human-in-the-loop pauses and interrupts ## Summary ### Problem Statement -Agents often need to pause execution to request human approval, gather additional input, or confirm potentially risky actions. Currently, there's no standardized way to handle these interruptions across different agent frameworks. + +Agents often need to pause execution to request human approval, gather +additional input, or confirm potentially risky actions. Currently, there's no +standardized way to handle these interruptions across different agent +frameworks. ### Motivation -Support **human-in-the-loop pauses** (and related mechanisms) natively in AG-UI and CopilotKit. This enables compatibility with various framework interrupts, workflow suspend/resume, and other framework-specific pause mechanisms. + +Support **human-in-the-loop pauses** (and related mechanisms) natively in AG-UI +and CopilotKit. This enables compatibility with various framework interrupts, +workflow suspend/resume, and other framework-specific pause mechanisms. ## Status - **Status**: Draft -- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) +- **Author(s)**: Markus Ecker (mail@mme.xyz) ## Overview @@ -37,28 +44,29 @@ sequenceDiagram ### Updates to RUN_FINISHED Event ```typescript -type RunFinishedOutcome = "success" | "interrupt"; +type RunFinishedOutcome = "success" | "interrupt" type RunFinished = { - type: "RUN_FINISHED"; + type: "RUN_FINISHED" // ... existing fields - outcome?: RunFinishedOutcome; // optional for back-compat (see rules below) + outcome?: RunFinishedOutcome // optional for back-compat (see rules below) // Present when outcome === "success" (or when outcome omitted and interrupt is absent) - result?: any; + result?: any // Present when outcome === "interrupt" (or when outcome omitted and interrupt is present) interrupt?: { - id?: string; // id can be set when needed - reason?: string; // e.g. "human_approval" | "upload_required" | "policy_hold" - payload?: any; // arbitrary JSON for UI (forms, proposals, diffs, etc.) - }; -}; + id?: string // id can be set when needed + reason?: string // e.g. "human_approval" | "upload_required" | "policy_hold" + payload?: any // arbitrary JSON for UI (forms, proposals, diffs, etc.) + } +} ``` -When a run finishes with `outcome == "interrupt"`, the agent indicates that on the next run, a value needs to be provided to continue. +When a run finishes with `outcome == "interrupt"`, the agent indicates that on +the next run, a value needs to be provided to continue. ### Updates to RunAgentInput @@ -68,16 +76,17 @@ type RunAgentInput = { // NEW: resume channel for continuing a suspension resume?: { - interruptId?: string; // echo back if one was provided - payload?: any; // arbitrary JSON: approvals, edits, files-as-refs, etc. - }; -}; + interruptId?: string // echo back if one was provided + payload?: any // arbitrary JSON: approvals, edits, files-as-refs, etc. + } +} ``` ### Contract Rules - Resume requests **must** use the same `threadId` -- When given in the `interrupt`, the `interruptId` must be provided via `RunAgentInput` +- When given in the `interrupt`, the `interruptId` must be provided via + `RunAgentInput` - Agents should handle missing or invalid resume payloads gracefully ## Implementation Examples @@ -85,6 +94,7 @@ type RunAgentInput = { ### Minimal Interrupt/Resume **Agent sends interrupt:** + ```json { "type": "RUN_FINISHED", @@ -105,6 +115,7 @@ type RunAgentInput = { ``` **User responds:** + ```json { "threadId": "t1", @@ -119,6 +130,7 @@ type RunAgentInput = { ### Complex Approval Flow **Agent requests approval with context:** + ```json { "type": "RUN_FINISHED", @@ -141,6 +153,7 @@ type RunAgentInput = { ``` **User approves with modifications:** + ```json { "threadId": "thread-456", @@ -161,18 +174,25 @@ type RunAgentInput = { ## Use Cases ### Human Approval -Agents pause before executing sensitive operations (sending emails, making purchases, deleting data). + +Agents pause before executing sensitive operations (sending emails, making +purchases, deleting data). ### Information Gathering + Agent requests additional context or files from the user mid-execution. ### Policy Enforcement -Automatic pauses triggered by organizational policies or compliance requirements. + +Automatic pauses triggered by organizational policies or compliance +requirements. ### Multi-Step Wizards + Complex workflows where each step requires user confirmation or input. ### Error Recovery + Agent pauses when encountering an error, allowing user to provide guidance. ## Implementation Considerations @@ -180,11 +200,13 @@ Agent pauses when encountering an error, allowing user to provide guidance. ### Client SDK Changes TypeScript SDK: + - Extended `RunFinishedEvent` type with outcome and interrupt fields - Updated `RunAgentInput` with resume field - Helper methods for interrupt handling Python SDK: + - Extended `RunFinishedEvent` class - Updated `RunAgentInput` with resume support - Interrupt state management utilities @@ -192,14 +214,17 @@ Python SDK: ### Framework Integration **Planning Frameworks:** + - Map framework interrupts to AG-UI interrupt events - Handle resume payloads in execution continuation **Workflow Systems:** + - Convert workflow suspensions to AG-UI interrupts - Resume workflow execution with provided payload **Custom Frameworks:** + - Provide interrupt/resume adapter interface - Documentation for integration patterns @@ -221,4 +246,4 @@ Python SDK: ## References - [AG-UI Events Documentation](/concepts/events) -- [AG-UI State Management](/concepts/state) \ No newline at end of file +- [AG-UI State Management](/concepts/state) diff --git a/docs/drafts/meta-events.mdx b/docs/drafts/meta-events.mdx index 87f31fd15..738bb6d74 100644 --- a/docs/drafts/meta-events.mdx +++ b/docs/drafts/meta-events.mdx @@ -8,21 +8,30 @@ description: Annotations and signals independent of agent runs ## Summary ### Problem Statement -Currently, AG-UI events are tightly coupled to agent runs. There's no standardized way to attach user feedback, annotations, or external signals to the event stream that are independent of the agent's execution lifecycle. + +Currently, AG-UI events are tightly coupled to agent runs. There's no +standardized way to attach user feedback, annotations, or external signals to +the event stream that are independent of the agent's execution lifecycle. ### Motivation -AG-UI is extended with **MetaEvents**, a new class of events that can occur at any point in the event stream, independent of agent runs. MetaEvents provide a way to attach annotations, signals, or feedback to a serialized stream. They may originate from users, clients, or external systems rather than from agents. Examples include reactions such as thumbs up/down on a message. + +AG-UI is extended with **MetaEvents**, a new class of events that can occur at +any point in the event stream, independent of agent runs. MetaEvents provide a +way to attach annotations, signals, or feedback to a serialized stream. They may +originate from users, clients, or external systems rather than from agents. +Examples include reactions such as thumbs up/down on a message. ## Status - **Status**: Draft -- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) +- **Author(s)**: Markus Ecker (mail@mme.xyz) ## Detailed Specification ### Overview This proposal introduces: + - A new **MetaEvent** type for side-band annotations - Events that can appear anywhere in the stream - Support for user feedback, tags, and external annotations @@ -52,13 +61,15 @@ type MetaEvent = BaseEvent & { - **Run-independent**: MetaEvents are not tied to any specific run lifecycle - **Position-flexible**: Can appear before, between, or after runs - **Origin-diverse**: May come from users, clients, or external systems -- **Extensible**: Applications define their own metaType values and payload schemas +- **Extensible**: Applications define their own metaType values and payload + schemas ## Implementation Examples ### User Feedback **Thumbs Up:** + ```json { "id": "evt_123", @@ -73,6 +84,7 @@ type MetaEvent = BaseEvent & { ``` **Thumbs Down with Reason:** + ```json { "id": "evt_124", @@ -91,6 +103,7 @@ type MetaEvent = BaseEvent & { ### Annotations **User Note:** + ```json { "id": "evt_789", @@ -106,6 +119,7 @@ type MetaEvent = BaseEvent & { ``` **Tag Assignment:** + ```json { "id": "evt_890", @@ -122,6 +136,7 @@ type MetaEvent = BaseEvent & { ### External System Events **Analytics Event:** + ```json { "id": "evt_901", @@ -139,6 +154,7 @@ type MetaEvent = BaseEvent & { ``` **Moderation Flag:** + ```json { "id": "evt_902", @@ -158,35 +174,42 @@ type MetaEvent = BaseEvent & { While applications can define their own types, these are commonly used: -| MetaType | Description | Typical Payload | -| --- | --- | --- | -| `thumbs_up` | Positive feedback | `{ messageId, userId }` | -| `thumbs_down` | Negative feedback | `{ messageId, userId, reason? }` | -| `note` | User annotation | `{ text, relatedId?, author }` | -| `tag` | Categorization | `{ tags[], targetId }` | -| `bookmark` | Save for later | `{ messageId, userId }` | -| `copy` | Content copied | `{ messageId, content }` | -| `share` | Content shared | `{ messageId, method }` | -| `rating` | Numeric rating | `{ messageId, rating, maxRating }` | +| MetaType | Description | Typical Payload | +| ------------- | ----------------- | ---------------------------------- | +| `thumbs_up` | Positive feedback | `{ messageId, userId }` | +| `thumbs_down` | Negative feedback | `{ messageId, userId, reason? }` | +| `note` | User annotation | `{ text, relatedId?, author }` | +| `tag` | Categorization | `{ tags[], targetId }` | +| `bookmark` | Save for later | `{ messageId, userId }` | +| `copy` | Content copied | `{ messageId, content }` | +| `share` | Content shared | `{ messageId, method }` | +| `rating` | Numeric rating | `{ messageId, rating, maxRating }` | ## Use Cases ### User Feedback Collection + Capture user reactions to agent responses for quality improvement. ### Conversation Annotation -Allow users to add notes, tags, or bookmarks to important parts of conversations. + +Allow users to add notes, tags, or bookmarks to important parts of +conversations. ### Analytics and Tracking + Record user interactions and behaviors without affecting agent execution. ### Content Moderation + Flag or mark content for review by external moderation systems. ### Collaborative Features + Enable multiple users to annotate or comment on shared conversations. ### Audit Trail + Create a complete record of all interactions, not just agent responses. ## Implementation Considerations @@ -194,27 +217,17 @@ Create a complete record of all interactions, not just agent responses. ### Client SDK Changes TypeScript SDK: + - New `MetaEvent` type in `@ag-ui/core` - Helper functions for common meta event types - MetaEvent filtering and querying utilities Python SDK: + - `MetaEvent` class implementation - Meta event builders for common types - Event stream filtering capabilities -### Storage Implications - -- MetaEvents should be persisted alongside regular events -- Consider separate indexing for efficient querying -- Support filtering streams to include/exclude meta events - -### Security Considerations - -- Validate metaType against allowed values if restricting -- Consider permissions for who can create certain meta event types -- Sanitize payload content to prevent injection attacks - ## Testing Strategy - Unit tests for MetaEvent creation and validation @@ -226,4 +239,4 @@ Python SDK: - [AG-UI Events Documentation](/concepts/events) - [Event Sourcing](https://martinfowler.com/eaaDev/EventSourcing.html) -- [CQRS Pattern](https://martinfowler.com/bliki/CQRS.html) \ No newline at end of file +- [CQRS Pattern](https://martinfowler.com/bliki/CQRS.html) diff --git a/docs/drafts/multimodal-messages.mdx b/docs/drafts/multimodal-messages.mdx index f1ef8398a..969032fc9 100644 --- a/docs/drafts/multimodal-messages.mdx +++ b/docs/drafts/multimodal-messages.mdx @@ -1,6 +1,7 @@ --- title: Multi-modal Messages -description: Support for multimodal input messages including text, images, audio, and files +description: + Support for multimodal input messages including text, images, audio, and files --- # Multi-modal Messages Proposal @@ -8,21 +9,27 @@ description: Support for multimodal input messages including text, images, audio ## Summary ### Problem Statement -Current AG-UI protocol only supports text-based user messages. As LLMs increasingly support multimodal inputs (images, audio, files), the protocol needs to evolve to handle these richer input types. + +Current AG-UI protocol only supports text-based user messages. As LLMs +increasingly support multimodal inputs (images, audio, files), the protocol +needs to evolve to handle these richer input types. ### Motivation -Evolve AG-UI to support **multimodal input messages** without breaking existing apps. Inputs may include text, images, audio, and files. + +Evolve AG-UI to support **multimodal input messages** without breaking existing +apps. Inputs may include text, images, audio, and files. ## Status - **Status**: Draft -- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) +- **Author(s)**: Markus Ecker (mail@mme.xyz) ## Detailed Specification ### Overview -Extend the `UserMessage` `content` property to be either a string or an array of `InputContent`: +Extend the `UserMessage` `content` property to be either a string or an array of +`InputContent`: ```typescript interface TextInputContent { @@ -39,7 +46,7 @@ interface BinaryInputContent { filename?: string } -type InputContent = TextInputContent | BinaryInputContent; +type InputContent = TextInputContent | BinaryInputContent type UserMessage = { id: string @@ -62,10 +69,10 @@ interface TextInputContent { } ``` -| Property | Type | Description | -| --- | --- | --- | -| `type` | `"text"` | Identifies this as text content | -| `text` | `string` | The text content | +| Property | Type | Description | +| -------- | -------- | ------------------------------- | +| `type` | `"text"` | Identifies this as text content | +| `text` | `string` | The text content | #### BinaryInputContent @@ -82,14 +89,14 @@ interface BinaryInputContent { } ``` -| Property | Type | Description | -| --- | --- | --- | -| `type` | `"binary"` | Identifies this as binary content | -| `mimeType` | `string` | MIME type of the content (e.g., "image/jpeg", "audio/wav") | -| `id` | `string?` | Optional identifier for content reference | -| `url` | `string?` | Optional URL to fetch the content | -| `data` | `string?` | Optional base64-encoded content | -| `filename` | `string?` | Optional filename for the content | +| Property | Type | Description | +| ---------- | ---------- | ---------------------------------------------------------- | +| `type` | `"binary"` | Identifies this as binary content | +| `mimeType` | `string` | MIME type of the content (e.g., "image/jpeg", "audio/wav") | +| `id` | `string?` | Optional identifier for content reference | +| `url` | `string?` | Optional URL to fetch the content | +| `data` | `string?` | Optional base64-encoded content | +| `filename` | `string?` | Optional filename for the content | ### Content Delivery Methods @@ -205,12 +212,14 @@ At least one of `data`, `url`, or `id` must be provided for binary content. ### Client SDK Changes TypeScript SDK: + - Extended `UserMessage` type in `@ag-ui/core` - Content validation utilities - Helper methods for constructing multimodal messages - Binary content encoding/decoding utilities Python SDK: + - Extended `UserMessage` class - Content type validation - Multimodal message builders @@ -219,33 +228,32 @@ Python SDK: ### Framework Integration Frameworks need to: + - Parse multimodal user messages - Forward content to LLM providers that support multimodal inputs - Handle fallbacks for models that don't support certain content types - Manage content upload/storage for binary data -### Security Considerations - -- Validate MIME types against allowed list -- Implement size limits for binary content -- Sanitize URLs to prevent SSRF attacks -- Consider content scanning for malicious files - ## Use Cases ### Visual Question Answering + Users can upload images and ask questions about them. ### Document Processing + Upload PDFs, Word documents, or spreadsheets for analysis. ### Audio Transcription and Analysis + Process voice recordings, podcasts, or meeting audio. ### Multi-document Comparison + Compare multiple images, documents, or mixed media. ### Screenshot Analysis + Share screenshots for UI/UX feedback or debugging assistance. ## Testing Strategy @@ -261,4 +269,4 @@ Share screenshots for UI/UX feedback or debugging assistance. - [OpenAI Vision API](https://platform.openai.com/docs/guides/vision) - [Anthropic Vision](https://docs.anthropic.com/en/docs/vision) - [MIME Types](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types) -- [Data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs) \ No newline at end of file +- [Data URLs](https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs) diff --git a/docs/drafts/reasoning.mdx b/docs/drafts/reasoning.mdx index b19d12e28..31df060e1 100644 --- a/docs/drafts/reasoning.mdx +++ b/docs/drafts/reasoning.mdx @@ -8,24 +8,33 @@ description: Support for LLM reasoning visibility and continuity ## Summary ### Problem Statement -LLMs increasingly use chain-of-thought reasoning to improve response quality, but there's no standardized way to surface reasoning signals while maintaining privacy and state continuity across turns. + +LLMs increasingly use chain-of-thought reasoning to improve response quality, +but there's no standardized way to surface reasoning signals while maintaining +privacy and state continuity across turns. ### Motivation + AG-UI should support **LLM reasoning** without breaking existing apps. -- **Reasoning visibility & continuity**: We must surface reasoning signals (e.g., **reasoning summaries**) and support **encrypted reasoning items** for state carry-over across turns—especially under `store:false`/ZDR—*without exposing raw chain-of-thought*. -- **Backwards compatibility**: Existing AG-UI clients must keep working unchanged. New capabilities should be non-breaking. +- **Reasoning visibility & continuity**: We must surface reasoning signals + (e.g., **reasoning summaries**) and support **encrypted reasoning items** for + state carry-over across turns—especially under `store:false`/ZDR—_without + exposing raw chain-of-thought_. +- **Backwards compatibility**: Existing AG-UI clients must keep working + unchanged. New capabilities should be non-breaking. ## Status - **Status**: Draft -- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) +- **Author(s)**: Markus Ecker (mail@mme.xyz) ## Detailed Specification ### Overview This proposal introduces: + - New events for reasoning lifecycle management - A new `ReasoningMessage` type for message history - Support for encrypted reasoning content @@ -46,10 +55,10 @@ type ReasoningStartEvent = BaseEvent & { } ``` -| Property | Type | Description | -| --- | --- | --- | -| `messageId` | `string` | Unique identifier of this reasoning | -| `encryptedContent` | `string?` | Optionally the encrypted content | +| Property | Type | Description | +| ------------------ | --------- | ----------------------------------- | +| `messageId` | `string` | Unique identifier of this reasoning | +| `encryptedContent` | `string?` | Optionally the encrypted content | ### ReasoningMessageStartEvent @@ -63,10 +72,10 @@ type ReasoningMessageStartEvent = BaseEvent & { } ``` -| Property | Type | Description | -| --- | --- | --- | -| `messageId` | `string` | Unique identifier of the message | -| `role` | `"assistant"` | Role of the reasoning message | +| Property | Type | Description | +| ----------- | ------------- | -------------------------------- | +| `messageId` | `string` | Unique identifier of the message | +| `role` | `"assistant"` | Role of the reasoning message | ### ReasoningMessageContentEvent @@ -80,10 +89,10 @@ type ReasoningMessageContentEvent = BaseEvent & { } ``` -| Property | Type | Description | -| --- | --- | --- | +| Property | Type | Description | +| ----------- | -------- | ---------------------------------------------- | | `messageId` | `string` | Matches the ID from ReasoningMessageStartEvent | -| `delta` | `string` | Reasoning content chunk (non-empty) | +| `delta` | `string` | Reasoning content chunk (non-empty) | ### ReasoningMessageEndEvent @@ -96,8 +105,8 @@ type ReasoningMessageEndEvent = BaseEvent & { } ``` -| Property | Type | Description | -| --- | --- | --- | +| Property | Type | Description | +| ----------- | -------- | ---------------------------------------------- | | `messageId` | `string` | Matches the ID from ReasoningMessageStartEvent | ### ReasoningMessageChunkEvent @@ -112,10 +121,10 @@ type ReasoningMessageChunkEvent = BaseEvent & { } ``` -| Property | Type | Description | -| --- | --- | --- | +| Property | Type | Description | +| ----------- | --------- | ------------------------------------------ | | `messageId` | `string?` | Message ID (first event must be non-empty) | -| `delta` | `string?` | Reasoning content chunk | +| `delta` | `string?` | Reasoning content chunk | ### ReasoningEndEvent @@ -128,8 +137,8 @@ type ReasoningEndEvent = BaseEvent & { } ``` -| Property | Type | Description | -| --- | --- | --- | +| Property | Type | Description | +| ----------- | -------- | ----------------------------------- | | `messageId` | `string` | Unique identifier of this reasoning | ## New ReasoningMessage Type @@ -146,6 +155,7 @@ type ReasoningMessage = { ## Removed Events These events have never been publicly documented and will be removed: + - `THINKING_TEXT_MESSAGE_START` - `THINKING_TEXT_MESSAGE_CONTENT` - `THINKING_TEXT_MESSAGE_END` @@ -155,20 +165,24 @@ These events have never been publicly documented and will be removed: ### Client SDK Changes TypeScript SDK: + - New event types in `@ag-ui/core` - ReasoningMessage type in message unions - Reasoning event handlers in subscriber - Support for encrypted content handling Python SDK: + - New event classes in `ag_ui.core.events` - ReasoningMessage class - Encryption/decryption utilities ### Privacy and Security -- **Encrypted reasoning**: Support for encrypted reasoning content that clients cannot decrypt -- **State continuity**: Encrypted reasoning items can be passed across turns without exposing content +- **Encrypted reasoning**: Support for encrypted reasoning content that clients + cannot decrypt +- **State continuity**: Encrypted reasoning items can be passed across turns + without exposing content - **ZDR compliance**: Works with `store:false` and zero data retention policies ### Backward Compatibility @@ -180,15 +194,20 @@ Python SDK: ## Use Cases ### Chain-of-Thought Visibility + Show users that the model is "thinking" without exposing internal reasoning. ### Reasoning Summaries + Provide high-level summaries of reasoning process for transparency. ### State Continuity -Maintain reasoning context across conversation turns without storing raw content. + +Maintain reasoning context across conversation turns without storing raw +content. ### Compliance and Privacy + Meet data retention requirements while preserving reasoning capabilities. ## Examples diff --git a/docs/drafts/serialization.mdx b/docs/drafts/serialization.mdx index 1b20281ba..68a1be6ee 100644 --- a/docs/drafts/serialization.mdx +++ b/docs/drafts/serialization.mdx @@ -1,6 +1,7 @@ --- title: Serialization -description: Stream serialization for chat history restoration and event compaction +description: + Stream serialization for chat history restoration and event compaction --- # Serialization Proposal @@ -8,36 +9,83 @@ description: Stream serialization for chat history restoration and event compact ## Summary ### Problem Statement -Currently, there's no standardized way to serialize and restore AG-UI event streams, making it difficult to reload chat history, attach to running agents, or implement branching/time travel features. + +Currently, there's no standardized way to serialize and restore AG-UI event +streams, making it difficult to reload chat history, attach to running agents, +or implement branching/time travel features. ### Motivation -AG-UI adds **stream serialization** to reload chat history and attach to active agents, enabling restoration and interaction with live state. A standardized `compactEvents(events: BaseEvent[]): BaseEvent[]` reduces already-streamed events and normalizes inputs. Additionally, `RunStartedEvent` gains `parentRunId` for branching/time travel and an `input` field carrying the exact `AgentInput` sent to the agent (which may omit messages already present in history). + +AG-UI adds **stream serialization** to reload chat history and attach to active +agents, enabling restoration and interaction with live state. A standardized +`compactEvents(events: BaseEvent[]): BaseEvent[]` reduces already-streamed +events and normalizes inputs. Additionally, `RunStartedEvent` gains +`parentRunId` for branching/time travel and an `input` field carrying the exact +`AgentInput` sent to the agent (which may omit messages already present in +history). ## Status - **Status**: Draft -- **Author(s)**: Markus Ecker (markus.ecker@gmail.com) +- **Author(s)**: Markus Ecker (mail@mme.xyz) ## Detailed Specification ### Overview This proposal introduces three key capabilities: -1. **Stream serialization** - Serialize/deserialize event streams for persistence and restoration + +1. **Stream serialization** - Serialize/deserialize event streams for + persistence and restoration 2. **Event compaction** - Reduce event volume while preserving semantic meaning -3. **Run lineage tracking** - Enable branching and time travel with parent run references +3. **Run lineage tracking** - Enable branching and time travel with parent run + references + +### Git-like Branching Model + +The `parentRunId` field enables a git-like branching structure where the entire +conversation history can be stored as a continuous serialized stream, with each +run forming nodes in a directed acyclic graph: + +```mermaid +gitGraph + commit id: "run1" + commit id: "run2" + branch alternative-path + checkout alternative-path + commit id: "run3 (parent run2)" + commit id: "run4" + checkout main + commit id: "run5 (parent run2)" + commit id: "run6" + checkout alternative-path +``` + +**Key Benefits of This Append-Only Architecture:** + +- **Immutable History**: Events are never modified or deleted, only appended +- **Serializable Stream**: The entire DAG can be stored as a single continuous + stream of events +- **Multiple Branches**: Different conversation paths coexist in the same + serialized log +- **Time Travel**: Navigate to any point in any branch without data loss +- **Efficient Storage**: Compaction reduces redundancy while preserving the full + graph structure ## Proposed Changes ### Stream Serialization -Support serializing/deserializing the event stream so chat history can be reloaded and sessions can attach to running agents/live state. +Support serializing/deserializing the event stream so chat history can be +reloaded and sessions can attach to running agents/live state. ### Event Compaction Introduce `compactEvents(events: BaseEvent[]): BaseEvent[]` to: + - Reduce the number of already-streamed events -- **Normalize** `RunStartedEvent.input` so it contains only the messages that were not already sent/recorded earlier in the thread +- **Normalize** `RunStartedEvent.input` so it contains only the messages that + were not already sent/recorded earlier in the thread ```typescript // Event compaction API @@ -47,7 +95,10 @@ declare function compactEvents(events: BaseEvent[]): BaseEvent[] ### Run Lineage and Input Capture Extend `RunStartedEvent` with: -- `parentRunId?: string` to enable branching/time travel + +- `parentRunId?: string` to enable branching/time travel - similar to git + commits, this creates an append-only log where each run points to its parent, + forming a directed acyclic graph of conversation branches - `input?: AgentInput` containing the agent input exactly as sent - `input.messages` **may omit** messages already present in history - `compactEvents` **normalizes** this field to a minimal form @@ -62,6 +113,8 @@ type RunStartedEvent = BaseEvent & { /** * Optional lineage pointer for branching/time travel. * If present, refers to a prior run within the same thread. + * Creates a git-like append-only log where runs form a DAG (directed acyclic graph), + * enabling conversation branching without mutating existing history. */ parentRunId?: string /** @@ -77,15 +130,19 @@ type RunStartedEvent = BaseEvent & { The `compactEvents` function applies these transformations: ### Message Events -- Consecutive `TEXT_MESSAGE_CONTENT` events with same `messageId` → single event with concatenated content + +- Consecutive `TEXT_MESSAGE_CONTENT` events with same `messageId` → single event + with concatenated content - Complete message sequences (START + CONTENT + END) → single snapshot event - Tool call sequences → compacted tool invocation records ### State Events + - Multiple `STATE_DELTA` events → single `STATE_SNAPSHOT` with final state - Redundant state updates → removed if superseded by later snapshots ### Run Input Normalization + - Messages in `RunStartedEvent.input` that exist in prior events → removed - Only new/incremental messages retained in normalized form @@ -109,8 +166,9 @@ const compacted = compactEvents(restored); ### Event Compaction Example **Before compaction:** + ```typescript -[ +;[ { type: "TEXT_MESSAGE_START", messageId: "msg1", role: "user" }, { type: "TEXT_MESSAGE_CONTENT", messageId: "msg1", delta: "Hello " }, { type: "TEXT_MESSAGE_CONTENT", messageId: "msg1", delta: "world" }, @@ -121,23 +179,29 @@ const compacted = compactEvents(restored); ``` **After compaction:** + ```typescript -[ +;[ { type: "MESSAGES_SNAPSHOT", - messages: [{ id: "msg1", role: "user", content: "Hello world" }] + messages: [{ id: "msg1", role: "user", content: "Hello world" }], }, { type: "STATE_SNAPSHOT", - state: { foo: 2 } - } + state: { foo: 2 }, + }, ] ``` ### Branching with Parent Run ID +The `parentRunId` field creates a git-like branching model where the event +stream becomes an immutable, append-only log. Each run can branch from any +previous run, creating alternative conversation paths without modifying the +original history. + ```typescript -// Original run +// Original run (like a git commit) { type: "RUN_STARTED", threadId: "thread1", @@ -145,16 +209,23 @@ const compacted = compactEvents(restored); input: { messages: ["Tell me about Paris"] } } -// Branch from run1 +// Branch from run1 (like creating a git branch from a specific commit) { type: "RUN_STARTED", threadId: "thread1", runId: "run2", - parentRunId: "run1", // Points to parent + parentRunId: "run1", // Points to parent, creating a new branch input: { messages: ["Actually, tell me about London instead"] } } ``` +This append-only structure ensures that: + +- No existing events are ever modified or deleted +- Multiple branches can coexist in the same event stream +- You can always trace back the full lineage of any conversation branch +- Time travel and undo operations are possible without data loss + ### Normalized Input Example ```typescript @@ -185,18 +256,23 @@ const compacted = compactEvents(restored); ## Use Cases ### Session Restoration + Reload a previous chat session with full history and state. ### Live Agent Attachment + Connect to an already-running agent and receive ongoing events. ### Branching Conversations + Create alternative conversation branches from any point in history. ### Time Travel Debugging + Navigate to any point in conversation history for debugging. ### Efficient Storage + Compact events before long-term storage to reduce size. ## Implementation Considerations @@ -204,12 +280,14 @@ Compact events before long-term storage to reduce size. ### Client SDK Changes TypeScript SDK: + - `compactEvents` function implementation - Serialization/deserialization utilities - Branch management helpers - Storage adapter interfaces Python SDK: + - Event compaction algorithm - Serialization utilities - Parent run tracking @@ -235,4 +313,4 @@ Python SDK: - [Event Sourcing](https://martinfowler.com/eaaDev/EventSourcing.html) - [AG-UI Events Documentation](/concepts/events) - [AG-UI State Management](/concepts/state) -- [JSON Patch RFC 6902](https://tools.ietf.org/html/rfc6902) \ No newline at end of file +- [JSON Patch RFC 6902](https://tools.ietf.org/html/rfc6902)