GetStream · Nash0x7E2 · Nov 4, 2025 · Oct 27, 2025 · Oct 28, 2025 · Oct 28, 2025
diff --git a/agents-core/pyproject.toml b/agents-core/pyproject.toml
@@ -42,6 +42,7 @@ deepgram = ["vision-agents-plugins-deepgram"]
 elevenlabs = ["vision-agents-plugins-elevenlabs"]
 gemini = ["vision-agents-plugins-gemini"]
 getstream = ["vision-agents-plugins-getstream"]
+heygen = ["vision-agents-plugins-heygen"]
 kokoro = ["vision-agents-plugins-kokoro"]
 krisp = ["vision-agents-plugins-krisp"]
 moonshine = ["vision-agents-plugins-moonshine"]
@@ -57,6 +58,7 @@ all-plugins = [
   "vision-agents-plugins-elevenlabs",
   "vision-agents-plugins-gemini",
   "vision-agents-plugins-getstream",
+  "vision-agents-plugins-heygen",
   "vision-agents-plugins-kokoro",
   "vision-agents-plugins-krisp",
   "vision-agents-plugins-moonshine",

diff --git a/agents-core/vision_agents/core/agents/agents.py b/agents-core/vision_agents/core/agents/agents.py
@@ -215,6 +215,11 @@ def __init__(
 
         self.llm._attach_agent(self)
 
+        # Attach processors that need agent reference
+        for processor in self.processors:
+            if hasattr(processor, '_attach_agent'):
+                processor._attach_agent(self)
+
         self.events.subscribe(self._on_vad_audio)
         self.events.subscribe(self._on_agent_say)
         # Initialize state variables
@@ -1167,10 +1172,13 @@ def publish_audio(self) -> bool:
         """Whether the agent should publish an outbound audio track.
 
         Returns:
-            True if TTS is configured or when in Realtime mode.
+            True if TTS is configured, when in Realtime mode, or if there are audio publishers.
         """
         if self.tts is not None or self.realtime_mode:
             return True
+        # Also publish audio if there are audio publishers (e.g., HeyGen avatar)
+        if self.audio_publishers:
+            return True
         return False
 
     @property
@@ -1296,6 +1304,11 @@ def _prepare_rtc(self):
             if self.realtime_mode and isinstance(self.llm, Realtime):
                 self._audio_track = self.llm.output_track
                 self.logger.info("🎵 Using Realtime provider output track for audio")
+            elif self.audio_publishers:
+                # Get the first audio publisher to create the track
+                audio_publisher = self.audio_publishers[0]
+                self._audio_track = audio_publisher.publish_audio_track()
+                self.logger.info("🎵 Audio track initialized from audio publisher")
             else:
                 # Default to WebRTC-friendly format unless configured differently
                 framerate = 48000

diff --git a/aiortc b/aiortc
diff --git a/plugins/aws/example/uv.lock b/plugins/aws/example/uv.lock
diff --git a/plugins/heygen/README.md b/plugins/heygen/README.md
@@ -0,0 +1,189 @@
+# HeyGen Avatar Plugin for Vision Agents
+
+Add realistic avatar video to your AI agents using HeyGen's streaming avatar API.
+
+## Features
+
+- 🎭 **Realistic Avatars**: Use HeyGen's high-quality avatars with natural movements
+- 🎤 **Automatic Lip-Sync**: Avatar automatically syncs with audio from any TTS provider
+- 🚀 **WebRTC Streaming**: Low-latency real-time video streaming via WebRTC
+- 🔌 **Easy Integration**: Works seamlessly with Vision Agents framework
+- 🎨 **Customizable**: Configure avatar, quality, resolution, and more
+
+## Installation
+
+```bash
+pip install vision-agents-plugins-heygen
+```
+
+Or with uv:
+
+```bash
+uv pip install vision-agents-plugins-heygen
+```
+
+## Quick Start
+
+```python
+import asyncio
+from uuid import uuid4
+from dotenv import load_dotenv
+
+from vision_agents.core import User, Agent
+from vision_agents.plugins import cartesia, deepgram, getstream, gemini, heygen
+from vision_agents.plugins.heygen import VideoQuality
+
+load_dotenv()
+
+async def start_avatar_agent():
+    agent = Agent(
+        edge=getstream.Edge(),
+        agent_user=User(name="AI Assistant with Avatar", id="agent"),
+        instructions="You're a friendly AI assistant.",
+
+        llm=gemini.LLM("gemini-2.0-flash"),
+        tts=cartesia.TTS(),
+        stt=deepgram.STT(),
+
+        # Add HeyGen avatar
+        processors=[
+            heygen.AvatarPublisher(
+                avatar_id="default",
+                quality=VideoQuality.HIGH
+            )
+        ]
+    )
+
+    call = agent.edge.client.video.call("default", str(uuid4()))
+
+    with await agent.join(call):
+        await agent.edge.open_demo(call)
+        await agent.simple_response("Hello! I'm your AI assistant with an avatar.")
+        await agent.finish()
+
+if __name__ == "__main__":
+    asyncio.run(start_avatar_agent())
+```
+
+## Configuration
+
+### Environment Variables
+
+Set your HeyGen API key:
+
+```bash
+HEYGEN_API_KEY=your_heygen_api_key_here
+```
+
+### AvatarPublisher Options
+
+```python
+from vision_agents.plugins.heygen import VideoQuality
+
+heygen.AvatarPublisher(
+    avatar_id="default",           # HeyGen avatar ID
+    quality=VideoQuality.HIGH,    # Video quality: VideoQuality.LOW, VideoQuality.MEDIUM, or VideoQuality.HIGH
+    resolution=(1920, 1080),       # Output resolution (width, height)
+    api_key=None,                  # Optional: override env var
+)
+```
+
+## Usage Examples
+
+### With Realtime LLM
+
+```python
+from vision_agents.plugins import gemini, heygen, getstream
+
+agent = Agent(
+    edge=getstream.Edge(),
+    agent_user=User(name="Realtime Avatar AI"),
+    instructions="Be conversational and responsive.",
+
+    llm=gemini.Realtime(fps=2),  # No separate TTS needed
+
+    processors=[
+        heygen.AvatarPublisher(avatar_id="professional_presenter")
+    ]
+)
+
+call = agent.edge.client.video.call("default", str(uuid4()))
+
+with await agent.join(call):
+    await agent.finish()
+```
+
+### With Multiple Processors
+
+```python
+from vision_agents.plugins import ultralytics, heygen
+
+agent = Agent(
+    edge=getstream.Edge(),
+    agent_user=User(name="Fitness Coach"),
+    instructions="Analyze user poses and provide feedback.",
+
+    llm=gemini.Realtime(fps=3),
+
+    processors=[
+        # Process incoming user video
+        ultralytics.YOLOPoseProcessor(model_path="yolo11n-pose.pt"),
+        # Publish avatar video
+        heygen.AvatarPublisher(avatar_id="fitness_trainer")
+    ]
+)
+```
+
+## How It Works
+
+1. **Connection**: Establishes WebRTC connection to HeyGen's streaming API
+2. **Audio Input**: Receives audio from your TTS provider or Realtime LLM
+3. **Avatar Generation**: HeyGen generates avatar video with lip-sync
+4. **Video Streaming**: Streams avatar video to call participants via GetStream Edge
+
+## Requirements
+
+- Python 3.10+
+- HeyGen API key (get one at [heygen.com](https://heygen.com))
+- GetStream account for video calls
+- TTS provider (Cartesia, ElevenLabs, etc.) or Realtime LLM
+
+## Troubleshooting
+
+### Connection Issues
+
+If you experience connection problems:
+
+1. Check your HeyGen API key is valid
+2. Ensure you have network access to HeyGen's servers
+3. Check firewall settings for WebRTC traffic
+
+### Video Quality
+
+To optimize video quality:
+
+- Use `quality=VideoQuality.HIGH` for best results
+- Increase resolution if bandwidth allows
+- Ensure stable internet connection
+
+## API Reference
+
+### AvatarPublisher
+
+Main class for publishing HeyGen avatar video.
+
+**Methods:**
+- `publish_video_track()`: Returns video track for streaming
+- `state()`: Returns current state information
+- `close()`: Clean up resources
+
+## License
+
+MIT
+
+## Links
+
+- [Documentation](https://visionagents.ai/)
+- [GitHub](https://github.com/GetStream/Vision-Agents)
+- [HeyGen API Docs](https://docs.heygen.com/docs/streaming-api)
+