diff --git a/.env.example b/.env.example
index 21bd47eb..c27a7b57 100644
--- a/.env.example
+++ b/.env.example
@@ -25,3 +25,7 @@ CARTESIA_API_KEY=your_cartesia_api_key_here
 
 # Anthropic API credentials
 ANTHROPIC_API_KEY=your_anthropic_api_key_here
+
+# Baseten API credentials
+BASETEN_API_KEY=your_baseten_api_key_here
+BASETEN_BASE_URL=your_baseten_base_url_here
diff --git a/agents-core/vision_agents/core/agents/agents.py b/agents-core/vision_agents/core/agents/agents.py
index 545840bd..6d8ae358 100644
--- a/agents-core/vision_agents/core/agents/agents.py
+++ b/agents-core/vision_agents/core/agents/agents.py
@@ -551,6 +551,8 @@ async def join(self, call: Call) -> "AgentSessionContextManager":
 
         # wait for conversation creation coro at the very end of the join flow
         self.conversation = await create_conversation_coro
+        # Provide conversation to the LLM so it can access the chat history.
+        self.llm.set_conversation(self.conversation)
         return AgentSessionContextManager(self, self._connection)
 
     async def finish(self):
diff --git a/agents-core/vision_agents/core/llm/llm.py b/agents-core/vision_agents/core/llm/llm.py
index 1871bcd7..3c25ffc4 100644
--- a/agents-core/vision_agents/core/llm/llm.py
+++ b/agents-core/vision_agents/core/llm/llm.py
@@ -26,7 +26,7 @@
 from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import Participant
 from getstream.video.rtc import AudioStreamTrack, PcmData
 from vision_agents.core.processors import Processor
-from vision_agents.core.utils.utils import parse_instructions
+from vision_agents.core.utils.utils import Instructions, parse_instructions
 from vision_agents.core.events.manager import EventManager
 from .function_registry import FunctionRegistry
 from .llm_types import ToolSchema, NormalizedToolCallItem
@@ -50,7 +50,6 @@ class LLM(abc.ABC):
     before_response_listener: BeforeCb
     after_response_listener: AfterCb
     agent: Optional["Agent"]
-    _conversation: Optional["Conversation"]
     function_registry: FunctionRegistry
 
     def __init__(self):
@@ -59,6 +58,9 @@ def __init__(self):
         self.events = EventManager()
         self.events.register_events_from_module(events)
         self.function_registry = FunctionRegistry()
+        self.instructions: Optional[str] = None
+        self.parsed_instructions: Optional[Instructions] = None
+        self._conversation: Optional[Conversation] = None
 
     async def warmup(self) -> None:
         """
@@ -187,9 +189,20 @@ def _attach_agent(self, agent: Agent):
         Attach agent to the llm
         """
         self.agent = agent
-        self._conversation = agent.conversation
         self._set_instructions(agent.instructions)
 
+    def set_conversation(self, conversation: Conversation):
+        """
+        Provide the Conversation object to the LLM to access the chat history.
+        To be called by the Agent after it joins the call.
+
+        Args:
+            conversation: a Conversation object
+
+        Returns:
+        """
+        self._conversation = conversation
+
     def _set_instructions(self, instructions: str):
         self.instructions = instructions
 
diff --git a/plugins/anthropic/tests/test_anthropic_llm.py b/plugins/anthropic/tests/test_anthropic_llm.py
index 7bb9918d..7791b474 100644
--- a/plugins/anthropic/tests/test_anthropic_llm.py
+++ b/plugins/anthropic/tests/test_anthropic_llm.py
@@ -18,7 +18,7 @@ class TestClaudeLLM:
     async def llm(self) -> ClaudeLLM:
         """Test ClaudeLLM initialization with a provided client."""
         llm = ClaudeLLM(model="claude-sonnet-4-20250514")
-        llm._conversation = InMemoryConversation("be friendly", [])
+        llm.set_conversation(InMemoryConversation("be friendly", []))
         return llm
 
     @pytest.mark.asyncio
@@ -58,7 +58,7 @@ async def test_native_api(self, llm: ClaudeLLM):
     @pytest.mark.integration
     async def test_stream(self, llm: ClaudeLLM):
         streamingWorks = False
-        
+
         @llm.events.subscribe
         async def passed(event: LLMResponseChunkEvent):
             nonlocal streamingWorks
@@ -70,7 +70,6 @@ async def passed(event: LLMResponseChunkEvent):
 
         assert streamingWorks
 
-
     @pytest.mark.integration
     async def test_memory(self, llm: ClaudeLLM):
         await llm.simple_response(
diff --git a/plugins/aws/tests/test_aws.py b/plugins/aws/tests/test_aws.py
index 47d9df9c..1c31ed25 100644
--- a/plugins/aws/tests/test_aws.py
+++ b/plugins/aws/tests/test_aws.py
@@ -35,7 +35,7 @@ def assert_response_successful(self, response):
     async def llm(self) -> BedrockLLM:
         """Test BedrockLLM initialization with a provided client."""
         llm = BedrockLLM(model="qwen.qwen3-32b-v1:0", region_name="us-east-1")
-        llm._conversation = InMemoryConversation("be friendly", [])
+        llm.set_conversation(InMemoryConversation("be friendly", []))
         return llm
 
     @pytest.mark.asyncio
diff --git a/plugins/baseten/README.md b/plugins/baseten/README.md
new file mode 100644
index 00000000..92409564
--- /dev/null
+++ b/plugins/baseten/README.md
@@ -0,0 +1,117 @@
+# Qwen3-VL hosted on Baseten 
+Qwen3-VL is the latest open-source Video Language Model (VLM) from Alibaba. This plugin allows developers to easily run the model hosted on [Baseten](https://www.baseten.co/) with Vision Agents. The model accepts text and video and responds with text vocalised with the TTS service of your choice. 
+
+## Features
+
+- **Video understanding**: Automatically buffers and forwards video frames to Baseten-hosted VLM models
+- **Streaming responses**: Supports streaming text responses with real-time chunk events
+- **Frame buffering**: Configurable frame rate and buffer duration for optimal performance
+- **Event-driven**: Emits LLM events (chunks, completion, errors) for integration with other components
+
+## Installation
+
+```bash
+uv add vision-agents[baseten]
+```
+
+## Quick Start
+
+```python
+from vision_agents.core import Agent, User
+from vision_agents.plugins import baseten, getstream, deepgram, elevenlabs, vogent
+
+async def create_agent(**kwargs) -> Agent:
+    # Initialize the Baseten VLM
+    llm = baseten.VLM(model="qwen3vl")
+    
+    # Create an agent with video understanding capabilities
+    agent = Agent(
+        edge=getstream.Edge(),
+        agent_user=User(name="Video Assistant", id="agent"),
+        instructions="You're a helpful video AI assistant. Analyze the video frames and respond to user questions about what you see.",
+        llm=llm,
+        stt=deepgram.STT(),
+        tts=elevenlabs.TTS(),
+        turn_detection=vogent.TurnDetection(),
+        processors=[],
+    )
+    return agent
+
+async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None:
+    await agent.create_user()
+    call = await agent.create_call(call_type, call_id)
+    
+    with await agent.join(call):
+        # The agent will automatically process video frames and respond to user input
+        await agent.finish()
+```
+
+## Configuration
+
+### Environment Variables
+
+- **`BASETEN_API_KEY`**: Your Baseten API key (required)
+- **`BASETEN_BASE_URL`**: The base URL for your Baseten API endpoint (required)
+
+### Initialization Parameters
+
+```python
+baseten.VLM(
+    model: str,                    # Baseten model name (e.g., "qwen3vl")
+    api_key: Optional[str] = None,  # API key (defaults to BASETEN_API_KEY env var)
+    base_url: Optional[str] = None, # Base URL (defaults to BASETEN_BASE_URL env var)
+    fps: int = 1,                   # Frames per second to process (default: 1)
+    frame_buffer_seconds: int = 10, # Seconds of video to buffer (default: 10)
+    client: Optional[AsyncOpenAI] = None,  # Custom OpenAI client (optional)
+)
+```
+
+### Parameters
+
+- **`model`**: The name of the Baseten-hosted model to use. Must be a vision-capable model.
+- **`api_key`**: Your Baseten API key. If not provided, reads from `BASETEN_API_KEY` environment variable.
+- **`base_url`**: The base URL for Baseten API. If not provided, reads from `BASETEN_BASE_URL` environment variable.
+- **`fps`**: Number of video frames per second to capture and send to the model. Lower values reduce API costs but may miss fast-moving content. Default is 1 fps.
+- **`frame_buffer_seconds`**: How many seconds of video to buffer. Total buffer size = `fps * frame_buffer_seconds`. Default is 10 seconds.
+- **`client`**: Optional pre-configured `AsyncOpenAI` client. If provided, `api_key` and `base_url` are ignored.
+
+## How It Works
+
+1. **Video Frame Buffering**: The plugin automatically subscribes to video tracks when the agent joins a call. It buffers frames at the specified FPS for the configured duration.
+
+2. **Frame Processing**: When responding to user input, the plugin:
+   - Converts buffered video frames to JPEG format
+   - Resizes frames to 800x600 (maintaining aspect ratio)
+   - Encodes frames as base64 data URLs
+
+3. **API Request**: Sends the conversation history (including system instructions) along with all buffered frames to the Baseten model.
+
+4. **Streaming Response**: Processes the streaming response and emits events for each chunk and completion.
+
+## Events
+
+The plugin emits the following events:
+
+- **`LLMResponseChunkEvent`**: Emitted for each text chunk in the streaming response
+- **`LLMResponseCompletedEvent`**: Emitted when the response stream completes
+- **`LLMErrorEvent`**: Emitted if an API request fails
+
+## Requirements
+
+- Python 3.10+
+- `openai>=2.5.0`
+- `vision-agents` (core framework)
+- Baseten API key and base URL
+
+## Notes
+
+- **Frame Rate**: The default FPS of 1 is optimized for VLM use cases. Higher FPS values will increase API costs and latency.
+- **Frame Size**: Frames are automatically resized to 800x600 pixels while maintaining aspect ratio to optimize API payload size.
+- **Buffer Duration**: The 10-second default buffer provides context for the model while keeping memory usage reasonable.
+- **Tool Calling**: Tool/function calling support is not yet implemented (see TODOs in code).
+
+## Troubleshooting
+
+- **No video processing**: Ensure the agent has joined a call with video tracks available. The plugin automatically subscribes to video when tracks are added.
+- **API errors**: Verify your `BASETEN_API_KEY` and `BASETEN_BASE_URL` are set correctly and the model name is valid.
+- **High latency**: Consider reducing `fps` or `frame_buffer_seconds` to decrease the number of frames sent per request.
diff --git a/plugins/baseten/example/README.md b/plugins/baseten/example/README.md
new file mode 100644
index 00000000..69c2bf0a
--- /dev/null
+++ b/plugins/baseten/example/README.md
@@ -0,0 +1 @@
+Please see root plugin readme. 
\ No newline at end of file
diff --git a/plugins/baseten/example/__init__.py b/plugins/baseten/example/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/plugins/baseten/example/pyproject.toml b/plugins/baseten/example/pyproject.toml
new file mode 100644
index 00000000..d34229b4
--- /dev/null
+++ b/plugins/baseten/example/pyproject.toml
@@ -0,0 +1,21 @@
+[project]
+name = "qwen3-vl-example"
+version = "0.1.0"
+description = "Example using Qwen3 VL hosted on Baseten with Vision Agents"
+requires-python = ">=3.10"
+dependencies = [
+    "vision-agents",
+    "vision-agents-plugins-baseten",
+    "vision-agents-plugins-getstream",
+    "vision-agents-plugins-deepgram",
+    "vision-agents-plugins-elevenlabs",
+    "python-dotenv",
+]
+
+[tool.uv.sources]
+vision-agents = { workspace = true }
+vision-agents-plugins-baseten = { workspace = true }
+vision-agents-plugins-elevenlabs = { workspace = true }
+vision-agents-plugins-getstream = { workspace = true }
+vision-agents-plugins-deepgram = { workspace = true }
+
diff --git a/plugins/baseten/example/qwen_vl_example.py b/plugins/baseten/example/qwen_vl_example.py
new file mode 100644
index 00000000..7db371f2
--- /dev/null
+++ b/plugins/baseten/example/qwen_vl_example.py
@@ -0,0 +1,46 @@
+import asyncio
+
+from dotenv import load_dotenv
+
+from vision_agents.core import Agent, User, cli
+from vision_agents.core.agents import AgentLauncher
+from vision_agents.plugins import baseten, getstream, deepgram, elevenlabs
+from vision_agents.core.events import CallSessionParticipantJoinedEvent
+
+
+load_dotenv()
+
+
+async def create_agent(**kwargs) -> Agent:
+    # Initialize the Baseten VLM
+    llm = baseten.VLM(model="qwen3vl")
+
+    # Create an agent with video understanding capabilities
+    agent = Agent(
+        edge=getstream.Edge(),
+        agent_user=User(name="Video Assistant", id="agent"),
+        instructions="You're a helpful video AI assistant. Analyze the video frames and respond to user questions about what you see.",
+        llm=llm,
+        stt=deepgram.STT(),
+        tts=elevenlabs.TTS(),
+        processors=[],
+    )
+    return agent
+
+async def join_call(agent: Agent, call_type: str, call_id: str, **kwargs) -> None:
+    await agent.create_user()
+    call = await agent.create_call(call_type, call_id)
+
+    @agent.events.subscribe
+    async def on_participant_joined(event: CallSessionParticipantJoinedEvent):
+        if event.participant.user.id != "agent":
+            await asyncio.sleep(2)
+            await agent.simple_response("Describe what you currently see")
+
+    with await agent.join(call):
+        await agent.edge.open_demo(call)
+        # The agent will automatically process video frames and respond to user input
+        await agent.finish()
+
+if __name__ == "__main__":
+    cli(AgentLauncher(create_agent=create_agent, join_call=join_call))
\ No newline at end of file
diff --git a/plugins/baseten/py.typed b/plugins/baseten/py.typed
new file mode 100644
index 00000000..e69de29b
diff --git a/plugins/baseten/pyproject.toml b/plugins/baseten/pyproject.toml
new file mode 100644
index 00000000..35af0d8c
--- /dev/null
+++ b/plugins/baseten/pyproject.toml
@@ -0,0 +1,36 @@
+[build-system]
+requires = ["hatchling", "hatch-vcs"]
+build-backend = "hatchling.build"
+
+[project]
+name = "vision-agents-plugins-baseten"
+dynamic = ["version"]
+description = "Baseten plugin for vision agents"
+readme = "README.md"
+requires-python = ">=3.10"
+license = "MIT"
+dependencies = [
+    "vision-agents",
+    "openai>=2.5.0",
+]
+
+[project.urls]
+Documentation = "https://visionagents.ai/"
+Website = "https://visionagents.ai/"
+Source = "https://github.com/GetStream/Vision-Agents"
+
+[tool.hatch.version]
+source = "vcs"
+raw-options = { root = "..", search_parent_directories = true, fallback_version = "0.0.0" }
+
+[tool.hatch.build.targets.wheel]
+packages = ["."]
+
+[tool.uv.sources]
+vision-agents = { workspace = true }
+
+[dependency-groups]
+dev = [
+    "pytest>=8.4.1",
+    "pytest-asyncio>=1.0.0",
+]
diff --git a/plugins/baseten/vision_agents/plugins/baseten/__init__.py b/plugins/baseten/vision_agents/plugins/baseten/__init__.py
new file mode 100644
index 00000000..19a94314
--- /dev/null
+++ b/plugins/baseten/vision_agents/plugins/baseten/__init__.py
@@ -0,0 +1,4 @@
+from .baseten_vlm import BasetenVLM as VLM
+
+
+__all__ = ["VLM"]
diff --git a/plugins/baseten/vision_agents/plugins/baseten/baseten_vlm.py b/plugins/baseten/vision_agents/plugins/baseten/baseten_vlm.py
new file mode 100644
index 00000000..7e84acc1
--- /dev/null
+++ b/plugins/baseten/vision_agents/plugins/baseten/baseten_vlm.py
@@ -0,0 +1,307 @@
+import base64
+import io
+import logging
+import os
+from collections import deque
+from typing import Iterator, Optional, cast
+
+import av
+from aiortc.mediastreams import MediaStreamTrack, VideoStreamTrack
+from getstream.video.rtc.pb.stream.video.sfu.models.models_pb2 import Participant
+from openai import AsyncOpenAI, AsyncStream
+from openai.types.chat import ChatCompletionChunk
+from PIL.Image import Resampling
+from vision_agents.core.llm.events import (
+    LLMResponseChunkEvent,
+    LLMResponseCompletedEvent,
+)
+from vision_agents.core.llm.llm import LLMResponseEvent, VideoLLM
+from vision_agents.core.processors import Processor
+from vision_agents.core.utils.video_forwarder import VideoForwarder
+
+from . import events
+
+logger = logging.getLogger(__name__)
+
+
+PLUGIN_NAME = "baseten_vlm"
+
+
+class BasetenVLM(VideoLLM):
+    """
+    TODO: Docs
+
+    Examples:
+
+        from vision_agents.plugins import baseten
+        llm = baseten.VLM(model="qwen3vl")
+
+    """
+
+    def __init__(
+        self,
+        model: str,
+        api_key: Optional[str] = None,
+        base_url: Optional[str] = None,
+        fps: int = 1,
+        frame_buffer_seconds: int = 10,
+        client: Optional[AsyncOpenAI] = None,
+    ):
+        """
+        Initialize the BasetenVLM class.
+
+        Args:
+            model (str): The Baseten-hosted model to use.
+            api_key: optional API key. By default, loads from BASETEN_API_KEY environment variable.
+            base_url: optional base url. By default, loads from BASETEN_BASE_URL environment variable.
+            fps: the number of video frames per second to handle.
+            frame_buffer_seconds: the number of seconds to buffer for the model's input.
+                Total buffer size = fps * frame_buffer_seconds.
+            client: optional `AsyncOpenAI` client. By default, creates a new client object.
+        """
+        super().__init__()
+        self.model = model
+        self.events.register_events_from_module(events)
+
+        api_key = api_key or os.getenv("BASETEN_API_KEY")
+        base_url = base_url or os.getenv("BASETEN_BASE_URL")
+        if client is not None:
+            self._client = client
+        elif not api_key:
+            raise ValueError("api_key must be provided")
+        elif not base_url:
+            raise ValueError("base_url must be provided")
+        else:
+            self._client = AsyncOpenAI(api_key=api_key, base_url=base_url)
+
+        self._fps = fps
+        self._video_forwarder: Optional[VideoForwarder] = None
+
+        # Buffer latest 10s of the video track to forward it to the model
+        # together with the user transcripts
+        self._frame_buffer: deque[av.VideoFrame] = deque(
+            maxlen=fps * frame_buffer_seconds
+        )
+        self._frame_width = 800
+        self._frame_height = 600
+
+    async def simple_response(
+        self,
+        text: str,
+        processors: Optional[list[Processor]] = None,
+        participant: Optional[Participant] = None,
+    ) -> LLMResponseEvent:
+        """
+        simple_response is a standardized way to create an LLM response.
+
+        This method is also called every time the new STT transcript is received.
+
+        Args:
+            text: The text to respond to.
+            processors: list of processors (which contain state) about the video/voice AI.
+            participant: the Participant object, optional.
+
+        Examples:
+
+            llm.simple_response("say hi to the user, be nice")
+        """
+
+        # TODO: Clean up the `_build_enhanced_instructions` and use that. The should be compiled at the agent probably.
+
+        if self._conversation is None:
+            # The agent hasn't joined the call yet.
+            logger.warning(
+                "Cannot create an LLM response - the conversation has not been initialized yet."
+            )
+            return LLMResponseEvent(original=None, text="")
+
+        messages: list[dict] = []
+        # Add Agent's instructions as system prompt.
+        if self.instructions:
+            messages.append(
+                {
+                    "role": "system",
+                    "content": self.instructions,
+                }
+            )
+
+        # TODO: Do we need to limit how many messages we send?
+        # Add all messages from the conversation to the prompt
+        for message in self._conversation.messages:
+            messages.append(
+                {
+                    "role": message.role,
+                    "content": message.content,
+                }
+            )
+
+        # Attach the latest bufferred frames to the request
+        frames_data = []
+        for frame_bytes in self._get_frames_bytes():
+            frame_b64 = base64.b64encode(frame_bytes).decode("utf-8")
+            frame_msg = {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/jpeg;base64,{frame_b64}"},
+            }
+            frames_data.append(frame_msg)
+
+        logger.debug(
+            f'Forwarding {len(frames_data)} to the Baseten model "{self.model}"'
+        )
+
+        messages.append(
+            {
+                "role": "user",
+                "content": frames_data,
+            }
+        )
+
+        # TODO: Maybe move it to a method, too much code
+        try:
+            response = await self._client.chat.completions.create(  # type: ignore[arg-type]
+                messages=messages,  # type: ignore[arg-type]
+                model=self.model,
+                stream=True,
+            )
+        except Exception as e:
+            # Send an error event if the request failed
+            logger.exception(
+                f'Failed to get a response from the Baseten model "{self.model}"'
+            )
+            self.events.send(
+                events.LLMErrorEvent(
+                    plugin_name=PLUGIN_NAME,
+                    error_message=str(e),
+                    event_data=e,
+                )
+            )
+            return LLMResponseEvent(original=None, text="")
+
+        i = 0
+        llm_response_event: LLMResponseEvent[Optional[ChatCompletionChunk]] = (
+            LLMResponseEvent(original=None, text="")
+        )
+        text_chunks: list[str] = []
+        total_text = ""
+        async for chunk in cast(AsyncStream[ChatCompletionChunk], response):
+            if not chunk.choices:
+                continue
+
+            choice = chunk.choices[0]
+            content = choice.delta.content
+            finish_reason = choice.finish_reason
+
+            if content:
+                text_chunks.append(content)
+                # Emit delta events for each response chunk.
+                self.events.send(
+                    LLMResponseChunkEvent(
+                        plugin_name=PLUGIN_NAME,
+                        content_index=None,
+                        item_id=chunk.id,
+                        output_index=0,
+                        sequence_number=i,
+                        delta=content,
+                    )
+                )
+
+            elif finish_reason:
+                # Emit the completion event when the response stream is finished.
+                total_text = "".join(text_chunks)
+                self.events.send(
+                    LLMResponseCompletedEvent(
+                        plugin_name=PLUGIN_NAME,
+                        original=chunk,
+                        text=total_text,
+                        item_id=chunk.id,
+                    )
+                )
+
+            llm_response_event = LLMResponseEvent(original=chunk, text=total_text)
+            i += 1
+
+        return llm_response_event
+
+    async def watch_video_track(
+        self,
+        track: MediaStreamTrack,
+        shared_forwarder: Optional[VideoForwarder] = None,
+    ) -> None:
+        """
+        Setup video forwarding and start bufferring video frames.
+        This method is called by the `Agent`.
+
+        Args:
+            track: instance of VideoStreamTrack.
+            shared_forwarder: a shared VideoForwarder instance if present. Defaults to None.
+
+        Returns: None
+        """
+
+        if self._video_forwarder is not None and shared_forwarder is None:
+            logger.warning("Video forwarder already running, stopping the previous one")
+            await self._video_forwarder.stop()
+            self._video_forwarder = None
+            logger.info("Stopped video forwarding")
+
+        logger.info("🎥 BasetenVLM subscribing to VideoForwarder")
+        if not shared_forwarder:
+            self._video_forwarder = shared_forwarder or VideoForwarder(
+                cast(VideoStreamTrack, track),
+                max_buffer=10,
+                fps=1.0,  # Low FPS for VLM
+                name="baseten_vlm_forwarder",
+            )
+            await self._video_forwarder.start()
+        else:
+            self._video_forwarder = shared_forwarder
+
+        # Start buffering video frames
+        await self._video_forwarder.start_event_consumer(self._frame_buffer.append)
+
+    def _get_frames_bytes(self) -> Iterator[bytes]:
+        """
+        Iterate over all bufferred video frames.
+        """
+        for frame in self._frame_buffer:
+            yield _frame_to_jpeg_bytes(
+                frame=frame,
+                target_width=self._frame_width,
+                target_height=self._frame_height,
+                quality=85,
+            )
+
+
+# TODO: Move it to some core utils
+def _frame_to_jpeg_bytes(
+    frame: av.VideoFrame, target_width: int, target_height: int, quality: int = 85
+) -> bytes:
+    """
+    Convert a frame to JPEG bytes with resizing.
+
+    Args:
+        frame: an instance of `av.VideoFrame`
+        target_width: target width in pixels
+        target_height: target height in pixels
+        quality: JPEG quality. Default is 85.
+
+    Returns: frame as JPEG bytes.
+
+    """
+    # Convert frame to a PIL image
+    img = frame.to_image()
+
+    # Calculate scaling to maintain aspect ratio
+    src_width, src_height = img.size
+    # Calculate scale factor (fit within target dimensions)
+    scale = min(target_width / src_width, target_height / src_height)
+    new_width = int(src_width * scale)
+    new_height = int(src_height * scale)
+
+    # Resize with aspect ratio maintained
+    resized = img.resize((new_width, new_height), Resampling.LANCZOS)
+
+    # Save as JPEG with quality control
+    buf = io.BytesIO()
+    resized.save(buf, "JPEG", quality=quality, optimize=True)
+    return buf.getvalue()
diff --git a/plugins/baseten/vision_agents/plugins/baseten/events.py b/plugins/baseten/vision_agents/plugins/baseten/events.py
new file mode 100644
index 00000000..a521661e
--- /dev/null
+++ b/plugins/baseten/vision_agents/plugins/baseten/events.py
@@ -0,0 +1,12 @@
+from dataclasses import dataclass, field
+from vision_agents.core.events import PluginBaseEvent
+from typing import Optional, Any
+
+
+@dataclass
+class LLMErrorEvent(PluginBaseEvent):
+    """Event emitted when an LLM encounters an error."""
+
+    type: str = field(default="plugin.llm.error", init=False)
+    error_message: Optional[str] = None
+    event_data: Optional[Any] = None
diff --git a/plugins/gemini/tests/test_gemini_llm.py b/plugins/gemini/tests/test_gemini_llm.py
index 7210fff2..f683a56e 100644
--- a/plugins/gemini/tests/test_gemini_llm.py
+++ b/plugins/gemini/tests/test_gemini_llm.py
@@ -14,9 +14,7 @@
 load_dotenv()
 
 
-
 class TestGeminiLLM:
-
     def test_message(self):
         messages = GeminiLLM._normalize_message("say hi")
         assert isinstance(messages[0], Message)
@@ -32,7 +30,7 @@ def test_advanced_message(self):
     @pytest.fixture
     async def llm(self) -> GeminiLLM:
         llm = GeminiLLM(model="gemini-2.0-flash-exp")
-        llm._conversation = InMemoryConversation("be friendly", [])
+        llm.set_conversation(InMemoryConversation("be friendly", []))
         return llm
 
     @pytest.mark.integration
@@ -51,14 +49,14 @@ async def test_native_api(self, llm: GeminiLLM):
     @pytest.mark.integration
     async def test_stream(self, llm: GeminiLLM):
         streamingWorks = False
-        
+
         @llm.events.subscribe
         async def passed(event: LLMResponseChunkEvent):
             nonlocal streamingWorks
             streamingWorks = True
-        
+
         await llm.simple_response("Explain magma to a 5 year old")
-        
+
         # Wait for all events in queue to be processed
         await llm.events.wait()
 
@@ -67,7 +65,9 @@ async def passed(event: LLMResponseChunkEvent):
     @pytest.mark.integration
     async def test_memory(self, llm: GeminiLLM):
         await llm.simple_response(text="There are 2 dogs in the room")
-        response = await llm.simple_response(text="How many paws are there in the room?")
+        response = await llm.simple_response(
+            text="How many paws are there in the room?"
+        )
 
         assert "8" in response.text or "eight" in response.text
 
@@ -82,7 +82,7 @@ async def test_native_memory(self, llm: GeminiLLM):
     @pytest.mark.integration
     async def test_instruction_following(self):
         llm = GeminiLLM(model="gemini-2.0-flash-exp")
-        llm._conversation = InMemoryConversation("be friendly", [])
+        llm.set_conversation(InMemoryConversation("be friendly", []))
 
         llm._set_instructions("only reply in 2 letter country shortcuts")
 
@@ -165,11 +165,11 @@ async def handle_error_event(event: events.GeminiErrorEvent):
             )
             chunk_item_ids.add(chunk_event.item_id)
             total_delta_text += chunk_event.delta
-            
+
             # Validate content_index: should be sequential (0, 1, 2, ...) or None
             if chunk_event.content_index is not None:
                 content_indices.append(chunk_event.content_index)
-        
+
         # Verify content_index sequencing if any are provided
         if content_indices:
             # Should be sequential starting from 0
diff --git a/plugins/openrouter/tests/test_openrouter_llm.py b/plugins/openrouter/tests/test_openrouter_llm.py
index 77c586cc..2962d36c 100644
--- a/plugins/openrouter/tests/test_openrouter_llm.py
+++ b/plugins/openrouter/tests/test_openrouter_llm.py
@@ -62,9 +62,9 @@ async def llm(self) -> LLM:
         """Fixture for OpenRouter LLM with z-ai/glm-4.6 model."""
         if not os.environ.get("OPENROUTER_API_KEY"):
             pytest.skip("OPENROUTER_API_KEY environment variable not set")
-        
+
         llm = LLM(model="anthropic/claude-haiku-4.5")
-        llm._conversation = InMemoryConversation("be friendly", [])
+        llm.set_conversation(InMemoryConversation("be friendly", []))
         return llm
 
     @pytest.mark.integration
@@ -114,7 +114,7 @@ async def test_memory(self, llm: LLM):
         response = await llm.simple_response(
             text="How many paws are there in the room?",
         )
-        
+
         self.assert_response_successful(response)
         assert "8" in response.text or "eight" in response.text.lower(), (
             f"Expected '8' or 'eight' in response, got: {response.text}"
@@ -129,7 +129,7 @@ async def test_native_memory(self, llm: LLM):
         response = await llm.create_response(
             input="How many paws are there in the room?",
         )
-        
+
         self.assert_response_successful(response)
         assert "8" in response.text or "eight" in response.text.lower(), (
             f"Expected '8' or 'eight' in response, got: {response.text}"
@@ -153,4 +153,3 @@ async def test_instruction_following(self):
         assert "nl" in response.text.lower(), (
             f"Expected 'NL' in response, got: {response.text}"
         )
-
diff --git a/pyproject.toml b/pyproject.toml
index 6eef8063..9a8a73a5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,7 @@ build-backend = "hatchling.build"
 vision-agents = { workspace = true }
 vision-agents-plugins-anthropic = { workspace = true }
 vision-agents-plugins-aws = { workspace = true }
+vision-agents-plugins-baseten = { workspace = true }
 vision-agents-plugins-cartesia = { workspace = true }
 vision-agents-plugins-deepgram = { workspace = true }
 vision-agents-plugins-elevenlabs = { workspace = true }
@@ -37,6 +38,7 @@ members = [
     "agents-core",
     "plugins/anthropic",
     "plugins/aws",
+    "plugins/baseten",
     "plugins/cartesia",
     "plugins/deepgram",
     "plugins/elevenlabs",
diff --git a/uv.lock b/uv.lock
index 9a8bfe28..0d0c374a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.12"
 resolution-markers = [
     "python_full_version >= '3.13' and sys_platform == 'win32'",
@@ -13,6 +13,7 @@ members = [
     "vision-agents",
     "vision-agents-plugins-anthropic",
     "vision-agents-plugins-aws",
+    "vision-agents-plugins-baseten",
     "vision-agents-plugins-cartesia",
     "vision-agents-plugins-deepgram",
     "vision-agents-plugins-elevenlabs",
@@ -5532,6 +5533,32 @@ dev = [
     { name = "pytest-asyncio", specifier = ">=1.0.0" },
 ]
 
+[[package]]
+name = "vision-agents-plugins-baseten"
+source = { editable = "plugins/baseten" }
+dependencies = [
+    { name = "openai" },
+    { name = "vision-agents" },
+]
+
+[package.dev-dependencies]
+dev = [
+    { name = "pytest" },
+    { name = "pytest-asyncio" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "openai", specifier = ">=2.5.0" },
+    { name = "vision-agents", editable = "agents-core" },
+]
+
+[package.metadata.requires-dev]
+dev = [
+    { name = "pytest", specifier = ">=8.4.1" },
+    { name = "pytest-asyncio", specifier = ">=1.0.0" },
+]
+
 [[package]]
 name = "vision-agents-plugins-cartesia"
 source = { editable = "plugins/cartesia" }